From 2e3b1d27b47808293e65bbb69885f0c7cda0ed26 Mon Sep 17 00:00:00 2001 From: Edward Shogulin Date: Wed, 26 Jun 2024 01:21:15 +0100 Subject: [PATCH] [CPU] [ARM] FullyConnected: int8 support --- .../markup_dequantization_fuse.hpp | 32 ++++ .../src/low_precision.cpp | 3 + .../src/markup_dequantization_fuse.cpp | 38 +++++ .../src/mat_mul.cpp | 4 + .../src/network_helper.cpp | 9 +- src/plugins/intel_cpu/src/config.h | 5 +- src/plugins/intel_cpu/src/cpu_memory.cpp | 3 +- .../executors/acl/acl_common_executor.cpp | 77 ++++++++- .../executors/acl/acl_common_executor.hpp | 5 + .../executors/acl/acl_lowp_fullyconnected.cpp | 160 ++++++++++++++++++ .../executors/acl/acl_lowp_fullyconnected.hpp | 50 ++++++ .../src/nodes/executors/acl/acl_utils.hpp | 2 +- .../src/nodes/executors/debug_messages.hpp | 1 + .../fullyconnected_implementations.cpp | 39 +++++ .../arm/pass/mat_mul_decomposition.cpp | 39 +++++ .../arm/pass/mat_mul_decomposition.hpp | 19 +++ .../aarch64/pass/snippets_mark_skipped.cpp | 13 +- .../aarch64/pass/snippets_mark_skipped.hpp | 5 +- .../pass/snippets_mark_skipped_base.cpp | 63 +++++++ .../pass/snippets_mark_skipped_base.hpp | 22 +++ .../x64/pass/snippets_mark_skipped.hpp | 6 +- .../transformation_pipeline.cpp | 8 + .../intel_cpu/tests/functional/CMakeLists.txt | 4 +- .../fully_connected_transformation.cpp | 52 ++++++ .../{ => x64}/add_transformation.cpp | 0 .../assign_and_read_value_transformation.cpp | 0 .../batch_to_space_transformation.cpp | 0 .../{ => x64}/clamp_transformation.cpp | 0 .../{ => x64}/concat_transformation.cpp | 0 .../concat_with_child_and_output.cpp | 0 ...t_with_different_precision_on_children.cpp | 0 ...oncat_with_intermediate_transformation.cpp | 0 .../concat_with_neighbors_transformation.cpp | 0 .../concat_with_split_transformation.cpp | 0 ...nvolution_backprop_data_transformation.cpp | 0 .../convolution_qdq_transformation.cpp | 0 .../{ => x64}/convolution_transformation.cpp | 0 .../depth_to_space_transformation.cpp | 0 ...ntwise_branch_selection_transformation.cpp | 0 ...eliminate_fake_quantize_transformation.cpp | 0 .../fq_and_avg_pool_transformation.cpp | 0 .../fq_and_max_pool_transformation.cpp | 0 ...d_two_output_branches_with_convolution.cpp | 0 .../fq_precision_selection_transformation.cpp | 0 .../{ => x64}/fq_transformation.cpp | 0 .../fq_with_dq_not_optimal_transformation.cpp | 0 .../fully_connected_transformation.cpp | 5 +- .../{ => x64}/fuse_convert_transformation.cpp | 0 .../fuse_dequantize_to_fq_transformation.cpp | 0 ...fuse_fq_and_scale_shift_transformation.cpp | 0 .../fuse_multiply_to_fq_transformation.cpp | 0 .../fuse_subtract_to_fq_transformation.cpp | 0 .../{ => x64}/gather_transformation.cpp | 0 .../{ => x64}/gemm_transformation.cpp | 0 .../group_convolution_transformation.cpp | 0 .../groupconvolution_qdq_transformation.cpp | 0 .../{ => x64}/interpolate_transformation.cpp | 0 .../{ => x64}/mat_mul_transformation.cpp | 0 .../mat_mul_with_constant_transformation.cpp | 0 .../mat_mul_with_optimized_constant_fq.cpp | 0 .../move_fake_quantize_transformation.cpp | 0 .../multiply_to_group_convolution.cpp | 0 .../{ => x64}/multiply_transformation.cpp | 0 .../{ => x64}/multiply_with_one_parent.cpp | 0 .../{ => x64}/mvn_transformation.cpp | 0 .../{ => x64}/normalize_transformation.cpp | 0 .../{ => x64}/output_layers.cpp | 0 .../{ => x64}/output_layers_concat.cpp | 0 .../output_layers_concat_multi_channel.cpp | 0 .../{ => x64}/pad_transformation.cpp | 0 .../{ => x64}/prelu_transformation.cpp | 0 .../pull_reshape_through_dequantization.cpp | 0 .../recurrent_cell_transformation.cpp | 0 .../{ => x64}/reduce_max_transformation.cpp | 0 .../{ => x64}/reduce_mean_transformation.cpp | 0 .../{ => x64}/reduce_min_transformation.cpp | 0 .../{ => x64}/reduce_sum_transformation.cpp | 0 .../{ => x64}/relu_transformation.cpp | 0 .../{ => x64}/reshape_transformation.cpp | 0 .../shuffle_channels_transformation.cpp | 0 .../space_to_batch_transformation.cpp | 0 .../{ => x64}/split_transformation.cpp | 0 .../{ => x64}/squeeze_transformation.cpp | 0 .../strided_slice_transformation.cpp | 0 .../subtract_multiply_to_multiply_add.cpp | 0 .../{ => x64}/subtract_transformation.cpp | 0 .../transpose_after_matmul_transformation.cpp | 0 .../{ => x64}/transpose_transformation.cpp | 0 .../{ => x64}/unsqueeze_transformation.cpp | 0 .../variadic_split_transformation.cpp | 0 .../skip_tests_config.cpp | 2 + .../fully_connected_transformation.cpp | 3 +- .../fully_connected_transformation.hpp | 5 +- .../mat_mul_transformation.hpp | 2 + .../fully_connected_transformation.cpp | 31 +++- .../mat_mul_transformation.cpp | 21 ++- .../layer_transformation.hpp | 2 + .../layer_transformation.cpp | 16 +- .../include/ov_lpt_models/mat_mul.hpp | 7 +- .../ov_helpers/ov_lpt_models/src/mat_mul.cpp | 56 ++++-- 100 files changed, 756 insertions(+), 53 deletions(-) create mode 100644 src/common/low_precision_transformations/include/low_precision/markup_dequantization_fuse.hpp create mode 100644 src/common/low_precision_transformations/src/markup_dequantization_fuse.cpp create mode 100644 src/plugins/intel_cpu/src/nodes/executors/acl/acl_lowp_fullyconnected.cpp create mode 100644 src/plugins/intel_cpu/src/nodes/executors/acl/acl_lowp_fullyconnected.hpp create mode 100644 src/plugins/intel_cpu/src/transformations/cpu_opset/arm/pass/mat_mul_decomposition.cpp create mode 100644 src/plugins/intel_cpu/src/transformations/cpu_opset/arm/pass/mat_mul_decomposition.hpp create mode 100644 src/plugins/intel_cpu/src/transformations/snippets/common/pass/snippets_mark_skipped_base.cpp create mode 100644 src/plugins/intel_cpu/src/transformations/snippets/common/pass/snippets_mark_skipped_base.hpp create mode 100644 src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/aarch64/fully_connected_transformation.cpp rename src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/{ => x64}/add_transformation.cpp (100%) rename src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/{ => x64}/assign_and_read_value_transformation.cpp (100%) rename src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/{ => x64}/batch_to_space_transformation.cpp (100%) rename src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/{ => x64}/clamp_transformation.cpp (100%) rename src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/{ => x64}/concat_transformation.cpp (100%) rename src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/{ => x64}/concat_with_child_and_output.cpp (100%) rename src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/{ => x64}/concat_with_different_precision_on_children.cpp (100%) rename src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/{ => x64}/concat_with_intermediate_transformation.cpp (100%) rename src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/{ => x64}/concat_with_neighbors_transformation.cpp (100%) rename src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/{ => x64}/concat_with_split_transformation.cpp (100%) rename src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/{ => x64}/convolution_backprop_data_transformation.cpp (100%) rename src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/{ => x64}/convolution_qdq_transformation.cpp (100%) rename src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/{ => x64}/convolution_transformation.cpp (100%) rename src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/{ => x64}/depth_to_space_transformation.cpp (100%) rename src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/{ => x64}/elementwise_branch_selection_transformation.cpp (100%) rename src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/{ => x64}/eliminate_fake_quantize_transformation.cpp (100%) rename src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/{ => x64}/fq_and_avg_pool_transformation.cpp (100%) rename src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/{ => x64}/fq_and_max_pool_transformation.cpp (100%) rename src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/{ => x64}/fq_and_two_output_branches_with_convolution.cpp (100%) rename src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/{ => x64}/fq_precision_selection_transformation.cpp (100%) rename src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/{ => x64}/fq_transformation.cpp (100%) rename src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/{ => x64}/fq_with_dq_not_optimal_transformation.cpp (100%) rename src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/{ => x64}/fully_connected_transformation.cpp (86%) rename src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/{ => x64}/fuse_convert_transformation.cpp (100%) rename src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/{ => x64}/fuse_dequantize_to_fq_transformation.cpp (100%) rename src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/{ => x64}/fuse_fq_and_scale_shift_transformation.cpp (100%) rename src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/{ => x64}/fuse_multiply_to_fq_transformation.cpp (100%) rename src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/{ => x64}/fuse_subtract_to_fq_transformation.cpp (100%) rename src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/{ => x64}/gather_transformation.cpp (100%) rename src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/{ => x64}/gemm_transformation.cpp (100%) rename src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/{ => x64}/group_convolution_transformation.cpp (100%) rename src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/{ => x64}/groupconvolution_qdq_transformation.cpp (100%) rename src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/{ => x64}/interpolate_transformation.cpp (100%) rename src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/{ => x64}/mat_mul_transformation.cpp (100%) rename src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/{ => x64}/mat_mul_with_constant_transformation.cpp (100%) rename src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/{ => x64}/mat_mul_with_optimized_constant_fq.cpp (100%) rename src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/{ => x64}/move_fake_quantize_transformation.cpp (100%) rename src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/{ => x64}/multiply_to_group_convolution.cpp (100%) rename src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/{ => x64}/multiply_transformation.cpp (100%) rename src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/{ => x64}/multiply_with_one_parent.cpp (100%) rename src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/{ => x64}/mvn_transformation.cpp (100%) rename src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/{ => x64}/normalize_transformation.cpp (100%) rename src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/{ => x64}/output_layers.cpp (100%) rename src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/{ => x64}/output_layers_concat.cpp (100%) rename src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/{ => x64}/output_layers_concat_multi_channel.cpp (100%) rename src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/{ => x64}/pad_transformation.cpp (100%) rename src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/{ => x64}/prelu_transformation.cpp (100%) rename src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/{ => x64}/pull_reshape_through_dequantization.cpp (100%) rename src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/{ => x64}/recurrent_cell_transformation.cpp (100%) rename src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/{ => x64}/reduce_max_transformation.cpp (100%) rename src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/{ => x64}/reduce_mean_transformation.cpp (100%) rename src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/{ => x64}/reduce_min_transformation.cpp (100%) rename src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/{ => x64}/reduce_sum_transformation.cpp (100%) rename src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/{ => x64}/relu_transformation.cpp (100%) rename src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/{ => x64}/reshape_transformation.cpp (100%) rename src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/{ => x64}/shuffle_channels_transformation.cpp (100%) rename src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/{ => x64}/space_to_batch_transformation.cpp (100%) rename src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/{ => x64}/split_transformation.cpp (100%) rename src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/{ => x64}/squeeze_transformation.cpp (100%) rename src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/{ => x64}/strided_slice_transformation.cpp (100%) rename src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/{ => x64}/subtract_multiply_to_multiply_add.cpp (100%) rename src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/{ => x64}/subtract_transformation.cpp (100%) rename src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/{ => x64}/transpose_after_matmul_transformation.cpp (100%) rename src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/{ => x64}/transpose_transformation.cpp (100%) rename src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/{ => x64}/unsqueeze_transformation.cpp (100%) rename src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/{ => x64}/variadic_split_transformation.cpp (100%) diff --git a/src/common/low_precision_transformations/include/low_precision/markup_dequantization_fuse.hpp b/src/common/low_precision_transformations/include/low_precision/markup_dequantization_fuse.hpp new file mode 100644 index 00000000000000..6e4c17e436595b --- /dev/null +++ b/src/common/low_precision_transformations/include/low_precision/markup_dequantization_fuse.hpp @@ -0,0 +1,32 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "low_precision/lpt_visibility.hpp" +#include +#include "openvino/pass/graph_rewrite.hpp" +#include "openvino/pass/pattern/matcher.hpp" + +namespace ov { +namespace pass { +namespace low_precision { + +/** + * @ingroup ov_transformation_common_api + * @brief MarkupDequantizationFuse transformation marks not updatable dequantization operations for fusing. + * + * For more details about the transformation, refer to + * [MarkupBias](@ref openvino_docs_OV_UG_lpt_MarkupBias) page + * in the OpenVINO Developer Guide. + */ +class LP_TRANSFORMATIONS_API MarkupDequantizationFuse : public ov::pass::MatcherPass { +public: + OPENVINO_RTTI("MarkupBias", "0"); + MarkupDequantizationFuse(); +}; + +} // namespace low_precision +} // namespace pass +} // namespace ov \ No newline at end of file diff --git a/src/common/low_precision_transformations/src/low_precision.cpp b/src/common/low_precision_transformations/src/low_precision.cpp index e58374ed3e2b1a..3a18a6f51161d4 100644 --- a/src/common/low_precision_transformations/src/low_precision.cpp +++ b/src/common/low_precision_transformations/src/low_precision.cpp @@ -20,6 +20,7 @@ #include "low_precision/align_quantization_intervals.hpp" #include "low_precision/fake_quantize_decomposition.hpp" #include "low_precision/markup_bias.hpp" +#include "low_precision/markup_dequantization_fuse.hpp" #include "low_precision/markup_precisions.hpp" #include "low_precision/markup_can_be_quantized.hpp" #include "low_precision/markup_avg_pool_precision_preserved.hpp" @@ -208,6 +209,8 @@ bool ov::pass::low_precision::MarkupOptimizations::run_on_model(const std::share markup.register_pass(params.defaultPrecisions); } markup.register_pass(); + // TODO: debug only + markup.register_pass(); markup.run_passes(f); return false; } diff --git a/src/common/low_precision_transformations/src/markup_dequantization_fuse.cpp b/src/common/low_precision_transformations/src/markup_dequantization_fuse.cpp new file mode 100644 index 00000000000000..e5a06e56de20e9 --- /dev/null +++ b/src/common/low_precision_transformations/src/markup_dequantization_fuse.cpp @@ -0,0 +1,38 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "low_precision/markup_dequantization_fuse.hpp" + +#include +#include "openvino/opsets/opset1.hpp" +#include "openvino/pass/pattern/op/wrap_type.hpp" + +#include "itt.hpp" +#include "low_precision/rt_info/bias_attribute.hpp" + +using namespace ov::pass::low_precision; + +MarkupDequantizationFuse::MarkupDequantizationFuse() { + MATCHER_SCOPE(MarkupBias); + auto layer_m = ov::pass::pattern::wrap_type(ov::pass::pattern::has_static_rank()); + // TODO: getDequantization? + auto bias_const_m = ov::pass::pattern::wrap_type(); + auto bias_m = ov::pass::pattern::wrap_type({layer_m, bias_const_m}); + + ov::matcher_pass_callback callback = [=](ov::pass::pattern::Matcher& m) { + const auto& pattern_map = m.get_pattern_value_map(); + const auto& const_shape = pattern_map.at(bias_const_m).get_shape(); + + const bool per_channel = std::count_if(const_shape.begin(), const_shape.end(), [](size_t x) { return x > 1; }) == 1; + if (ov::shape_size(const_shape) == 1 || per_channel) { + const auto bias = pattern_map.at(bias_m).get_node_shared_ptr(); + ov::mark_as_bias(bias); + } + + return false; + }; + + auto m = std::make_shared(bias_m, matcher_name); + register_matcher(m, callback); +} diff --git a/src/common/low_precision_transformations/src/mat_mul.cpp b/src/common/low_precision_transformations/src/mat_mul.cpp index 9155e9bf877783..12f02efa584b1c 100644 --- a/src/common/low_precision_transformations/src/mat_mul.cpp +++ b/src/common/low_precision_transformations/src/mat_mul.cpp @@ -12,6 +12,7 @@ #include "openvino/pass/pattern/op/or.hpp" #include "openvino/pass/pattern/op/wrap_type.hpp" +#include "low_precision/rt_info/bias_attribute.hpp" #include "low_precision/network_helper.hpp" #include "openvino/util/log.hpp" #include "itt.hpp" @@ -176,6 +177,9 @@ bool MatMulTransformation::transform(TransformationContext &context, ov::pass::p updateOutput(context, newMultiply, newMatMul); + // TODO: debug only + ov::mark_as_bias(newMultiply); + OPENVINO_DEBUG("LPT: done: ", newMatMul); return true; } diff --git a/src/common/low_precision_transformations/src/network_helper.cpp b/src/common/low_precision_transformations/src/network_helper.cpp index 2b26567f7a8307..6b52d84c9a9dd6 100644 --- a/src/common/low_precision_transformations/src/network_helper.cpp +++ b/src/common/low_precision_transformations/src/network_helper.cpp @@ -17,6 +17,7 @@ #include "low_precision/common/ie_lpt_exception.hpp" #include "low_precision/layer_transformation.hpp" #include "low_precision/network_helper.hpp" +#include "low_precision/rt_info/bias_attribute.hpp" #include "low_precision/rt_info/intervals_alignment_attribute.hpp" #include "low_precision/rt_info/precision_preserved_attribute.hpp" #include "low_precision/rt_info/quantization_alignment_attribute.hpp" @@ -1183,7 +1184,7 @@ FakeQuantizeDequantization NetworkHelper::getDequantization(const std::shared_pt const std::shared_ptr multiply = ov::as_type_ptr(dataNode.get_node_shared_ptr()); std::shared_ptr multiplyConstant; if (multiply != nullptr) { - if (!FakeQuantizeDequantization::checkShape(multiply)) { + if (!FakeQuantizeDequantization::checkShape(multiply) || ov::marked_as_bias(multiply)) { return FakeQuantizeDequantization(); } @@ -1198,6 +1199,9 @@ FakeQuantizeDequantization NetworkHelper::getDequantization(const std::shared_pt std::shared_ptr subtractConvert; std::shared_ptr subtractConstant; if (subtract != nullptr) { + if (ov::marked_as_bias(subtract)) { + return FakeQuantizeDequantization(); + } if (!FakeQuantizeDequantization::checkShape(subtract)) { return FakeQuantizeDequantization(dataNode, nullptr, nullptr, nullptr, nullptr, multiply, multiplyConstant); } @@ -1211,6 +1215,9 @@ FakeQuantizeDequantization NetworkHelper::getDequantization(const std::shared_pt const std::shared_ptr convert = ov::as_type_ptr(dataNode.get_node_shared_ptr()); if (convert != nullptr) { + if (ov::marked_as_bias(convert)) { + return FakeQuantizeDequantization(); + } auto el_type = convert->input(0).get_element_type(); auto foundIt = std::find(defaultPrecisions.begin(), defaultPrecisions.end(), el_type); if (foundIt == defaultPrecisions.end() && diff --git a/src/plugins/intel_cpu/src/config.h b/src/plugins/intel_cpu/src/config.h index 4ca6332c25c3cc..4cbb23570a90a5 100644 --- a/src/plugins/intel_cpu/src/config.h +++ b/src/plugins/intel_cpu/src/config.h @@ -44,7 +44,8 @@ struct Config { Unknown }; - bool collectPerfCounters = false; + // TODO: workaround to collect performance counters + bool collectPerfCounters = true; bool exclusiveAsyncRequests = false; SnippetsMode snippetsMode = SnippetsMode::Enable; std::string dumpToDot = {}; @@ -75,7 +76,7 @@ struct Config { std::set modelDistributionPolicy = {}; bool enableHyperThreading = true; bool changedHyperThreading = false; -#if defined(OPENVINO_ARCH_X86) || defined(OPENVINO_ARCH_X86_64) +#if defined(OPENVINO_ARCH_X86) || defined(OPENVINO_ARCH_X86_64) || defined(OPENVINO_ARCH_ARM64) LPTransformsMode lpTransformsMode = LPTransformsMode::On; #else // Currently INT8 mode is not optimized on ARM / RISCV or other non-x86 platforms, fallback to FP32 mode. diff --git a/src/plugins/intel_cpu/src/cpu_memory.cpp b/src/plugins/intel_cpu/src/cpu_memory.cpp index ab454382f57d73..7e79e192fdd360 100644 --- a/src/plugins/intel_cpu/src/cpu_memory.cpp +++ b/src/plugins/intel_cpu/src/cpu_memory.cpp @@ -471,6 +471,7 @@ void DnnlMemoryMngr::notifyUpdate() { StaticMemory::StaticMemory(const dnnl::engine& eng, MemoryDescPtr desc, const void* data, bool pads_zeroing) : m_eng(eng), m_pMemDesc(desc) { + OPENVINO_ASSERT(!desc->empty() || (desc->empty() && (data == nullptr))); if (desc->getPrecision() == element::string) { OPENVINO_THROW("[CPU] StaticMemory object cannot be created for string data."); } @@ -480,7 +481,7 @@ StaticMemory::StaticMemory(const dnnl::engine& eng, MemoryDescPtr desc, const vo m_size = m_pMemDesc->getCurrentMemSize(); - if (data) { + if (data || desc->empty()) { m_pMemMngr = std::make_shared(const_cast(data), m_size); } else { m_pMemMngr = std::make_shared(m_size); diff --git a/src/plugins/intel_cpu/src/nodes/executors/acl/acl_common_executor.cpp b/src/plugins/intel_cpu/src/nodes/executors/acl/acl_common_executor.cpp index 5779147a5b3352..d2e88bd06c72d0 100644 --- a/src/plugins/intel_cpu/src/nodes/executors/acl/acl_common_executor.cpp +++ b/src/plugins/intel_cpu/src/nodes/executors/acl/acl_common_executor.cpp @@ -3,6 +3,9 @@ // #include "acl_common_executor.hpp" + +#include + #include "acl_utils.hpp" #include "nodes/executors/memory_arguments.hpp" #include "utils/debug_capabilities.h" @@ -38,9 +41,9 @@ static void initACLTensorParams(const MemoryPtr& memoryPtr, } } -static std::shared_ptr initTensorInfo(const arm_compute::TensorShape& tensorShape, - const arm_compute::DataType& dataType, - const arm_compute::DataLayout& dataLayout) { +std::shared_ptr ACLCommonExecutor::initTensorInfo(const arm_compute::TensorShape& tensorShape, + const arm_compute::DataType& dataType, + const arm_compute::DataLayout& dataLayout) { std::shared_ptr aclMemoryInfo = nullptr; if (dataType != arm_compute::DataType::UNKNOWN) { aclMemoryInfo = std::make_shared( @@ -72,6 +75,9 @@ bool ACLCommonExecutor::update(const MemoryArgs &memory) { ACLTypes aclDataType{}; ACLLayouts aclDataLayout{}; for (auto& cpu_mem_ptr : memory) { + if (cpu_mem_ptr.second->getSize() == 0) { + continue; + } const ACLArgs index = argConvert.at(cpu_mem_ptr.first); initACLTensorParams(cpu_mem_ptr.second, aclTensorAttrs, aclMemoryShapes[index], @@ -108,18 +114,79 @@ bool ACLCommonExecutor::update(const MemoryArgs &memory) { configureThreadSafe([&] { iFunction = configureFunction(aclMemoryTensors); }); + +// for (auto& cpu_mem_ptr : memory) { +// const ACLArgs index = argConvert.at(cpu_mem_ptr.first); +// if (aclTensorAttrs.memoryUsageIndicator[index]) { +// aclMemoryTensors[index]->allocator()->import_memory(memory.at(cpu_mem_ptr.first)->getData()); +// } +// } return true; } +//namespace { +//std::ostream& operator<<(std::ostream& os, const arm_compute::ITensorInfo* tensor_info) { +// const auto data_type = tensor_info->data_type(); +// switch (data_type) { +// case arm_compute::DataType::S8: { +// return os << "S8"; +// } +// case arm_compute::DataType::QSYMM8: { +// return os << "QSYMM8"; +// } +// case arm_compute::DataType::QASYMM8: { +// return os << "QASYMM8"; +// } +// case arm_compute::DataType::QASYMM8_SIGNED: { +// return os << "QASYMM8_SIGNED"; +// } +// case arm_compute::DataType::S32: { +// return os << "S32"; +// } +// case arm_compute::DataType::F32: { +// return os << "F32"; +// } +// default: { +// return os << "[UNKNOWN]"; +// } +// } +//} +//} // namespace + void ACLCommonExecutor::execute(const MemoryArgs &memory) { - // TODO: Move import_memory() to update() function - CVS-145871 for (auto& cpu_mem_ptr : memory) { const ACLArgs index = argConvert.at(cpu_mem_ptr.first); - if (aclTensorAttrs.memoryUsageIndicator[index]) { + if (aclMemoryTensors[index]) { aclMemoryTensors[index]->allocator()->import_memory(memory.at(cpu_mem_ptr.first)->getData()); } } + +// for (auto index = 0; index < aclMemoryTensors.size(); ++index) { +// const auto& tensor = aclMemoryTensors[index]; +// if ((tensor == nullptr) || (index == ACLArgs::ACL_DST)) { +// continue; +// } +// +// if (index == ACLArgs::ACL_SRC_0) { +// std::cout << "src0 "; +// } else if (index == ACLArgs::ACL_WEI) { +// std::cout << "src1 "; +// } else if (index == ACLArgs::ACL_BIAS) { +// std::cout << "biases "; +// } else { +// std::cout << "[UNKNOWN] "; +// } +// std::cout << tensor->info() << ":" << std::endl; +// tensor->print(std::cout); +// } + iFunction->run(); + +// { +// std::shared_ptr tensor = aclMemoryTensors[ACLArgs::ACL_DST]; +// std::cout << "dst " << tensor->info() << ":" << std::endl; +// tensor->print(std::cout); +// } } ACLCommonExecutor::~ACLCommonExecutor() { diff --git a/src/plugins/intel_cpu/src/nodes/executors/acl/acl_common_executor.hpp b/src/plugins/intel_cpu/src/nodes/executors/acl/acl_common_executor.hpp index 854130d6f884bb..97d632b8af1982 100644 --- a/src/plugins/intel_cpu/src/nodes/executors/acl/acl_common_executor.hpp +++ b/src/plugins/intel_cpu/src/nodes/executors/acl/acl_common_executor.hpp @@ -47,6 +47,11 @@ class ACLCommonExecutor : public Executor { protected: ACLTensorAttrs aclTensorAttrs; + + virtual std::shared_ptr initTensorInfo(const arm_compute::TensorShape& tensorShape, + const arm_compute::DataType& dataType, + const arm_compute::DataLayout& dataLayout); + private: ACLTensors aclMemoryTensors; ACLFunction iFunction = nullptr; diff --git a/src/plugins/intel_cpu/src/nodes/executors/acl/acl_lowp_fullyconnected.cpp b/src/plugins/intel_cpu/src/nodes/executors/acl/acl_lowp_fullyconnected.cpp new file mode 100644 index 00000000000000..788e1a5092c553 --- /dev/null +++ b/src/plugins/intel_cpu/src/nodes/executors/acl/acl_lowp_fullyconnected.cpp @@ -0,0 +1,160 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "acl_lowp_fullyconnected.hpp" + +#include "arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h" + +#include "nodes/executors/acl/acl_utils.hpp" +#include "nodes/executors/executor.hpp" +#include "nodes/executors/memory_arguments.hpp" +#include "utils/debug_capabilities.h" +#include "nodes/executors/debug_messages.hpp" +#include "nodes/executors/implementation_utils.hpp" +#include "acl_weights.hpp" +#include "acl_utils.hpp" + +namespace ov { +namespace intel_cpu { + +bool checkAndInitPostOps(const PostOps &postOps, arm_compute::GEMMInfo& info) { + // Add postops + if (!postOps.empty() && postOps.size() == 1) { + if (const auto activation = std::dynamic_pointer_cast(postOps[0])) { + auto activation_info = info.activation_info(); + auto result = getActivationLayerInfo(convertToEltwiseAlgorithm( + activation->type()), + activation_info, + activation->alpha(), + activation->beta(), + activation->gamma()); + if (result) { + info.set_activation_info(activation_info); + } + return result; + } + } + return false; +} + +void initFCAttrs(const FCAttrs &attrs, + ACLTensorAttrs& aclTensorAttrs, + ACLFCAttrs& aclfcAttrs, + const MemoryArgs &memory, + arm_compute::GEMMInfo& gemmInfo, + const PostOps &postOps) { + aclTensorAttrs.hasLayoutTypeNHWC = memory.at(ARG_SRC)->getDescPtr()->hasLayoutType(LayoutType::nspc); + // TODO: not completed + //fullyConnectedLayerInfo.weights_trained_layout = getAclDataLayoutByMemoryDesc(memory.at(ARG_WEI)->getDescPtr()); + aclfcAttrs.inputPrecision = memory.at(ARG_SRC)->getDescPtr()->getPrecision(); + // TODO: not completed + //fullyConnectedLayerInfo.transpose_weights = false; + aclfcAttrs.weightsNonTransposed = attrs.weightsNonTransposed; + + checkAndInitPostOps(postOps, gemmInfo); + + if (memory.at(ARG_SRC)->getPrecision() != memory.at(ARG_WEI)->getPrecision()) { + aclfcAttrs.isConvertedWeights = true; + } +} + +ACLLowpFullyConnectedExecutor::ACLLowpFullyConnectedExecutor(const FCAttrs &attrs, + const PostOps &postOps, + const MemoryArgs &memory, + const ExecutorContext::CPtr& context) : dequantizationScales(attrs.dequantizationScales) { + initFCAttrs(attrs, aclTensorAttrs, aclfcAttrs, memory, gemmInfo, postOps); + packedWeights = prepareWeightMemory(memory, context, attrs, aclfcAttrs, postOps); +} + +bool ACLLowpFullyConnectedExecutor::supports(const FCConfig &config) { + // TODO: check weights layout +// const auto attrs = static_cast(config.attrs); + + + const auto src0 = srcType(config); +// const auto src1 = weiType(config); +// const auto dst = dstType(config); + // TODO: check precisions + VERIFY(one_of(src0, ov::element::i8, ov::element::u8), UNSUPPORTED_SRC_PRECISIONS); + //VERIFY(postOpsNumbers(config) == 0, UNSUPPORTED_NUMBER_OF_POSTOPS); + VERIFY(one_of(srcRank(config), 2U, 3U, 4U), UNSUPPORTED_SRC_RANK); + VERIFY(one_of(weiRank(config), 2U, 3U, 4U), UNSUPPORTED_WEI_RANK); + VERIFY(static_cast(config.attrs).dequantizationScales.size() <= 1, UNSUPPORTED_PER_CHANNEL_QUANTIZATION); + return true; +} + +void ACLLowpFullyConnectedExecutor::updateTensorsShapes(ACLShapes& aclMemoryShapes) { + updateFCTensorsShapes(aclMemoryShapes); +} + +arm_compute::Status ACLLowpFullyConnectedExecutor::validateTensorsInfo(const ACLInfos & aclMemoryInfos) { + // TODO: debug only +// const auto src0 = aclMemoryInfos[ACLArgs::ACL_SRC_0].get(); +// const auto src1 = aclMemoryInfos[ACLArgs::ACL_WEI].get(); +// const auto dst = aclMemoryInfos[ACLArgs::ACL_DST].get(); + + // TODO: debug only + if (!dequantizationScales.empty()) { + auto& tensor_info = aclMemoryInfos[ACLArgs::ACL_SRC_0]; + tensor_info->set_quantization_info(arm_compute::QuantizationInfo(dequantizationScales[0])); + + auto& tensor_info_weights = aclMemoryInfos[ACLArgs::ACL_WEI]; + tensor_info_weights->set_quantization_info(arm_compute::QuantizationInfo(1.f)); + +// auto tensor = aclMemoryTensors[ACLArgs::ACL_SRC_0]; +// auto tensor_info = tensor->info(); +// tensor_info->set_quantization_info(arm_compute::QuantizationInfo(dequantizationScales[0])); + } + + const auto matMulValid = arm_compute::NEGEMMLowpMatrixMultiplyCore::validate( + aclMemoryInfos[ACLArgs::ACL_SRC_0].get(), + aclMemoryInfos[ACLArgs::ACL_WEI].get(), + nullptr, //aclMemoryInfos[ACLArgs::ACL_BIAS].get(), + aclMemoryInfos[ACLArgs::ACL_DST].get(), + gemmInfo); + return matMulValid; +} + +ACLFunction ACLLowpFullyConnectedExecutor::configureFunction(const ACLTensors & aclMemoryTensors) { + auto gemm = std::make_unique(); + gemm->configure( + aclMemoryTensors[ACLArgs::ACL_SRC_0].get(), + aclMemoryTensors[ACLArgs::ACL_WEI].get(), + nullptr, //aclMemoryTensors[ACLArgs::ACL_BIAS].get(), + aclMemoryTensors.at(ACLArgs::ACL_DST).get(), + gemmInfo); + + if (aclfcAttrs.isConvertedWeights || !aclfcAttrs.weightsNonTransposed) { + aclTensorAttrs.memoryUsageIndicator[ACLArgs::ACL_WEI] = false; + aclMemoryTensors[ACLArgs::ACL_WEI]->allocator()->import_memory(packedWeights->getData()); + } + return gemm; +} + +// TODO: move to ACLLowpExecutor +std::shared_ptr ACLLowpFullyConnectedExecutor::initTensorInfo( + const arm_compute::TensorShape& tensorShape, + const arm_compute::DataType& dataType, + const arm_compute::DataLayout& dataLayout) { + arm_compute::DataType result; + switch (dataType) { + case arm_compute::DataType::S8: { + result = arm_compute::DataType::QASYMM8_SIGNED; + break; + } + case arm_compute::DataType::U8: { + result = arm_compute::DataType::QASYMM8; + break; + } + default: { + result = dataType; + break; + } + } + + return ACLCommonExecutor::initTensorInfo(tensorShape, result, dataLayout); +} + +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/executors/acl/acl_lowp_fullyconnected.hpp b/src/plugins/intel_cpu/src/nodes/executors/acl/acl_lowp_fullyconnected.hpp new file mode 100644 index 00000000000000..4d58c7908e61c4 --- /dev/null +++ b/src/plugins/intel_cpu/src/nodes/executors/acl/acl_lowp_fullyconnected.hpp @@ -0,0 +1,50 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "acl_common_executor.hpp" +#include "nodes/executors/fullyconnected_config.hpp" +#include "acl_weights.hpp" + +namespace ov { +namespace intel_cpu { + +class ACLLowpFullyConnectedExecutor : public ACLCommonExecutor { +public: + ACLLowpFullyConnectedExecutor(const FCAttrs& attrs, + const PostOps& postOps, + const MemoryArgs& memory, + const ExecutorContext::CPtr& context); + + static bool supports(const FCConfig& config); + + void updateTensorsShapes(ACLShapes& aclMemoryShapes) override; + + arm_compute::Status validateTensorsInfo(const ACLInfos & aclMemoryInfos) override; + + ACLFunction configureFunction(const ACLTensors & aclMemoryTensors) override; + + impl_desc_type implType() const override { + return impl_desc_type::gemm_acl; + } + +protected: + std::shared_ptr initTensorInfo(const arm_compute::TensorShape& tensorShape, + const arm_compute::DataType& dataType, + const arm_compute::DataLayout& dataLayout) override; + +private: + arm_compute::GEMMInfo gemmInfo; + arm_compute::WeightsInfo weightsInfo; + + MemoryCPtr packedWeights; + ACLFCAttrs aclfcAttrs; + std::vector dequantizationScales; +}; + +using ACLLowpFullyConnectedExecutorPtr = std::shared_ptr; + +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/executors/acl/acl_utils.hpp b/src/plugins/intel_cpu/src/nodes/executors/acl/acl_utils.hpp index cbf1c3cea1eb0c..adc31cd85cfe53 100644 --- a/src/plugins/intel_cpu/src/nodes/executors/acl/acl_utils.hpp +++ b/src/plugins/intel_cpu/src/nodes/executors/acl/acl_utils.hpp @@ -110,7 +110,7 @@ inline int axisCast(const std::size_t axis, const std::size_t shapeSize, ACLAxis * @param precision precision to be converted * @return ComputeLibrary DataType or UNKNOWN if precision is not mapped to DataType */ -inline arm_compute::DataType precisionToAclDataType(ov::element::Type precision) { +inline arm_compute::DataType precisionToAclDataType(const ov::element::Type& precision) { switch (precision) { case ov::element::i8: return arm_compute::DataType::S8; case ov::element::u8: return arm_compute::DataType::U8; diff --git a/src/plugins/intel_cpu/src/nodes/executors/debug_messages.hpp b/src/plugins/intel_cpu/src/nodes/executors/debug_messages.hpp index 26ae6ace59631b..206842014365a0 100644 --- a/src/plugins/intel_cpu/src/nodes/executors/debug_messages.hpp +++ b/src/plugins/intel_cpu/src/nodes/executors/debug_messages.hpp @@ -18,6 +18,7 @@ #define UNSUPPORTED_DST_RANK " unsupported dst rank" #define UNSUPPORTED_DST_STRIDES " unsupported dst strides" #define HEURISTICS_MISMATCH " heuristics mismatch" +#define UNSUPPORTED_PER_CHANNEL_QUANTIZATION " unsupported per-channel quantization" #define VERIFY(condition, ...) \ do { \ diff --git a/src/plugins/intel_cpu/src/nodes/executors/fullyconnected_implementations.cpp b/src/plugins/intel_cpu/src/nodes/executors/fullyconnected_implementations.cpp index 4a5b333b739321..32eb604d90e7ac 100644 --- a/src/plugins/intel_cpu/src/nodes/executors/fullyconnected_implementations.cpp +++ b/src/plugins/intel_cpu/src/nodes/executors/fullyconnected_implementations.cpp @@ -28,6 +28,7 @@ #if defined(OV_CPU_WITH_ACL) #include "nodes/executors/acl/acl_fullyconnected.hpp" +#include "nodes/executors/acl/acl_lowp_fullyconnected.hpp" #endif #if defined(OV_CPU_WITH_SHL) @@ -85,6 +86,14 @@ static const TypeMapping aclFCTypeMapping { {{_any, _any, _any, _any}, pt(just(), just(), just(), just())} }; +static const TypeMapping aclLowpFCTypeMapping { + // {src, wei, bia, dst} pt + {{_i8, _i8, _any, _f32}, pt(just(), just(), just(), just())}, + {{_i8, _i8, _any, _i32}, pt(just(), just(), just(), just())}, + //{{_u8, _u8, _any, _i32}, pt(just(), just(), bypass(), just())}, + {{_any, _any, _any, _any}, pt(just(), just(), just(), just())} +}; + static const MappingNotation dnnlConvolutionMappingNotation { ARG_SRC, ARG_WEI, ARG_BIAS, ARG_DST }; @@ -350,6 +359,36 @@ const std::vector>& getImplementations() { const ExecutorContext::CPtr context) { return std::make_shared(attrs, postOps, memory, context); }) + OV_CPU_INSTANCE_ACL( + "fullyconnected_acl_lowp", + ExecutorType::Acl, + OperationType::FullyConnected, + ShapeTolerance::Agnostic, + // supports + [](const FCConfig& config) -> bool { + VERIFY(noSparseDecompression(config), UNSUPPORTED_SPARSE_WEIGHTS); + VERIFY(noWeightsDecompression(config), UNSUPPORTED_WEIGHTS_DECOMPRESSION); + return ACLLowpFullyConnectedExecutor::supports(config); + }, + // requiresFallback + [](const FCConfig& config) -> ov::optional> { + return requiresFallbackCommon(config, + aclLowpFCTypeMapping, + aclFCLayoutConfig, + aclFullyConnectedMappingNotation); + }, + // acceptsShapes + [](const MemoryArgs& memory) -> bool { + // @todo create syntactic sugar (functor) for shape agnostic lambda + return true; + }, + // create + [](const FCAttrs& attrs, + const PostOps& postOps, + const MemoryArgs& memory, + const ExecutorContext::CPtr context) { + return std::make_shared(attrs, postOps, memory, context); + }) OV_CPU_INSTANCE_SHL( "fullyconnected_shl", ExecutorType::Shl, diff --git a/src/plugins/intel_cpu/src/transformations/cpu_opset/arm/pass/mat_mul_decomposition.cpp b/src/plugins/intel_cpu/src/transformations/cpu_opset/arm/pass/mat_mul_decomposition.cpp new file mode 100644 index 00000000000000..c7f2fea32ed09c --- /dev/null +++ b/src/plugins/intel_cpu/src/transformations/cpu_opset/arm/pass/mat_mul_decomposition.cpp @@ -0,0 +1,39 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + + +#include "mat_mul_decomposition.hpp" + +#include "ov_ops/type_relaxed.hpp" + +#include "openvino/opsets/opset1.hpp" +#include "openvino/core/rt_info.hpp" + +ov::intel_cpu::MatMulDecomposition::MatMulDecomposition() { + auto matMul = ov::pass::pattern::wrap_type(); + + ov::matcher_pass_callback callback = [](ov::pass::pattern::Matcher& m) { + auto matMul = std::dynamic_pointer_cast(m.get_match_root()); + if (!matMul) { + return false; + } + + // TODO: is it possible to move to matcher? + const auto in_type1 = matMul->get_input_element_type(0); + const auto in_type2 = matMul->get_input_element_type(1); + if ((in_type1 != element::i8) && (in_type1 != element::u8) && (in_type2 != element::u8) && (in_type2 != element::u8)) { + return false; + } + + const auto newMatMul = matMul->clone_with_new_inputs({matMul->get_input_source_output(0), matMul->get_input_source_output(1)}); + // TODO: output type is hardcoded + newMatMul->set_output_type(0, element::i32, matMul->get_output_partial_shape(0)); + const auto convert = std::make_shared(newMatMul, element::f32); + replace_node(matMul, convert); + ov::copy_runtime_info(matMul, {newMatMul, convert}); + return true; + }; + + auto m = std::make_shared(matMul, "MatMulDecomposition"); + register_matcher(m, callback); +} \ No newline at end of file diff --git a/src/plugins/intel_cpu/src/transformations/cpu_opset/arm/pass/mat_mul_decomposition.hpp b/src/plugins/intel_cpu/src/transformations/cpu_opset/arm/pass/mat_mul_decomposition.hpp new file mode 100644 index 00000000000000..447a3a0a85de49 --- /dev/null +++ b/src/plugins/intel_cpu/src/transformations/cpu_opset/arm/pass/mat_mul_decomposition.hpp @@ -0,0 +1,19 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#pragma once + +#include "openvino/pass/pattern/op/wrap_type.hpp" +#include "openvino/pass/graph_rewrite.hpp" + +namespace ov { +namespace intel_cpu { + +class MatMulDecomposition: public ov::pass::MatcherPass { +public: + OPENVINO_RTTI("MatMulDecomposition", "0"); + MatMulDecomposition(); +}; + +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/transformations/snippets/aarch64/pass/snippets_mark_skipped.cpp b/src/plugins/intel_cpu/src/transformations/snippets/aarch64/pass/snippets_mark_skipped.cpp index 818f54983b2dfc..337d2df387fccd 100644 --- a/src/plugins/intel_cpu/src/transformations/snippets/aarch64/pass/snippets_mark_skipped.cpp +++ b/src/plugins/intel_cpu/src/transformations/snippets/aarch64/pass/snippets_mark_skipped.cpp @@ -7,6 +7,7 @@ #include "snippets/op/subgraph.hpp" #include "snippets/utils/utils.hpp" +#include "low_precision/rt_info/bias_attribute.hpp" #include "transformations/utils/utils.hpp" #include "transformations/utils.hpp" #include "utils/general_utils.h" @@ -72,7 +73,7 @@ bool isFullyConnected(const std::shared_ptr& node) { ov::op::util::is_on_constant_path(out_weights); } -bool SupportsFusingWithConvolution_Simple(const std::shared_ptr &node) { +bool SupportsFusingWithConvolution_Simple(const std::shared_ptr &node, const int channelAxis = DEFAULT_AXIS) { // Note: some other operations support this fusing (SoftPlus, Sqrt). // Skip them here, when they are supported by Snippets ARM. Ticket: 141170. return ov::is_type(node) || @@ -205,6 +206,11 @@ bool SnippetsMarkSkipped::run_on_model(const std::shared_ptr &m) { for (auto &node : m->get_ordered_ops()) { if (is_skipped_op(node)) continue; + + if (ov::marked_as_bias(node)) { + SetNodeFusingType(node, NodeFusingType::FusedWithMisc); + } + if (isSuitableConvolutionParent(node)) { // Initiate fusing chain SetNodeFusingType(node, NodeFusingType::FusedWithConvolution); @@ -231,7 +237,10 @@ bool SnippetsMarkSkipped::run_on_model(const std::shared_ptr &m) { PropagateIfHasOnlyChild(node, fusingChainType); } else if (isSuitableChildForFusingSimple(node)) { #if defined (OV_CPU_WITH_ACL) - if (one_of(fusingChainType, NodeFusingType::FusedWithConvolution, NodeFusingType::FusedWithBinaryConvolution)) { + if (one_of(fusingChainType, + NodeFusingType::FusedWithConvolution, + NodeFusingType::FusedWithBinaryConvolution, + NodeFusingType::FusedWithFC)) { PropagateIfHasOnlyChild(node, NodeFusingType::FusedTerminator); continue; } diff --git a/src/plugins/intel_cpu/src/transformations/snippets/aarch64/pass/snippets_mark_skipped.hpp b/src/plugins/intel_cpu/src/transformations/snippets/aarch64/pass/snippets_mark_skipped.hpp index 7fdc7244d21de2..fdfdce489fe630 100644 --- a/src/plugins/intel_cpu/src/transformations/snippets/aarch64/pass/snippets_mark_skipped.hpp +++ b/src/plugins/intel_cpu/src/transformations/snippets/aarch64/pass/snippets_mark_skipped.hpp @@ -4,7 +4,7 @@ #pragma once -#include "openvino/pass/graph_rewrite.hpp" +#include "transformations/snippets/common/pass/snippets_mark_skipped_base.hpp" namespace ov { namespace intel_cpu { @@ -14,10 +14,9 @@ namespace intel_cpu { * @brief Mark operations that should be ignored by snippets on tokenization stage. A typical example is eltwise operations * that will be fused into convolutions on plugin side. */ -class SnippetsMarkSkipped : public ov::pass::ModelPass { +class SnippetsMarkSkipped : public SnippetsMarkSkippedBase { public: OPENVINO_RTTI("SnippetsMarkSkipped", "0"); - SnippetsMarkSkipped() : ModelPass() {} bool run_on_model(const std::shared_ptr &) override; }; diff --git a/src/plugins/intel_cpu/src/transformations/snippets/common/pass/snippets_mark_skipped_base.cpp b/src/plugins/intel_cpu/src/transformations/snippets/common/pass/snippets_mark_skipped_base.cpp new file mode 100644 index 00000000000000..4c892b00a50cfd --- /dev/null +++ b/src/plugins/intel_cpu/src/transformations/snippets/common/pass/snippets_mark_skipped_base.cpp @@ -0,0 +1,63 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// +#include "snippets_mark_skipped_base.hpp" + +#include "snippets/pass/tokenization.hpp" +#include "snippets/op/subgraph.hpp" + +#include "transformations/utils/utils.hpp" +#include "transformations/utils.hpp" +#include "utils/general_utils.h" +#include "utils/cpu_utils.hpp" +#include "cpu/x64/cpu_isa_traits.hpp" + +#include "itt.hpp" + + +namespace ov { +namespace intel_cpu { + +bool SnippetsMarkSkippedBase::canBePerformedAsScaleShift(const std::shared_ptr &node, const int channelAxis) { + size_t fusingPort = 0; + size_t numNonConstInputs = 0; + ov::PartialShape dataShape; + for (size_t i = 0; i < node->get_input_size(); i++) { + const auto parent = node->get_input_node_shared_ptr(i); + if (!ov::is_type(parent)) { + fusingPort = i; + dataShape = node->get_input_partial_shape(i); + // only one non-const parent is allowed + if (++numNonConstInputs != 1) + return false; + } else { + // every const parent must have exactly one child + const auto out = parent->outputs(); + const bool has_only_child = (out.size() == 1) && (out[0].get_target_inputs().size() == 1); + if (!has_only_child) + return false; + } + } + + const auto isBroadcastableToDataInput = [&]() { + for (size_t i = 0; i < node->get_input_size(); i++) { + if (i == fusingPort) + continue; + const ov::PartialShape weightShape = node->get_input_partial_shape(i); + if (!isPerTensorOrPerChannelBroadcastable(dataShape.get_max_shape(), weightShape.get_max_shape(), channelAxis, true)) + return false; + } + return true; + }; + + // Prelu and MulAdd are still ignored + // isConvertablePowerStatic() is ignored + return (ov::is_type(node) || + ov::is_type(node) || + ov::is_type(node) || + ov::is_type(node)) && + isBroadcastableToDataInput(); +} + +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/transformations/snippets/common/pass/snippets_mark_skipped_base.hpp b/src/plugins/intel_cpu/src/transformations/snippets/common/pass/snippets_mark_skipped_base.hpp new file mode 100644 index 00000000000000..a6982fe0f295da --- /dev/null +++ b/src/plugins/intel_cpu/src/transformations/snippets/common/pass/snippets_mark_skipped_base.hpp @@ -0,0 +1,22 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "openvino/pass/graph_rewrite.hpp" + +namespace ov { +namespace intel_cpu { + +/** + * @interface SnippetsMarkSkippedBase + * @brief Base class to mark operations that should be ignored by snippets on tokenization stage. + */ +class SnippetsMarkSkippedBase : public ov::pass::ModelPass { +protected: + bool canBePerformedAsScaleShift(const std::shared_ptr &node, const int channelAxis); +}; + +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/snippets_mark_skipped.hpp b/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/snippets_mark_skipped.hpp index 856e3e64736899..41a14ad4af662f 100644 --- a/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/snippets_mark_skipped.hpp +++ b/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/snippets_mark_skipped.hpp @@ -4,7 +4,7 @@ #pragma once -#include "openvino/pass/graph_rewrite.hpp" +#include "transformations/snippets/common/pass/snippets_mark_skipped_base.hpp" namespace ov { namespace intel_cpu { @@ -14,10 +14,10 @@ namespace intel_cpu { * @brief Mark operations that should be ignored by snippets on tokenization stage. A typical example is eltwise operations * that will be fused into convolutions on plugin side. */ -class SnippetsMarkSkipped : public ov::pass::ModelPass { +class SnippetsMarkSkipped : public SnippetsMarkSkippedBase { public: OPENVINO_RTTI("SnippetsMarkSkipped", "0"); - SnippetsMarkSkipped(bool enableBF16 = false) : ModelPass(), enableBF16(enableBF16) {} + SnippetsMarkSkipped(bool enableBF16 = false) : SnippetsMarkSkippedBase(), enableBF16(enableBF16) {} bool run_on_model(const std::shared_ptr &) override; private: bool enableBF16 = false; diff --git a/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp b/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp index 3d12f7db9ab72c..fe0816288901df 100644 --- a/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp +++ b/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp @@ -116,6 +116,7 @@ #include "transformations/cpu_opset/convert_to_cpu_specific_opset.hpp" #if defined(OPENVINO_ARCH_ARM64) #include "transformations/snippets/aarch64/pass/snippets_mark_skipped.hpp" +#include "transformations/cpu_opset/arm/pass/mat_mul_decomposition.hpp" #else #include "transformations/snippets/x64/pass/snippets_mark_skipped.hpp" #endif @@ -722,7 +723,11 @@ void Transformations::Lpt(const std::vector& defaultPrecision }), PrecisionsRestriction::create({ {{0}, {ov::element::u8, ov::element::i8}}, +#if defined(OPENVINO_ARCH_ARM64) + {{1}, {ov::element::u8, ov::element::i8}} +#else {{1}, {ov::element::i8}} +#endif }), PrecisionsRestriction::create({ {{0, 1}, {ov::element::u8}} @@ -862,6 +867,9 @@ void Transformations::PostLpt() { auto symbolic_pipeline = CPU_REGISTER_PASS_COMMON(postLPTPassManager, ov::pass::SymbolicOptimizations, false); symbolic_pipeline->get_manager()->register_pass(); + // TODO: not necessary + //CPU_REGISTER_PASS_ARM64(postLPTPassManager, ov::intel_cpu::MatMulDecomposition); + postLPTPassManager.run_passes(model); } diff --git a/src/plugins/intel_cpu/tests/functional/CMakeLists.txt b/src/plugins/intel_cpu/tests/functional/CMakeLists.txt index 64fd58448749e8..cbc1909ae86e7a 100644 --- a/src/plugins/intel_cpu/tests/functional/CMakeLists.txt +++ b/src/plugins/intel_cpu/tests/functional/CMakeLists.txt @@ -48,6 +48,7 @@ if(NOT (ARM OR AARCH64)) list(APPEND EXCLUDED_SOURCE_PATHS ${CMAKE_CURRENT_SOURCE_DIR}/custom/single_layer_tests/instances/arm ${CMAKE_CURRENT_SOURCE_DIR}/custom/subgraph_tests/src/arm + ${CMAKE_CURRENT_SOURCE_DIR}/shared_tests_instances/low_precision_transformations/arm ${CMAKE_CURRENT_SOURCE_DIR}/utils/arm) else() list(APPEND EXCLUDED_SOURCE_PATHS @@ -74,7 +75,8 @@ endif() if(NOT X86_64) list(APPEND EXCLUDED_SOURCE_PATHS ${CMAKE_CURRENT_SOURCE_DIR}/custom/single_layer_tests/instances/x64 - ${CMAKE_CURRENT_SOURCE_DIR}/custom/subgraph_tests/src/x64) + ${CMAKE_CURRENT_SOURCE_DIR}/custom/subgraph_tests/src/x64 + ${CMAKE_CURRENT_SOURCE_DIR}/shared_tests_instances/low_precision_transformations/x64) endif() ov_add_test_target( diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/aarch64/fully_connected_transformation.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/aarch64/fully_connected_transformation.cpp new file mode 100644 index 00000000000000..3e8fdf92e828d0 --- /dev/null +++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/aarch64/fully_connected_transformation.cpp @@ -0,0 +1,52 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include "low_precision_transformations/fully_connected_transformation.hpp" +#include "common_test_utils/test_constants.hpp" + +using namespace LayerTestsDefinitions; + +namespace { +const std::vector netPrecisions = { + ov::element::f32 +}; + +const std::vector shapes = { + { + ov::PartialShape{ 1, 16 }, + ov::PartialShape{ 16, 8 }, + false, + false + }, + { + ov::PartialShape{ 1, 16 }, + ov::PartialShape{ 8, 16 }, + false, + true + }, + { + ov::PartialShape{ 16, 1 }, + ov::PartialShape{ 16, 8 }, + true, + false + }, +}; + +const std::vector trasformationParamValues = { + LayerTestsUtils::LayerTransformationParamsNGraphFactory::createParams() +}; + +INSTANTIATE_TEST_SUITE_P(smoke_LPT, FullyConnectedTransformation, + ::testing::Combine( + ::testing::ValuesIn(netPrecisions), + ::testing::ValuesIn(shapes), + ::testing::Values(ov::test::utils::DEVICE_CPU), + ::testing::ValuesIn(trasformationParamValues), + ::testing::ValuesIn({ov::element::i8 /*, ov::element::u8*/}), + ::testing::ValuesIn({true, false}), + ::testing::Values("gemm_acl_i8")), + FullyConnectedTransformation::getTestCaseName); +} // namespace diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/add_transformation.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/x64/add_transformation.cpp similarity index 100% rename from src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/add_transformation.cpp rename to src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/x64/add_transformation.cpp diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/assign_and_read_value_transformation.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/x64/assign_and_read_value_transformation.cpp similarity index 100% rename from src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/assign_and_read_value_transformation.cpp rename to src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/x64/assign_and_read_value_transformation.cpp diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/batch_to_space_transformation.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/x64/batch_to_space_transformation.cpp similarity index 100% rename from src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/batch_to_space_transformation.cpp rename to src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/x64/batch_to_space_transformation.cpp diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/clamp_transformation.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/x64/clamp_transformation.cpp similarity index 100% rename from src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/clamp_transformation.cpp rename to src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/x64/clamp_transformation.cpp diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/concat_transformation.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/x64/concat_transformation.cpp similarity index 100% rename from src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/concat_transformation.cpp rename to src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/x64/concat_transformation.cpp diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/concat_with_child_and_output.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/x64/concat_with_child_and_output.cpp similarity index 100% rename from src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/concat_with_child_and_output.cpp rename to src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/x64/concat_with_child_and_output.cpp diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/concat_with_different_precision_on_children.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/x64/concat_with_different_precision_on_children.cpp similarity index 100% rename from src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/concat_with_different_precision_on_children.cpp rename to src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/x64/concat_with_different_precision_on_children.cpp diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/concat_with_intermediate_transformation.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/x64/concat_with_intermediate_transformation.cpp similarity index 100% rename from src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/concat_with_intermediate_transformation.cpp rename to src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/x64/concat_with_intermediate_transformation.cpp diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/concat_with_neighbors_transformation.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/x64/concat_with_neighbors_transformation.cpp similarity index 100% rename from src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/concat_with_neighbors_transformation.cpp rename to src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/x64/concat_with_neighbors_transformation.cpp diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/concat_with_split_transformation.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/x64/concat_with_split_transformation.cpp similarity index 100% rename from src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/concat_with_split_transformation.cpp rename to src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/x64/concat_with_split_transformation.cpp diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/convolution_backprop_data_transformation.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/x64/convolution_backprop_data_transformation.cpp similarity index 100% rename from src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/convolution_backprop_data_transformation.cpp rename to src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/x64/convolution_backprop_data_transformation.cpp diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/convolution_qdq_transformation.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/x64/convolution_qdq_transformation.cpp similarity index 100% rename from src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/convolution_qdq_transformation.cpp rename to src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/x64/convolution_qdq_transformation.cpp diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/convolution_transformation.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/x64/convolution_transformation.cpp similarity index 100% rename from src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/convolution_transformation.cpp rename to src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/x64/convolution_transformation.cpp diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/depth_to_space_transformation.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/x64/depth_to_space_transformation.cpp similarity index 100% rename from src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/depth_to_space_transformation.cpp rename to src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/x64/depth_to_space_transformation.cpp diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/elementwise_branch_selection_transformation.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/x64/elementwise_branch_selection_transformation.cpp similarity index 100% rename from src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/elementwise_branch_selection_transformation.cpp rename to src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/x64/elementwise_branch_selection_transformation.cpp diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/eliminate_fake_quantize_transformation.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/x64/eliminate_fake_quantize_transformation.cpp similarity index 100% rename from src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/eliminate_fake_quantize_transformation.cpp rename to src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/x64/eliminate_fake_quantize_transformation.cpp diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/fq_and_avg_pool_transformation.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/x64/fq_and_avg_pool_transformation.cpp similarity index 100% rename from src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/fq_and_avg_pool_transformation.cpp rename to src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/x64/fq_and_avg_pool_transformation.cpp diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/fq_and_max_pool_transformation.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/x64/fq_and_max_pool_transformation.cpp similarity index 100% rename from src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/fq_and_max_pool_transformation.cpp rename to src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/x64/fq_and_max_pool_transformation.cpp diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/fq_and_two_output_branches_with_convolution.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/x64/fq_and_two_output_branches_with_convolution.cpp similarity index 100% rename from src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/fq_and_two_output_branches_with_convolution.cpp rename to src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/x64/fq_and_two_output_branches_with_convolution.cpp diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/fq_precision_selection_transformation.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/x64/fq_precision_selection_transformation.cpp similarity index 100% rename from src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/fq_precision_selection_transformation.cpp rename to src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/x64/fq_precision_selection_transformation.cpp diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/fq_transformation.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/x64/fq_transformation.cpp similarity index 100% rename from src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/fq_transformation.cpp rename to src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/x64/fq_transformation.cpp diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/fq_with_dq_not_optimal_transformation.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/x64/fq_with_dq_not_optimal_transformation.cpp similarity index 100% rename from src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/fq_with_dq_not_optimal_transformation.cpp rename to src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/x64/fq_with_dq_not_optimal_transformation.cpp diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/fully_connected_transformation.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/x64/fully_connected_transformation.cpp similarity index 86% rename from src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/fully_connected_transformation.cpp rename to src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/x64/fully_connected_transformation.cpp index 0368215a5cf5a4..ed9b51d141b59c 100644 --- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/fully_connected_transformation.cpp +++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/x64/fully_connected_transformation.cpp @@ -44,6 +44,9 @@ INSTANTIATE_TEST_SUITE_P(smoke_LPT, FullyConnectedTransformation, ::testing::ValuesIn(netPrecisions), ::testing::ValuesIn(shapes), ::testing::Values(ov::test::utils::DEVICE_CPU), - ::testing::ValuesIn(trasformationParamValues)), + ::testing::ValuesIn(trasformationParamValues), + ::testing::ValuesIn({ov::element::i8, ov::element::u8}), + ::testing::ValuesIn({true, false}), + ::testing::Values("")), FullyConnectedTransformation::getTestCaseName); } // namespace diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/fuse_convert_transformation.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/x64/fuse_convert_transformation.cpp similarity index 100% rename from src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/fuse_convert_transformation.cpp rename to src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/x64/fuse_convert_transformation.cpp diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/fuse_dequantize_to_fq_transformation.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/x64/fuse_dequantize_to_fq_transformation.cpp similarity index 100% rename from src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/fuse_dequantize_to_fq_transformation.cpp rename to src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/x64/fuse_dequantize_to_fq_transformation.cpp diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/fuse_fq_and_scale_shift_transformation.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/x64/fuse_fq_and_scale_shift_transformation.cpp similarity index 100% rename from src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/fuse_fq_and_scale_shift_transformation.cpp rename to src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/x64/fuse_fq_and_scale_shift_transformation.cpp diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/fuse_multiply_to_fq_transformation.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/x64/fuse_multiply_to_fq_transformation.cpp similarity index 100% rename from src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/fuse_multiply_to_fq_transformation.cpp rename to src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/x64/fuse_multiply_to_fq_transformation.cpp diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/fuse_subtract_to_fq_transformation.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/x64/fuse_subtract_to_fq_transformation.cpp similarity index 100% rename from src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/fuse_subtract_to_fq_transformation.cpp rename to src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/x64/fuse_subtract_to_fq_transformation.cpp diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/gather_transformation.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/x64/gather_transformation.cpp similarity index 100% rename from src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/gather_transformation.cpp rename to src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/x64/gather_transformation.cpp diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/gemm_transformation.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/x64/gemm_transformation.cpp similarity index 100% rename from src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/gemm_transformation.cpp rename to src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/x64/gemm_transformation.cpp diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/group_convolution_transformation.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/x64/group_convolution_transformation.cpp similarity index 100% rename from src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/group_convolution_transformation.cpp rename to src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/x64/group_convolution_transformation.cpp diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/groupconvolution_qdq_transformation.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/x64/groupconvolution_qdq_transformation.cpp similarity index 100% rename from src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/groupconvolution_qdq_transformation.cpp rename to src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/x64/groupconvolution_qdq_transformation.cpp diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/interpolate_transformation.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/x64/interpolate_transformation.cpp similarity index 100% rename from src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/interpolate_transformation.cpp rename to src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/x64/interpolate_transformation.cpp diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/mat_mul_transformation.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/x64/mat_mul_transformation.cpp similarity index 100% rename from src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/mat_mul_transformation.cpp rename to src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/x64/mat_mul_transformation.cpp diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/mat_mul_with_constant_transformation.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/x64/mat_mul_with_constant_transformation.cpp similarity index 100% rename from src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/mat_mul_with_constant_transformation.cpp rename to src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/x64/mat_mul_with_constant_transformation.cpp diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/mat_mul_with_optimized_constant_fq.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/x64/mat_mul_with_optimized_constant_fq.cpp similarity index 100% rename from src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/mat_mul_with_optimized_constant_fq.cpp rename to src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/x64/mat_mul_with_optimized_constant_fq.cpp diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/move_fake_quantize_transformation.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/x64/move_fake_quantize_transformation.cpp similarity index 100% rename from src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/move_fake_quantize_transformation.cpp rename to src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/x64/move_fake_quantize_transformation.cpp diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/multiply_to_group_convolution.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/x64/multiply_to_group_convolution.cpp similarity index 100% rename from src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/multiply_to_group_convolution.cpp rename to src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/x64/multiply_to_group_convolution.cpp diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/multiply_transformation.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/x64/multiply_transformation.cpp similarity index 100% rename from src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/multiply_transformation.cpp rename to src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/x64/multiply_transformation.cpp diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/multiply_with_one_parent.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/x64/multiply_with_one_parent.cpp similarity index 100% rename from src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/multiply_with_one_parent.cpp rename to src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/x64/multiply_with_one_parent.cpp diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/mvn_transformation.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/x64/mvn_transformation.cpp similarity index 100% rename from src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/mvn_transformation.cpp rename to src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/x64/mvn_transformation.cpp diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/normalize_transformation.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/x64/normalize_transformation.cpp similarity index 100% rename from src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/normalize_transformation.cpp rename to src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/x64/normalize_transformation.cpp diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/output_layers.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/x64/output_layers.cpp similarity index 100% rename from src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/output_layers.cpp rename to src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/x64/output_layers.cpp diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/output_layers_concat.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/x64/output_layers_concat.cpp similarity index 100% rename from src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/output_layers_concat.cpp rename to src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/x64/output_layers_concat.cpp diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/output_layers_concat_multi_channel.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/x64/output_layers_concat_multi_channel.cpp similarity index 100% rename from src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/output_layers_concat_multi_channel.cpp rename to src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/x64/output_layers_concat_multi_channel.cpp diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/pad_transformation.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/x64/pad_transformation.cpp similarity index 100% rename from src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/pad_transformation.cpp rename to src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/x64/pad_transformation.cpp diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/prelu_transformation.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/x64/prelu_transformation.cpp similarity index 100% rename from src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/prelu_transformation.cpp rename to src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/x64/prelu_transformation.cpp diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/pull_reshape_through_dequantization.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/x64/pull_reshape_through_dequantization.cpp similarity index 100% rename from src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/pull_reshape_through_dequantization.cpp rename to src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/x64/pull_reshape_through_dequantization.cpp diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/recurrent_cell_transformation.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/x64/recurrent_cell_transformation.cpp similarity index 100% rename from src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/recurrent_cell_transformation.cpp rename to src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/x64/recurrent_cell_transformation.cpp diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/reduce_max_transformation.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/x64/reduce_max_transformation.cpp similarity index 100% rename from src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/reduce_max_transformation.cpp rename to src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/x64/reduce_max_transformation.cpp diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/reduce_mean_transformation.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/x64/reduce_mean_transformation.cpp similarity index 100% rename from src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/reduce_mean_transformation.cpp rename to src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/x64/reduce_mean_transformation.cpp diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/reduce_min_transformation.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/x64/reduce_min_transformation.cpp similarity index 100% rename from src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/reduce_min_transformation.cpp rename to src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/x64/reduce_min_transformation.cpp diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/reduce_sum_transformation.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/x64/reduce_sum_transformation.cpp similarity index 100% rename from src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/reduce_sum_transformation.cpp rename to src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/x64/reduce_sum_transformation.cpp diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/relu_transformation.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/x64/relu_transformation.cpp similarity index 100% rename from src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/relu_transformation.cpp rename to src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/x64/relu_transformation.cpp diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/reshape_transformation.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/x64/reshape_transformation.cpp similarity index 100% rename from src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/reshape_transformation.cpp rename to src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/x64/reshape_transformation.cpp diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/shuffle_channels_transformation.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/x64/shuffle_channels_transformation.cpp similarity index 100% rename from src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/shuffle_channels_transformation.cpp rename to src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/x64/shuffle_channels_transformation.cpp diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/space_to_batch_transformation.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/x64/space_to_batch_transformation.cpp similarity index 100% rename from src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/space_to_batch_transformation.cpp rename to src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/x64/space_to_batch_transformation.cpp diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/split_transformation.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/x64/split_transformation.cpp similarity index 100% rename from src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/split_transformation.cpp rename to src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/x64/split_transformation.cpp diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/squeeze_transformation.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/x64/squeeze_transformation.cpp similarity index 100% rename from src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/squeeze_transformation.cpp rename to src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/x64/squeeze_transformation.cpp diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/strided_slice_transformation.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/x64/strided_slice_transformation.cpp similarity index 100% rename from src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/strided_slice_transformation.cpp rename to src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/x64/strided_slice_transformation.cpp diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/subtract_multiply_to_multiply_add.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/x64/subtract_multiply_to_multiply_add.cpp similarity index 100% rename from src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/subtract_multiply_to_multiply_add.cpp rename to src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/x64/subtract_multiply_to_multiply_add.cpp diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/subtract_transformation.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/x64/subtract_transformation.cpp similarity index 100% rename from src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/subtract_transformation.cpp rename to src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/x64/subtract_transformation.cpp diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/transpose_after_matmul_transformation.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/x64/transpose_after_matmul_transformation.cpp similarity index 100% rename from src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/transpose_after_matmul_transformation.cpp rename to src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/x64/transpose_after_matmul_transformation.cpp diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/transpose_transformation.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/x64/transpose_transformation.cpp similarity index 100% rename from src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/transpose_transformation.cpp rename to src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/x64/transpose_transformation.cpp diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/unsqueeze_transformation.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/x64/unsqueeze_transformation.cpp similarity index 100% rename from src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/unsqueeze_transformation.cpp rename to src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/x64/unsqueeze_transformation.cpp diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/variadic_split_transformation.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/x64/variadic_split_transformation.cpp similarity index 100% rename from src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/variadic_split_transformation.cpp rename to src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/x64/variadic_split_transformation.cpp diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp index 477c6190694205..0716bd8eb3e953 100644 --- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp +++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp @@ -463,8 +463,10 @@ std::vector disabledTestPatterns() { retVector.emplace_back(R"(smoke_TestsDFT_(1|2|3|4)d/DFTLayerTest.Inference.*)"); // Issue 88764, 91647, 108802: accuracy issue retVector.emplace_back(R"(MultipleLSTMCellTest/MultipleLSTMCellTest.CompareWithRefs.*)"); +#if !defined(OPENVINO_ARCH_ARM64) // int8 / code-generation specific retVector.emplace_back(R"(smoke_LPT.*)"); +#endif // Compressed weights are not supported retVector.emplace_back(R"(smoke_MatMulCompressedWeights.*)"); retVector.emplace_back(R"(smoke_MatMulSharedCompressedWeights.*)"); diff --git a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/low_precision_transformations/fully_connected_transformation.cpp b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/low_precision_transformations/fully_connected_transformation.cpp index 71978473696a0b..67f83c29968b51 100644 --- a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/low_precision_transformations/fully_connected_transformation.cpp +++ b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/low_precision_transformations/fully_connected_transformation.cpp @@ -45,6 +45,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_LPT, FullyConnectedTransformation, ::testing::ValuesIn(netPrecisions), ::testing::ValuesIn(shapes), ::testing::Values(ov::test::utils::DEVICE_GPU), - ::testing::ValuesIn(trasformationParamValues)), + ::testing::ValuesIn(trasformationParamValues), + ::testing::ValuesIn({ov::element::i8, ov::element::u8})), FullyConnectedTransformation::getTestCaseName); } // namespace diff --git a/src/tests/functional/plugin/shared/include/low_precision_transformations/fully_connected_transformation.hpp b/src/tests/functional/plugin/shared/include/low_precision_transformations/fully_connected_transformation.hpp index 731ce44224e33b..a05061ea79baf0 100644 --- a/src/tests/functional/plugin/shared/include/low_precision_transformations/fully_connected_transformation.hpp +++ b/src/tests/functional/plugin/shared/include/low_precision_transformations/fully_connected_transformation.hpp @@ -20,7 +20,10 @@ typedef std::tuple< ov::element::Type, MatMulShapes, std::string, - ov::pass::low_precision::LayerTransformation::Params> FullyConnectedTransformationParams; + ov::pass::low_precision::LayerTransformation::Params, + ov::element::Type, + bool, + std::string> FullyConnectedTransformationParams; namespace LayerTestsDefinitions { diff --git a/src/tests/functional/plugin/shared/include/low_precision_transformations/mat_mul_transformation.hpp b/src/tests/functional/plugin/shared/include/low_precision_transformations/mat_mul_transformation.hpp index 3d394c1e45674c..74dd7450130360 100644 --- a/src/tests/functional/plugin/shared/include/low_precision_transformations/mat_mul_transformation.hpp +++ b/src/tests/functional/plugin/shared/include/low_precision_transformations/mat_mul_transformation.hpp @@ -19,8 +19,10 @@ class MatMulTransformationTestValues { ov::builder::subgraph::FakeQuantizeOnData fqOnData1; ov::Shape inputShape2; ov::builder::subgraph::FakeQuantizeOnData fqOnData2; + // TODO: remove, not used std::string expectedKernelName; std::string expectedRuntimePrecision; + bool requantization; }; typedef std::tuple< diff --git a/src/tests/functional/plugin/shared/src/low_precision_transformations/fully_connected_transformation.cpp b/src/tests/functional/plugin/shared/src/low_precision_transformations/fully_connected_transformation.cpp index f72f6d90333613..3e535cb67693e7 100644 --- a/src/tests/functional/plugin/shared/src/low_precision_transformations/fully_connected_transformation.cpp +++ b/src/tests/functional/plugin/shared/src/low_precision_transformations/fully_connected_transformation.cpp @@ -20,14 +20,20 @@ std::string FullyConnectedTransformation::getTestCaseName(const testing::TestPar MatMulShapes shapes; std::string targetDevice; ov::pass::low_precision::LayerTransformation::Params params; - std::tie(precision, shapes, targetDevice, params) = obj.param; + ov::element::Type weightsType; + bool prelu; + std::string expectedPrimitiveType; + std::tie(precision, shapes, targetDevice, params, weightsType, prelu, expectedPrimitiveType) = obj.param; std::ostringstream result; result << - get_test_case_name_by_params(precision, shapes.inputA, targetDevice, params) << - shapes.inputB << "_" << + get_test_case_name_by_params(precision, shapes.inputA, targetDevice, params) << + shapes.inputB << "_" << shapes.transposeA << "_" << - shapes.transposeB; + shapes.transposeB << "_" << + weightsType << "_" << + prelu << "_" << + expectedPrimitiveType; return result.str(); } @@ -36,7 +42,10 @@ void FullyConnectedTransformation::SetUp() { ov::element::Type precision; MatMulShapes shapes; ov::pass::low_precision::LayerTransformation::Params params; - std::tie(precision, shapes, targetDevice, params) = this->GetParam(); + ov::element::Type weightsType; + bool prelu; + std::string expectedPrimitiveType; + std::tie(precision, shapes, targetDevice, params, weightsType, prelu, expectedPrimitiveType) = this->GetParam(); init_input_shapes({ shapes.inputA, shapes.inputB }); @@ -45,12 +54,22 @@ void FullyConnectedTransformation::SetUp() { shapes.inputA, shapes.inputB, shapes.transposeA, - shapes.transposeB); + shapes.transposeB, + weightsType == ov::element::i8, + prelu); } TEST_P(FullyConnectedTransformation, CompareWithRefImpl) { SKIP_IF_CURRENT_TEST_IS_DISABLED(); run(); + + const auto actualPrecision = get_runtime_precision_by_type("FullyConnected"); + auto expectedPrecision = std::get<4>(GetParam()); + EXPECT_EQ(actualPrecision, expectedPrecision.to_string()); + + auto expectedPrimitiveType = std::get<6>(GetParam()); + const std::string actualPrimitiveType = get_property_by_type("FullyConnected", "primitiveType"); + EXPECT_EQ(expectedPrimitiveType, actualPrimitiveType); }; } // namespace LayerTestsDefinitions diff --git a/src/tests/functional/plugin/shared/src/low_precision_transformations/mat_mul_transformation.cpp b/src/tests/functional/plugin/shared/src/low_precision_transformations/mat_mul_transformation.cpp index bc1ce628deb245..d1d4ea94c6b01b 100644 --- a/src/tests/functional/plugin/shared/src/low_precision_transformations/mat_mul_transformation.cpp +++ b/src/tests/functional/plugin/shared/src/low_precision_transformations/mat_mul_transformation.cpp @@ -27,10 +27,11 @@ std::string MatMulTransformation::getTestCaseName(const testing::TestParamInfo(GetParam()); - const auto actualType = get_runtime_precision(params.expectedKernelName); + const auto& actualType = get_runtime_precision_by_type("MatMul"); + const auto expected = std::get<3>(GetParam()); + EXPECT_EQ(expected.expectedRuntimePrecision, actualType); - EXPECT_EQ(actualType, params.expectedRuntimePrecision); + const auto& actualPrimitiveType = get_property_by_type("MatMul", "primitiveType"); + const auto expectedPrimitiveType = "gemm_acl_i8"; + EXPECT_EQ(expectedPrimitiveType, actualPrimitiveType); } TEST_P(MatMulTransformation, CompareWithRefImpl) { diff --git a/src/tests/functional/shared_test_classes/include/shared_test_classes/base/low_precision_transformations/layer_transformation.hpp b/src/tests/functional/shared_test_classes/include/shared_test_classes/base/low_precision_transformations/layer_transformation.hpp index 10a70f3bc04ee0..b9da9ff8af4833 100644 --- a/src/tests/functional/shared_test_classes/include/shared_test_classes/base/low_precision_transformations/layer_transformation.hpp +++ b/src/tests/functional/shared_test_classes/include/shared_test_classes/base/low_precision_transformations/layer_transformation.hpp @@ -49,6 +49,8 @@ class LayerTransformation : virtual public ov::test::SubgraphBaseTest { // get runtime precision by operation type std::string get_runtime_precision_by_type(const std::string& layerType); + std::string get_property_by_type(const std::string& layerTypeName, const std::string& propertyName); + // get runtime precision by operation friendly name which can be fused std::string get_runtime_precision_by_fused_name(const std::string& layerName); diff --git a/src/tests/functional/shared_test_classes/src/base/low_precision_transformations/layer_transformation.cpp b/src/tests/functional/shared_test_classes/src/base/low_precision_transformations/layer_transformation.cpp index 49e7b0581cae76..0a5e59b9f23e66 100644 --- a/src/tests/functional/shared_test_classes/src/base/low_precision_transformations/layer_transformation.cpp +++ b/src/tests/functional/shared_test_classes/src/base/low_precision_transformations/layer_transformation.cpp @@ -7,6 +7,7 @@ #include #include +#include "openvino/util/common_util.hpp" namespace LayerTestsUtils { ov::pass::low_precision::LayerTransformation::Params LayerTransformationParamsNGraphFactory::createParamsU8I8AndI8() { @@ -60,14 +61,14 @@ std::string LayerTransformation::get_test_case_name_by_params( namespace { template -std::string find_node_by_runtime_precision(const ov::CompiledModel& execNet, IsNodeF is_node_f) { +std::string find_node_by_runtime_precision(const ov::CompiledModel& execNet, IsNodeF is_node_f, const std::string& propertyName = "runtimePrecision") { const std::shared_ptr& execFunction = execNet.get_runtime_model(); for (const auto& op : execFunction->get_ops()) { if (!is_node_f(op)) continue; const ov::RTMap& rtInfo = op->get_rt_info(); - const auto& it = rtInfo.find("runtimePrecision"); + const auto& it = rtInfo.find(propertyName); OPENVINO_ASSERT(it != rtInfo.end(), "Runtime precision is not found for node: ", op->get_friendly_name()); return it->second.as(); } @@ -94,6 +95,17 @@ std::string LayerTransformation::get_runtime_precision_by_type(const std::string return find_node_by_runtime_precision(compiledModel, is_node_f); } +std::string LayerTransformation::get_property_by_type(const std::string& layerTypeName, const std::string& propertyName) { + auto is_node_f = [&layerTypeName](const std::shared_ptr& op) { + const auto& rtInfo = op->get_rt_info(); + const auto& typeIt = rtInfo.find("layerType"); + + OPENVINO_ASSERT(typeIt != rtInfo.end(), "Layer is not found for type: ", layerTypeName); + return typeIt->second.as() == layerTypeName; + }; + return ov::util::to_lower(find_node_by_runtime_precision(compiledModel, is_node_f, propertyName)); +} + namespace { bool has_layer(const std::string& names, const std::string& layer_name) { size_t beginPosition = 0ul; diff --git a/src/tests/ov_helpers/ov_lpt_models/include/ov_lpt_models/mat_mul.hpp b/src/tests/ov_helpers/ov_lpt_models/include/ov_lpt_models/mat_mul.hpp index 787e1f6ebe8bd4..30675d550cc64f 100644 --- a/src/tests/ov_helpers/ov_lpt_models/include/ov_lpt_models/mat_mul.hpp +++ b/src/tests/ov_helpers/ov_lpt_models/include/ov_lpt_models/mat_mul.hpp @@ -27,14 +27,17 @@ class MatMulFunction { const ov::PartialShape inputShape1, const ov::PartialShape inputShape2, const bool transpose1, - const bool transpose2); + const bool transpose2, + const bool signedOnWeights, + const bool relu); static std::shared_ptr getOriginal( const ov::element::Type precision, const ov::Shape& inputShape1, const FakeQuantizeOnData& fqOnData1, const ov::Shape& inputShape2, - const FakeQuantizeOnData& fqOnData2); + const FakeQuantizeOnData& fqOnData2, + const bool requantization = false); static std::shared_ptr getOriginal(const ov::element::Type netPrecision, const ov::PartialShape& inputShape1, diff --git a/src/tests/ov_helpers/ov_lpt_models/src/mat_mul.cpp b/src/tests/ov_helpers/ov_lpt_models/src/mat_mul.cpp index 1b1351ef1b3399..ac04a6ec1e970f 100644 --- a/src/tests/ov_helpers/ov_lpt_models/src/mat_mul.cpp +++ b/src/tests/ov_helpers/ov_lpt_models/src/mat_mul.cpp @@ -54,12 +54,18 @@ std::shared_ptr MatMulFunction::getOriginal( const ov::PartialShape inputShape1, const ov::PartialShape inputShape2, const bool transpose1, - const bool transpose2) { + const bool transpose2, + const bool signedOnWeights, + const bool relu) { const auto paramNode = std::make_shared(precision, inputShape1); const std::vector constShapes(inputShape1.rank().get_length(), 1ul); - const auto fakeQuantizeOnAcitvations = ov::test::utils::make_fake_quantize( - paramNode, precision, 256ul, constShapes, - { 0.f }, { 255.f / 4.f }, { 0.f }, { 255.f / 4.f }); + const auto fakeQuantizeOnAcitvations = signedOnWeights ? + ov::test::utils::make_fake_quantize( + paramNode, precision, 256ul, constShapes, + { -128.f / 4.f }, { 127.f / 4.f }, { -128.f / 4.f }, { 127.f / 4.f }) : + ov::test::utils::make_fake_quantize( + paramNode, precision, 256ul, constShapes, + { 0.f }, { 255.f / 4.f }, { 0.f }, { 255.f / 4.f }); fakeQuantizeOnAcitvations->set_friendly_name("fakeQuantizeOnAcitvations"); auto weightsConst = std::make_shared( @@ -71,14 +77,19 @@ std::shared_ptr MatMulFunction::getOriginal( { -128.f / 8.f }, { 127.f / 8.f }, { -128.f / 8.f }, { 127.f / 8.f }); fakeQuantizeOnWeights->set_friendly_name("fakeQuantizeOnWeights"); - const std::shared_ptr fullyConnected = std::make_shared( + std::shared_ptr parent = std::make_shared( fakeQuantizeOnAcitvations->output(0), fakeQuantizeOnWeights->output(0), transpose1, transpose2); - fullyConnected->set_friendly_name("fullyConnected"); + parent->set_friendly_name("fullyConnected"); - ov::ResultVector results{ std::make_shared(fullyConnected) }; + if (relu) { + parent = std::make_shared(parent); + parent->set_friendly_name("relu"); + } + + ov::ResultVector results{ std::make_shared(parent) }; std::shared_ptr function = std::make_shared( results, ov::ParameterVector{ paramNode }, @@ -93,21 +104,40 @@ std::shared_ptr MatMulFunction::getOriginal( const ov::Shape& inputShape1, const FakeQuantizeOnData& fqOnData1, const ov::Shape& inputShape2, - const FakeQuantizeOnData& fqOnData2) { + const FakeQuantizeOnData& fqOnData2, + const bool requantization) { const std::shared_ptr input1 = std::make_shared(precision, inputShape1); input1->set_friendly_name("input1"); const std::shared_ptr input2 = std::make_shared(precision, inputShape2); input2->set_friendly_name("input2"); - const std::shared_ptr matMul = std::make_shared( - makeFakeQuantize(input1, precision, fqOnData1), - makeFakeQuantize(input2, precision, fqOnData2), + std::shared_ptr parent1 = input1; + if (!fqOnData1.empty()) { + parent1 = makeFakeQuantize(parent1, precision, fqOnData1); + } + + std::shared_ptr parent2 = input2; + if (!fqOnData2.empty()) { + parent2 = makeFakeQuantize(parent2, precision, fqOnData2); + } + + std::shared_ptr parent = std::make_shared( + parent1, + parent2, false, false); - matMul->set_friendly_name("matMul"); + parent->set_friendly_name("matMul"); + + if (requantization) { + parent = makeFakeQuantize(parent, precision, fqOnData1); + parent = std::make_shared( + parent, + std::make_shared(ov::element::f32, Shape{1}, std::vector{0.f})); + parent->set_friendly_name("prelu"); + } - std::shared_ptr result = std::make_shared(matMul); + std::shared_ptr result = std::make_shared(parent); std::shared_ptr function = std::make_shared( ov::ResultVector{ result },