[CPU] [ARM] FullyConnected fp16 issue reproducer

eshoguli · Aug 14, 2024 · 473aec5 · 473aec5
1 parent 55ffb33
commit 473aec5
Show file tree

Hide file tree

Showing 72 changed files with 244 additions and 36 deletions.
diff --git a/src/plugins/intel_cpu/tests/functional/CMakeLists.txt b/src/plugins/intel_cpu/tests/functional/CMakeLists.txt
@@ -74,7 +74,8 @@ endif()
 if(NOT X86_64)
     list(APPEND EXCLUDED_SOURCE_PATHS
          ${CMAKE_CURRENT_SOURCE_DIR}/custom/single_layer_tests/instances/x64
-         ${CMAKE_CURRENT_SOURCE_DIR}/custom/subgraph_tests/src/x64)
+         ${CMAKE_CURRENT_SOURCE_DIR}/custom/subgraph_tests/src/x64
+         ${CMAKE_CURRENT_SOURCE_DIR}/shared_tests_instances/low_precision_transformations/x64)
 endif()
 
 ov_add_test_target(

diff --git a/..._tests_instances/low_precision_transformations/aarch64/fully_connected_transformation.cpp b/..._tests_instances/low_precision_transformations/aarch64/fully_connected_transformation.cpp
@@ -0,0 +1,98 @@
+// Copyright (C) 2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <vector>
+
+#include "low_precision_transformations/fully_connected_transformation.hpp"
+#include "common_test_utils/test_constants.hpp"
+
+using namespace LayerTestsDefinitions;
+
+namespace {
+const std::vector<ov::element::Type> netPrecisions = {
+        ov::element::f32
+};
+
+const std::vector<MatMulShapes> shapes = {
+    {
+        ov::PartialShape{ 2, 8 },
+        ov::PartialShape{ 3, 8 },
+        false,
+        true
+    },
+//    {
+//        ov::PartialShape{ 1, 1, 16 },
+//        ov::PartialShape{ 1, 16, 8 },
+//        false,
+//        false
+//    },
+//    // transposeB <= here
+//    {
+//        ov::PartialShape{ 1, 16 },
+//        ov::PartialShape{ 8, 16 },
+//        false,
+//        true
+//    },
+//    {
+//        ov::PartialShape{ 16, 1 },
+//        ov::PartialShape{ 16, 8 },
+//        true,
+//        false
+//    },
+//    {
+//        ov::PartialShape{ 1, 16, 1 },
+//        ov::PartialShape{ 1, 16, 8 },
+//        true,
+//        false
+//    },
+////    // MatMul_101
+////    {
+////        ov::PartialShape{ 1, 128, 768 },
+////        ov::PartialShape{ 3072, 768 },    // after transpose: 768 x 3072
+////        false,
+////        true
+////    },
+};
+
+const std::vector<ov::pass::low_precision::LayerTransformation::Params> trasformationParamValues = {
+    LayerTestsUtils::LayerTransformationParamsNGraphFactory::createParams()
+};
+
+const std::vector<FullyConnectedParams> activations = {
+    // TODO: failed if transposeB = true: accuracy check
+    {
+        true,  // activation
+        false, // perChannel
+        "fullyConnected,fullyConnected/DequantizationMultiply,relu"
+    },
+////    // TODO: failed if transposeB = true: fp32 execution, FQ is not decomposed
+////    {
+////        true,  // activation
+////        true,  // perChannel
+////        "fullyConnected,fullyConnected/DequantizationMultiply,relu"
+////    },
+//    {
+//        false,  // activation
+//        false,  // perChannel
+//        "fullyConnected_original,fullyConnected"
+//    },
+////    // TODO: failed if transposeB = true: fp32 execution, FQ is not decomposed
+////    {
+////        false, // activation
+////        true,  // perChannel
+////        "fullyConnected_original,fullyConnected"
+////    }
+};
+
+INSTANTIATE_TEST_SUITE_P(smoke_LPT, FullyConnectedTransformation,
+    ::testing::Combine(
+        ::testing::ValuesIn(netPrecisions),
+        ::testing::ValuesIn(shapes),
+        ::testing::Values(ov::test::utils::DEVICE_CPU),
+        ::testing::ValuesIn(trasformationParamValues),
+        ::testing::ValuesIn({ov::element::i8 /*, ov::element::u8*/}),
+        ::testing::ValuesIn(activations),
+        ::testing::Values("gemm_acl_i8")),
+    FullyConnectedTransformation::getTestCaseName);
+}  // namespace
diff --git a/...on_transformations/add_transformation.cpp → ...ransformations/x64/add_transformation.cpp b/...on_transformations/add_transformation.cpp → ...ransformations/x64/add_transformation.cpp
diff --git a/.../assign_and_read_value_transformation.cpp → .../assign_and_read_value_transformation.cpp b/.../assign_and_read_value_transformation.cpp → .../assign_and_read_value_transformation.cpp
diff --git a/...mations/batch_to_space_transformation.cpp → ...ons/x64/batch_to_space_transformation.cpp b/...mations/batch_to_space_transformation.cpp → ...ons/x64/batch_to_space_transformation.cpp
diff --git a/..._transformations/clamp_transformation.cpp → ...nsformations/x64/clamp_transformation.cpp b/..._transformations/clamp_transformation.cpp → ...nsformations/x64/clamp_transformation.cpp
diff --git a/...transformations/concat_transformation.cpp → ...sformations/x64/concat_transformation.cpp b/...transformations/concat_transformation.cpp → ...sformations/x64/concat_transformation.cpp
diff --git a/...rmations/concat_with_child_and_output.cpp → ...ions/x64/concat_with_child_and_output.cpp b/...rmations/concat_with_child_and_output.cpp → ...ions/x64/concat_with_child_and_output.cpp
diff --git a/..._with_different_precision_on_children.cpp → ..._with_different_precision_on_children.cpp b/..._with_different_precision_on_children.cpp → ..._with_different_precision_on_children.cpp
diff --git a/...ncat_with_intermediate_transformation.cpp → ...ncat_with_intermediate_transformation.cpp b/...ncat_with_intermediate_transformation.cpp → ...ncat_with_intermediate_transformation.cpp
diff --git a/.../concat_with_neighbors_transformation.cpp → .../concat_with_neighbors_transformation.cpp b/.../concat_with_neighbors_transformation.cpp → .../concat_with_neighbors_transformation.cpp
diff --git a/...ions/concat_with_split_transformation.cpp → .../x64/concat_with_split_transformation.cpp b/...ions/concat_with_split_transformation.cpp → .../x64/concat_with_split_transformation.cpp
diff --git a/...volution_backprop_data_transformation.cpp → ...volution_backprop_data_transformation.cpp b/...volution_backprop_data_transformation.cpp → ...volution_backprop_data_transformation.cpp
diff --git a/...ations/convolution_qdq_transformation.cpp → ...ns/x64/convolution_qdq_transformation.cpp b/...ations/convolution_qdq_transformation.cpp → ...ns/x64/convolution_qdq_transformation.cpp
diff --git a/...formations/convolution_transformation.cpp → ...ations/x64/convolution_transformation.cpp b/...formations/convolution_transformation.cpp → ...ations/x64/convolution_transformation.cpp
diff --git a/...mations/depth_to_space_transformation.cpp → ...ons/x64/depth_to_space_transformation.cpp b/...mations/depth_to_space_transformation.cpp → ...ons/x64/depth_to_space_transformation.cpp
diff --git a/...twise_branch_selection_transformation.cpp → ...twise_branch_selection_transformation.cpp b/...twise_branch_selection_transformation.cpp → ...twise_branch_selection_transformation.cpp
diff --git a/...liminate_fake_quantize_transformation.cpp → ...liminate_fake_quantize_transformation.cpp b/...liminate_fake_quantize_transformation.cpp → ...liminate_fake_quantize_transformation.cpp
diff --git a/...ations/fq_and_avg_pool_transformation.cpp → ...ns/x64/fq_and_avg_pool_transformation.cpp b/...ations/fq_and_avg_pool_transformation.cpp → ...ns/x64/fq_and_avg_pool_transformation.cpp
diff --git a/...ations/fq_and_max_pool_transformation.cpp → ...ns/x64/fq_and_max_pool_transformation.cpp b/...ations/fq_and_max_pool_transformation.cpp → ...ns/x64/fq_and_max_pool_transformation.cpp
diff --git a/..._two_output_branches_with_convolution.cpp → ..._two_output_branches_with_convolution.cpp b/..._two_output_branches_with_convolution.cpp → ..._two_output_branches_with_convolution.cpp
diff --git a/...fq_precision_selection_transformation.cpp → ...fq_precision_selection_transformation.cpp b/...fq_precision_selection_transformation.cpp → ...fq_precision_selection_transformation.cpp
diff --git a/...ion_transformations/fq_transformation.cpp → ...transformations/x64/fq_transformation.cpp b/...ion_transformations/fq_transformation.cpp → ...transformations/x64/fq_transformation.cpp
diff --git a/...fq_with_dq_not_optimal_transformation.cpp → ...fq_with_dq_not_optimal_transformation.cpp b/...fq_with_dq_not_optimal_transformation.cpp → ...fq_with_dq_not_optimal_transformation.cpp
diff --git a/...ations/fully_connected_transformation.cpp → ...ns/x64/fully_connected_transformation.cpp b/...ations/fully_connected_transformation.cpp → ...ns/x64/fully_connected_transformation.cpp
diff --git a/...ormations/fuse_convert_transformation.cpp → ...tions/x64/fuse_convert_transformation.cpp b/...ormations/fuse_convert_transformation.cpp → ...tions/x64/fuse_convert_transformation.cpp
diff --git a/.../fuse_dequantize_to_fq_transformation.cpp → .../fuse_dequantize_to_fq_transformation.cpp b/.../fuse_dequantize_to_fq_transformation.cpp → .../fuse_dequantize_to_fq_transformation.cpp
diff --git a/...use_fq_and_scale_shift_transformation.cpp → ...use_fq_and_scale_shift_transformation.cpp b/...use_fq_and_scale_shift_transformation.cpp → ...use_fq_and_scale_shift_transformation.cpp
diff --git a/...ns/fuse_multiply_to_fq_transformation.cpp → ...64/fuse_multiply_to_fq_transformation.cpp b/...ns/fuse_multiply_to_fq_transformation.cpp → ...64/fuse_multiply_to_fq_transformation.cpp
diff --git a/...ns/fuse_subtract_to_fq_transformation.cpp → ...64/fuse_subtract_to_fq_transformation.cpp b/...ns/fuse_subtract_to_fq_transformation.cpp → ...64/fuse_subtract_to_fq_transformation.cpp
diff --git a/...transformations/gather_transformation.cpp → ...sformations/x64/gather_transformation.cpp b/...transformations/gather_transformation.cpp → ...sformations/x64/gather_transformation.cpp
diff --git a/...n_transformations/gemm_transformation.cpp → ...ansformations/x64/gemm_transformation.cpp b/...n_transformations/gemm_transformation.cpp → ...ansformations/x64/gemm_transformation.cpp
diff --git a/...ions/group_convolution_transformation.cpp → .../x64/group_convolution_transformation.cpp b/...ions/group_convolution_transformation.cpp → .../x64/group_convolution_transformation.cpp
diff --git a/...s/groupconvolution_qdq_transformation.cpp → ...4/groupconvolution_qdq_transformation.cpp b/...s/groupconvolution_qdq_transformation.cpp → ...4/groupconvolution_qdq_transformation.cpp
diff --git a/...formations/interpolate_transformation.cpp → ...ations/x64/interpolate_transformation.cpp b/...formations/interpolate_transformation.cpp → ...ations/x64/interpolate_transformation.cpp
diff --git a/...ransformations/mat_mul_transformation.cpp → ...formations/x64/mat_mul_transformation.cpp b/...ransformations/mat_mul_transformation.cpp → ...formations/x64/mat_mul_transformation.cpp
diff --git a/.../mat_mul_with_constant_transformation.cpp → .../mat_mul_with_constant_transformation.cpp b/.../mat_mul_with_constant_transformation.cpp → .../mat_mul_with_constant_transformation.cpp
diff --git a/...ns/mat_mul_with_optimized_constant_fq.cpp → ...64/mat_mul_with_optimized_constant_fq.cpp b/...ns/mat_mul_with_optimized_constant_fq.cpp → ...64/mat_mul_with_optimized_constant_fq.cpp
diff --git a/...ons/move_fake_quantize_transformation.cpp → ...x64/move_fake_quantize_transformation.cpp b/...ons/move_fake_quantize_transformation.cpp → ...x64/move_fake_quantize_transformation.cpp
diff --git a/...mations/multiply_to_group_convolution.cpp → ...ons/x64/multiply_to_group_convolution.cpp b/...mations/multiply_to_group_convolution.cpp → ...ons/x64/multiply_to_group_convolution.cpp
diff --git a/...ansformations/multiply_transformation.cpp → ...ormations/x64/multiply_transformation.cpp b/...ansformations/multiply_transformation.cpp → ...ormations/x64/multiply_transformation.cpp
diff --git a/...nsformations/multiply_with_one_parent.cpp → ...rmations/x64/multiply_with_one_parent.cpp b/...nsformations/multiply_with_one_parent.cpp → ...rmations/x64/multiply_with_one_parent.cpp
diff --git a/...on_transformations/mvn_transformation.cpp → ...ransformations/x64/mvn_transformation.cpp b/...on_transformations/mvn_transformation.cpp → ...ransformations/x64/mvn_transformation.cpp
diff --git a/...nsformations/normalize_transformation.cpp → ...rmations/x64/normalize_transformation.cpp b/...nsformations/normalize_transformation.cpp → ...rmations/x64/normalize_transformation.cpp
diff --git a/...ecision_transformations/output_layers.cpp → ...ion_transformations/x64/output_layers.cpp b/...ecision_transformations/output_layers.cpp → ...ion_transformations/x64/output_layers.cpp
diff --git a/..._transformations/output_layers_concat.cpp → ...nsformations/x64/output_layers_concat.cpp b/..._transformations/output_layers_concat.cpp → ...nsformations/x64/output_layers_concat.cpp
diff --git a/...ns/output_layers_concat_multi_channel.cpp → ...64/output_layers_concat_multi_channel.cpp b/...ns/output_layers_concat_multi_channel.cpp → ...64/output_layers_concat_multi_channel.cpp
diff --git a/...on_transformations/pad_transformation.cpp → ...ransformations/x64/pad_transformation.cpp b/...on_transformations/pad_transformation.cpp → ...ransformations/x64/pad_transformation.cpp
diff --git a/..._transformations/prelu_transformation.cpp → ...nsformations/x64/prelu_transformation.cpp b/..._transformations/prelu_transformation.cpp → ...nsformations/x64/prelu_transformation.cpp
diff --git a/...s/pull_reshape_through_dequantization.cpp → ...4/pull_reshape_through_dequantization.cpp b/...s/pull_reshape_through_dequantization.cpp → ...4/pull_reshape_through_dequantization.cpp
diff --git a/...mations/recurrent_cell_transformation.cpp → ...ons/x64/recurrent_cell_transformation.cpp b/...mations/recurrent_cell_transformation.cpp → ...ons/x64/recurrent_cell_transformation.cpp
diff --git a/...sformations/reduce_max_transformation.cpp → ...mations/x64/reduce_max_transformation.cpp b/...sformations/reduce_max_transformation.cpp → ...mations/x64/reduce_max_transformation.cpp
diff --git a/...formations/reduce_mean_transformation.cpp → ...ations/x64/reduce_mean_transformation.cpp b/...formations/reduce_mean_transformation.cpp → ...ations/x64/reduce_mean_transformation.cpp
diff --git a/...sformations/reduce_min_transformation.cpp → ...mations/x64/reduce_min_transformation.cpp b/...sformations/reduce_min_transformation.cpp → ...mations/x64/reduce_min_transformation.cpp
diff --git a/...sformations/reduce_sum_transformation.cpp → ...mations/x64/reduce_sum_transformation.cpp b/...sformations/reduce_sum_transformation.cpp → ...mations/x64/reduce_sum_transformation.cpp
diff --git a/...n_transformations/relu_transformation.cpp → ...ansformations/x64/relu_transformation.cpp b/...n_transformations/relu_transformation.cpp → ...ansformations/x64/relu_transformation.cpp
diff --git a/...ransformations/reshape_transformation.cpp → ...formations/x64/reshape_transformation.cpp b/...ransformations/reshape_transformation.cpp → ...formations/x64/reshape_transformation.cpp
diff --git a/...tions/shuffle_channels_transformation.cpp → ...s/x64/shuffle_channels_transformation.cpp b/...tions/shuffle_channels_transformation.cpp → ...s/x64/shuffle_channels_transformation.cpp
diff --git a/...mations/space_to_batch_transformation.cpp → ...ons/x64/space_to_batch_transformation.cpp b/...mations/space_to_batch_transformation.cpp → ...ons/x64/space_to_batch_transformation.cpp
diff --git a/..._transformations/split_transformation.cpp → ...nsformations/x64/split_transformation.cpp b/..._transformations/split_transformation.cpp → ...nsformations/x64/split_transformation.cpp
diff --git a/...ransformations/squeeze_transformation.cpp → ...formations/x64/squeeze_transformation.cpp b/...ransformations/squeeze_transformation.cpp → ...formations/x64/squeeze_transformation.cpp
diff --git a/...rmations/strided_slice_transformation.cpp → ...ions/x64/strided_slice_transformation.cpp b/...rmations/strided_slice_transformation.cpp → ...ions/x64/strided_slice_transformation.cpp
diff --git a/...ons/subtract_multiply_to_multiply_add.cpp → ...x64/subtract_multiply_to_multiply_add.cpp b/...ons/subtract_multiply_to_multiply_add.cpp → ...x64/subtract_multiply_to_multiply_add.cpp
diff --git a/...ansformations/subtract_transformation.cpp → ...ormations/x64/subtract_transformation.cpp b/...ansformations/subtract_transformation.cpp → ...ormations/x64/subtract_transformation.cpp
diff --git a/...transpose_after_matmul_transformation.cpp → ...transpose_after_matmul_transformation.cpp b/...transpose_after_matmul_transformation.cpp → ...transpose_after_matmul_transformation.cpp
diff --git a/...nsformations/transpose_transformation.cpp → ...rmations/x64/transpose_transformation.cpp b/...nsformations/transpose_transformation.cpp → ...rmations/x64/transpose_transformation.cpp
diff --git a/...nsformations/unsqueeze_transformation.cpp → ...rmations/x64/unsqueeze_transformation.cpp b/...nsformations/unsqueeze_transformation.cpp → ...rmations/x64/unsqueeze_transformation.cpp
diff --git a/...mations/variadic_split_transformation.cpp → ...ons/x64/variadic_split_transformation.cpp b/...mations/variadic_split_transformation.cpp → ...ons/x64/variadic_split_transformation.cpp
diff --git a/...al/plugin/shared/include/low_precision_transformations/fully_connected_transformation.hpp b/...al/plugin/shared/include/low_precision_transformations/fully_connected_transformation.hpp
@@ -16,11 +16,21 @@ class MatMulShapes {
     bool transposeB;
 };
 
+class FullyConnectedParams {
+public:
+    bool activation;
+    bool perChannelWeights;
+    std::string originalLayersNames;
+};
+
 typedef std::tuple<
     ov::element::Type,
     MatMulShapes,
     std::string,
-    ov::pass::low_precision::LayerTransformation::Params> FullyConnectedTransformationParams;
+    ov::pass::low_precision::LayerTransformation::Params,
+    ov::element::Type,
+    FullyConnectedParams,
+    std::string> FullyConnectedTransformationParams;
 
 namespace LayerTestsDefinitions {
 

diff --git a/...tional/plugin/shared/src/low_precision_transformations/fully_connected_transformation.cpp b/...tional/plugin/shared/src/low_precision_transformations/fully_connected_transformation.cpp
@@ -5,12 +5,13 @@
 #include "low_precision_transformations/fully_connected_transformation.hpp"
 
 #include <memory>
+#include <string>
 #include <tuple>
 #include <vector>
-#include <string>
 
 
 #include "common_test_utils/common_utils.hpp"
+#include "openvino/util/common_util.hpp"
 #include "ov_lpt_models/mat_mul.hpp"
 
 namespace LayerTestsDefinitions {
@@ -20,14 +21,22 @@ std::string FullyConnectedTransformation::getTestCaseName(const testing::TestPar
     MatMulShapes shapes;
     std::string targetDevice;
     ov::pass::low_precision::LayerTransformation::Params params;
-    std::tie(precision, shapes, targetDevice, params) = obj.param;
+    ov::element::Type weightsType;
+    FullyConnectedParams activation;
+    std::string expectedPrimitiveType;
+    std::tie(precision, shapes, targetDevice, params, weightsType, activation, expectedPrimitiveType) = obj.param;
 
     std::ostringstream result;
     result <<
-           get_test_case_name_by_params(precision, shapes.inputA, targetDevice, params) <<
-           shapes.inputB << "_" <<
+        get_test_case_name_by_params(precision, shapes.inputA, targetDevice, params) <<
+        shapes.inputB << "_" <<
         shapes.transposeA << "_" <<
-        shapes.transposeB;
+        shapes.transposeB << "_" <<
+        weightsType << "_" <<
+        "Activation=" << activation.activation << "_" <<
+        "perChannelWeights=" << activation.perChannelWeights << "_" <<
+        activation.originalLayersNames << "_" <<
+        expectedPrimitiveType;
 
     return result.str();
 }
@@ -36,7 +45,10 @@ void FullyConnectedTransformation::SetUp() {
     ov::element::Type precision;
     MatMulShapes shapes;
     ov::pass::low_precision::LayerTransformation::Params params;
-    std::tie(precision, shapes, targetDevice, params) = this->GetParam();
+    ov::element::Type weightsType;
+    FullyConnectedParams activation;
+    std::string expectedPrimitiveType;
+    std::tie(precision, shapes, targetDevice, params, weightsType, activation, expectedPrimitiveType) = this->GetParam();
 
     init_input_shapes({ shapes.inputA, shapes.inputB });
 
@@ -45,12 +57,31 @@ void FullyConnectedTransformation::SetUp() {
         shapes.inputA,
         shapes.inputB,
         shapes.transposeA,
-        shapes.transposeB);
+        shapes.transposeB,
+        weightsType == ov::element::i8,
+        activation.perChannelWeights,
+        activation.activation);
+
+    ov::pass::Serialize(
+            "/Users/eshoguli/projects/openvino_matmul/test.original.xml",
+            "/Users/eshoguli/projects/openvino_matmul/test.original.bin").run_on_model(function);
 }
 
 TEST_P(FullyConnectedTransformation, CompareWithRefImpl) {
     SKIP_IF_CURRENT_TEST_IS_DISABLED();
     run();
+
+//    const auto& activation = std::get<5>(GetParam());
+//    const auto originalLayersNames = get_property_by_type("FullyConnected", "originalLayersNames");
+//    EXPECT_EQ(ov::util::to_lower(activation.originalLayersNames), originalLayersNames);
+//
+//    const auto& actualPrecision = get_runtime_precision_by_type("FullyConnected");
+//    const auto expectedPrecision = std::get<4>(GetParam());
+//    EXPECT_EQ(actualPrecision, expectedPrecision.to_string());
+//
+//    const auto& expectedPrimitiveType = std::get<6>(GetParam());
+//    const std::string actualPrimitiveType = get_property_by_type("FullyConnected", "primitiveType");
+//    EXPECT_EQ(expectedPrimitiveType, actualPrimitiveType);
 };
 
 }  // namespace LayerTestsDefinitions
diff --git a/src/tests/ov_helpers/ov_lpt_models/include/ov_lpt_models/mat_mul.hpp b/src/tests/ov_helpers/ov_lpt_models/include/ov_lpt_models/mat_mul.hpp
@@ -24,17 +24,21 @@ class MatMulFunction {
 
     static std::shared_ptr<ov::Model> getOriginal(
         const ov::element::Type precision,
-        const ov::PartialShape inputShape1,
-        const ov::PartialShape inputShape2,
+        const ov::PartialShape& inputShape1,
+        const ov::PartialShape& inputShape2,
         const bool transpose1,
-        const bool transpose2);
+        const bool transpose2,
+        const bool signedWeights,
+        const bool perChannelWeights,
+        const bool relu);
 
     static std::shared_ptr<ov::Model> getOriginal(
         const ov::element::Type precision,
         const ov::Shape& inputShape1,
         const FakeQuantizeOnData& fqOnData1,
         const ov::Shape& inputShape2,
-        const FakeQuantizeOnData& fqOnData2);
+        const FakeQuantizeOnData& fqOnData2,
+        const bool requantization = false);
 
     static std::shared_ptr<ov::Model> getOriginal(const ov::element::Type netPrecision,
                                                   const ov::PartialShape& inputShape1,

diff --git a/src/tests/ov_helpers/ov_lpt_models/src/mat_mul.cpp b/src/tests/ov_helpers/ov_lpt_models/src/mat_mul.cpp
@@ -49,36 +49,81 @@ std::shared_ptr<ov::Model> MatMulFunction::getOriginal(
     return function;
 }
 
+namespace {
+template <typename T>
+std::vector<T> generate_values(const ov::Shape& shape) {
+    std::vector<T> values(ov::shape_size(shape));
+    for (size_t i = 0; i < values.size(); ++i) {
+        values[i] = static_cast<T>(static_cast<T>(i) / 10.0);
+    }
+    return values;
+}
+
+std::vector<float> generate_dequantization_values(
+        const ov::Shape& shape,
+        const size_t levels,
+        const bool low) {
+    const auto shape_size = ov::shape_size(shape);
+    std::vector<float> values(shape_size);
+    for (size_t i = 0; i < shape_size; ++i) {
+        values[i] = low ? -128.f / (static_cast<float>(i) + 1.f) : 127.f / (static_cast<float>(i) + 1.f);
+    }
+    return values;
+}
+} // namespace
+
 std::shared_ptr<ov::Model> MatMulFunction::getOriginal(
-    const ov::element::Type precision,
-    const ov::PartialShape inputShape1,
-    const ov::PartialShape inputShape2,
-    const bool transpose1,
-    const bool transpose2) {
+        const ov::element::Type precision,
+        const ov::PartialShape& inputShape1,
+        const ov::PartialShape& inputShape2,
+        const bool transpose1,
+        const bool transpose2,
+        const bool signedOnWeights,
+        const bool perChannelWeights,
+        const bool relu) {
     const auto paramNode = std::make_shared<ov::opset1::Parameter>(precision, inputShape1);
     const std::vector<size_t> constShapes(inputShape1.rank().get_length(), 1ul);
-    const auto fakeQuantizeOnAcitvations = ov::test::utils::make_fake_quantize(
-        paramNode, precision, 256ul, constShapes,
-        { 0.f }, { 255.f / 4.f }, { 0.f }, { 255.f / 4.f });
+    const auto fakeQuantizeOnAcitvations = signedOnWeights ?
+            ov::test::utils::make_fake_quantize(
+                paramNode, precision, 256ul, constShapes,
+                { -128.f / 4.f }, { 127.f / 4.f }, { -128.f / 4.f }, { 127.f / 4.f }) :
+            ov::test::utils::make_fake_quantize(
+                paramNode, precision, 256ul, constShapes,
+                { 0.f }, { 255.f / 4.f }, { 0.f }, { 255.f / 4.f });
     fakeQuantizeOnAcitvations->set_friendly_name("fakeQuantizeOnAcitvations");
 
+    const size_t channel = inputShape2[inputShape2.size() - 2].get_length();
     auto weightsConst = std::make_shared<ov::op::v0::Constant>(
-        precision,
-        inputShape2.to_shape(),
-        std::vector<float>({ 1.f }));
-    const auto fakeQuantizeOnWeights = ov::test::utils::make_fake_quantize(
-        weightsConst, precision, 256ul, { 1ul, 1ul },
-        { -128.f / 8.f }, { 127.f / 8.f }, { -128.f / 8.f }, { 127.f / 8.f });
+            precision,
+            inputShape2.to_shape(),
+            generate_values<float>(inputShape2.to_shape()));
+
+    const auto fakeQuantizeOnWeights = perChannelWeights ?
+       ov::test::utils::make_fake_quantize(
+           weightsConst, precision, 256ul,
+           Shape{ channel, 1 },
+           generate_dequantization_values(Shape{ channel, 1 }, 256ul, true),
+           generate_dequantization_values(Shape{ channel, 1 }, 256ul, false),
+           generate_dequantization_values(Shape{ channel, 1 }, 256ul, true),
+           generate_dequantization_values(Shape{ channel, 1 }, 256ul, false)) :
+        ov::test::utils::make_fake_quantize(
+            weightsConst, precision, 256ul, { 1ul, 1ul },
+            { -128.f / 8.f }, { 127.f / 8.f }, { -128.f / 8.f }, { 127.f / 8.f });
     fakeQuantizeOnWeights->set_friendly_name("fakeQuantizeOnWeights");
 
-    const std::shared_ptr<ov::opset1::MatMul> fullyConnected = std::make_shared<ov::opset1::MatMul>(
+    std::shared_ptr<Node> parent = std::make_shared<ov::opset1::MatMul>(
         fakeQuantizeOnAcitvations->output(0),
         fakeQuantizeOnWeights->output(0),
         transpose1,
         transpose2);
-    fullyConnected->set_friendly_name("fullyConnected");
+    parent->set_friendly_name("fullyConnected");
+
+    if (relu) {
+        parent = std::make_shared<ov::opset1::Relu>(parent);
+        parent->set_friendly_name("relu");
+    }
 
-    ov::ResultVector results{ std::make_shared<ov::opset1::Result>(fullyConnected) };
+    ov::ResultVector results{ std::make_shared<ov::opset1::Result>(parent) };
     std::shared_ptr<ov::Model> function = std::make_shared<ov::Model>(
         results,
         ov::ParameterVector{ paramNode },
@@ -93,21 +138,40 @@ std::shared_ptr<ov::Model> MatMulFunction::getOriginal(
     const ov::Shape& inputShape1,
     const FakeQuantizeOnData& fqOnData1,
     const ov::Shape& inputShape2,
-    const FakeQuantizeOnData& fqOnData2) {
+    const FakeQuantizeOnData& fqOnData2,
+    const bool requantization) {
     const std::shared_ptr<ov::opset1::Parameter> input1 = std::make_shared<ov::opset1::Parameter>(precision, inputShape1);
     input1->set_friendly_name("input1");
 
     const std::shared_ptr<ov::opset1::Parameter> input2 = std::make_shared<ov::opset1::Parameter>(precision, inputShape2);
     input2->set_friendly_name("input2");
 
-    const std::shared_ptr<ov::opset1::MatMul> matMul = std::make_shared<ov::opset1::MatMul>(
-        makeFakeQuantize(input1, precision, fqOnData1),
-        makeFakeQuantize(input2, precision, fqOnData2),
+    std::shared_ptr<ov::Node> parent1 = input1;
+    if (!fqOnData1.empty()) {
+        parent1 = makeFakeQuantize(parent1, precision, fqOnData1);
+    }
+
+    std::shared_ptr<ov::Node> parent2 = input2;
+    if (!fqOnData2.empty()) {
+        parent2 = makeFakeQuantize(parent2, precision, fqOnData2);
+    }
+
+    std::shared_ptr<Node> parent = std::make_shared<ov::opset1::MatMul>(
+        parent1,
+        parent2,
         false,
         false);
-    matMul->set_friendly_name("matMul");
+    parent->set_friendly_name("matMul");
+
+    if (requantization) {
+        parent = makeFakeQuantize(parent, precision, fqOnData1);
+        parent = std::make_shared<ov::opset1::PRelu>(
+                parent,
+                std::make_shared<ov::opset1::Constant>(ov::element::f32, Shape{1}, std::vector<float>{0.f}));
+        parent->set_friendly_name("prelu");
+    }
 
-    std::shared_ptr<ov::opset1::Result> result = std::make_shared<ov::opset1::Result>(matMul);
+    std::shared_ptr<ov::opset1::Result> result = std::make_shared<ov::opset1::Result>(parent);
 
     std::shared_ptr<ov::Model> function = std::make_shared<ov::Model>(
         ov::ResultVector{ result },