From 2c77ebbf31b4439d05424b75fc0a052ad05a27f9 Mon Sep 17 00:00:00 2001
From: Vladislav Golubev <vladislav.golubev@intel.com>
Date: Mon, 25 Sep 2023 16:37:21 +0200
Subject: [PATCH] Group & NF4 decompression temporary disabled

---
 .../transformation_pipeline.cpp               | 19 +++++++++++++------
 .../src/matmul_weights_decompression.cpp      |  4 ++++
 2 files changed, 17 insertions(+), 6 deletions(-)
diff --git a/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp b/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp
index b35828e754515f..d8bb3454918f8c 100644
--- a/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp
+++ b/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp
@@ -209,9 +209,14 @@ void Transformations::PreLpt(const std::vector<ov::element::Type>& defaultPrecis
     } else {
         // We need to fuse Transpose to MatMul to have a simpler callback for the next transformation
         CPU_REGISTER_PASS_COMMON(manager, ov::pass::TransposeMatMul);
+        const ov::element::TypeVector decompression_precisions{
+            ov::element::u8,
+            // TODO: Uncomment when group decompression is supported
+            // ov::element::nf4
+        };
         // MarkDequantizationSubgraph is used even in non-LPT pipeline on X64 platforms
         // in order to keep compressed MatMul weights with decompression operations as is
-        CPU_REGISTER_PASS_X64(manager, ov::pass::MarkDequantizationSubgraph, ov::element::TypeVector{ov::element::u8, ov::element::nf4}, true);
+        CPU_REGISTER_PASS_X64(manager, ov::pass::MarkDequantizationSubgraph, decompression_precisions, true);
         CPU_SET_CALLBACK_X64(manager, [](const_node_ptr &node) -> bool {
             auto get_single_consumer = [](const_node_ptr &node) -> std::shared_ptr<ov::Node> {
                 const auto consumers = node->get_output_target_inputs(0);
@@ -226,12 +231,14 @@ void Transformations::PreLpt(const std::vector<ov::element::Type>& defaultPrecis
 
             if (ov::is_type<ov::opset1::MatMul>(consumer)) {
                 return false;
-            } else if (ov::is_type<ov::opset1::Reshape>(consumer)) {
-                consumer = get_single_consumer(consumer);
-                if (consumer != nullptr && ov::is_type<ov::opset1::MatMul>(consumer)) {
-                    return false;
-                }
             }
+            // TODO: Uncomment when group decompression is supported
+            // else if (ov::is_type<ov::opset1::Reshape>(consumer)) {
+            //     consumer = get_single_consumer(consumer);
+            //     if (consumer != nullptr && ov::is_type<ov::opset1::MatMul>(consumer)) {
+            //         return false;
+            //     }
+            // }
             return true;
         }, ov::pass::MarkDequantizationSubgraph);
     }
diff --git a/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/matmul_weights_decompression.cpp b/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/matmul_weights_decompression.cpp
index 1e805f8833dbf2..eae5e3e0c15cd8 100644
--- a/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/matmul_weights_decompression.cpp
+++ b/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/matmul_weights_decompression.cpp
@@ -246,6 +246,10 @@ class MatmulWeightsDecompression : public testing::WithParamInterface<MatmulWeig
     void checkResults() {
         const auto& test_param = GetParam();
         const auto& weights_precision = std::get<1>(test_param);
+        // TODO: remove this condition when group decompression is supported
+        if (weights_precision == ov::element::nf4 || std::get<0>(test_param).weights_group_size != 1) {
+            return;
+        }
         bool weights_found = false;
         for (const auto& n : compiledModel.get_runtime_model()->get_ordered_ops()) {
             if (n->get_friendly_name() == "Compressed_weights") {