From 2c77ebbf31b4439d05424b75fc0a052ad05a27f9 Mon Sep 17 00:00:00 2001 From: Vladislav Golubev Date: Mon, 25 Sep 2023 16:37:21 +0200 Subject: [PATCH] Group & NF4 decompression temporary disabled --- .../transformation_pipeline.cpp | 19 +++++++++++++------ .../src/matmul_weights_decompression.cpp | 4 ++++ 2 files changed, 17 insertions(+), 6 deletions(-) diff --git a/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp b/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp index b35828e754515f..d8bb3454918f8c 100644 --- a/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp +++ b/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp @@ -209,9 +209,14 @@ void Transformations::PreLpt(const std::vector& defaultPrecis } else { // We need to fuse Transpose to MatMul to have a simpler callback for the next transformation CPU_REGISTER_PASS_COMMON(manager, ov::pass::TransposeMatMul); + const ov::element::TypeVector decompression_precisions{ + ov::element::u8, + // TODO: Uncomment when group decompression is supported + // ov::element::nf4 + }; // MarkDequantizationSubgraph is used even in non-LPT pipeline on X64 platforms // in order to keep compressed MatMul weights with decompression operations as is - CPU_REGISTER_PASS_X64(manager, ov::pass::MarkDequantizationSubgraph, ov::element::TypeVector{ov::element::u8, ov::element::nf4}, true); + CPU_REGISTER_PASS_X64(manager, ov::pass::MarkDequantizationSubgraph, decompression_precisions, true); CPU_SET_CALLBACK_X64(manager, [](const_node_ptr &node) -> bool { auto get_single_consumer = [](const_node_ptr &node) -> std::shared_ptr { const auto consumers = node->get_output_target_inputs(0); @@ -226,12 +231,14 @@ void Transformations::PreLpt(const std::vector& defaultPrecis if (ov::is_type(consumer)) { return false; - } else if (ov::is_type(consumer)) { - consumer = get_single_consumer(consumer); - if (consumer != nullptr && ov::is_type(consumer)) { - return false; - } } + // TODO: Uncomment when group decompression is supported + // else if (ov::is_type(consumer)) { + // consumer = get_single_consumer(consumer); + // if (consumer != nullptr && ov::is_type(consumer)) { + // return false; + // } + // } return true; }, ov::pass::MarkDequantizationSubgraph); } diff --git a/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/matmul_weights_decompression.cpp b/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/matmul_weights_decompression.cpp index 1e805f8833dbf2..eae5e3e0c15cd8 100644 --- a/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/matmul_weights_decompression.cpp +++ b/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/matmul_weights_decompression.cpp @@ -246,6 +246,10 @@ class MatmulWeightsDecompression : public testing::WithParamInterface(test_param); + // TODO: remove this condition when group decompression is supported + if (weights_precision == ov::element::nf4 || std::get<0>(test_param).weights_group_size != 1) { + return; + } bool weights_found = false; for (const auto& n : compiledModel.get_runtime_model()->get_ordered_ops()) { if (n->get_friendly_name() == "Compressed_weights") {