MatmulWeightsDecompression tests extended with group decompression an…

…d nf4 precision
v-Golubev · Oct 6, 2023 · 32ff879 · 32ff879
1 parent a67c106
commit 32ff879
Show file tree

Hide file tree

Showing 3 changed files with 145 additions and 78 deletions.
diff --git a/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp b/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp
@@ -210,7 +210,7 @@ void Transformations::PreLpt(const std::vector<ov::element::Type>& defaultPrecis
         // We need to fuse Transpose to MatMul to have a simpler callback for the next transformation
         CPU_REGISTER_PASS_COMMON(manager, ov::pass::TransposeMatMul);
         // MarkDequantizationSubgraph is used even in non-LPT pipeline on X64 platforms
-        // in order to keep compressed u8 MatMul weights with decompression operations as is
+        // in order to keep compressed MatMul weights with decompression operations as is
         CPU_REGISTER_PASS_X64(manager, ov::pass::MarkDequantizationSubgraph, ov::element::TypeVector{ov::element::u8, ov::element::nf4}, true);
         CPU_SET_CALLBACK_X64(manager, [](const_node_ptr &node) -> bool {
             auto get_single_consumer = [](const_node_ptr &node) -> std::shared_ptr<ov::Node> {