From 26a31b3f95db604cab8fc107204f19c1e61ba94e Mon Sep 17 00:00:00 2001 From: Sergey Shlyapnikov Date: Tue, 19 Dec 2023 10:52:39 +0400 Subject: [PATCH] Revert "[GPU] Apply is_non_decompression_multiply() callback only for compressed models (#21719)" This reverts commit 032ac898e21c4008fb8c8dc2da5778c1b3ab4afe. --- .../src/plugin/transformations_pipeline.cpp | 48 ++++++++----------- 1 file changed, 21 insertions(+), 27 deletions(-) diff --git a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp index dbb7c2c79e2d5b..18cc248cde836a 100644 --- a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp +++ b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp @@ -191,8 +191,9 @@ void TransformationsPipeline::apply(std::shared_ptr func) { bool enableInt8; bool unroll_loop = config.get_property(ov::intel_gpu::enable_loop_unrolling); { - ov::pass::Manager initial_transformations_manager; - initial_transformations_manager.set_per_pass_validation(false); + ov::pass::Manager manager; + auto pass_config = manager.get_pass_config(); + manager.set_per_pass_validation(false); // Temporary solution, global rt info cleanup is needed for (auto& node : func->get_ops()) { @@ -201,8 +202,13 @@ void TransformationsPipeline::apply(std::shared_ptr func) { } enableInt8 = config.get_property(ov::intel_gpu::enable_lp_transformations) && ov::pass::low_precision::LowPrecision::isFunctionQuantized(func); - initial_transformations_manager.register_pass(); - initial_transformations_manager.register_pass(); + if (enableInt8) { + manager.register_pass( + std::vector{ ov::element::i8, ov::element::u8, ov::element::i4, ov::element::u4 }); + } + + manager.register_pass(); + manager.register_pass(); precisions_map fp_convert_precision_map = { {ov::element::f64, ov::element::f32} @@ -251,19 +257,19 @@ void TransformationsPipeline::apply(std::shared_ptr func) { } type_to_fuse_map empty_fuse_map = {}; - initial_transformations_manager.register_pass(); + manager.register_pass(); // fuse softmax, MVN patterns, so that they will not be marked as precision sensitive in ConvertPrecision - initial_transformations_manager.register_pass(); - initial_transformations_manager.register_pass(); + manager.register_pass(); + manager.register_pass(); // decompose MVNs that sre not supported in GPU, so that they will be marked as precision sensitive in ConvertPrecision - initial_transformations_manager.register_pass(); + manager.register_pass(); // Run these broadcast optimizations earlier to ensure that those are executed before NopElimination/ConstantFolding - initial_transformations_manager.register_pass(); - initial_transformations_manager.register_pass(); + manager.register_pass(); + manager.register_pass(); - initial_transformations_manager.register_pass(); - initial_transformations_manager.get_pass_config()->set_callback( + manager.register_pass(); + pass_config->set_callback( [](const_node_ptr& node) -> bool { auto next_node = node->get_output_target_inputs(0).begin()->get_node(); if (is_type(next_node)) { @@ -272,22 +278,10 @@ void TransformationsPipeline::apply(std::shared_ptr func) { return !is_type(next_node); }); - initial_transformations_manager.register_pass(ov::element::TypeVector{ov::element::u8, - ov::element::u4, - ov::element::i4}, true); - - // Ignore nodes that are not related to FullyConnected and allow ConstantFolding to be applied to them - initial_transformations_manager.get_pass_config()->set_callback(is_non_supported_decompression_op); - initial_transformations_manager.run_passes(func); - - ov::pass::Manager manager; - auto pass_config = manager.get_pass_config(); - + manager.register_pass(ov::element::TypeVector{ov::element::u8, ov::element::u4, ov::element::i4}, true); // Need to check if transfomrations work correctly for mixed models with both compression and quantization at the same time. - if (enableInt8) { - manager.register_pass( - std::vector{ ov::element::i8, ov::element::u8, ov::element::i4, ov::element::u4 }); - } + // Ignore nodes that are not related to FullyConnected and allow ConstantFolding to be applied to them + pass_config->set_callback(is_non_supported_decompression_op); manager.register_pass();