diff --git a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp index 3ea765527accff..5bda8db96a775c 100644 --- a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp +++ b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp @@ -187,9 +187,8 @@ void TransformationsPipeline::apply(std::shared_ptr func) { bool enableInt8; bool unroll_loop = config.get_property(ov::intel_gpu::enable_loop_unrolling); { - ov::pass::Manager manager; - auto pass_config = manager.get_pass_config(); - manager.set_per_pass_validation(false); + ov::pass::Manager initial_transformations_manager; + initial_transformations_manager.set_per_pass_validation(false); // Temporary solution, global rt info cleanup is needed for (auto& node : func->get_ops()) { @@ -198,13 +197,8 @@ void TransformationsPipeline::apply(std::shared_ptr func) { } enableInt8 = config.get_property(ov::intel_gpu::enable_lp_transformations) && ov::pass::low_precision::LowPrecision::isFunctionQuantized(func); - if (enableInt8) { - manager.register_pass( - std::vector{ ov::element::i8, ov::element::u8, ov::element::i4, ov::element::u4 }); - } - - manager.register_pass(); - manager.register_pass(); + initial_transformations_manager.register_pass(); + initial_transformations_manager.register_pass(); precisions_map fp_convert_precision_map = { {ov::element::f64, ov::element::f32} @@ -253,19 +247,19 @@ void TransformationsPipeline::apply(std::shared_ptr func) { } type_to_fuse_map empty_fuse_map = {}; - manager.register_pass(); + initial_transformations_manager.register_pass(); // fuse softmax, MVN patterns, so that they will not be marked as precision sensitive in ConvertPrecision - manager.register_pass(); - manager.register_pass(); + initial_transformations_manager.register_pass(); + initial_transformations_manager.register_pass(); // decompose MVNs that sre not supported in GPU, so that they will be marked as precision sensitive in ConvertPrecision - manager.register_pass(); + initial_transformations_manager.register_pass(); // Run these broadcast optimizations earlier to ensure that those are executed before NopElimination/ConstantFolding - manager.register_pass(); - manager.register_pass(); + initial_transformations_manager.register_pass(); + initial_transformations_manager.register_pass(); - manager.register_pass(); - pass_config->set_callback( + initial_transformations_manager.register_pass(); + initial_transformations_manager.get_pass_config()->set_callback( [](const_node_ptr& node) -> bool { auto next_node = node->get_output_target_inputs(0).begin()->get_node(); if (is_type(next_node)) { @@ -274,9 +268,22 @@ void TransformationsPipeline::apply(std::shared_ptr func) { return !is_type(next_node); }); - manager.register_pass(ov::element::TypeVector{ov::element::u8, ov::element::u4, ov::element::i4}, true); + initial_transformations_manager.register_pass(ov::element::TypeVector{ov::element::u8, + ov::element::u4, + ov::element::i4}, true); + // Ignore nodes that are not related to FullyConnected and allow ConstantFolding to be applied to them - pass_config->set_callback(is_non_decompression_multiply); + initial_transformations_manager.get_pass_config()->set_callback(is_non_decompression_multiply); + initial_transformations_manager.run_passes(func); + + ov::pass::Manager manager; + auto pass_config = manager.get_pass_config(); + + // Need to check if transfomrations work correctly for mixed models with both compression and quantization at the same time. + if (enableInt8) { + manager.register_pass( + std::vector{ ov::element::i8, ov::element::u8, ov::element::i4, ov::element::u4 }); + } const bool keep_precision_sensitive_in_fp32_1 = true; const bool convert_input_output_precision = false;