diff --git a/src/common/transformations/src/transformations/low_precision/mark_dequantization_subgraph.cpp b/src/common/transformations/src/transformations/low_precision/mark_dequantization_subgraph.cpp index bddaf81e31a067..5755dc51dc34ea 100644 --- a/src/common/transformations/src/transformations/low_precision/mark_dequantization_subgraph.cpp +++ b/src/common/transformations/src/transformations/low_precision/mark_dequantization_subgraph.cpp @@ -44,7 +44,7 @@ void set_rt_info(const PatternValueMap& pt_map, } }; -void swap_nodes(const PatternValueMap& pt_map, +bool swap_nodes(const PatternValueMap& pt_map, const std::shared_ptr& first, const std::shared_ptr& second) { if (pt_map.count(first) && pt_map.count(second)) { @@ -59,7 +59,9 @@ void swap_nodes(const PatternValueMap& pt_map, } first_node->validate_and_infer_types(); second_node->validate_and_infer_types(); + return true; } + return false; } } // namespace @@ -89,7 +91,7 @@ ov::pass::MarkDequantization::MarkDequantization(const element::TypeVector& prec auto input = pt_map.at(input_pattern); const auto multiply = m.get_match_root(); - if (transformation_callback(multiply)) { + if (!check_precision(input.get_element_type(), precisions) || transformation_callback(multiply)) { return false; } @@ -117,9 +119,9 @@ ov::pass::MarkDequantization::MarkDequantization(const element::TypeVector& prec set_rt_info(pt_map, enable_constant_folding, converts_to_unmark, precisions); // Move Reshape/Unsqueeze ops up to fold them in ConstantFolding. - swap_nodes(pt_map, zp_convert_pattern, zp_reshape_pattern); - swap_nodes(pt_map, scale_convert_pattern, scale_reshape_pattern); - return false; + auto changed = swap_nodes(pt_map, zp_convert_pattern, zp_reshape_pattern); + changed = changed || swap_nodes(pt_map, scale_convert_pattern, scale_reshape_pattern); + return changed; }; auto m = std::make_shared(multiply_pattern, "MarkDequantization"); diff --git a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp index b0a767eec013bb..0317327b589fdb 100644 --- a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp +++ b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp @@ -291,10 +291,12 @@ void TransformationsPipeline::apply(std::shared_ptr func) { auto is_model_quantized = ov::pass::low_precision::LowPrecision::isFunctionQuantized(func); enableInt8 = config.get_property(ov::intel_gpu::enable_lp_transformations) && is_model_quantized; - if (enableInt8) { - manager.register_pass( - std::vector{ ov::element::i8, ov::element::u8, ov::element::i4, ov::element::u4 }); - } + + //if (enableInt8) { Why do we need this check? According to the line 378 we did this marking anyway + manager.register_pass( + std::vector{ ov::element::i8, ov::element::u8, ov::element::i4, ov::element::u4 }, + !device_info.supports_immad); + //} manager.register_pass(); manager.register_pass(); @@ -373,7 +375,8 @@ void TransformationsPipeline::apply(std::shared_ptr func) { // it expects to have the same data type for weights and zero points (apply it only for u8 data type, since other compression // types are not supported by oneDNN) manager.register_pass(supported_woq_types, !device_info.supports_immad); - pass_config->set_callback([&](const std::shared_ptr node) { + pass_config->set_callback([&](const std::shared_ptr node) { return !is_decompression_multiply(node, device_info.supports_immad); });