From e637a9f0bd434658923a9a8b11e1936938c4dcff Mon Sep 17 00:00:00 2001 From: "River.Li" Date: Thu, 26 Dec 2024 14:16:22 +0800 Subject: [PATCH 1/2] [GPU] fix property overwritten issue --- src/plugins/intel_gpu/src/runtime/execution_config.cpp | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/plugins/intel_gpu/src/runtime/execution_config.cpp b/src/plugins/intel_gpu/src/runtime/execution_config.cpp index a698ec7eb6c5a0..11fff7873f81ca 100644 --- a/src/plugins/intel_gpu/src/runtime/execution_config.cpp +++ b/src/plugins/intel_gpu/src/runtime/execution_config.cpp @@ -250,12 +250,16 @@ void ExecutionConfig::apply_user_properties(const cldnn::device_info& info) { } // Enable KV-cache compression by default for non-systolic platforms - if (!is_set_by_user(ov::hint::kv_cache_precision) && !info.supports_immad) { + if (!is_set_by_user(ov::hint::kv_cache_precision) && + internal_properties.find(ov::hint::kv_cache_precision.name()) == internal_properties.end() && + !info.supports_immad) { set_property(ov::hint::kv_cache_precision(ov::element::i8)); } // Enable dynamic quantization by default for non-systolic platforms - if (!is_set_by_user(ov::hint::dynamic_quantization_group_size) && !info.supports_immad) { + if (!is_set_by_user(ov::hint::dynamic_quantization_group_size) && + internal_properties.find(ov::hint::dynamic_quantization_group_size.name()) == internal_properties.end() && + !info.supports_immad) { set_property(ov::hint::dynamic_quantization_group_size(32)); } From b7329788f013e3d3716f0ce08d632754585cd56d Mon Sep 17 00:00:00 2001 From: "River.Li" Date: Thu, 2 Jan 2025 13:17:19 +0800 Subject: [PATCH 2/2] Update --- .../intel_gpu/runtime/execution_config.hpp | 3 ++ .../src/runtime/execution_config.cpp | 34 +++++++++++-------- 2 files changed, 23 insertions(+), 14 deletions(-) diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/execution_config.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/execution_config.hpp index 5e059b17da0e97..70a04f0b0c3a99 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/runtime/execution_config.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/execution_config.hpp @@ -150,6 +150,7 @@ class ExecutionConfig { void apply_performance_hints(const cldnn::device_info& info); void apply_priority_hints(const cldnn::device_info& info); void apply_debug_options(const cldnn::device_info& info); + void update_specific_default_properties(const cldnn::device_info& info); template void apply_rt_info_property(const ov::Property& property, const ov::RTMap& rt_info) { @@ -167,6 +168,8 @@ class ExecutionConfig { std::map supported_properties; std::map property_validators; + + bool specific_default_properties_is_set = false; }; } // namespace intel_gpu diff --git a/src/plugins/intel_gpu/src/runtime/execution_config.cpp b/src/plugins/intel_gpu/src/runtime/execution_config.cpp index 11fff7873f81ca..3b1376d19b4fea 100644 --- a/src/plugins/intel_gpu/src/runtime/execution_config.cpp +++ b/src/plugins/intel_gpu/src/runtime/execution_config.cpp @@ -229,7 +229,27 @@ void ExecutionConfig::apply_hints(const cldnn::device_info& info) { apply_debug_options(info); } +void ExecutionConfig::update_specific_default_properties(const cldnn::device_info& info) { + // These default properties should be set once. + if (specific_default_properties_is_set) + return; + specific_default_properties_is_set = true; + + // Enable KV-cache compression by default for non-systolic platforms + if (get_property(ov::hint::kv_cache_precision) == ov::element::undefined && !info.supports_immad) { + set_property(ov::hint::kv_cache_precision(ov::element::i8)); + } + + // Enable dynamic quantization by default for non-systolic platforms + if (get_property(ov::hint::dynamic_quantization_group_size) == 0 && !info.supports_immad) { + set_property(ov::hint::dynamic_quantization_group_size(32)); + } +} + void ExecutionConfig::apply_user_properties(const cldnn::device_info& info) { + // Update specific default properties, call once before internal_properties updated. + update_specific_default_properties(info); + // Copy internal properties before applying hints to ensure that // a property set by hint won't be overriden by a value in user config. // E.g num_streams=AUTO && hint=THROUGHPUT @@ -249,20 +269,6 @@ void ExecutionConfig::apply_user_properties(const cldnn::device_info& info) { set_property(ov::intel_gpu::queue_type(QueueTypes::in_order)); } - // Enable KV-cache compression by default for non-systolic platforms - if (!is_set_by_user(ov::hint::kv_cache_precision) && - internal_properties.find(ov::hint::kv_cache_precision.name()) == internal_properties.end() && - !info.supports_immad) { - set_property(ov::hint::kv_cache_precision(ov::element::i8)); - } - - // Enable dynamic quantization by default for non-systolic platforms - if (!is_set_by_user(ov::hint::dynamic_quantization_group_size) && - internal_properties.find(ov::hint::dynamic_quantization_group_size.name()) == internal_properties.end() && - !info.supports_immad) { - set_property(ov::hint::dynamic_quantization_group_size(32)); - } - user_properties.clear(); }