Skip to content

Commit

Permalink
[GPU] Parse runtime_options from model RT info and apply to config
Browse files Browse the repository at this point in the history
Signed-off-by: Vladimir Paramuzov <[email protected]>
  • Loading branch information
vladimir-paramuzov committed Dec 4, 2024
1 parent 398f703 commit 0fe547a
Show file tree
Hide file tree
Showing 4 changed files with 108 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,10 @@ class ExecutionConfig {

void apply_user_properties(const cldnn::device_info& info);

// Note that RT info property value has lower priority than values set by user via core.set_property or passed to compile_model call
// So this method should be called after setting all user properties, but before apply_user_properties() call.
void apply_rt_info(const ov::RTMap& rt_info);

std::string to_string() const;

protected:
Expand All @@ -147,6 +151,16 @@ class ExecutionConfig {
void apply_priority_hints(const cldnn::device_info& info);
void apply_debug_options(const cldnn::device_info& info);

template <typename T, PropertyMutability mutability>
void apply_rt_info_property(const ov::Property<T, mutability>& property, const ov::RTMap& rt_info) {
if (!is_set_by_user(property)) {
auto rt_info_val = rt_info.find(property.name());
if (rt_info_val != rt_info.end()) {
set_user_property(property(rt_info_val->second.template as<T>()));
}
}
}

private:
ov::AnyMap internal_properties;
ov::AnyMap user_properties;
Expand Down
2 changes: 2 additions & 0 deletions src/plugins/intel_gpu/src/plugin/plugin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,7 @@ std::shared_ptr<ov::ICompiledModel> Plugin::compile_model(const std::shared_ptr<

ExecutionConfig config = m_configs_map.at(device_id);
config.set_user_property(orig_config);
config.apply_rt_info(model->get_rt_info<ov::AnyMap>("runtime_options"));
config.apply_user_properties(context->get_engine().get_device_info());

set_cache_info(model, config);
Expand Down Expand Up @@ -278,6 +279,7 @@ ov::SupportedOpsMap Plugin::query_model(const std::shared_ptr<const ov::Model>&

ExecutionConfig config = m_configs_map.at(device_id);
config.set_user_property(orig_config);
config.apply_rt_info(model->get_rt_info<ov::AnyMap>("runtime_options"));
config.apply_user_properties(ctx->get_engine().get_device_info());

ProgramBuilder prog(ctx->get_engine(), config);
Expand Down
6 changes: 6 additions & 0 deletions src/plugins/intel_gpu/src/runtime/execution_config.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -254,6 +254,12 @@ void ExecutionConfig::apply_user_properties(const cldnn::device_info& info) {
user_properties.clear();
}

void ExecutionConfig::apply_rt_info(const ov::RTMap& rt_info) {
apply_rt_info_property(ov::hint::kv_cache_precision, rt_info);
apply_rt_info_property(ov::hint::dynamic_quantization_group_size, rt_info);
apply_rt_info_property(ov::hint::activations_scale_factor, rt_info);
}

std::string ExecutionConfig::to_string() const {
std::stringstream s;
s << "internal properties:\n";
Expand Down
86 changes: 86 additions & 0 deletions src/plugins/intel_gpu/tests/functional/behavior/properties.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
// Copyright (C) 2024 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#include "openvino/runtime/properties.hpp"
#include "base/ov_behavior_test_utils.hpp"
#include "openvino/runtime/core.hpp"
#include "common_test_utils/subgraph_builders/conv_pool_relu.hpp"

namespace {

class TestPropertiesGPU : public ::testing::Test {
public:
std::shared_ptr<ov::Model> model;

void SetUp() override {
SKIP_IF_CURRENT_TEST_IS_DISABLED();
model = ov::test::utils::make_conv_pool_relu();
}
};

TEST_F(TestPropertiesGPU, RTInfoPropertiesWithDefault) {
ov::Core core;
ov::Any type;
ov::Any size;
ov::Any scale;
ov::CompiledModel compiled_model;
model->set_rt_info("f16", "runtime_options", ov::hint::kv_cache_precision.name());
model->set_rt_info("0", "runtime_options", ov::hint::dynamic_quantization_group_size.name());
model->set_rt_info("8.0", "runtime_options", ov::hint::activations_scale_factor.name());

OV_ASSERT_NO_THROW(compiled_model = core.compile_model(model, ov::test::utils::DEVICE_GPU));
OV_ASSERT_NO_THROW(type = compiled_model.get_property(ov::hint::kv_cache_precision));
OV_ASSERT_NO_THROW(size = compiled_model.get_property(ov::hint::dynamic_quantization_group_size));
OV_ASSERT_NO_THROW(scale = compiled_model.get_property(ov::hint::activations_scale_factor));
ASSERT_EQ(type.as<ov::element::Type>(), ov::element::f16);
ASSERT_EQ(size.as<uint64_t>(), 0);
ASSERT_EQ(scale.as<float>(), 8.0f);
}

TEST_F(TestPropertiesGPU, RTInfoPropertiesWithUserValuesFromCore) {
ov::Core core;
ov::Any type;
ov::Any size;
ov::Any scale;
ov::CompiledModel compiled_model;
model->set_rt_info("f16", "runtime_options", ov::hint::kv_cache_precision.name());
model->set_rt_info("0", "runtime_options", ov::hint::dynamic_quantization_group_size.name());
model->set_rt_info("8.0", "runtime_options", ov::hint::activations_scale_factor.name());
core.set_property(ov::hint::kv_cache_precision(ov::element::u8));
core.set_property(ov::hint::dynamic_quantization_group_size(16));
core.set_property(ov::hint::activations_scale_factor(4.0f));

OV_ASSERT_NO_THROW(compiled_model = core.compile_model(model, ov::test::utils::DEVICE_GPU));
OV_ASSERT_NO_THROW(type = compiled_model.get_property(ov::hint::kv_cache_precision));
OV_ASSERT_NO_THROW(size = compiled_model.get_property(ov::hint::dynamic_quantization_group_size));
OV_ASSERT_NO_THROW(scale = compiled_model.get_property(ov::hint::activations_scale_factor));
ASSERT_EQ(type.as<ov::element::Type>(), ov::element::u8);
ASSERT_EQ(size.as<uint64_t>(), 16);
ASSERT_EQ(scale.as<float>(), 4.0f);
}

TEST_F(TestPropertiesGPU, RTInfoPropertiesWithUserValuesFromCompileModel) {
ov::Core core;
ov::Any type;
ov::Any size;
ov::Any scale;
ov::CompiledModel compiled_model;
model->set_rt_info("f16", "runtime_options", ov::hint::kv_cache_precision.name());
model->set_rt_info("0", "runtime_options", ov::hint::dynamic_quantization_group_size.name());
model->set_rt_info("8.0", "runtime_options", ov::hint::activations_scale_factor.name());
ov::AnyMap config;
config[ov::hint::kv_cache_precision.name()] = "u8";
config[ov::hint::dynamic_quantization_group_size.name()] = "16";
config[ov::hint::activations_scale_factor.name()] = "4.0";

OV_ASSERT_NO_THROW(compiled_model = core.compile_model(model, ov::test::utils::DEVICE_GPU, config));
OV_ASSERT_NO_THROW(type = compiled_model.get_property(ov::hint::kv_cache_precision));
OV_ASSERT_NO_THROW(size = compiled_model.get_property(ov::hint::dynamic_quantization_group_size));
OV_ASSERT_NO_THROW(scale = compiled_model.get_property(ov::hint::activations_scale_factor));
ASSERT_EQ(type.as<ov::element::Type>(), ov::element::u8);
ASSERT_EQ(size.as<uint64_t>(), 16);
ASSERT_EQ(scale.as<float>(), 4.0f);
}

} // namespace

0 comments on commit 0fe547a

Please sign in to comment.