Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[AUTO] Load cached model to target device W/O CPU accelerating #24618

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 15 additions & 29 deletions src/plugins/auto/src/auto_schedule.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,6 @@ void AutoSchedule::init() {
if (m_compile_context[ACTUALDEVICE].m_is_enabled) {
LOG_INFO_TAG("select device:%s", m_compile_context[ACTUALDEVICE].m_device_info.device_name.c_str());
bool is_actual_cpu = m_compile_context[ACTUALDEVICE].m_device_info.device_name.find("CPU") != std::string::npos;
bool is_actual_gpu = m_compile_context[ACTUALDEVICE].m_device_info.device_name.find("GPU") != std::string::npos;
// if Actual device is CPU or perf_hint is cumulative, disabled m_compile_context[CPU], only use
// m_compile_context[ACTUALDEVICE]
if (is_actual_cpu || !m_context->m_startup_fallback) {
Expand All @@ -148,29 +147,11 @@ void AutoSchedule::init() {
// limit the threads num for compiling
auto device = m_compile_context[ACTUALDEVICE].m_device_info.device_name;
auto& device_config = m_compile_context[ACTUALDEVICE].m_device_info.config;
if (is_actual_gpu) {
int max_threads = 0;
try {
max_threads = m_context->m_ov_core->get_property(device, ov::compilation_num_threads);
} catch (const ov::Exception&) {
LOG_DEBUG_TAG("cannot get MAX_NUM_THREADS from GPU");
}
if (max_threads == static_cast<int>(std::thread::hardware_concurrency())) {
int thread_num = max_threads / 2;
m_compile_context[ACTUALDEVICE].m_device_info.config.insert(
ov::compilation_num_threads(thread_num));
LOG_DEBUG_TAG("gpu streams number for compiling: %d", thread_num);
} else {
// user set the compiling threads num
// use the user's val anyway
LOG_DEBUG_TAG("user defined compiling threads: %d", max_threads);
}
}
std::string cache_dir = device_config.count(ov::cache_dir.name())
? device_config[ov::cache_dir.name()].as<std::string>()
: m_context->m_ov_core->get_property("", ov::cache_dir);

if (!m_context->m_is_set_startup_fallback && !cache_dir.empty()) {
if (m_context->m_startup_fallback && !cache_dir.empty()) {
const auto properties =
m_context->m_ov_core->create_compile_config(ov::DeviceIDParser(device).get_device_name(),
device_config);
Expand Down Expand Up @@ -323,15 +304,20 @@ void AutoSchedule::try_to_compile_model(AutoCompileContext& context, const std::
device_config.find(ov::compilation_num_threads.name()) != device_config.end());
if (cur_dev_is_gpu && m_compile_context[CPU].m_is_enabled && !is_already_set_gpu) {
device_config.insert(ov::intel_gpu::hint::host_task_priority(ov::hint::Priority::HIGH));
auto proc_type_table = get_org_proc_type_table();
int compilation_num_threads = proc_type_table[0][MAIN_CORE_PROC] != 0
? proc_type_table[0][MAIN_CORE_PROC]
: proc_type_table[0][EFFICIENT_CORE_PROC];
if (device_config.insert(ov::compilation_num_threads(compilation_num_threads)).second)
LOG_DEBUG_TAG("gpu streams number for compiling: %d", compilation_num_threads);
else
LOG_DEBUG_TAG("user defined compiling threads: %d",
device_config[ov::compilation_num_threads.name()].as<int32_t>());
int max_threads = 0;
try {
m_context->m_ov_core->get_property(device, ov::compilation_num_threads);
auto proc_type_table = get_org_proc_type_table();
max_threads = proc_type_table[0][MAIN_CORE_PROC] != 0 ? proc_type_table[0][MAIN_CORE_PROC]
: proc_type_table[0][EFFICIENT_CORE_PROC];
if (device_config.insert(ov::compilation_num_threads(max_threads)).second)
LOG_DEBUG_TAG("gpu streams number for compiling: %d", max_threads);
else
LOG_DEBUG_TAG("user defined compiling threads: %d",
device_config[ov::compilation_num_threads.name()].as<int32_t>());
} catch (const ov::Exception&) {
LOG_DEBUG_TAG("cannot get MAX_NUM_THREADS from GPU");
}
}
}
try {
Expand Down
1 change: 0 additions & 1 deletion src/plugins/auto/src/common.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -207,7 +207,6 @@ class ScheduleContext : public std::enable_shared_from_this<ScheduleContext> {
bool m_need_perf_counters;
bool m_batching_disabled = false;
bool m_startup_fallback = true;
bool m_is_set_startup_fallback = false;
bool m_runtime_fallback = true;
bool m_bind_buffer = false;
std::shared_ptr<ov::Model> m_model;
Expand Down
1 change: 0 additions & 1 deletion src/plugins/auto/src/plugin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -436,7 +436,6 @@ std::shared_ptr<ov::ICompiledModel> Plugin::compile_model_impl(const std::string
OPENVINO_ASSERT(auto_s_context->m_ov_core);
auto_s_context->m_log_tag = get_device_name();
auto_s_context->m_model_precision = model_precision;
auto_s_context->m_is_set_startup_fallback = load_config.is_set_by_user(ov::intel_auto::enable_startup_fallback);
auto_s_context->m_startup_fallback = load_config.get_property(ov::intel_auto::enable_startup_fallback);
auto_s_context->m_runtime_fallback = load_config.get_property(ov::intel_auto::enable_runtime_fallback);
auto_s_context->m_bind_buffer = load_config.get_property(ov::intel_auto::device_bind_buffer);
Expand Down
45 changes: 44 additions & 1 deletion src/plugins/auto/tests/functional/behavior/caching_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ TEST_F(AutoFuncTests, compiled_with_cache_enabled) {
core.set_property(ov::cache_dir(""));
}

TEST_F(AutoFuncTests, load_cached_model_to_actual_device_and_disable_CPU_accelerating) {
TEST_F(AutoFuncTests, load_cached_model_to_actual_device_and_disable_CPU_accelerating_default_startup_fallback) {
core.set_property(ov::cache_dir(cache_path));
core.set_property("MOCK_GPU", ov::device::id("test")); // device id for cache property distinguish with MOCK_CPU
{
Expand Down Expand Up @@ -74,6 +74,49 @@ TEST_F(AutoFuncTests, load_cached_model_to_actual_device_and_disable_CPU_acceler
core.set_property(ov::cache_dir(""));
}

TEST_F(AutoFuncTests, load_cached_model_to_actual_device_and_disable_CPU_accelerating_set_startup_fallback) {
core.set_property(ov::cache_dir(cache_path));
core.set_property("MOCK_GPU", ov::device::id("test")); // device id for cache property distinguish with MOCK_CPU
{
auto compiled_model = core.compile_model(model_cannot_batch,
"AUTO",
{ov::device::priorities("MOCK_GPU", "MOCK_CPU"),
ov::hint::performance_mode(ov::hint::PerformanceMode::THROUGHPUT)});
}
// No cached model for actual device
// will cache model for both actual device and CPU plugin
ASSERT_EQ(ov::test::utils::listFilesWithExt(cache_path, "blob").size(), 2);
ov::test::utils::removeFilesWithExt(cache_path, "blob");
{
auto compiled_model = core.compile_model(
model_cannot_batch,
"AUTO",
{ov::device::priorities("MOCK_GPU"), ov::hint::performance_mode(ov::hint::PerformanceMode::THROUGHPUT)});
}
{
auto compiled_model = core.compile_model(model_cannot_batch,
"AUTO",
{ov::device::priorities("MOCK_GPU", "MOCK_CPU"),
ov::hint::performance_mode(ov::hint::PerformanceMode::THROUGHPUT),
ov::intel_auto::enable_startup_fallback(true)});
}
// cached model exists for actual device
// will reuse cached model for actual device without CPU accelerating(No cached model for CPU)
ASSERT_EQ(ov::test::utils::listFilesWithExt(cache_path, "blob").size(), 1);
core.set_property("MOCK_GPU", ov::device::id("test_regenerate"));
{
auto compiled_model = core.compile_model(model_cannot_batch,
"AUTO",
{ov::device::priorities("MOCK_GPU", "MOCK_CPU"),
ov::hint::performance_mode(ov::hint::PerformanceMode::THROUGHPUT),
ov::intel_auto::enable_startup_fallback(false)});
}
// model hash id changed for actual device
// will cache 2 models for actual device and no cached model for CPU
ASSERT_EQ(ov::test::utils::listFilesWithExt(cache_path, "blob").size(), 2);
core.set_property(ov::cache_dir(""));
}

TEST_F(AutoFuncTests, compiled_with_cache_enabled_batch_enabled) {
#ifdef ENABLE_AUTO_BATCH
core.set_property(ov::cache_dir(cache_path));
Expand Down
Loading