Skip to content

Commit

Permalink
[AUTO] Load cached model to target device W/O CPU accelerating (#24618)
Browse files Browse the repository at this point in the history
### Details:
- update logic of only loading cached model to GPU with AUTO if GPU
cached blob exists and `ov::intel_auto::enable_startup_fallback` is
enable

### Tickets:
 - CVS-138574
  • Loading branch information
yangwang201911 authored May 23, 2024
1 parent 4b0868c commit c904b8e
Show file tree
Hide file tree
Showing 4 changed files with 59 additions and 32 deletions.
44 changes: 15 additions & 29 deletions src/plugins/auto/src/auto_schedule.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,6 @@ void AutoSchedule::init() {
if (m_compile_context[ACTUALDEVICE].m_is_enabled) {
LOG_INFO_TAG("select device:%s", m_compile_context[ACTUALDEVICE].m_device_info.device_name.c_str());
bool is_actual_cpu = m_compile_context[ACTUALDEVICE].m_device_info.device_name.find("CPU") != std::string::npos;
bool is_actual_gpu = m_compile_context[ACTUALDEVICE].m_device_info.device_name.find("GPU") != std::string::npos;
// if Actual device is CPU or perf_hint is cumulative, disabled m_compile_context[CPU], only use
// m_compile_context[ACTUALDEVICE]
if (is_actual_cpu || !m_context->m_startup_fallback) {
Expand All @@ -148,29 +147,11 @@ void AutoSchedule::init() {
// limit the threads num for compiling
auto device = m_compile_context[ACTUALDEVICE].m_device_info.device_name;
auto& device_config = m_compile_context[ACTUALDEVICE].m_device_info.config;
if (is_actual_gpu) {
int max_threads = 0;
try {
max_threads = m_context->m_ov_core->get_property(device, ov::compilation_num_threads);
} catch (const ov::Exception&) {
LOG_DEBUG_TAG("cannot get MAX_NUM_THREADS from GPU");
}
if (max_threads == static_cast<int>(std::thread::hardware_concurrency())) {
int thread_num = max_threads / 2;
m_compile_context[ACTUALDEVICE].m_device_info.config.insert(
ov::compilation_num_threads(thread_num));
LOG_DEBUG_TAG("gpu streams number for compiling: %d", thread_num);
} else {
// user set the compiling threads num
// use the user's val anyway
LOG_DEBUG_TAG("user defined compiling threads: %d", max_threads);
}
}
std::string cache_dir = device_config.count(ov::cache_dir.name())
? device_config[ov::cache_dir.name()].as<std::string>()
: m_context->m_ov_core->get_property("", ov::cache_dir);

if (!m_context->m_is_set_startup_fallback && !cache_dir.empty()) {
if (m_context->m_startup_fallback && !cache_dir.empty()) {
const auto properties =
m_context->m_ov_core->create_compile_config(ov::DeviceIDParser(device).get_device_name(),
device_config);
Expand Down Expand Up @@ -323,15 +304,20 @@ void AutoSchedule::try_to_compile_model(AutoCompileContext& context, const std::
device_config.find(ov::compilation_num_threads.name()) != device_config.end());
if (cur_dev_is_gpu && m_compile_context[CPU].m_is_enabled && !is_already_set_gpu) {
device_config.insert(ov::intel_gpu::hint::host_task_priority(ov::hint::Priority::HIGH));
auto proc_type_table = get_org_proc_type_table();
int compilation_num_threads = proc_type_table[0][MAIN_CORE_PROC] != 0
? proc_type_table[0][MAIN_CORE_PROC]
: proc_type_table[0][EFFICIENT_CORE_PROC];
if (device_config.insert(ov::compilation_num_threads(compilation_num_threads)).second)
LOG_DEBUG_TAG("gpu streams number for compiling: %d", compilation_num_threads);
else
LOG_DEBUG_TAG("user defined compiling threads: %d",
device_config[ov::compilation_num_threads.name()].as<int32_t>());
int max_threads = 0;
try {
m_context->m_ov_core->get_property(device, ov::compilation_num_threads);
auto proc_type_table = get_org_proc_type_table();
max_threads = proc_type_table[0][MAIN_CORE_PROC] != 0 ? proc_type_table[0][MAIN_CORE_PROC]
: proc_type_table[0][EFFICIENT_CORE_PROC];
if (device_config.insert(ov::compilation_num_threads(max_threads)).second)
LOG_DEBUG_TAG("gpu streams number for compiling: %d", max_threads);
else
LOG_DEBUG_TAG("user defined compiling threads: %d",
device_config[ov::compilation_num_threads.name()].as<int32_t>());
} catch (const ov::Exception&) {
LOG_DEBUG_TAG("cannot get MAX_NUM_THREADS from GPU");
}
}
}
try {
Expand Down
1 change: 0 additions & 1 deletion src/plugins/auto/src/common.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -207,7 +207,6 @@ class ScheduleContext : public std::enable_shared_from_this<ScheduleContext> {
bool m_need_perf_counters;
bool m_batching_disabled = false;
bool m_startup_fallback = true;
bool m_is_set_startup_fallback = false;
bool m_runtime_fallback = true;
bool m_bind_buffer = false;
std::shared_ptr<ov::Model> m_model;
Expand Down
1 change: 0 additions & 1 deletion src/plugins/auto/src/plugin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -436,7 +436,6 @@ std::shared_ptr<ov::ICompiledModel> Plugin::compile_model_impl(const std::string
OPENVINO_ASSERT(auto_s_context->m_ov_core);
auto_s_context->m_log_tag = get_device_name();
auto_s_context->m_model_precision = model_precision;
auto_s_context->m_is_set_startup_fallback = load_config.is_set_by_user(ov::intel_auto::enable_startup_fallback);
auto_s_context->m_startup_fallback = load_config.get_property(ov::intel_auto::enable_startup_fallback);
auto_s_context->m_runtime_fallback = load_config.get_property(ov::intel_auto::enable_runtime_fallback);
auto_s_context->m_bind_buffer = load_config.get_property(ov::intel_auto::device_bind_buffer);
Expand Down
45 changes: 44 additions & 1 deletion src/plugins/auto/tests/functional/behavior/caching_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ TEST_F(AutoFuncTests, compiled_with_cache_enabled) {
core.set_property(ov::cache_dir(""));
}

TEST_F(AutoFuncTests, load_cached_model_to_actual_device_and_disable_CPU_accelerating) {
TEST_F(AutoFuncTests, load_cached_model_to_actual_device_and_disable_CPU_accelerating_default_startup_fallback) {
core.set_property(ov::cache_dir(cache_path));
core.set_property("MOCK_GPU", ov::device::id("test")); // device id for cache property distinguish with MOCK_CPU
{
Expand Down Expand Up @@ -74,6 +74,49 @@ TEST_F(AutoFuncTests, load_cached_model_to_actual_device_and_disable_CPU_acceler
core.set_property(ov::cache_dir(""));
}

TEST_F(AutoFuncTests, load_cached_model_to_actual_device_and_disable_CPU_accelerating_set_startup_fallback) {
core.set_property(ov::cache_dir(cache_path));
core.set_property("MOCK_GPU", ov::device::id("test")); // device id for cache property distinguish with MOCK_CPU
{
auto compiled_model = core.compile_model(model_cannot_batch,
"AUTO",
{ov::device::priorities("MOCK_GPU", "MOCK_CPU"),
ov::hint::performance_mode(ov::hint::PerformanceMode::THROUGHPUT)});
}
// No cached model for actual device
// will cache model for both actual device and CPU plugin
ASSERT_EQ(ov::test::utils::listFilesWithExt(cache_path, "blob").size(), 2);
ov::test::utils::removeFilesWithExt(cache_path, "blob");
{
auto compiled_model = core.compile_model(
model_cannot_batch,
"AUTO",
{ov::device::priorities("MOCK_GPU"), ov::hint::performance_mode(ov::hint::PerformanceMode::THROUGHPUT)});
}
{
auto compiled_model = core.compile_model(model_cannot_batch,
"AUTO",
{ov::device::priorities("MOCK_GPU", "MOCK_CPU"),
ov::hint::performance_mode(ov::hint::PerformanceMode::THROUGHPUT),
ov::intel_auto::enable_startup_fallback(true)});
}
// cached model exists for actual device
// will reuse cached model for actual device without CPU accelerating(No cached model for CPU)
ASSERT_EQ(ov::test::utils::listFilesWithExt(cache_path, "blob").size(), 1);
core.set_property("MOCK_GPU", ov::device::id("test_regenerate"));
{
auto compiled_model = core.compile_model(model_cannot_batch,
"AUTO",
{ov::device::priorities("MOCK_GPU", "MOCK_CPU"),
ov::hint::performance_mode(ov::hint::PerformanceMode::THROUGHPUT),
ov::intel_auto::enable_startup_fallback(false)});
}
// model hash id changed for actual device
// will cache 2 models for actual device and no cached model for CPU
ASSERT_EQ(ov::test::utils::listFilesWithExt(cache_path, "blob").size(), 2);
core.set_property(ov::cache_dir(""));
}

TEST_F(AutoFuncTests, compiled_with_cache_enabled_batch_enabled) {
#ifdef ENABLE_AUTO_BATCH
core.set_property(ov::cache_dir(cache_path));
Expand Down

0 comments on commit c904b8e

Please sign in to comment.