From 31c88946b19ef774fd1020b643839ce833173d27 Mon Sep 17 00:00:00 2001 From: Taylor Yeonbok Lee Date: Tue, 2 Apr 2024 13:15:13 +0900 Subject: [PATCH] Fix memory pool to assign slot if the request layout can occupy > 50 % of the slot size --- .../include/intel_gpu/runtime/memory_pool.hpp | 6 ++++-- src/plugins/intel_gpu/src/graph/primitive_inst.cpp | 10 +++++++++- src/plugins/intel_gpu/src/runtime/memory_pool.cpp | 13 ++++++++----- 3 files changed, 21 insertions(+), 8 deletions(-) diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/memory_pool.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/memory_pool.hpp index bf95dadf4f0883..19855d2e03cd9c 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/runtime/memory_pool.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/memory_pool.hpp @@ -109,7 +109,8 @@ class memory_pool { const std::set& restrictions, allocation_type type, bool reusable = true, - bool reset = true); // get from pool or create memory allocation + bool reset = true, + bool is_dynamic = false); // get from pool or create memory allocation memory_ptr get_memory(const layout& layout, allocation_type type, bool reset = true); memory_ptr get_from_non_padded_pool(const layout& layout, const primitive_id& prim_id, @@ -117,7 +118,8 @@ class memory_pool { uint32_t network_id, const std::set&, allocation_type type, - bool reset = true); + bool reset = true, + bool is_dynamic = false); memory_ptr get_from_padded_pool(const layout& layout, const primitive_id& prim_id, size_t unique_id, diff --git a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp index 57256345afdc43..faadbe31015ab6 100644 --- a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp +++ b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp @@ -162,7 +162,15 @@ static memory::ptr get_memory_from_pool(engine& _engine, if (_node.get_program().get_config().get_property(ov::intel_gpu::enable_memory_pool)) { if (curr_memory != nullptr) pool.release_memory(curr_memory, _node.get_unique_id(), _node.id(), net_id); - return pool.get_memory(layout, _node.id(), _node.get_unique_id(), net_id, memory_dependencies, type, reusable_across_network, reset); + return pool.get_memory(layout, + _node.id(), + _node.get_unique_id(), + net_id, + memory_dependencies, + type, + reusable_across_network, + reset, + _node.is_dynamic()); } return pool.get_memory(layout, type, reset); } diff --git a/src/plugins/intel_gpu/src/runtime/memory_pool.cpp b/src/plugins/intel_gpu/src/runtime/memory_pool.cpp index 2000e32b25a151..8b4b82d3de4b3f 100644 --- a/src/plugins/intel_gpu/src/runtime/memory_pool.cpp +++ b/src/plugins/intel_gpu/src/runtime/memory_pool.cpp @@ -121,16 +121,18 @@ memory::ptr memory_pool::get_from_non_padded_pool(const layout& layout, uint32_t network_id, const std::set& restrictions, allocation_type type, - bool reset) { + bool reset, + bool is_dynamic) { auto it = _non_padded_pool.lower_bound(layout.bytes_count()); while (it != _non_padded_pool.end()) { - if (it->second._network_id == network_id && + if ((!is_dynamic || (layout.bytes_count() > it->second._memory->get_layout().bytes_count() * 0.5)) && + (it->second._network_id == network_id && it->second._type == type && it->second._memory->get_layout().format != format::fs_b_yx_fsv32 && layout.format != format::fs_b_yx_fsv32 && ((layout.format != format::b_fs_yx_fsv32 && layout.format != format::b_fs_zyx_fsv32) || (layout.feature() % 32 == 0)) && - !has_conflict(it->second._users, restrictions, network_id)) { + !has_conflict(it->second._users, restrictions, network_id))) { it->second._users.insert(memory_user(unique_id, network_id, prim_id)); auto ret_mem = _engine->reinterpret_buffer(*it->second._memory, layout); GPU_DEBUG_CODE(ret_mem->from_memory_pool = true); @@ -228,7 +230,8 @@ memory::ptr memory_pool::get_memory(const layout& layout, const std::set& restrictions, allocation_type type, bool reusable_across_network, - bool reset) { + bool reset, + bool is_dynamic) { bool do_reuse = reusable_across_network; GPU_DEBUG_GET_INSTANCE(debug_config); GPU_DEBUG_IF(debug_config->disable_memory_reuse) { @@ -238,7 +241,7 @@ memory::ptr memory_pool::get_memory(const layout& layout, // reusable within the same network if (!layout.format.is_image() && layout.data_padding == padding{{0, 0, 0, 0}, 0}) { // non-padded buffers - return get_from_non_padded_pool(layout, prim_id, unique_id, network_id, restrictions, type, reset); + return get_from_non_padded_pool(layout, prim_id, unique_id, network_id, restrictions, type, reset, is_dynamic); } else if (!layout.format.is_image()) { // padded buffers return get_from_padded_pool(layout, prim_id, unique_id, network_id, restrictions, type);