From ff5a4631d848c44f100b1236fe6f592ab84943a3 Mon Sep 17 00:00:00 2001 From: Maksim Kutakov Date: Thu, 5 Sep 2024 08:00:05 +0200 Subject: [PATCH] [CPU] Add interface to release compiled model internal memory (#26262) ### Details: Port https://github.com/openvinotoolkit/openvino/pull/26390 to master ### Tickets: - CVS-145873 --- .../openvino/runtime/icompiled_model.hpp | 6 + .../openvino/runtime/compiled_model.hpp | 9 + src/inference/src/cpp/compiled_model.cpp | 4 + src/inference/src/dev/icompiled_model.cpp | 4 + src/plugins/intel_cpu/src/compiled_model.cpp | 8 + src/plugins/intel_cpu/src/compiled_model.h | 2 + src/plugins/intel_cpu/src/cpu_memory.cpp | 209 +++------ src/plugins/intel_cpu/src/cpu_memory.h | 158 +++---- .../intel_cpu/src/dnnl_extension_utils.cpp | 15 +- src/plugins/intel_cpu/src/dnnl_scratch_pad.h | 6 +- src/plugins/intel_cpu/src/edge.cpp | 11 +- src/plugins/intel_cpu/src/edge.h | 2 +- src/plugins/intel_cpu/src/graph.cpp | 289 ++++-------- src/plugins/intel_cpu/src/graph.h | 23 +- src/plugins/intel_cpu/src/graph_context.cpp | 4 +- src/plugins/intel_cpu/src/graph_context.h | 7 + src/plugins/intel_cpu/src/infer_request.cpp | 50 +- src/plugins/intel_cpu/src/infer_request.h | 14 +- src/plugins/intel_cpu/src/memory_control.cpp | 428 ++++++++++++++++++ src/plugins/intel_cpu/src/memory_control.hpp | 72 +++ .../memory_desc/cpu_blocked_memory_desc.cpp | 21 +- src/plugins/intel_cpu/src/node.cpp | 50 +- src/plugins/intel_cpu/src/nodes/bucketize.cpp | 12 +- src/plugins/intel_cpu/src/nodes/concat.cpp | 16 +- src/plugins/intel_cpu/src/nodes/conv.cpp | 18 +- src/plugins/intel_cpu/src/nodes/deconv.cpp | 24 +- src/plugins/intel_cpu/src/nodes/def_conv.cpp | 20 +- .../intel_cpu/src/nodes/depth_to_space.cpp | 8 +- .../executors/dnnl/dnnl_fullyconnected.hpp | 4 +- .../src/nodes/extract_image_patches.cpp | 8 +- src/plugins/intel_cpu/src/nodes/eye.cpp | 4 +- src/plugins/intel_cpu/src/nodes/gather.cpp | 14 +- src/plugins/intel_cpu/src/nodes/gather_nd.cpp | 12 +- .../intel_cpu/src/nodes/gather_tree.cpp | 16 +- .../intel_cpu/src/nodes/grid_sample.cpp | 12 +- src/plugins/intel_cpu/src/nodes/grn.cpp | 8 +- src/plugins/intel_cpu/src/nodes/input.cpp | 8 +- .../intel_cpu/src/nodes/interpolate.cpp | 28 +- src/plugins/intel_cpu/src/nodes/lrn.cpp | 8 +- src/plugins/intel_cpu/src/nodes/matmul.cpp | 12 +- src/plugins/intel_cpu/src/nodes/memory.cpp | 68 ++- src/plugins/intel_cpu/src/nodes/memory.hpp | 4 +- .../intel_cpu/src/nodes/multinomial.cpp | 8 + .../intel_cpu/src/nodes/multinomial.hpp | 2 + src/plugins/intel_cpu/src/nodes/mvn.cpp | 8 +- src/plugins/intel_cpu/src/nodes/normalize.cpp | 4 +- src/plugins/intel_cpu/src/nodes/pad.cpp | 21 +- src/plugins/intel_cpu/src/nodes/pooling.cpp | 8 +- src/plugins/intel_cpu/src/nodes/reduce.cpp | 8 +- src/plugins/intel_cpu/src/nodes/reorder.cpp | 45 +- src/plugins/intel_cpu/src/nodes/reorder.h | 5 +- .../intel_cpu/src/nodes/reverse_sequence.cpp | 12 +- src/plugins/intel_cpu/src/nodes/rnn.cpp | 2 +- src/plugins/intel_cpu/src/nodes/roi_align.cpp | 8 +- .../intel_cpu/src/nodes/roi_pooling.cpp | 12 +- src/plugins/intel_cpu/src/nodes/roll.cpp | 16 +- .../intel_cpu/src/nodes/shuffle_channels.cpp | 8 +- .../intel_cpu/src/nodes/space_to_depth.cpp | 8 +- src/plugins/intel_cpu/src/nodes/split.cpp | 20 +- src/plugins/intel_cpu/src/nodes/split.h | 5 +- .../intel_cpu/src/nodes/tensoriterator.cpp | 46 +- .../intel_cpu/src/nodes/tensoriterator.h | 5 +- src/plugins/intel_cpu/src/nodes/topk.cpp | 8 +- src/plugins/intel_cpu/src/nodes/transpose.cpp | 8 +- src/plugins/intel_cpu/src/nodes/unique.cpp | 8 +- .../intel_cpu/src/partitioned_mem_mgr.cpp | 24 +- .../intel_cpu/src/partitioned_mem_mgr.h | 19 +- src/plugins/intel_cpu/src/proxy_mem_mgr.cpp | 54 +-- src/plugins/intel_cpu/src/proxy_mem_mgr.h | 26 +- .../src/utils/debug_capabilities.cpp | 2 +- .../ov_executable_network/release_memory.cpp | 128 ++++++ .../intel_cpu/tests/unit/cpu_tensor_test.cpp | 3 +- .../intel_cpu/tests/unit/dnnl_memory_test.cpp | 6 +- .../graph/merge_transpose_reorder_test.cpp | 3 +- .../src/node_builders/convolution.cpp | 16 +- 75 files changed, 1428 insertions(+), 824 deletions(-) create mode 100644 src/plugins/intel_cpu/src/memory_control.cpp create mode 100644 src/plugins/intel_cpu/src/memory_control.hpp create mode 100644 src/plugins/intel_cpu/tests/functional/custom/behavior/ov_executable_network/release_memory.cpp diff --git a/src/inference/dev_api/openvino/runtime/icompiled_model.hpp b/src/inference/dev_api/openvino/runtime/icompiled_model.hpp index eca22b3b0036f3..01f7b556da909f 100644 --- a/src/inference/dev_api/openvino/runtime/icompiled_model.hpp +++ b/src/inference/dev_api/openvino/runtime/icompiled_model.hpp @@ -134,6 +134,12 @@ class OPENVINO_RUNTIME_API ICompiledModel : public std::enable_shared_from_this< */ ov::SoPtr get_context() const; + /** + * @brief Release intermediate memory + * + */ + virtual void release_memory(); + virtual ~ICompiledModel() = default; private: diff --git a/src/inference/include/openvino/runtime/compiled_model.hpp b/src/inference/include/openvino/runtime/compiled_model.hpp index 7fb005ee999f84..33aae94a2bd2cd 100644 --- a/src/inference/include/openvino/runtime/compiled_model.hpp +++ b/src/inference/include/openvino/runtime/compiled_model.hpp @@ -200,6 +200,15 @@ class OPENVINO_RUNTIME_API CompiledModel { return get_property(property.name()).template as(); } + /** + * @brief Release intermediate memory. + * + * This method forces the Compiled model to release memory allocated for intermediate structures, e.g. caches, + * tensors, temporal buffers etc., when possible + * + */ + void release_memory(); + /** * @brief Returns pointer to device-specific shared context * on a remote accelerator device that was used to create this CompiledModel. diff --git a/src/inference/src/cpp/compiled_model.cpp b/src/inference/src/cpp/compiled_model.cpp index 14ae5b98d1826a..c780bbee1e991d 100644 --- a/src/inference/src/cpp/compiled_model.cpp +++ b/src/inference/src/cpp/compiled_model.cpp @@ -145,6 +145,10 @@ Any CompiledModel::get_property(const std::string& name) const { }); } +void CompiledModel::release_memory() { + OV_COMPILED_MODEL_CALL_STATEMENT(_impl->release_memory()); +} + RemoteContext CompiledModel::get_context() const { OV_COMPILED_MODEL_CALL_STATEMENT({ auto ctx = _impl->get_context(); diff --git a/src/inference/src/dev/icompiled_model.cpp b/src/inference/src/dev/icompiled_model.cpp index 0079826cdeb1b5..b1cbedac1632ab 100644 --- a/src/inference/src/dev/icompiled_model.cpp +++ b/src/inference/src/dev/icompiled_model.cpp @@ -147,3 +147,7 @@ ov::SoPtr ov::ICompiledModel::get_context() const { void ov::ICompiledModel::set_model_shared_object(ov::Model& model, const std::shared_ptr& shared_object) { model.m_shared_object = shared_object; } + +void ov::ICompiledModel::release_memory() { + // nothing to do +} diff --git a/src/plugins/intel_cpu/src/compiled_model.cpp b/src/plugins/intel_cpu/src/compiled_model.cpp index fb474e129cd3bb..72943b837f1f3b 100644 --- a/src/plugins/intel_cpu/src/compiled_model.cpp +++ b/src/plugins/intel_cpu/src/compiled_model.cpp @@ -342,5 +342,13 @@ void CompiledModel::export_model(std::ostream& modelStream) const { serializer << m_model; } +void CompiledModel::release_memory() { + for (auto&& graph : m_graphs) { + GraphGuard::Lock graph_lock{graph}; + auto ctx = graph_lock._graph.getGraphContext(); + ctx->getNetworkMemoryControl()->releaseMemory(); + } +} + } // namespace intel_cpu } // namespace ov diff --git a/src/plugins/intel_cpu/src/compiled_model.h b/src/plugins/intel_cpu/src/compiled_model.h index facd9ef3698ca7..faedf1ae5a744c 100644 --- a/src/plugins/intel_cpu/src/compiled_model.h +++ b/src/plugins/intel_cpu/src/compiled_model.h @@ -49,6 +49,8 @@ class CompiledModel : public ov::ICompiledModel { "Set property to Core::compile_model during compilation"); }; + void release_memory() override; + private: std::shared_ptr create_sync_infer_request() const override; friend class SyncInferRequest; diff --git a/src/plugins/intel_cpu/src/cpu_memory.cpp b/src/plugins/intel_cpu/src/cpu_memory.cpp index ba78cb5360243f..8e5fe8d72fd1f2 100644 --- a/src/plugins/intel_cpu/src/cpu_memory.cpp +++ b/src/plugins/intel_cpu/src/cpu_memory.cpp @@ -67,7 +67,7 @@ namespace { Memory::Memory(const dnnl::engine& eng, MemoryDescPtr desc, const void* data, bool pads_zeroing) : m_eng(eng), m_pMemDesc(desc), - m_mgrHandle(std::make_shared(make_unique()), this), + m_blockHandle(std::make_shared(make_unique()), this), dnnlMemHandle(this) { if (desc->getPrecision() == element::string) { OPENVINO_THROW("[CPU] Memory object cannot be created for string data."); @@ -78,18 +78,18 @@ Memory::Memory(const dnnl::engine& eng, MemoryDescPtr desc, const void* data, bo Memory::Memory(const dnnl::engine& eng, const MemoryDesc& desc, const void* data, bool pads_zeroing) : Memory::Memory(eng, desc.clone(), data, pads_zeroing) {} -Memory::Memory(const dnnl::engine& eng, MemoryDescPtr desc, MemoryMngrPtr mngr) : - m_eng(eng), m_pMemDesc(desc), m_mgrHandle(mngr, this), dnnlMemHandle(this) { +Memory::Memory(const dnnl::engine& eng, MemoryDescPtr desc, MemoryBlockPtr block) : + m_eng(eng), m_pMemDesc(desc), m_blockHandle(block, this), dnnlMemHandle(this) { if (desc->getPrecision() == element::string) { OPENVINO_THROW("[CPU] Memory object can't be created for string data."); } - bool memAllocated = m_mgrHandle->getRawPtr(); + bool memAllocated = m_blockHandle->getRawPtr(); create(desc, nullptr, !memAllocated); } -Memory::Memory(const dnnl::engine& eng, const MemoryDesc& desc, MemoryMngrPtr mngr) : - Memory::Memory(eng, desc.clone(), mngr) {} +Memory::Memory(const dnnl::engine& eng, const MemoryDesc& desc, MemoryBlockPtr block) : + Memory::Memory(eng, desc.clone(), block) {} size_t Memory::getSize() const { auto size = getDesc().getCurrentMemSize(); @@ -113,9 +113,9 @@ void Memory::create(MemoryDescPtr desc, const void* data, bool pads_zeroing) { } auto memSize = m_pMemDesc->getCurrentMemSize(); if (nullptr != data) { - m_mgrHandle->setExtBuff(const_cast(data), memSize); + m_blockHandle->setExtBuff(const_cast(data), memSize); } else { - m_mgrHandle->resize(memSize); + m_blockHandle->resize(memSize); } } @@ -146,7 +146,7 @@ void Memory::redefineDesc(MemoryDescPtr desc) { void Memory::update() { if (dnnlMemHandle.isInit()) { auto prim = dnnlMemHandle.getPrim(); - prim.set_data_handle(m_mgrHandle->getRawPtr()); + prim.set_data_handle(m_blockHandle->getRawPtr()); } } @@ -185,22 +185,6 @@ dnnl::memory Memory::DnnlMemPrimHandle::getPrim() const { return m_prim; } -bool Memory::isAllocated() const noexcept { - if (m_mgrHandle->getRawPtr()) { - return true; - } - if (!m_pMemDesc) { - return false; - } - if (!(m_pMemDesc->isDefined())) { - return true; - } - if (m_pMemDesc->getCurrentMemSize() == 0) { - return true; - } - return false; -} - void* Memory::getData() const { void* data = getDataNoThrow(); if (data == nullptr && @@ -210,17 +194,17 @@ void* Memory::getData() const { return data; } -void* MemoryMngrWithReuse::getRawPtr() const noexcept { +void* MemoryBlockWithReuse::getRawPtr() const noexcept { return m_data.get(); } -void MemoryMngrWithReuse::setExtBuff(void *ptr, size_t size) { +void MemoryBlockWithReuse::setExtBuff(void *ptr, size_t size) { m_useExternalStorage = true; m_memUpperBound = size; m_data = decltype(m_data)(ptr, release); } -bool MemoryMngrWithReuse::resize(size_t size) { +bool MemoryBlockWithReuse::resize(size_t size) { constexpr int cacheLineSize = 64; bool sizeChanged = false; if (size > m_memUpperBound) { @@ -235,63 +219,26 @@ bool MemoryMngrWithReuse::resize(size_t size) { if (numa_node >= 0) { if (!mbind_move(ptr, size, numa_node)) { - DEBUG_LOG("MemoryMngrWithReuse move_memory to node ", numa_node, " failed\n"); + DEBUG_LOG("MemoryBlockWithReuse move_memory to node ", numa_node, " failed\n"); } } } return sizeChanged; } -bool MemoryMngrWithReuse::hasExtBuffer() const noexcept { +bool MemoryBlockWithReuse::hasExtBuffer() const noexcept { return m_useExternalStorage; } -void MemoryMngrWithReuse::release(void *ptr) {} - -void MemoryMngrWithReuse::destroy(void *ptr) { - dnnl::impl::free(ptr); -} - -void* MemoryMngrRealloc::getRawPtr() const noexcept { - return m_data.get(); +void MemoryBlockWithReuse::free() { + m_data = decltype(m_data)(nullptr, release); + m_memUpperBound = 0ul; + m_useExternalStorage = false; } -void MemoryMngrRealloc::setExtBuff(void *ptr, size_t size) { - m_useExternalStorage = true; - m_memUpperBound = size; - m_data = decltype(m_data)(ptr, release); -} - -bool MemoryMngrRealloc::resize(size_t size) { - constexpr int cacheLineSize = 64; - constexpr size_t growFactor = 2; - bool sizeChanged = false; - if (size > m_memUpperBound) { - size *= growFactor; - void *ptr = dnnl::impl::malloc(size, cacheLineSize); - if (!ptr) { - OPENVINO_THROW("Failed to allocate ", size, " bytes of memory"); - } +void MemoryBlockWithReuse::release(void *ptr) {} - if (auto src = m_data.get()) { - std::memcpy(ptr, src, m_memUpperBound); - } - - m_memUpperBound = size; - m_useExternalStorage = false; - m_data = decltype(m_data)(ptr, destroy); - sizeChanged = true; - } - return sizeChanged; -} - -bool MemoryMngrRealloc::hasExtBuffer() const noexcept { - return m_useExternalStorage; -} - -void MemoryMngrRealloc::release(void *ptr) {} - -void MemoryMngrRealloc::destroy(void *ptr) { +void MemoryBlockWithReuse::destroy(void *ptr) { dnnl::impl::free(ptr); } @@ -302,7 +249,7 @@ StringMemory::StringMemory(const dnnl::engine& engine, const MemoryDescPtr& desc OPENVINO_THROW("[CPU] StringMemory supports String type only."); } - m_manager = std::make_shared(); + m_memoryBlock = std::make_shared(); if (!m_mem_desc->isDefined()) { return; @@ -312,9 +259,9 @@ StringMemory::StringMemory(const dnnl::engine& engine, const MemoryDescPtr& desc if (data != nullptr) { auto not_const_data = const_cast(data); - m_manager->setExtBuff(reinterpret_cast(not_const_data), string_size); + m_memoryBlock->setExtBuff(reinterpret_cast(not_const_data), string_size); } else { - m_manager->resize(string_size); + m_memoryBlock->resize(string_size); } } @@ -327,7 +274,7 @@ void StringMemory::load(const IMemory& src, bool ftz) const { } void* StringMemory::getData() const { - return m_manager->getRawPtr(); + return m_memoryBlock->getRawPtr(); } void StringMemory::redefineDesc(MemoryDescPtr desc) { @@ -340,30 +287,14 @@ void StringMemory::redefineDesc(MemoryDescPtr desc) { m_mem_desc = desc; const auto string_size = m_mem_desc->getShape().getElementsCount(); - m_manager->resize(string_size); + m_memoryBlock->resize(string_size); } void StringMemory::nullify() { - auto data_ptr = m_manager->getStringPtr(); + auto data_ptr = m_memoryBlock->getStringPtr(); if (data_ptr != nullptr) { - std::fill(data_ptr, data_ptr + m_manager->getStrLen(), OvString()); - } -} - -bool StringMemory::isAllocated() const noexcept { - if (getData()) { - return true; - } - if (!m_mem_desc) { - return false; - } - if (!(m_mem_desc->isDefined())) { - return true; - } - if (m_mem_desc->getCurrentMemSize() == 0) { - return true; + std::fill(data_ptr, data_ptr + m_memoryBlock->getStrLen(), OvString()); } - return false; } size_t StringMemory::getSize() const { // In bytes @@ -374,25 +305,25 @@ size_t StringMemory::getSize() const { // In bytes return size; } -MemoryMngrPtr StringMemory::getMemoryMngr() const { - OPENVINO_THROW("Unexpected call of StringMemory::getMemoryMngr()"); +MemoryBlockPtr StringMemory::getMemoryBlock() const { + OPENVINO_THROW("Unexpected call of StringMemory::getMemoryBlock()"); } dnnl::memory StringMemory::getPrimitive() const { OPENVINO_THROW("Unexpected call of StringMemory::getPrimitive()"); } -void StringMemory::StringMemoryMngr::setExtBuff(OvString* ptr, size_t size) { +void StringMemory::StringMemoryBlock::setExtBuff(OvString* ptr, size_t size) { m_use_external_storage = true; m_str_upper_bound = size; m_data = decltype(m_data)(ptr, release); } -StringMemory::OvString* StringMemory::StringMemoryMngr::getStringPtr() const noexcept { +StringMemory::OvString* StringMemory::StringMemoryBlock::getStringPtr() const noexcept { return m_data.get(); } -bool StringMemory::StringMemoryMngr::resize(size_t size) { +bool StringMemory::StringMemoryBlock::resize(size_t size) { bool sizeChanged = false; if (size > m_str_upper_bound) { if (size > PTRDIFF_MAX) { @@ -411,58 +342,58 @@ bool StringMemory::StringMemoryMngr::resize(size_t size) { return sizeChanged; } -bool StringMemory::StringMemoryMngr::hasExtBuffer() const noexcept { +bool StringMemory::StringMemoryBlock::hasExtBuffer() const noexcept { return m_use_external_storage; } -size_t StringMemory::StringMemoryMngr::getStrLen() const noexcept { +size_t StringMemory::StringMemoryBlock::getStrLen() const noexcept { return m_str_upper_bound; } -void StringMemory::StringMemoryMngr::destroy(OvString* ptr) { +void StringMemory::StringMemoryBlock::destroy(OvString* ptr) { delete[] ptr; } -void* StringMemory::StringMemoryMngr::getRawPtr() const noexcept { +void* StringMemory::StringMemoryBlock::getRawPtr() const noexcept { return reinterpret_cast(m_data.get()); } -/////////////// DnnlMemoryMngr /////////////// +/////////////// DnnlMemoryBlock /////////////// -void* DnnlMemoryMngr::getRawPtr() const noexcept { - return m_pMemMngr->getRawPtr(); +void* DnnlMemoryBlock::getRawPtr() const noexcept { + return m_pMemBlock->getRawPtr(); } -void DnnlMemoryMngr::setExtBuff(void *ptr, size_t size) { - m_pMemMngr->setExtBuff(ptr, size); +void DnnlMemoryBlock::setExtBuff(void *ptr, size_t size) { + m_pMemBlock->setExtBuff(ptr, size); notifyUpdate(); } -bool DnnlMemoryMngr::resize(size_t size) { - bool sizeChanged = m_pMemMngr->resize(size); +bool DnnlMemoryBlock::resize(size_t size) { + bool sizeChanged = m_pMemBlock->resize(size); if (sizeChanged) { notifyUpdate(); } return sizeChanged; } -bool DnnlMemoryMngr::hasExtBuffer() const noexcept { - return m_pMemMngr->hasExtBuffer(); +bool DnnlMemoryBlock::hasExtBuffer() const noexcept { + return m_pMemBlock->hasExtBuffer(); } -void DnnlMemoryMngr::registerMemory(Memory* memPtr) { +void DnnlMemoryBlock::registerMemory(Memory* memPtr) { if (memPtr) { m_setMemPtrs.insert(memPtr); } } -void DnnlMemoryMngr::unregisterMemory(Memory* memPtr) { +void DnnlMemoryBlock::unregisterMemory(Memory* memPtr) { if (memPtr) { m_setMemPtrs.erase(memPtr); } } -void DnnlMemoryMngr::notifyUpdate() { +void DnnlMemoryBlock::notifyUpdate() { for (auto& item : m_setMemPtrs) { if (item) { item->update(); @@ -482,9 +413,9 @@ StaticMemory::StaticMemory(const dnnl::engine& eng, MemoryDescPtr desc, const vo m_size = m_pMemDesc->getCurrentMemSize(); if (data) { - m_pMemMngr = std::make_shared(const_cast(data), m_size); + m_pMemBlock = std::make_shared(const_cast(data), m_size); } else { - m_pMemMngr = std::make_shared(m_size); + m_pMemBlock = std::make_shared(m_size); } try { @@ -495,7 +426,7 @@ StaticMemory::StaticMemory(const dnnl::engine& eng, MemoryDescPtr desc, const vo m_prim = dnnl::memory(dnnl_desc->getDnnlDesc(), m_eng, DNNL_MEMORY_NONE); // // ======================== - m_prim.set_data_handle(m_pMemMngr->getRawPtr()); + m_prim.set_data_handle(m_pMemBlock->getRawPtr()); } catch (const std::exception& exc) { dnnlErrorCtx = exc.what(); @@ -505,10 +436,6 @@ StaticMemory::StaticMemory(const dnnl::engine& eng, MemoryDescPtr desc, const vo StaticMemory::StaticMemory(const dnnl::engine& eng, const MemoryDesc& desc, const void* data, bool pads_zeroing) : StaticMemory::StaticMemory(eng, desc.clone(), data, pads_zeroing) {} -bool StaticMemory::isAllocated() const noexcept { - return 0 == m_size || getData() != nullptr; -} - const MemoryDesc& StaticMemory::getDesc() const { return *m_pMemDesc; } @@ -518,7 +445,7 @@ MemoryDescPtr StaticMemory::getDescPtr() const { } void* StaticMemory::getData() const { - return m_pMemMngr->getRawPtr(); + return m_pMemBlock->getRawPtr(); } size_t StaticMemory::getSize() const { @@ -544,8 +471,8 @@ void StaticMemory::load(const IMemory& src, bool ftz) const { transferData(src, *this, ftz); } -MemoryMngrPtr StaticMemory::getMemoryMngr() const { - return m_pMemMngr; +MemoryBlockPtr StaticMemory::getMemoryBlock() const { + return m_pMemBlock; } //oneDNN specifics for backward compatibility @@ -562,38 +489,38 @@ void StaticMemory::nullify() { memset(dataPtr, 0, getSize()); } -StaticMemory::StaticMemoryMngr::StaticMemoryMngr(size_t size) : m_size(size) { - memMngrImpl.resize(m_size); +StaticMemory::StaticMemoryBlock::StaticMemoryBlock(size_t size) : m_size(size) { + memBlockImpl.resize(m_size); } -StaticMemory::StaticMemoryMngr::StaticMemoryMngr(void* data, size_t size) : m_size(size) { - memMngrImpl.setExtBuff(data, m_size); +StaticMemory::StaticMemoryBlock::StaticMemoryBlock(void* data, size_t size) : m_size(size) { + memBlockImpl.setExtBuff(data, m_size); } -void* StaticMemory::StaticMemoryMngr::getRawPtr() const noexcept { - return memMngrImpl.getRawPtr(); +void* StaticMemory::StaticMemoryBlock::getRawPtr() const noexcept { + return memBlockImpl.getRawPtr(); } -void StaticMemory::StaticMemoryMngr::setExtBuff(void* ptr, size_t size) { - OPENVINO_THROW("Unexpected: StaticMemoryMngr may not be modified"); +void StaticMemory::StaticMemoryBlock::setExtBuff(void* ptr, size_t size) { + OPENVINO_THROW("Unexpected: StaticMemoryBlock may not be modified"); } -bool StaticMemory::StaticMemoryMngr::resize(size_t size) { +bool StaticMemory::StaticMemoryBlock::resize(size_t size) { if (size != m_size) { - OPENVINO_THROW("Unexpected: StaticMemoryMngr may not resize the memory"); + OPENVINO_THROW("Unexpected: StaticMemoryBlock may not resize the memory"); } return false; } -bool StaticMemory::StaticMemoryMngr::hasExtBuffer() const noexcept { - return memMngrImpl.hasExtBuffer(); +bool StaticMemory::StaticMemoryBlock::hasExtBuffer() const noexcept { + return memBlockImpl.hasExtBuffer(); } -void StaticMemory::StaticMemoryMngr::registerMemory(Memory* memPtr) { +void StaticMemory::StaticMemoryBlock::registerMemory(Memory* memPtr) { //do nothing } -void StaticMemory::StaticMemoryMngr::unregisterMemory(Memory* memPtr) { +void StaticMemory::StaticMemoryBlock::unregisterMemory(Memory* memPtr) { //do nothing } diff --git a/src/plugins/intel_cpu/src/cpu_memory.h b/src/plugins/intel_cpu/src/cpu_memory.h index f62c2f052575f6..70e6713e36b886 100644 --- a/src/plugins/intel_cpu/src/cpu_memory.h +++ b/src/plugins/intel_cpu/src/cpu_memory.h @@ -29,16 +29,16 @@ namespace ov { namespace intel_cpu { class Memory; -class ProxyMemoryMngr; +class ProxyMemoryBlock; /** - * @interface IMemoryMngr + * @interface IMemoryBlock * @brief An interface to memory control object */ -class IMemoryMngr { +class IMemoryBlock { public: - virtual ~IMemoryMngr() = default; + virtual ~IMemoryBlock() = default; /** * @brief Accessor to underlying memory buffer @@ -68,15 +68,16 @@ class IMemoryMngr { }; /** - * @brief An implementation of the mem manager where memory reallocation occurs only if a bigger buffer is requested. + * @brief An implementation of the mem block where memory reallocation occurs only if a bigger buffer is requested. */ -class MemoryMngrWithReuse : public IMemoryMngr { +class MemoryBlockWithReuse : public IMemoryBlock { public: - MemoryMngrWithReuse(int numa_node = -1) : m_data(nullptr, release), numa_node(numa_node) {} + MemoryBlockWithReuse(int numa_node = -1) : m_data(nullptr, release), numa_node(numa_node) {} void* getRawPtr() const noexcept override; void setExtBuff(void* ptr, size_t size) override; bool resize(size_t size) override; bool hasExtBuffer() const noexcept override; + void free(); private: bool m_useExternalStorage = false; @@ -88,24 +89,7 @@ class MemoryMngrWithReuse : public IMemoryMngr { static void destroy(void *ptr); }; -class MemoryMngrRealloc : public IMemoryMngr { -public: - MemoryMngrRealloc() : m_data(nullptr, release) {} - void* getRawPtr() const noexcept override; - void setExtBuff(void* ptr, size_t size) override; - bool resize(size_t size) override; - bool hasExtBuffer() const noexcept override; - -private: - bool m_useExternalStorage = false; - size_t m_memUpperBound = 0ul; - std::unique_ptr m_data; - - static void release(void *ptr); - static void destroy(void *ptr); -}; - -class IMemoryMngrObserver : public IMemoryMngr { +class IMemoryBlockObserver : public IMemoryBlock { public: virtual void registerMemory(Memory* memPtr) = 0; virtual void unregisterMemory(Memory* memPtr) = 0; @@ -114,9 +98,9 @@ class IMemoryMngrObserver : public IMemoryMngr { /** * @brief A proxy object that additionally implements observer pattern */ -class DnnlMemoryMngr : public IMemoryMngrObserver { +class DnnlMemoryBlock : public IMemoryBlockObserver { public: - explicit DnnlMemoryMngr(std::unique_ptr mngr) : m_pMemMngr(std::move(mngr)) {} + explicit DnnlMemoryBlock(std::unique_ptr memBlock) : m_pMemBlock(std::move(memBlock)) {} void* getRawPtr() const noexcept override; void setExtBuff(void* ptr, size_t size) override; bool resize(size_t size) override; @@ -129,49 +113,49 @@ class DnnlMemoryMngr : public IMemoryMngrObserver { private: std::unordered_set m_setMemPtrs; - std::unique_ptr m_pMemMngr; + std::unique_ptr m_pMemBlock; }; -using MemoryMngrPtr = std::shared_ptr; -using MemoryMngrCPtr = std::shared_ptr; +using MemoryBlockPtr = std::shared_ptr; +using MemoryBlockCPtr = std::shared_ptr; -class DnnlMemMngrHandle { +class DnnlMemBlockHandle { public: - DnnlMemMngrHandle(MemoryMngrPtr pMgr, Memory* pMem) : m_pMgr(pMgr), m_pMem(pMem) { - if (m_pMgr) { - m_pMgr->registerMemory(m_pMem); + DnnlMemBlockHandle(MemoryBlockPtr pBlock, Memory* pMem) : m_pMemBlock(pBlock), m_pMem(pMem) { + if (m_pMemBlock) { + m_pMemBlock->registerMemory(m_pMem); } } - DnnlMemMngrHandle(const DnnlMemMngrHandle&) = delete; - DnnlMemMngrHandle& operator= (const DnnlMemMngrHandle&) = delete; + DnnlMemBlockHandle(const DnnlMemBlockHandle&) = delete; + DnnlMemBlockHandle& operator= (const DnnlMemBlockHandle&) = delete; - DnnlMemMngrHandle(DnnlMemMngrHandle&& source) { - std::swap(m_pMgr, source.m_pMgr); + DnnlMemBlockHandle(DnnlMemBlockHandle&& source) { + std::swap(m_pMemBlock, source.m_pMemBlock); std::swap(m_pMem, source.m_pMem); } - DnnlMemMngrHandle& operator= (DnnlMemMngrHandle&& rhs) { - std::swap(m_pMgr, rhs.m_pMgr); + DnnlMemBlockHandle& operator= (DnnlMemBlockHandle&& rhs) { + std::swap(m_pMemBlock, rhs.m_pMemBlock); std::swap(m_pMem, rhs.m_pMem); return *this; } - ~DnnlMemMngrHandle() { - if (m_pMgr) { - m_pMgr->unregisterMemory(m_pMem); + ~DnnlMemBlockHandle() { + if (m_pMemBlock) { + m_pMemBlock->unregisterMemory(m_pMem); } } - MemoryMngrPtr get() const { - return m_pMgr; + MemoryBlockPtr get() const { + return m_pMemBlock; } - MemoryMngrPtr::element_type* operator->() const noexcept { - return m_pMgr.get(); + MemoryBlockPtr::element_type* operator->() const noexcept { + return m_pMemBlock.get(); } private: - MemoryMngrPtr m_pMgr = nullptr; + MemoryBlockPtr m_pMemBlock = nullptr; Memory* m_pMem = nullptr; }; @@ -179,8 +163,6 @@ class IMemory { public: virtual ~IMemory() = default; - virtual bool isAllocated() const noexcept = 0; - virtual const MemoryDesc& getDesc() const = 0; virtual MemoryDescPtr getDescPtr() const = 0; @@ -200,13 +182,22 @@ class IMemory { virtual const VectorDims& getStaticDims() const = 0; // Redefines descriptor. The memory descriptor will be replaced with the new one. - // Memory will not be reallocated if the new tensor size is less or equal the upper bound. + // Memory will not be reallocated according to the dynamic memory block policy // Caution!!! This action invalidates the previous data layout. The old data may become unreachable. virtual void redefineDesc(MemoryDescPtr desc) = 0; virtual void load(const IMemory& src, bool ftz = true) const = 0; - virtual MemoryMngrPtr getMemoryMngr() const = 0; + virtual MemoryBlockPtr getMemoryBlock() const = 0; + + virtual void nullify() = 0; + + bool isDefined() const noexcept { + if (auto desc = getDescPtr()) { + return desc->isDefined(); + } + return false; + } //oneDNN specifics for backward compatibility virtual dnnl::memory getPrimitive() const = 0; @@ -219,8 +210,6 @@ class IMemory { return DnnlExtensionUtils::ElementTypeToDataType(getDesc().getPrecision()); } - virtual void nullify() = 0; - template ::value && !std::is_reference::value, int>::type = 0, typename std::enable_if::value, int>::type = 0> @@ -229,10 +218,10 @@ class IMemory { class StaticMemory final : public IMemory { public: - class StaticMemoryMngr : public IMemoryMngrObserver { + class StaticMemoryBlock : public IMemoryBlockObserver { public: - explicit StaticMemoryMngr(size_t size); - StaticMemoryMngr(void* data, size_t size); + explicit StaticMemoryBlock(size_t size); + StaticMemoryBlock(void* data, size_t size); void* getRawPtr() const noexcept override; void setExtBuff(void* ptr, size_t size) override; bool resize(size_t size) override; @@ -242,10 +231,10 @@ class StaticMemory final : public IMemory { private: size_t m_size = 0; - MemoryMngrWithReuse memMngrImpl; + MemoryBlockWithReuse memBlockImpl; }; - using MemMngrPtr = std::shared_ptr; + using MemBlockPtr = std::shared_ptr; public: StaticMemory(const dnnl::engine& eng, MemoryDescPtr desc, const void* data = nullptr, bool pads_zeroing = true); @@ -257,8 +246,6 @@ class StaticMemory final : public IMemory { StaticMemory(Memory&&) = delete; StaticMemory& operator= (StaticMemory&&) = delete; - bool isAllocated() const noexcept override; - const MemoryDesc& getDesc() const override; MemoryDescPtr getDescPtr() const override; @@ -273,7 +260,7 @@ class StaticMemory final : public IMemory { void load(const IMemory& src, bool ftz = true) const override; - MemoryMngrPtr getMemoryMngr() const override; + MemoryBlockPtr getMemoryBlock() const override; //oneDNN specifics for backward compatibility dnnl::memory getPrimitive() const override; @@ -285,7 +272,7 @@ class StaticMemory final : public IMemory { MemoryDescPtr m_pMemDesc; size_t m_size; dnnl::memory m_prim; - MemMngrPtr m_pMemMngr; + MemBlockPtr m_pMemBlock; std::string dnnlErrorCtx; }; @@ -293,8 +280,8 @@ class Memory : public IMemory { public: Memory(const dnnl::engine& eng, MemoryDescPtr desc, const void* data = nullptr, bool pads_zeroing = true); Memory(const dnnl::engine& eng, const MemoryDesc& desc, const void* data = nullptr, bool pads_zeroing = true); - Memory(const dnnl::engine& eng, MemoryDescPtr desc, MemoryMngrPtr mngr); - Memory(const dnnl::engine& eng, const MemoryDesc& desc, MemoryMngrPtr mbgr); + Memory(const dnnl::engine& eng, MemoryDescPtr desc, MemoryBlockPtr block); + Memory(const dnnl::engine& eng, const MemoryDesc& desc, MemoryBlockPtr block); Memory(const Memory&) = delete; Memory& operator= (const Memory&) = delete; @@ -304,8 +291,6 @@ class Memory : public IMemory { dnnl::memory getPrimitive() const override; - bool isAllocated() const noexcept override; - const MemoryDesc& getDesc() const override { return *m_pMemDesc; } @@ -326,9 +311,6 @@ class Memory : public IMemory { return getDesc().getShape().getStaticDims(); } - // Redefines descriptor. The memory descriptor will be replaced with the new one. - // Memory will not be reallocated if the new tensor size is less or equal the upper bound. - // Caution!!! This action invalidates the previous data layout. The old data may become unreachable. void redefineDesc(MemoryDescPtr desc) override; void load(const IMemory& src, bool ftz = true) const override; @@ -338,13 +320,13 @@ class Memory : public IMemory { return m_eng; } - MemoryMngrPtr getMemoryMngr() const override { - return m_mgrHandle.get(); + MemoryBlockPtr getMemoryBlock() const override { + return m_blockHandle.get(); } private: - friend DnnlMemoryMngr; - friend ProxyMemoryMngr; + friend DnnlMemoryBlock; + friend ProxyMemoryBlock; private: void update(); @@ -355,7 +337,7 @@ class Memory : public IMemory { private: dnnl::engine m_eng; MemoryDescPtr m_pMemDesc; - DnnlMemMngrHandle m_mgrHandle; + DnnlMemBlockHandle m_blockHandle; bool m_padsZeroing = true; class DnnlMemPrimHandle { public: @@ -373,7 +355,7 @@ class Memory : public IMemory { } dnnlMemHandle; void* getDataNoThrow() const noexcept { - return m_mgrHandle->getRawPtr(); + return m_blockHandle->getRawPtr(); } }; @@ -381,9 +363,9 @@ class StringMemory : public IMemory { public: using OvString = ov::element_type_traits::value_type; - class StringMemoryMngr { + class StringMemoryBlock { public: - StringMemoryMngr() : m_data(nullptr, release) {} + StringMemoryBlock() : m_data(nullptr, release) {} OvString* getStringPtr() const noexcept; void setExtBuff(OvString* ptr, size_t size); size_t getStrLen() const noexcept; @@ -400,20 +382,18 @@ class StringMemory : public IMemory { static void destroy(OvString* ptr); }; - using StringMemoryMngrPtr = std::shared_ptr; + using StringMemoryBlockPtr = std::shared_ptr; StringMemory(const dnnl::engine& engine, const MemoryDescPtr& desc, const void* data = nullptr); StringMemory(const dnnl::engine& engine, const MemoryDesc& desc, const void* data = nullptr) : StringMemory(engine, desc.clone(), data) {} - StringMemory(const dnnl::engine& engine, const MemoryDescPtr& desc, const StringMemoryMngrPtr& manager) - : m_engine(engine), m_mem_desc(desc), m_manager(manager) {} - - StringMemory(const dnnl::engine& engine, const MemoryDesc& desc, const StringMemoryMngrPtr& manager) - : StringMemory(engine, desc.clone(), manager) {} + StringMemory(const dnnl::engine& engine, const MemoryDescPtr& desc, const StringMemoryBlockPtr& block) + : m_engine(engine), m_mem_desc(desc), m_memoryBlock(block) {} - bool isAllocated() const noexcept override; + StringMemory(const dnnl::engine& engine, const MemoryDesc& desc, const StringMemoryBlockPtr& block) + : StringMemory(engine, desc.clone(), block) {} const MemoryDesc& getDesc() const override { return *m_mem_desc; @@ -439,10 +419,10 @@ class StringMemory : public IMemory { void load(const IMemory& src, bool ftz = false) const override; - MemoryMngrPtr getMemoryMngr() const override; + MemoryBlockPtr getMemoryBlock() const override; - StringMemoryMngrPtr getStringMemoryMngrPtr() const { - return m_manager; + StringMemoryBlockPtr getStringMemoryBlockPtr() const { + return m_memoryBlock; } dnnl::memory getPrimitive() const override; @@ -452,7 +432,7 @@ class StringMemory : public IMemory { private: dnnl::engine m_engine; MemoryDescPtr m_mem_desc; - StringMemoryMngrPtr m_manager; + StringMemoryBlockPtr m_memoryBlock; }; using MemoryPtr = std::shared_ptr; diff --git a/src/plugins/intel_cpu/src/dnnl_extension_utils.cpp b/src/plugins/intel_cpu/src/dnnl_extension_utils.cpp index ca5e6ab6c1438a..d60ede1decf6c1 100644 --- a/src/plugins/intel_cpu/src/dnnl_extension_utils.cpp +++ b/src/plugins/intel_cpu/src/dnnl_extension_utils.cpp @@ -171,6 +171,18 @@ DnnlMemoryDescPtr DnnlExtensionUtils::makeDescriptor(const_dnnl_memory_desc_t de } } +static size_t sub_byte_data_type_multiplier(dnnl::memory::data_type dataType) { + switch (dataType) { + case dnnl::memory::data_type::nf4: + case dnnl::memory::data_type::s4: + case dnnl::memory::data_type::u4: + case dnnl::memory::data_type::f4_e2m1: + return 2; + default: + return 1; + } +} + size_t DnnlExtensionUtils::getMemSizeForDnnlDesc(const dnnl::memory::desc& desc) { auto tmpDesc = desc; @@ -181,7 +193,8 @@ size_t DnnlExtensionUtils::getMemSizeForDnnlDesc(const dnnl::memory::desc& desc) if (size == DNNL_RUNTIME_SIZE_VAL) return MemoryDesc::UNDEFINED_SIZE; - size += offset0 * sizeOfDataType(tmpDesc.get_data_type()); + size += div_up(offset0 * sizeOfDataType(tmpDesc.get_data_type()), + sub_byte_data_type_multiplier(tmpDesc.get_data_type())); return size; } diff --git a/src/plugins/intel_cpu/src/dnnl_scratch_pad.h b/src/plugins/intel_cpu/src/dnnl_scratch_pad.h index a589b9dbb0cf71..6f356e58c4770b 100644 --- a/src/plugins/intel_cpu/src/dnnl_scratch_pad.h +++ b/src/plugins/intel_cpu/src/dnnl_scratch_pad.h @@ -13,16 +13,16 @@ namespace ov { namespace intel_cpu { class DnnlScratchPad { - MemoryMngrPtr mgrPtr; + MemoryBlockPtr blockPtr; dnnl::engine eng; public: DnnlScratchPad(const dnnl::engine& eng, int numa_node = -1) : eng(eng) { - mgrPtr = std::make_shared(make_unique(numa_node)); + blockPtr = std::make_shared(make_unique(numa_node)); } MemoryPtr createScratchPadMem(const MemoryDescPtr& md) { - return std::make_shared(eng, md, mgrPtr); + return std::make_shared(eng, md, blockPtr); } }; diff --git a/src/plugins/intel_cpu/src/edge.cpp b/src/plugins/intel_cpu/src/edge.cpp index c193cb1641285b..c314718bb82416 100644 --- a/src/plugins/intel_cpu/src/edge.cpp +++ b/src/plugins/intel_cpu/src/edge.cpp @@ -273,14 +273,14 @@ void Edge::allocate(const void* mem_ptr) { allocateCommon(allocateFunc); } -void Edge::allocate(MemoryMngrPtr memMngr) { - if (!memMngr) { - OPENVINO_THROW("Unexpected: Memory manager ptr is NULL"); +void Edge::allocate(MemoryBlockPtr memBlock) { + if (!memBlock) { + OPENVINO_THROW("Unexpected: Memory block ptr is NULL"); } auto allocateFunc = [OV_CAPTURE_CPY_AND_THIS](const MemoryDesc& inputDesc) -> MemoryPtr { auto parentPtr = getParent(); - return std::make_shared(parentPtr->getEngine(), inputDesc, memMngr); + return std::make_shared(parentPtr->getEngine(), inputDesc, memBlock); }; allocateCommon(allocateFunc); @@ -533,11 +533,12 @@ EdgePtr Edge::getBaseEdge(int look) { bool Edge::inPlace(LOOK look) const { int inputNum = getInputNum(); - int outputNum = getOutputNum(); if (look & LOOK_UP) { if (getParent()->inPlaceOutPort(inputNum) >= 0) return true; } + + int outputNum = getOutputNum(); if (look & LOOK_DOWN) { if (getChild()->inPlaceInputPort(outputNum) >= 0) return true; diff --git a/src/plugins/intel_cpu/src/edge.h b/src/plugins/intel_cpu/src/edge.h index e9f26a2d8955b4..29cb8113943cd3 100644 --- a/src/plugins/intel_cpu/src/edge.h +++ b/src/plugins/intel_cpu/src/edge.h @@ -52,7 +52,7 @@ class Edge { void init(); void allocate(const void* mem_ptr = nullptr); - void allocate(MemoryMngrPtr memMngr); + void allocate(MemoryBlockPtr memBlock); void externalAllocate(WeightsSharing::Ptr weightsCache); void reuse(MemoryPtr ptr); void validate(); diff --git a/src/plugins/intel_cpu/src/graph.cpp b/src/plugins/intel_cpu/src/graph.cpp index 0175ddfae1b644..e0573e310ac86c 100644 --- a/src/plugins/intel_cpu/src/graph.cpp +++ b/src/plugins/intel_cpu/src/graph.cpp @@ -44,8 +44,6 @@ #include #include "common/primitive_desc_iface.hpp" -#include "openvino/runtime/memory_solver.hpp" - #include "openvino/runtime/threading/cpu_streams_executor.hpp" #include "openvino/core/parallel.hpp" @@ -57,9 +55,6 @@ using namespace dnnl; namespace ov { namespace intel_cpu { -typedef std::unordered_set edge_cluster_t; -typedef std::vector edge_clusters_t; - Graph::~Graph() { CPU_DEBUG_CAP_ENABLE(summary_perf(*this)); } @@ -92,7 +87,6 @@ void Graph::CreateGraph(const std::vector& graphNodes, m_stream = dnnl::stream(getEngine()); this->_name = std::move(name); - this->reuse_io_tensors = false; this->graphNodes = graphNodes; this->graphEdges = graphEdges; @@ -118,7 +112,6 @@ template void Graph::CreateGraph(const std::shared_ptr&, const void Graph::Replicate(const std::shared_ptr &model) { OV_ITT_SCOPE_CHAIN(FIRST_INFERENCE, taskChain, itt::domains::intel_cpu_LT, "Graph::Replicate", "ov::Model"); this->_name = model->get_friendly_name(); - this->reuse_io_tensors = false; // Map data object onto producer node std::map, NodePtr> op2node; @@ -646,53 +639,8 @@ static inline bool isConstOutput(EdgePtr edge) { return edge->getParent()->isConstant() && !edge->getChild()->isConstant(); } -static edge_clusters_t findEdgeClusters(const std::vector & graphEdges) { - typedef std::unordered_map edge_cluster_idx_map_t; - - edge_clusters_t edge_clusters; - edge_cluster_idx_map_t edge_cluster_indices; - - for (auto &edge : graphEdges) { - auto edge_it = edge_cluster_indices.find(edge); - if (edge_it != edge_cluster_indices.end()) - continue; // edge is visited - - size_t cluster_idx = edge_clusters.size(); - EdgePtr last_shared_edge = nullptr; - - // find cluster index - for (auto shared_edge = edge->getSharedEdge(std::nothrow); - shared_edge; - shared_edge = shared_edge->getSharedEdge(std::nothrow)) { - auto shared_edge_it = edge_cluster_indices.find(shared_edge); - if (shared_edge_it != edge_cluster_indices.end()) { - cluster_idx = shared_edge_it->second; - last_shared_edge = shared_edge; - break; - } - } - - // add shared edges to cluster - edge_cluster_indices.emplace(edge, cluster_idx); - - if (cluster_idx == edge_clusters.size()) - edge_clusters.emplace_back(edge_cluster_t { edge }); - else - edge_clusters[cluster_idx].emplace(edge); - - for (auto shared_edge = edge->getSharedEdge(std::nothrow); - shared_edge != last_shared_edge; - shared_edge = shared_edge->getSharedEdge(std::nothrow)) { - edge_cluster_indices.emplace(shared_edge, cluster_idx); - edge_clusters[cluster_idx].emplace(shared_edge); - } - } - - return edge_clusters; -} - void Graph::AllocateWithReuse(const std::vector& syncNodesInds) { - edge_clusters_t edge_clusters = findEdgeClusters(graphEdges); + edgeClusters edge_clusters = MemoryControl::findEdgeClusters(graphEdges); size_t remaining_edge_clusters_count = edge_clusters.size(); @@ -709,7 +657,7 @@ void Graph::AllocateWithReuse(const std::vector& syncNodesInds) { // Special allocation for string tensors if (edge->getDesc().getPrecision() == element::string && edge->getStatus() == Edge::Status::NeedAllocation) { - StringMemory::StringMemoryMngrPtr mngr; + StringMemory::StringMemoryBlockPtr memBlcok; if (edge->getParent()->isConstant()) { if (edge->getParent()->getType() == Type::Input) { auto constNode = static_cast(edge->getParent().get()); @@ -720,11 +668,11 @@ void Graph::AllocateWithReuse(const std::vector& syncNodesInds) { auto stringMemory = dynamic_cast(edge->getMemoryPtr().get()); OPENVINO_ASSERT(stringMemory, "[CPU] Edge between nodes '", edge->getParent()->getName(), "' and '", edge->getChild()->getName(), "' must have StringMemory."); - mngr = stringMemory->getStringMemoryMngrPtr(); + memBlcok = stringMemory->getStringMemoryBlockPtr(); } else { auto memory = std::make_shared(getEngine(), edge->getDesc()); edge->reuse(memory); - mngr = memory->getStringMemoryMngrPtr(); + memBlcok = memory->getStringMemoryBlockPtr(); } for (auto& edge_c : cluster) { if (edge_c == edge) { @@ -732,7 +680,7 @@ void Graph::AllocateWithReuse(const std::vector& syncNodesInds) { } OPENVINO_ASSERT(edge_c->getDesc().getPrecision() == element::string, "All edges in the cluster must be string."); if (edge_c->getStatus() == Edge::Status::NotAllocated) { - auto memory = std::make_shared(getEngine(), edge_c->getDesc(), mngr); + auto memory = std::make_shared(getEngine(), edge_c->getDesc(), memBlcok); edge_c->reuse(memory); } else { OPENVINO_THROW("[CPU] String tensors allocation in the cluster. Edge between nodes '", edge_c->getParent()->getName(), "' and '", @@ -764,77 +712,108 @@ void Graph::AllocateWithReuse(const std::vector& syncNodesInds) { } } - const int64_t alignment = 32; // 32 bytes + // Markup the memory regions + std::vector memoryRegions; + memoryRegions.reserve(remaining_edge_clusters_count); + + for (size_t i = 0; i < remaining_edge_clusters_count; ++i) { + MemoryRegion reg = {std::numeric_limits::max(), + 0, + 0, + static_cast(i), + MemoryRegion::RegionType::VARIABLE, + MemoryRegion::AllocType::UNKNOWN}; - // Markup the boxes - std::vector definedBoxes; - std::vector undefinedBoxes; - for (size_t i = 0; i < remaining_edge_clusters_count; i++) { - ov::MemorySolver::Box box = { std::numeric_limits::max(), 0, 0, static_cast(i) }; int64_t boxSize = 0; + bool isConst = false, isOutput = false, isInput = false; for (auto &edge : edge_clusters[i]) { - int e_start = edge->getParent()->execIndex; - int e_finish = edge->getChild()->execIndex; + int e_start = edge->getParent()->getExecIndex(); + int e_finish = edge->getChild()->getExecIndex(); + + auto&& desc = edge->getDesc(); - if (boxSize != -1 && edge->getDesc().isDefined()) { - int64_t e_size = edge->getDesc().getCurrentMemSize(); // size in bytes (from the beginning of data to the last element) + if (boxSize != -1 && desc.isDefined()) { + int64_t e_size = desc.getCurrentMemSize(); // size in bytes (from the beginning of data to the last element) boxSize = std::max(e_size, boxSize); } else { boxSize = -1; } - box.start = std::min(e_start, box.start); - box.finish = std::max(e_finish, box.finish); - } + reg.start = std::min(e_start, reg.start); + reg.finish = std::max(e_finish, reg.finish); + + auto allocType = + desc.getPrecision() == element::string ? MemoryRegion::AllocType::STRING : MemoryRegion::AllocType::POD; + + if (reg.alloc_type != allocType && MemoryRegion::AllocType::UNKNOWN != reg.alloc_type) { + OPENVINO_THROW("Different allocation types in the same memory region"); + } + reg.alloc_type = allocType; - // Constant data are filled once on load. - // So we need it untouchable during all execution time - // -1 is a place holder for a max timestamp. - bool isConst = false, isOutput = false, isInput = false; - for (auto &edge : edge_clusters[i]) { isConst |= isConstOutput(edge); isOutput |= edge->getChild()->getType() == Type::Output; isInput |= edge->getParent()->getType() == Type::Input; } - if (reuse_io_tensors) { - if (isInput | isConst) box.start = 0; - if (isOutput | isConst) box.finish = -1; - } else { - if (isInput | isOutput | isConst) { - box.start = 0; - box.finish = -1; + reg.size = boxSize; + + if (isConst) { + reg.type = MemoryRegion::RegionType::CONSTANT; + } else if (isInput) { + if (isOutput) { + reg.type = MemoryRegion::RegionType::IO; + } else { + reg.type = MemoryRegion::RegionType::INPUT; } + } else if (isOutput) { + reg.type = MemoryRegion::RegionType::OUTPUT; } - if (boxSize != -1) { - box.size = div_up(boxSize, alignment); - definedBoxes.push_back(box); - } else { - box.size = boxSize; - undefinedBoxes.push_back(box); - } + memoryRegions.push_back(reg); } - // Process defined boxes (static shapes) - ov::MemorySolver staticMemSolver(definedBoxes); - size_t total_size = static_cast(staticMemSolver.solve()) * alignment; - - memWorkspace = std::make_shared(getEngine(), DnnlBlockedMemoryDesc(ov::element::i8, Shape(VectorDims{total_size}))); + // special processing of the dynamic output edges + auto it = std::remove_if(memoryRegions.begin(), memoryRegions.end(), [&](const MemoryRegion& region) { + if (region.size >= 0 || !one_of(region.type, MemoryRegion::RegionType::OUTPUT, MemoryRegion::RegionType::IO)) { + return false; + } + bool result = false; + for (auto& edge : edge_clusters[region.id]) { + auto child = edge->getChild(); + if (child->getType() == Type::Output && edge->getStatus() == Edge::Status::NeedAllocation) { + auto proxyMemBlock = std::make_shared(); + DEBUG_LOG("ProxyMemoryBlock ", proxyMemBlock, " ", this); + edge->allocate(proxyMemBlock); + + // Store the output memory blocks. + // So that, the infer requests can be able to access them. + int count = 0; + for (auto& output : outputNodesMap) { + if (output.second == child) { + outputNodesMemBlocksMap[output.first] = proxyMemBlock; + count++; + } + } + // sometimes there are unused output ports. + OPENVINO_ASSERT(count <= 1, "CPU plugin cannot find output node. count ", count); + result = true; + } + } + return result; + }); - if (edge_clusters.empty()) - return; + memoryRegions.erase(it, memoryRegions.end()); - auto* workspace_ptr = static_cast(memWorkspace->getData()); + //Set up the memory control subsystem. + this->m_pMemoryControl = &(getGraphContext()->getNetworkMemoryControl()->createMemoryControlUnit(syncNodesInds)); + auto memoryBlocks = m_pMemoryControl->insert(memoryRegions); - for (const auto& box : definedBoxes) { + // attach all the not yet allocated edges to the memory contol + for (auto&& item : memoryBlocks) { int count = 0; - for (auto& edge : edge_clusters[box.id]) { + for (auto&& edge : edge_clusters[item.first]) { if (edge->getStatus() == Edge::Status::NeedAllocation) { - int64_t offset = staticMemSolver.get_offset(box.id); - // !! Fallback to individual memory allocation !! - // if you like to check infer without reuse just call this function without arguments. - edge->allocate(workspace_ptr + offset * alignment); // alignment in byte + edge->allocate(item.second); // TODO: WA for some test (like strided_slice_test) which use tensors with // shapes {0}. And it is implicitly converted into {1} tensor. @@ -848,92 +827,7 @@ void Graph::AllocateWithReuse(const std::vector& syncNodesInds) { OPENVINO_ASSERT(count == 1); } - //Process undefined boxes (dynamic shapes) - if (!undefinedBoxes.empty()) { - // Use proxy memory manager for output edges - for (const auto& box : undefinedBoxes) { - for (auto& edge : edge_clusters[box.id]) { - const auto child = edge->getChild(); - if (child->getType() == Type::Output && - edge->getStatus() == Edge::Status::NeedAllocation) { - auto proxyMemMngr = - std::make_shared(); - DEBUG_LOG("ProxyMemoryMngr ", proxyMemMngr, " ", this); - edge->allocate(proxyMemMngr); - - // Store the output memory managers. - // So that, the infer requests can be able to access them. - int count = 0; - for (auto &output : outputNodesMap) { - if (output.second == child) { - outputNodesMemMngrMap[output.first] = proxyMemMngr; - count++; - } - } - // sometimes there are unused output ports. - OPENVINO_ASSERT(count <= 1, "CPU plugin cannot find output node. count ", count); - } - } - } - - if (!syncNodesInds.empty()) { - //We have to extend the lifespan of tensors that are crossing a sync point border in order to save - //the intermediate computation results from possible loss due to the tensor resize - for (auto& box : undefinedBoxes) { - if (-1 == box.finish) { - continue; - } - auto itr_upper = std::upper_bound(syncNodesInds.begin(), syncNodesInds.end(), box.finish, [](int y, int x) { return y <= x;}); - auto itr_lower = std::lower_bound(syncNodesInds.begin(), syncNodesInds.end(), box.start); - if (itr_lower != itr_upper) { // across sections - if (itr_upper == syncNodesInds.end()) { - box.finish = -1; - } else { - box.finish = *itr_upper; - } - } - } - } - - ov::MemorySolver::normalize_boxes(undefinedBoxes); - - std::vector> groups; //groups of nonoverlapping boxes - constexpr bool enableMemReuse = true; // set false to disable mem reuse for debug purposes - if (enableMemReuse) { - groups.push_back({undefinedBoxes.front()}); - for (size_t i = 1; i < undefinedBoxes.size(); ++i) { - const auto& box = undefinedBoxes[i]; - bool groupFound = false; - for (auto& group : groups) { - const auto& lastBox = group.back(); - if (lastBox.start > box.finish || lastBox.finish < box.start) { - group.push_back(box); - groupFound = true; - break; - } - } - - if (!groupFound) { - groups.push_back({box}); - } - } - } else { - for (auto& box : undefinedBoxes) { - groups.push_back({box}); - } - } - for (auto& group : groups) { - auto grpMemMngr = - std::make_shared(make_unique()); - for (auto& box : group) { - for (auto& edge : edge_clusters[box.id]) { - if (edge->getStatus() == Edge::Status::NeedAllocation) { - edge->allocate(grpMemMngr); - } - } - } - } - } + m_pMemoryControl->allocateMemory(); // Resolve all other edges with status NotAllocated and in-place for (auto& cluster : edge_clusters) { @@ -957,7 +851,7 @@ void Graph::AllocateWithReuse(const std::vector& syncNodesInds) { } else { auto sharedEdge = edge->getSharedEdge(); auto sharedEdgeParent = sharedEdge->getParent(); - edge->allocate(sharedEdge->getMemoryPtr()->getMemoryMngr()); + edge->allocate(sharedEdge->getMemoryPtr()->getMemoryBlock()); DEBUG_LOG(*edge, " sharedEdge with ", *sharedEdge); } } @@ -1002,13 +896,6 @@ bool Graph::ProcessDynNodes() { const bool containsDynamicNodes = std::any_of(graphNodes.begin(), graphNodes.end(), [](const NodePtr& node) { return node->isDynamicNode(); }); - // In case of dynamic shapes, tensors may be resized due to the shapes variations. - // If the input tensor is included to memory reuse, it means that its memory manager is shared with other tensors in the graph, which in turn may cause data - // loss when one of the tensors down the graph requests mem resize, while the input data have not been yet read by the consumers. To avoid such situations - // we disable io mem reuse for the case of dynamic shapes. - if (containsDynamicNodes) { - this->reuse_io_tensors = false; - } return containsDynamicNodes; } @@ -1385,6 +1272,14 @@ void Graph::Infer(SyncInferRequest* request) { DEBUG_LOG("Infer graph: ", GetName(), ". Status: ", static_cast(status)); const int numaId = GetNumaNodeId(context); + if (!m_pMemoryControl) { + OPENVINO_THROW("Memory control unit is not initilized in graph: ", GetName()); + } + + if (!m_pMemoryControl->allocated()) { + m_pMemoryControl->allocateMemory(); + } + switch (status) { case Status::ReadyDynamic: InferDynamic(request, numaId, UpdateNodes(m_executableGraphNodes)); diff --git a/src/plugins/intel_cpu/src/graph.h b/src/plugins/intel_cpu/src/graph.h index 58dc9fd92faf48..3f9debefe7e06c 100644 --- a/src/plugins/intel_cpu/src/graph.h +++ b/src/plugins/intel_cpu/src/graph.h @@ -10,6 +10,7 @@ #include "node.h" #include "edge.h" #include "graph_context.h" +#include "memory_control.hpp" #include "openvino/runtime/profiling_info.hpp" #include @@ -40,6 +41,8 @@ class Graph { }; Graph() = default; + Graph(Graph&&) = default; + Graph& operator=(Graph&&) = default; ~Graph(); @@ -204,10 +207,6 @@ class Graph { // values mean increment it within each Infer() call int infer_count = -1; - bool reuse_io_tensors = true; - - MemoryPtr memWorkspace; - std::vector graphNodes; std::vector graphEdges; @@ -253,12 +252,21 @@ class Graph { friend class intel_cpu::SyncInferRequest; friend std::shared_ptr dump_graph_as_ie_ngraph_net(const Graph &graph); +private: + using event_t = void (Graph::*)(void); + +private: + void EnforceInferencePrecision(); + void EnforceBF16(); + void insertReorder(EdgePtr& edge, bool isOptimized, std::unordered_set& uniqueLayerNames); + void insertConvert(EdgePtr& edge); + private: // TODO: change std::map to std::unordered_map std::map inputNodesMap; std::map outputNodesMap; - std::unordered_map outputNodesMemMngrMap; + std::unordered_map outputNodesMemBlocksMap; // these node pointers (from graphNodes) are to avoid regular checking for // constantness of nodes in Infer methods and calls of @@ -269,10 +277,7 @@ class Graph { GraphContext::CPtr context; dnnl::stream m_stream; - void EnforceInferencePrecision(); - void EnforceBF16(); - void insertReorder(EdgePtr& edge, bool isOptimized, std::unordered_set& uniqueLayerNames); - void insertConvert(EdgePtr& edge); + MemoryControl* m_pMemoryControl = nullptr; }; using GraphPtr = std::shared_ptr; diff --git a/src/plugins/intel_cpu/src/graph_context.cpp b/src/plugins/intel_cpu/src/graph_context.cpp index 2699a8854afb80..e200766fa4791c 100644 --- a/src/plugins/intel_cpu/src/graph_context.cpp +++ b/src/plugins/intel_cpu/src/graph_context.cpp @@ -4,6 +4,7 @@ #include "dnnl_types.h" #include "graph_context.h" #include "nodes/memory.hpp" +#include "memory_control.hpp" namespace ov { namespace intel_cpu { @@ -18,7 +19,8 @@ GraphContext::GraphContext(const Config& config, isGraphQuantizedFlag(isGraphQuantized), streamExecutor(streamExecutor), subMemoryManager(sub_memory_manager), - memoryStatesRegister(std::make_shared()) { + memoryStatesRegister(std::make_shared()), + networkMemoryControl(std::make_shared()) { rtParamsCache = std::make_shared(config.rtCacheCapacity); // primitive/executors can be shared across sub-stream // but scratch pad cannot be shared. diff --git a/src/plugins/intel_cpu/src/graph_context.h b/src/plugins/intel_cpu/src/graph_context.h index 138ccebe0f9a40..db2b126213978c 100644 --- a/src/plugins/intel_cpu/src/graph_context.h +++ b/src/plugins/intel_cpu/src/graph_context.h @@ -18,6 +18,8 @@ namespace node { class MemoryStatesRegister; } // namespace node +class NetworkMemoryControl; + class GraphContext { public: typedef std::shared_ptr Ptr; @@ -76,6 +78,10 @@ class GraphContext { return memoryStatesRegister; } + const std::shared_ptr& getNetworkMemoryControl() const { + return networkMemoryControl; + } + private: Config config; // network-level config @@ -97,6 +103,7 @@ class GraphContext { int numNumaNodes = 1; std::shared_ptr memoryStatesRegister; + std::shared_ptr networkMemoryControl; }; } // namespace intel_cpu diff --git a/src/plugins/intel_cpu/src/infer_request.cpp b/src/plugins/intel_cpu/src/infer_request.cpp index 2d382c00508287..85bc5c2fe38f45 100644 --- a/src/plugins/intel_cpu/src/infer_request.cpp +++ b/src/plugins/intel_cpu/src/infer_request.cpp @@ -162,13 +162,13 @@ static inline void change_edge_ptr(const EdgePtr& edge, ov::SoPtr& OPENVINO_ASSERT(mem != nullptr, "Edge with name '", edge->name(), "' doesn't have allocated memory object."); if (tensor->get_element_type() == element::string) { - auto memMngr = dynamic_cast(mem.get())->getStringMemoryMngrPtr(); - OPENVINO_ASSERT(memMngr); - memMngr->setExtBuff(tensor->data(), tensor->get_size()); + auto memBlock = dynamic_cast(mem.get())->getStringMemoryBlockPtr(); + OPENVINO_ASSERT(memBlock); + memBlock->setExtBuff(tensor->data(), tensor->get_size()); } else { - auto memMngr = mem->getMemoryMngr(); - OPENVINO_ASSERT(memMngr); - memMngr->setExtBuff(tensor->data(), tensor->get_byte_size()); + auto memBlock = mem->getMemoryBlock(); + OPENVINO_ASSERT(memBlock); + memBlock->setExtBuff(tensor->data(), tensor->get_byte_size()); } } @@ -241,11 +241,11 @@ void SyncInferRequest::change_default_ptr() { auto output = outputNodesMap.find(it.first); OPENVINO_ASSERT(outputNodesMap.end() != output, "Cannot find output tensor with index: ", it.first); auto parentEdge = output->second->getParentEdgeAt(0); - if (parentEdge->getMemory().getData() == static_cast(it.second->data())) + void* const outputRawPtr = parentEdge->getMemory().getData(); + if (outputRawPtr == static_cast(it.second->data())) continue; bool canBeInPlace = true; - void* defaultPtr = parentEdge->getMemory().getData(); // Cannot be in-place after concat because concat is using different ptrs without offsets auto parent = parentEdge->getParent(); NodePtr previousParent; @@ -267,7 +267,7 @@ void SyncInferRequest::change_default_ptr() { if (!e) OPENVINO_THROW("Node ", parent->getName(), " contains empty parent edge"); - if (e->getMemory().getData() == defaultPtr) { + if (parent_port == parent->inPlaceInputPort(e->getOutputNum())) { parent = e->getParent(); parent_port = e->getInputNum(); break; @@ -279,13 +279,13 @@ void SyncInferRequest::change_default_ptr() { } if (Graph::Status::ReadyDynamic == m_graph->getStatus()) { - const auto &outMemMngrMap = m_graph->outputNodesMemMngrMap; - for (auto&& item : outMemMngrMap) { + const auto &outMemBlocksMap = m_graph->outputNodesMemBlocksMap; + for (auto&& item : outMemBlocksMap) { const auto& name = item.first; - // share intel_cpu::Tensor to Graph by injecting to corresponding ProxyMemoryMngr instance. - auto outputMemMngr = item.second; - OPENVINO_ASSERT(outputMemMngr, "proxy mem manager for output ", name, " is empty."); + // share intel_cpu::Tensor to Graph by injecting to corresponding ProxyMemoryBlock instance. + auto outputMemBlock = item.second; + OPENVINO_ASSERT(outputMemBlock, "proxy mem block for output ", name, " is empty."); auto controlBlockItr = m_outputControlBlocks.find(name); @@ -296,15 +296,15 @@ void SyncInferRequest::change_default_ptr() { //avoid cyclic memory use auto&& controlBlock = controlBlockItr->second; - std::shared_ptr memMngr = inputPtrs.count(controlBlock.rawPtr()) ? // same memory is used on the input and output - controlBlock.nextMemMngr() : // then swap internal buffer to avoid data corruption - controlBlock.currentMemMngr(); // else reuse the existing buffer + std::shared_ptr memBlock = inputPtrs.count(controlBlock.rawPtr()) ? // same memory is used on the input and output + controlBlock.nextMemBlock() : // then swap internal buffer to avoid data corruption + controlBlock.currentMemBlock(); // else reuse the existing buffer - outputMemMngr->setMemMngrResize(memMngr); - DEBUG_LOG("reset proxy ", outputMemMngr, ", actual ", controlBlock.currentMemMngr(), " graph ", m_graph, " inferrequest ", this); + outputMemBlock->setMemBlockResize(memBlock); + DEBUG_LOG("reset proxy ", outputMemBlock, ", actual ", controlBlock.currentMemBlock(), " graph ", m_graph, " inferrequest ", this); DEBUG_LOG(name, ", tensor ", controlBlock.tensor()); } else { - outputMemMngr->reset(); // switch to the internal memory since memory sharing is no longer possible + outputMemBlock->reset(); // switch to the internal memory since memory sharing is no longer possible } } } @@ -553,8 +553,8 @@ void SyncInferRequest::init_tensor(const std::size_t& port_index, const ov::ISyn DEBUG_LOG(port_index, ", tensor ", control_block.tensor(), - ", memmngr ", - control_block.tensor()->get_memory()->getMemoryMngr(), + ", memBlock ", + control_block.tensor()->get_memory()->getMemoryBlock(), "memory object ", control_block.tensor()->get_memory().get()); @@ -598,8 +598,8 @@ void SyncInferRequest::push_input_data() { SyncInferRequest::OutputControlBlock::OutputControlBlock(const ov::element::Type& precision, const Shape& shape) { dnnl::engine eng(dnnl::engine::kind::cpu, 0); - m_buffers[m_buffIndx] = std::make_shared(); - m_proxyMemMngr = std::make_shared(m_buffers[m_buffIndx]); + m_buffers[m_buffIndx] = std::make_shared(); + m_proxyMemBlock = std::make_shared(m_buffers[m_buffIndx]); VectorDims memDims; if (shape.isDynamic()) { // this is a WA since the ITensor doesn't allow dyn shapes @@ -613,7 +613,7 @@ SyncInferRequest::OutputControlBlock::OutputControlBlock(const ov::element::Type CpuBlockedMemoryDescPtr desc = std::make_shared(precision, Shape{memDims}); - auto memory = std::make_shared(eng, desc, m_proxyMemMngr); + auto memory = std::make_shared(eng, desc, m_proxyMemBlock); m_tensor = std::make_shared(memory); } diff --git a/src/plugins/intel_cpu/src/infer_request.h b/src/plugins/intel_cpu/src/infer_request.h index 2e097c87673a37..a9def63d359744 100644 --- a/src/plugins/intel_cpu/src/infer_request.h +++ b/src/plugins/intel_cpu/src/infer_request.h @@ -50,7 +50,7 @@ class SyncInferRequest : public ov::ISyncInferRequest { private: class OutputControlBlock { public: - using MemMngrPtr = std::shared_ptr; + using MemBlockPtr = std::shared_ptr; public: OutputControlBlock(const ov::element::Type& precision, const Shape& shape); @@ -69,26 +69,26 @@ class SyncInferRequest : public ov::ISyncInferRequest { return m_tensor->get_memory()->getData(); } - MemMngrPtr currentMemMngr() const { + MemBlockPtr currentMemBlock() const { return m_buffers[m_buffIndx]; } - MemMngrPtr nextMemMngr() { + MemBlockPtr nextMemBlock() { m_buffIndx ^= 0x1; if (!m_buffers[m_buffIndx]) { - m_buffers[m_buffIndx] = std::make_shared(); + m_buffers[m_buffIndx] = std::make_shared(); } return m_buffers[m_buffIndx]; } void update() { - m_proxyMemMngr->setMemMngrResize(currentMemMngr()); + m_proxyMemBlock->setMemBlockResize(currentMemBlock()); } private: std::shared_ptr m_tensor = nullptr; - ProxyMemoryMngrPtr m_proxyMemMngr = nullptr; - std::array m_buffers; + ProxyMemoryBlockPtr m_proxyMemBlock = nullptr; + std::array m_buffers; int m_buffIndx = 0; }; diff --git a/src/plugins/intel_cpu/src/memory_control.cpp b/src/plugins/intel_cpu/src/memory_control.cpp new file mode 100644 index 00000000000000..0f202c296891c1 --- /dev/null +++ b/src/plugins/intel_cpu/src/memory_control.cpp @@ -0,0 +1,428 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "memory_control.hpp" + +#include + +#include "node.h" +#include "openvino/runtime/memory_solver.hpp" + +namespace ov { +namespace intel_cpu { + +namespace { + +class StaticPartitionMemoryBlock : public IMemoryBlockObserver { +public: + StaticPartitionMemoryBlock(MemoryBlockPtr pBlock, ptrdiff_t offset) + : m_pBlock(pBlock), m_offset(offset) { + OPENVINO_ASSERT(m_pBlock, "Memory block is uninitialized"); + } + + void* getRawPtr() const noexcept override { + return static_cast(m_pBlock->getRawPtr()) + m_offset; + } + void setExtBuff(void* ptr, size_t size) override { + OPENVINO_THROW("Unexpected setExtBuff call to StaticPartitionMemoryBlock"); + } + bool resize(size_t size) override { + // don't pass over as it's static memory + return false; + } + bool hasExtBuffer() const noexcept override { + return m_pBlock->hasExtBuffer(); + } + void registerMemory(Memory* memPtr) override { + m_pBlock->registerMemory(memPtr); + } + void unregisterMemory(Memory* memPtr) override { + m_pBlock->unregisterMemory(memPtr); + } + +private: + MemoryBlockPtr m_pBlock; + ptrdiff_t m_offset = 0; +}; + +class MemoryBlockWithRelease : public IMemoryBlockObserver { +public: + MemoryBlockWithRelease() { + auto pInternalMem = make_unique(); + m_pInternalMem = pInternalMem.get(); + m_pBlock = std::make_shared(std::move(pInternalMem)); + } + + void* getRawPtr() const noexcept override { + return m_pBlock->getRawPtr(); + } + void setExtBuff(void* ptr, size_t size) override { + m_pBlock->setExtBuff(ptr, size); + } + bool resize(size_t size) override { + return m_pBlock->resize(size); + } + bool hasExtBuffer() const noexcept override { + return m_pBlock->hasExtBuffer(); + } + void registerMemory(Memory* memPtr) override { + m_pBlock->registerMemory(memPtr); + } + void unregisterMemory(Memory* memPtr) override { + m_pBlock->unregisterMemory(memPtr); + } + void free() { + m_pInternalMem->free(); + } + +private: + MemoryBlockPtr m_pBlock; + MemoryBlockWithReuse* m_pInternalMem; +}; + +class IMemoryManager { +public: + virtual ~IMemoryManager() = default; + virtual void insert(const MemoryRegion& reg) = 0; + virtual const MemoryControl::MemoryBlockMap& lastSolution() = 0; + virtual void allocate() = 0; + virtual void release() = 0; +}; + +using MemoryManagerPtr = std::shared_ptr; + +template +std::shared_ptr makeDnnlMemoryBlock(Args&&... args) { + return std::make_shared(make_unique(std::forward(args)...)); +} + +class MemoryManagerIO : public IMemoryManager { +public: + void insert(const MemoryRegion& reg) override { + m_blocks.insert({reg.id, makeDnnlMemoryBlock()}); + } + + const MemoryControl::MemoryBlockMap& lastSolution() override { + return m_blocks; + } + + void allocate() override { + // nothing to do + } + void release() override { + // nothing to do + } + +private: + MemoryControl::MemoryBlockMap m_blocks; +}; + +class MemoryManagerStatic : public IMemoryManager { +public: + void insert(const MemoryRegion& reg) override { + m_boxes.emplace_back(MemorySolver::Box{reg.start, reg.finish, reg.size, reg.id}); + } + + const MemoryControl::MemoryBlockMap& lastSolution() override { + if (!m_boxes.empty() && m_blocks.empty()) { + solve(); + } + return m_blocks; + } + +private: + void solve() { + constexpr size_t alignment = 32; + std::for_each(m_boxes.begin(), m_boxes.end(), [=](MemorySolver::Box& box) { + box.size = div_up(box.size, alignment); + }); + + ov::MemorySolver staticMemSolver(m_boxes); + m_totalSize = static_cast(staticMemSolver.solve()) * alignment; + + m_workspace = std::make_shared(); + + for (const auto& box : m_boxes) { + int64_t offset = staticMemSolver.get_offset(box.id); + auto memoryBlock = std::make_shared(m_workspace, offset * alignment); + m_blocks[box.id] = std::move(memoryBlock); + } + m_boxes.clear(); + } + + void allocate() override { + if (m_workspace) m_workspace->resize(m_totalSize); + } + void release() override { + if (m_workspace) m_workspace->free(); + } + +private: + MemoryControl::MemoryBlockMap m_blocks; + std::vector m_boxes; + std::shared_ptr m_workspace; + size_t m_totalSize = 0; +}; + +class MemoryManageNonOverlapingSets : public IMemoryManager { +public: + MemoryManageNonOverlapingSets(std::vector syncInds) : m_syncInds(std::move(syncInds)) {} + void insert(const MemoryRegion& reg) override { + MemorySolver::Box box = {reg.start, reg.finish, reg.size, reg.id}; + if (-1 != reg.finish) { + //We have to extend the lifespan of tensors that are crossing a sync point border in order to save + //the intermediate computation results from possible loss due to the tensor resize + auto itr_upper = + std::upper_bound(m_syncInds.begin(), m_syncInds.end(), box.finish, [](int y, int x) { + return y <= x; + }); + auto itr_lower = std::lower_bound(m_syncInds.begin(), m_syncInds.end(), box.start); + if (itr_lower != itr_upper) { // across sections + if (itr_upper == m_syncInds.end()) { + box.finish = -1; + } else { + box.finish = *itr_upper; + } + } + } + m_boxes.emplace_back(std::move(box)); + } + + const MemoryControl::MemoryBlockMap& lastSolution() override { + if (!m_boxes.empty() && m_blocks.empty()) { + solve(); + m_blocks = MemoryControl::MemoryBlockMap{m_internalBlocks.begin(), m_internalBlocks.end()}; + } + return m_blocks; + } + +private: + void solve() { + ov::MemorySolver::normalize_boxes(m_boxes); + + std::vector> groups; //groups of nonoverlapping boxes + groups.push_back({m_boxes.front()}); + for (size_t i = 1; i < m_boxes.size(); ++i) { + const auto& box = m_boxes[i]; + bool groupFound = false; + for (auto& group : groups) { + const auto& lastBox = group.back(); + if (lastBox.start > box.finish || lastBox.finish < box.start) { + group.push_back(box); + groupFound = true; + break; + } + } + + if (!groupFound) { + groups.push_back({box}); + } + } + for (auto& group : groups) { + auto grpMemBlock = std::make_shared(); + for (auto& box : group) { + m_internalBlocks[box.id] = grpMemBlock; + } + } + m_boxes.clear(); + } + + void allocate() override { + //nothing to do + } + void release() override { + for (auto&& item : m_internalBlocks) { + item.second->free(); + } + } + +private: + MemoryControl::MemoryBlockMap m_blocks; + std::unordered_map> + m_internalBlocks; + std::vector m_boxes; + std::vector m_syncInds; +}; + +} // namespace + +class MemoryControl::RegionHandler { +public: + using Condition = std::function; + +public: + RegionHandler(Condition cond, MemoryManagerPtr memManager) + : m_cond(std::move(cond)), + m_memManager(std::move(memManager)) {} + + bool insert(const MemoryRegion& reg) { + if (!m_cond(reg)) { + return false; + } + + m_memManager->insert(reg); + return true; + } + + const MemoryControl::MemoryBlockMap& lastSolution() const { + return m_memManager->lastSolution(); + } + + void allocate() { + m_memManager->allocate(); + } + + void release() { + m_memManager->release(); + } + +private: + Condition m_cond; + MemoryManagerPtr m_memManager; +}; + +namespace { + +template +MemoryControl::RegionHandlerPtr buildHandler(F&& f, Args&&... args) { + return std::make_shared(std::forward(f), + std::make_shared(std::forward(args)...)); +} + +} // namespace + +MemoryControl::MemoryControl(std::vector syncInds) { + // init handlers + + // handler for dynamic tensors + m_handlers.emplace_back(buildHandler([](const MemoryRegion& reg) { + if (reg.size < 0 || MemoryRegion::RegionType::VARIABLE != reg.type || + MemoryRegion::AllocType::POD != reg.alloc_type) { + return false; + } + return true; + })); + + // handler for static tensors + m_handlers.emplace_back(buildHandler([](const MemoryRegion& reg) { + if (reg.size >= 0 || MemoryRegion::RegionType::VARIABLE != reg.type || + MemoryRegion::AllocType::POD != reg.alloc_type) { + return false; + } + return true; + }, std::move(syncInds))); + + //handler for I/O tensors, so far simply individual blocks + m_handlers.emplace_back(buildHandler([](const MemoryRegion& reg) { + if (MemoryRegion::RegionType::VARIABLE == reg.type || reg.alloc_type != MemoryRegion::AllocType::POD) { + return false; + } + return true; + })); +} + +void MemoryControl::insert(const MemoryRegion& region) { + for (auto&& handler : m_handlers) { + if (handler->insert(region)) { + return; + } + } + OPENVINO_THROW("No suitable hanlder was found for the given memory region"); +} + +MemoryControl::MemoryBlockMap MemoryControl::insert(const std::vector& regions) { + for (auto&& region : regions) { + insert(region); + } + + MemoryControl::MemoryBlockMap blocksMap; + blocksMap.reserve(regions.size()); + + for (auto&& handler : m_handlers) { + auto&& solution = handler->lastSolution(); + for (auto&& item : solution) { + auto res = blocksMap.insert(item); + OPENVINO_ASSERT(res.second, "Memory solutions has non unique entries"); + } + } + + return blocksMap; +} + +void MemoryControl::allocateMemory() { + for (auto&& handler : m_handlers) { + handler->allocate(); + } + m_allocated = true; +} + +void MemoryControl::releaseMemory() { + for (auto&& handler : m_handlers) { + handler->release(); + } + m_allocated = false; +} + +edgeClusters MemoryControl::findEdgeClusters(const std::vector& graphEdges) { + typedef std::unordered_map edge_cluster_idx_map_t; + + edgeClusters edge_clusters; + edge_cluster_idx_map_t edge_cluster_indices; + + for (auto& edge : graphEdges) { + auto edge_it = edge_cluster_indices.find(edge); + if (edge_it != edge_cluster_indices.end()) + continue; // edge is visited + + size_t cluster_idx = edge_clusters.size(); + EdgePtr last_shared_edge = nullptr; + + // find cluster index + for (auto shared_edge = edge->getSharedEdge(std::nothrow); shared_edge; + shared_edge = shared_edge->getSharedEdge(std::nothrow)) { + auto shared_edge_it = edge_cluster_indices.find(shared_edge); + if (shared_edge_it != edge_cluster_indices.end()) { + cluster_idx = shared_edge_it->second; + last_shared_edge = shared_edge; + break; + } + } + + // add shared edges to cluster + edge_cluster_indices.emplace(edge, cluster_idx); + + if (cluster_idx == edge_clusters.size()) + edge_clusters.emplace_back(edgeCluster{edge}); + else + edge_clusters[cluster_idx].emplace(edge); + + for (auto shared_edge = edge->getSharedEdge(std::nothrow); shared_edge != last_shared_edge; + shared_edge = shared_edge->getSharedEdge(std::nothrow)) { + edge_cluster_indices.emplace(shared_edge, cluster_idx); + edge_clusters[cluster_idx].emplace(shared_edge); + } + } + + return edge_clusters; +} + +MemoryControl& NetworkMemoryControl::createMemoryControlUnit(std::vector syncInds) { + m_controlUnits.emplace_back(std::unique_ptr(new MemoryControl(syncInds))); + return *(m_controlUnits.back()); +} + +void NetworkMemoryControl::allocateMemory() { + for (auto&& item : m_controlUnits) { + item->allocateMemory(); + } +} + +void NetworkMemoryControl::releaseMemory() { + for (auto&& item : m_controlUnits) { + item->releaseMemory(); + } +} + +} // namespace intel_cpu +} // namespace ov \ No newline at end of file diff --git a/src/plugins/intel_cpu/src/memory_control.hpp b/src/plugins/intel_cpu/src/memory_control.hpp new file mode 100644 index 00000000000000..ce4dc90890f3fa --- /dev/null +++ b/src/plugins/intel_cpu/src/memory_control.hpp @@ -0,0 +1,72 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "edge.h" + +namespace ov { +namespace intel_cpu { + +using edgeCluster = std::unordered_set; +using edgeClusters = std::vector; + +struct MemoryRegion { + int start; // Execution order index of first use. + int finish; // Execution order index of last use. -1 means inf + int64_t size; // size in bytes + int64_t id; // ID unique for each region + + enum class RegionType : uint8_t { VARIABLE, CONSTANT, INPUT, OUTPUT, IO } type; + enum class AllocType : uint8_t { POD, STRING, UNKNOWN } alloc_type; +}; + +class MemoryControl { +public: + class RegionHandler; + + using RegionHandlerPtr = std::shared_ptr; + using MemoryBlockMap = std::unordered_map; + +public: + static edgeClusters findEdgeClusters(const std::vector& graphEdges); + + MemoryBlockMap insert(const std::vector& regions); + + bool allocated() const { + return m_allocated; + } + + void allocateMemory(); + void releaseMemory(); + +private: + explicit MemoryControl(std::vector syncInds); + void insert(const MemoryRegion& region); + + friend class NetworkMemoryControl; + +private: + std::vector m_syncInds; + std::vector m_handlers; + bool m_allocated = false; +}; + +class NetworkMemoryControl { +public: + NetworkMemoryControl() = default; + MemoryControl& createMemoryControlUnit(std::vector syncInds); + + void allocateMemory(); + void releaseMemory(); + +private: + using value_type = std::unique_ptr; + +private: + std::vector m_controlUnits; +}; + +} // namespace intel_cpu +} // namespace ov \ No newline at end of file diff --git a/src/plugins/intel_cpu/src/memory_desc/cpu_blocked_memory_desc.cpp b/src/plugins/intel_cpu/src/memory_desc/cpu_blocked_memory_desc.cpp index cb458b09c06eae..d1c50d0048c57d 100644 --- a/src/plugins/intel_cpu/src/memory_desc/cpu_blocked_memory_desc.cpp +++ b/src/plugins/intel_cpu/src/memory_desc/cpu_blocked_memory_desc.cpp @@ -121,9 +121,26 @@ size_t CpuBlockedMemoryDesc::getCurrentMemSizeImp() const { e_size += (getBlockDims()[j] - 1) * getStrides()[j]; } - e_size *= getPrecision() == ov::element::u1 ? 1 : getPrecision().size(); + const auto prc = getPrecision(); - return e_size; + if (prc == ov::element::u1) { + return e_size; + } + + auto byte_size = e_size * prc.bitwidth(); + + if (one_of(prc, ov::element::u3, ov::element::u6)) { + constexpr size_t storage_unit_size = 24; + byte_size += storage_unit_size - 1; + byte_size /= storage_unit_size; + byte_size *= 3; + } else { + constexpr size_t storage_unit_size = 8; + byte_size += storage_unit_size - 1; + byte_size /= storage_unit_size; + } + + return byte_size; } size_t CpuBlockedMemoryDesc::getMaxMemSize() const { diff --git a/src/plugins/intel_cpu/src/node.cpp b/src/plugins/intel_cpu/src/node.cpp index 3ea8584884c449..31c4a0d2a5b54d 100644 --- a/src/plugins/intel_cpu/src/node.cpp +++ b/src/plugins/intel_cpu/src/node.cpp @@ -365,9 +365,9 @@ void Node::resolveInPlaceEdges(Edge::LOOK look) { " Could not find an allocated edge to resolve in-place for node: ", getName()); - auto baseMemMngr = (*itr)->getMemory().getMemoryMngr(); - auto memMngr = std::make_shared(baseMemMngr); - auto newMem = std::make_shared(getEngine(), selected_pd->getConfig().inConfs[i].getMemDesc(), memMngr); + auto baseMemBlock = (*itr)->getMemory().getMemoryBlock(); + auto memBlock = std::make_shared(baseMemBlock); + auto newMem = std::make_shared(getEngine(), selected_pd->getConfig().inConfs[i].getMemDesc(), memBlock); parentEdge->reuse(newMem); } } @@ -378,15 +378,15 @@ void Node::resolveInPlaceEdges(Edge::LOOK look) { if (inplaceInpIndx < 0) continue; - auto baseMemMngr = getParentEdgeAt(inplaceInpIndx)->getMemory().getMemoryMngr(); - auto memMngr = std::make_shared(baseMemMngr); + auto baseMemBlock = getParentEdgeAt(inplaceInpIndx)->getMemory().getMemoryBlock(); + auto memBlock = std::make_shared(baseMemBlock); const auto& childEdges = getChildEdgesAtPort(i); for (auto& childEdge : childEdges) { OPENVINO_ASSERT(childEdge->getStatus() == Edge::Status::NotAllocated, " Unexpected inplace resolve call to an allocated edge: ", childEdge->name()); - auto newMem = std::make_shared(getEngine(), selected_pd->getConfig().outConfs[i].getMemDesc(), memMngr); + auto newMem = std::make_shared(getEngine(), selected_pd->getConfig().outConfs[i].getMemDesc(), memBlock); childEdge->reuse(newMem); } } @@ -545,6 +545,17 @@ std::vector Node::getAvailableFormatsForDims(const Shape &di return {memory::format_tag::any}; } +static void fetchRawMemory(const MemoryPtr& mem) { + // TODO: conceptually fetchRawMemory is a very bad solution + if (mem->getDesc().getPrecision() == element::string) { + return; + } + auto block = mem->getMemoryBlock(); + if (mem->isDefined()) { + block->resize(mem->getSize()); + } +} + void Node::updateShapes() { OPENVINO_ASSERT(isDynamicNode(), "Node::updateShapes() is called to a static shape node of type: ", @@ -557,6 +568,26 @@ void Node::updateShapes() { if (ShapeInferStatus::success == result.status) { redefineOutputMemory(result.dims); } + } else { + //guard check for internal dynamic nodes to avoid possible overestimation of the required memory size + if (shapeInference && FULL_PORT_MASK == shapeInference->get_port_mask()) + return; + + for (auto&& edge : getChildEdges()) { + auto edge_ptr = edge.lock(); + CPU_NODE_ASSERT(edge_ptr, " has null edge"); + if (edge_ptr->inPlace(Edge::LOOK_UP)) { + continue; + } + + auto mem = edge_ptr->getMemoryPtr(); + CPU_NODE_ASSERT(mem, " has null output memory"); + + if (mem->getShape().hasZeroDims()) { + continue; + } + fetchRawMemory(mem); + } } } catch (const std::exception& exp) { THROW_CPU_NODE_ERR(exp.what()); @@ -638,6 +669,9 @@ void Node::redefineOutputMemory(const size_t port, const VectorDims& new_output_ const auto& curr_desc = edges[0]->getMemory().getDesc(); if (curr_desc.getShape().isStatic() && curr_desc.getShape().getStaticDims() == new_shape) { + for (auto&& edge : edges) { + fetchRawMemory(edge->getMemoryPtr()); + } return; } @@ -1508,7 +1542,7 @@ bool Node::isInputTensorAtPortEmpty(size_t port) const { auto edge = getParentEdgeAt(port); if (one_of(edge->getStatus(), Edge::Status::Allocated, Edge::Status::Validated)) { auto&& mem = edge->getMemory(); - if (mem.isAllocated()) { + if (mem.isDefined()) { return mem.getShape().hasZeroDims(); } } @@ -1523,7 +1557,7 @@ bool Node::isOutputTensorAtPortEmpty(size_t port) const { return outputShapes[port].hasZeroDims(); } auto&& mem = getChildEdgeAt(port)->getMemory(); - if (mem.isAllocated()) { + if (mem.isDefined()) { return mem.getShape().hasZeroDims(); } return false; diff --git a/src/plugins/intel_cpu/src/nodes/bucketize.cpp b/src/plugins/intel_cpu/src/nodes/bucketize.cpp index 4d91bdbb8fac1d..a71255c0d531e4 100644 --- a/src/plugins/intel_cpu/src/nodes/bucketize.cpp +++ b/src/plugins/intel_cpu/src/nodes/bucketize.cpp @@ -189,12 +189,12 @@ void Bucketize::prepareParams() { auto inputTensorMemPtr = getSrcMemoryAtPort(INPUT_TENSOR_PORT); auto inputBinsMemPtr = getSrcMemoryAtPort(INPUT_BINS_PORT); auto dstMemPtr = getDstMemoryAtPort(0); - if (!dstMemPtr || !dstMemPtr->isAllocated()) - OPENVINO_THROW("Destination memory didn't allocate."); - if (!inputTensorMemPtr || !inputTensorMemPtr->isAllocated()) - OPENVINO_THROW("Input tensor didn't allocate."); - if (!inputBinsMemPtr || !inputBinsMemPtr->isAllocated()) - OPENVINO_THROW("Input bins didn't allocate."); + if (!dstMemPtr || !dstMemPtr->isDefined()) + OPENVINO_THROW("Destination memory is undefined."); + if (!inputTensorMemPtr || !inputTensorMemPtr->isDefined()) + OPENVINO_THROW("Input tensor is undefined."); + if (!inputBinsMemPtr || !inputBinsMemPtr->isDefined()) + OPENVINO_THROW("Input bins is undefined."); if (getSelectedPrimitiveDescriptor() == nullptr) OPENVINO_THROW("Preferable primitive descriptor is not set."); diff --git a/src/plugins/intel_cpu/src/nodes/concat.cpp b/src/plugins/intel_cpu/src/nodes/concat.cpp index f02863b6f707aa..6ca87ab31e6b37 100644 --- a/src/plugins/intel_cpu/src/nodes/concat.cpp +++ b/src/plugins/intel_cpu/src/nodes/concat.cpp @@ -328,8 +328,8 @@ void Concat::prepareParams() { return; const auto& dstMemPtr = getDstMemoryAtPort(0); - if (!dstMemPtr || !dstMemPtr->isAllocated()) - OPENVINO_THROW("Destination memory didn't allocate."); + if (!dstMemPtr || !dstMemPtr->isDefined()) + OPENVINO_THROW("Destination memory is undefined."); auto dstMemDesc = dstMemPtr->getDescWithType(); if (getSelectedPrimitiveDescriptor() == nullptr) OPENVINO_THROW("Preferable primitive descriptor is not set."); @@ -375,9 +375,9 @@ void Concat::prepareParams() { nelemTotal = 0; for (size_t i = 0; i < getParentEdges().size(); i++) { const auto& srcMemPtr = getSrcMemoryAtPort(i); - if (!srcMemPtr || !srcMemPtr->isAllocated()) { + if (!srcMemPtr || !srcMemPtr->isDefined()) { auto parent = getParentEdgeAt(i)->getParent(); - OPENVINO_THROW("Source memory from ", parent->getName(), " didn't allocate for node ", getName(), "."); + OPENVINO_THROW("Source memory from ", parent->getName(), " is undefined for node ", getName(), "."); } if (canExecRef) { @@ -694,8 +694,8 @@ void Concat::resolveInPlaceEdges(Edge::LOOK look) { auto itr = std::find_if(edges.begin(), edges.end(), [](const EdgePtr& edge) { return edge->getStatus() == Edge::Status::Allocated; }); OPENVINO_ASSERT(itr != edges.end(), " Could not find allocated child edge for concat node: " , getName()); - auto baseMemMngr = (*itr)->getMemory().getMemoryMngr(); - OPENVINO_ASSERT(baseMemMngr != nullptr, " NULL base memory manager in concat node: " , getName()); + auto baseMemBlock = (*itr)->getMemory().getMemoryBlock(); + OPENVINO_ASSERT(baseMemBlock != nullptr, " NULL base memory block in concat node: ", getName()); ptrdiff_t offset = 0; for (size_t i = 0; i < numberOfInputs; ++i) { @@ -714,8 +714,8 @@ void Concat::resolveInPlaceEdges(Edge::LOOK look) { auto memDesc = selected_pd->getConfig().inConfs[i].getMemDesc(); MemoryPtr newMem; if (partDim != 0) { - auto memMngr = std::make_shared(baseMemMngr, baseDim, offset, partDim); - newMem = std::make_shared(getEngine(), memDesc, memMngr); + auto memBlock = std::make_shared(baseMemBlock, baseDim, offset, partDim); + newMem = std::make_shared(getEngine(), memDesc, memBlock); } else { // empty tensor, no need to reference a part, default memory is enough newMem = std::make_shared(getEngine(), memDesc); diff --git a/src/plugins/intel_cpu/src/nodes/conv.cpp b/src/plugins/intel_cpu/src/nodes/conv.cpp index 2422e2d3bb041c..cbdb35db271622 100644 --- a/src/plugins/intel_cpu/src/nodes/conv.cpp +++ b/src/plugins/intel_cpu/src/nodes/conv.cpp @@ -1233,17 +1233,17 @@ void Convolution::prepareParams() { auto srcMemPtr = getSrcMemoryAtPort(0); auto wghMemPtr = getSrcMemoryAtPort(1); auto dstMemPtr = getOutputMemory(); - if (!dstMemPtr || !dstMemPtr->isAllocated()) - OPENVINO_THROW("Destination memory was not allocated."); - if (!srcMemPtr || !srcMemPtr->isAllocated()) - OPENVINO_THROW("Input memory was not allocated."); - if (!wghMemPtr || !wghMemPtr->isAllocated()) - OPENVINO_THROW("Weight memory was not allocated."); + if (!dstMemPtr || !dstMemPtr->isDefined()) + OPENVINO_THROW("Destination memory was undefined."); + if (!srcMemPtr || !srcMemPtr->isDefined()) + OPENVINO_THROW("Input memory was undefined."); + if (!wghMemPtr || !wghMemPtr->isDefined()) + OPENVINO_THROW("Weight memory was undefined."); MemoryPtr biasMemPtr = nullptr; if (withBiases) { biasMemPtr = getSrcMemoryAtPort(2); - if (!biasMemPtr || !biasMemPtr->isAllocated()) - OPENVINO_THROW("Input memory didn't allocate."); + if (!biasMemPtr || !biasMemPtr->isDefined()) + OPENVINO_THROW("Input memory is undefined."); } const NodeDesc *selected_pd = getSelectedPrimitiveDescriptor(); @@ -1531,7 +1531,7 @@ void Convolution::executeDynamicImpl(dnnl::stream strm) { const auto& sumInpMem = getParentEdgeAt(sumPortNum)->getMemory(); auto inp1 = subgraph->getInput(1); auto inp1Mem = inp1->getDstMemoryAtPort(0); - inp1Mem->getMemoryMngr()->setExtBuff(sumInpMem.getData(), sumInpMem.getSize()); + inp1Mem->getMemoryBlock()->setExtBuff(sumInpMem.getData(), sumInpMem.getSize()); subgraph->infer(); diff --git a/src/plugins/intel_cpu/src/nodes/deconv.cpp b/src/plugins/intel_cpu/src/nodes/deconv.cpp index d3f1ae0ba691a5..57046a0a06d55b 100644 --- a/src/plugins/intel_cpu/src/nodes/deconv.cpp +++ b/src/plugins/intel_cpu/src/nodes/deconv.cpp @@ -270,9 +270,9 @@ void Deconvolution::createDnnlCompatibleWeights() { Shape(dnnlCompatibleWeiDims), blockedDims, order); - // Create the memory with the edge memory mgr. In the case of the weight memory changes when inference, + // Create the memory with the edge memory block. In the case of the weight memory changes when inference, // dnnlCompatibleWeights memory would be updated automatically via update inform mechanism. - dnnlCompatibleWeights = std::make_shared(getEngine(), desc, blob->getMemoryMngr()); + dnnlCompatibleWeights = std::make_shared(getEngine(), desc, blob->getMemoryBlock()); } bool Deconvolution::canBeExecutedInInt8() const { @@ -812,12 +812,12 @@ void Deconvolution::prepareParams() { auto srcMemPtr = getSrcMemoryAtPort(0); auto wghMemPtr = getSrcMemoryAtPort(1); auto dstMemPtr = getDstMemoryAtPort(0); - if (!dstMemPtr || !dstMemPtr->isAllocated()) - OPENVINO_THROW("Destination memory has not been allocated."); - if (!srcMemPtr || !srcMemPtr->isAllocated()) - OPENVINO_THROW("Input memory has not been allocated."); - if (!wghMemPtr || !wghMemPtr->isAllocated()) - OPENVINO_THROW("Weight memory has not been allocated."); + if (!dstMemPtr || !dstMemPtr->isDefined()) + OPENVINO_THROW("Destination memory is undefined."); + if (!srcMemPtr || !srcMemPtr->isDefined()) + OPENVINO_THROW("Input memory is undefined."); + if (!wghMemPtr || !wghMemPtr->isDefined()) + OPENVINO_THROW("Weight memory is undefined."); auto selected_pd = getSelectedPrimitiveDescriptor(); if (selected_pd == nullptr) OPENVINO_THROW("Preferable primitive descriptor is not set for node ", getName(), "."); @@ -869,8 +869,8 @@ void Deconvolution::prepareParams() { if (withBiases) { biasMemPtr = getSrcMemoryAtPort(biasPort); - if (!biasMemPtr || !biasMemPtr->isAllocated()) - OPENVINO_THROW("Bias memory memory didn't allocate."); + if (!biasMemPtr || !biasMemPtr->isDefined()) + OPENVINO_THROW("Bias memory memory is undefined."); biasDesc = biasMemPtr->getDescWithType(); } bool is1x1PaddingAsymmetric = false; @@ -1094,8 +1094,8 @@ std::vector Deconvolution::readOutputSpatialDims() const { OPENVINO_THROW("Can't get output spatial dims. Inputs number = ", getParentEdges().size()); } const auto &shapeMemPtr = getSrcMemoryAtPort(2); - if (!shapeMemPtr || !shapeMemPtr->isAllocated()) { - OPENVINO_THROW("'output_shape' input memory is not allocated."); + if (!shapeMemPtr || !shapeMemPtr->isDefined()) { + OPENVINO_THROW("'output_shape' input memory is undefined."); } const auto spDimsNum = getInputShapeAtPort(0).getRank() - 2; if (shapeMemPtr->getStaticDims()[0] != spDimsNum) { diff --git a/src/plugins/intel_cpu/src/nodes/def_conv.cpp b/src/plugins/intel_cpu/src/nodes/def_conv.cpp index 97fe2a2b2da08b..eb56902c653e99 100644 --- a/src/plugins/intel_cpu/src/nodes/def_conv.cpp +++ b/src/plugins/intel_cpu/src/nodes/def_conv.cpp @@ -1184,19 +1184,19 @@ void DeformableConvolution::prepareParams() { auto offMemPtr = getSrcMemoryAtPort(OFF_ID); auto weiMemPtr = getSrcMemoryAtPort(WEI_ID); - if (!dstMemPtr || !dstMemPtr->isAllocated()) - OPENVINO_THROW(errorPrefix, " did not allocate destination memory"); - if (!srcMemPtr || !srcMemPtr->isAllocated()) - OPENVINO_THROW(errorPrefix, " did not allocate input memory"); - if (!offMemPtr || !offMemPtr->isAllocated()) - OPENVINO_THROW(errorPrefix, " did not allocate offsets shape memory"); - if (!weiMemPtr || !weiMemPtr->isAllocated()) - OPENVINO_THROW(errorPrefix, " did not allocate weights memory"); + if (!dstMemPtr || !dstMemPtr->isDefined()) + OPENVINO_THROW(errorPrefix, " has undefined destination memory"); + if (!srcMemPtr || !srcMemPtr->isDefined()) + OPENVINO_THROW(errorPrefix, " has undefined input memory"); + if (!offMemPtr || !offMemPtr->isDefined()) + OPENVINO_THROW(errorPrefix, " has undefined offsets shape memory"); + if (!weiMemPtr || !weiMemPtr->isDefined()) + OPENVINO_THROW(errorPrefix, " has undefined weights memory"); if (getOriginalInputsNumber() > 3) { auto modMemPtr = getSrcMemoryAtPort(MOD_ID); - if (!modMemPtr || !modMemPtr->isAllocated()) - OPENVINO_THROW(errorPrefix, " did not allocate modulations memory"); + if (!modMemPtr || !modMemPtr->isDefined()) + OPENVINO_THROW(errorPrefix, " has undefined modulations memory"); } auto selectedPrimitiveDescriptor = getSelectedPrimitiveDescriptor(); diff --git a/src/plugins/intel_cpu/src/nodes/depth_to_space.cpp b/src/plugins/intel_cpu/src/nodes/depth_to_space.cpp index bc374a236c78fd..15d521a423c7e7 100644 --- a/src/plugins/intel_cpu/src/nodes/depth_to_space.cpp +++ b/src/plugins/intel_cpu/src/nodes/depth_to_space.cpp @@ -162,10 +162,10 @@ void DepthToSpace::initSupportedPrimitiveDescriptors() { void DepthToSpace::createPrimitive() { auto dstMemPtr = getDstMemoryAtPort(0); auto srcMemPtr = getSrcMemoryAtPort(0); - if (!dstMemPtr || !dstMemPtr->isAllocated()) - THROW_ERROR("has not allocated destination memory"); - if (!srcMemPtr || !srcMemPtr->isAllocated()) - THROW_ERROR("has not allocated input memory"); + if (!dstMemPtr) + THROW_ERROR("has null destination memory"); + if (!srcMemPtr) + THROW_ERROR("has null input memory"); if (getSelectedPrimitiveDescriptor() == nullptr) THROW_ERROR("has unidentified preferable primitive descriptor"); diff --git a/src/plugins/intel_cpu/src/nodes/executors/dnnl/dnnl_fullyconnected.hpp b/src/plugins/intel_cpu/src/nodes/executors/dnnl/dnnl_fullyconnected.hpp index 95ba35cb8f3c47..266e78b3d46c77 100644 --- a/src/plugins/intel_cpu/src/nodes/executors/dnnl/dnnl_fullyconnected.hpp +++ b/src/plugins/intel_cpu/src/nodes/executors/dnnl/dnnl_fullyconnected.hpp @@ -99,7 +99,7 @@ class DnnlFCExecutor : public Executor { resetSrcMemoryDataHandle = true; // create 2D memory without underlying buffer and reset to the actual memory in scope of 'execute' call m_primArgs[DNNL_ARG_SRC] = - dnnl::memory(primMemDesc->getDnnlDesc(), m_context->getEngine(), memory->getData()); + dnnl::memory(primMemDesc->getDnnlDesc(), m_context->getEngine(), DNNL_MEMORY_NONE); } } @@ -111,7 +111,7 @@ class DnnlFCExecutor : public Executor { resetDstMemoryDataHandle = true; // create 2D memory without underlying buffer and reset to the actual memory in scope of 'execute' call m_primArgs[DNNL_ARG_DST] = - dnnl::memory(primMemDesc->getDnnlDesc(), m_context->getEngine(), memory->getData()); + dnnl::memory(primMemDesc->getDnnlDesc(), m_context->getEngine(), DNNL_MEMORY_NONE); } } diff --git a/src/plugins/intel_cpu/src/nodes/extract_image_patches.cpp b/src/plugins/intel_cpu/src/nodes/extract_image_patches.cpp index 185ff6bfd216d5..7c3dda49931451 100644 --- a/src/plugins/intel_cpu/src/nodes/extract_image_patches.cpp +++ b/src/plugins/intel_cpu/src/nodes/extract_image_patches.cpp @@ -372,10 +372,10 @@ ExtractImagePatches::ExtractImagePatches(const std::shared_ptr& op, co void ExtractImagePatches::prepareParams() { const auto& srcMemPtr0 = getSrcMemoryAtPort(0); const auto& dstMemPtr = getDstMemoryAtPort(0); - if (!srcMemPtr0 || !srcMemPtr0->isAllocated()) - OPENVINO_THROW("Input memory has not been allocated."); - if (!dstMemPtr || !dstMemPtr->isAllocated()) - OPENVINO_THROW("Destination memory has not been allocated."); + if (!srcMemPtr0 || !srcMemPtr0->isDefined()) + OPENVINO_THROW("Input memory is undefined."); + if (!dstMemPtr || !dstMemPtr->isDefined()) + OPENVINO_THROW("Destination memory is undefined."); if (getSelectedPrimitiveDescriptor() == nullptr) OPENVINO_THROW("Preferable primitive descriptor is not set."); diff --git a/src/plugins/intel_cpu/src/nodes/eye.cpp b/src/plugins/intel_cpu/src/nodes/eye.cpp index f32f67279b75d0..f1e78b04510914 100644 --- a/src/plugins/intel_cpu/src/nodes/eye.cpp +++ b/src/plugins/intel_cpu/src/nodes/eye.cpp @@ -104,8 +104,8 @@ void Eye::executeSpecified() { const size_t colNum = getColNum(); const int64_t shift = getDiagIndex(); auto outPtr = getDstMemoryAtPort(0); - if (!outPtr || !outPtr ->isAllocated()) - THROW_ERROR(errorPrefix, "Destination memory didn't allocate."); + if (!outPtr || !outPtr ->isDefined()) + THROW_ERROR(errorPrefix, "Destination memory is undefined."); T *dst = outPtr->getDataAs(); const size_t batchVolume = getBatchVolume(getBatchShape()); diff --git a/src/plugins/intel_cpu/src/nodes/gather.cpp b/src/plugins/intel_cpu/src/nodes/gather.cpp index 434a32073aca4f..94debfba1901ab 100644 --- a/src/plugins/intel_cpu/src/nodes/gather.cpp +++ b/src/plugins/intel_cpu/src/nodes/gather.cpp @@ -338,11 +338,11 @@ bool Gather::needPrepareParams() const { void Gather::prepareParams() { auto dataMemPtr = getSrcMemoryAtPort(GATHER_DATA); - if (!dataMemPtr || !dataMemPtr->isAllocated()) - THROW_ERROR(" has not allocated input data memory."); + if (!dataMemPtr || !dataMemPtr->isDefined()) + THROW_ERROR(" has undefined input data memory."); auto idxMemPtr = getSrcMemoryAtPort(GATHER_INDICES); - if (!idxMemPtr || !idxMemPtr->isAllocated()) - THROW_ERROR(" has not allocated input indices memory."); + if (!idxMemPtr || !idxMemPtr->isDefined()) + THROW_ERROR(" has undefined input indices memory."); if (getSelectedPrimitiveDescriptor() == nullptr) THROW_ERROR(" has unidentified preferable primitive descriptor."); @@ -945,7 +945,7 @@ void Gather::resolveInPlaceEdges(Edge::LOOK look) { "Gather node: ", getName(), " can not use inPlace memory with splitting on dynamic dimention"); - auto baseMemMngr = getParentEdgeAt(inplaceInpIndx)->getMemory().getMemoryMngr(); + auto baseMemBlock = getParentEdgeAt(inplaceInpIndx)->getMemory().getMemoryBlock(); const auto index = constIndices.front(); const ptrdiff_t offset = index < 0 ? baseDim + index : index; const auto& childEdges = getChildEdgesAtPort(outputPort); @@ -956,8 +956,8 @@ void Gather::resolveInPlaceEdges(Edge::LOOK look) { " with type ", getTypeStr()); - auto memMngr = std::make_shared(baseMemMngr, baseDim, offset); - auto newMem = std::make_shared(getEngine(), config.outConfs[outputPort].getMemDesc(), memMngr); + auto memBlock = std::make_shared(baseMemBlock, baseDim, offset); + auto newMem = std::make_shared(getEngine(), config.outConfs[outputPort].getMemDesc(), memBlock); childEdge->reuse(newMem); } diff --git a/src/plugins/intel_cpu/src/nodes/gather_nd.cpp b/src/plugins/intel_cpu/src/nodes/gather_nd.cpp index 86723215a35ad3..2dc91dd12559f2 100644 --- a/src/plugins/intel_cpu/src/nodes/gather_nd.cpp +++ b/src/plugins/intel_cpu/src/nodes/gather_nd.cpp @@ -84,12 +84,12 @@ void GatherND::prepareParams() { auto srcMemPtr = getSrcMemoryAtPort(GATHERND_DATA); auto idxMemPtr = getSrcMemoryAtPort(GATHERND_INDEXES); auto dstMemPtr = getDstMemoryAtPort(0); - if (!srcMemPtr || !srcMemPtr->isAllocated()) - THROW_ERROR(" has not allocated input memory of 'data'."); - if (!idxMemPtr || !idxMemPtr->isAllocated()) - THROW_ERROR(" has not allocated input memory of 'indices'."); - if (!dstMemPtr || !dstMemPtr->isAllocated()) - THROW_ERROR(" has not allocated output memory."); + if (!srcMemPtr || !srcMemPtr->isDefined()) + THROW_ERROR(" has undefined input memory of 'data'."); + if (!idxMemPtr || !idxMemPtr->isDefined()) + THROW_ERROR(" has undefined input memory of 'indices'."); + if (!dstMemPtr || !dstMemPtr->isDefined()) + THROW_ERROR(" has undefined output memory."); if (getSelectedPrimitiveDescriptor() == nullptr) THROW_ERROR(" has unidentified preferable primitive descriptor."); diff --git a/src/plugins/intel_cpu/src/nodes/gather_tree.cpp b/src/plugins/intel_cpu/src/nodes/gather_tree.cpp index 8a14220b165f69..f318290defbf82 100644 --- a/src/plugins/intel_cpu/src/nodes/gather_tree.cpp +++ b/src/plugins/intel_cpu/src/nodes/gather_tree.cpp @@ -98,14 +98,14 @@ void GatherTree::prepareParams() { const auto& maxSeqLenMemPtr = getSrcMemoryAtPort(GATHER_TREE_MAX_SEQ_LEN); const auto& dstMemPtr = getDstMemoryAtPort(0); - if (!stepIdxMemPtr || !stepIdxMemPtr->isAllocated()) - OPENVINO_THROW(errorPrefix, " has not allocated input memory of 'step_ids'."); - if (!parentIdxMemPtr || !parentIdxMemPtr->isAllocated()) - OPENVINO_THROW(errorPrefix, " has not allocated input memory of 'parent_ids'."); - if (!maxSeqLenMemPtr || !maxSeqLenMemPtr->isAllocated()) - OPENVINO_THROW(errorPrefix, " has not allocated input memory of 'max_seq_len'."); - if (!dstMemPtr || !dstMemPtr->isAllocated()) - OPENVINO_THROW(errorPrefix, " has not allocated output memory."); + if (!stepIdxMemPtr || !stepIdxMemPtr->isDefined()) + OPENVINO_THROW(errorPrefix, " has undefined input memory of 'step_ids'."); + if (!parentIdxMemPtr || !parentIdxMemPtr->isDefined()) + OPENVINO_THROW(errorPrefix, " has undefined input memory of 'parent_ids'."); + if (!maxSeqLenMemPtr || !maxSeqLenMemPtr->isDefined()) + OPENVINO_THROW(errorPrefix, " has undefined input memory of 'max_seq_len'."); + if (!dstMemPtr || !dstMemPtr->isDefined()) + OPENVINO_THROW(errorPrefix, " has undefined output memory."); if (getSelectedPrimitiveDescriptor() == nullptr) OPENVINO_THROW(errorPrefix, " has unidentified preferable primitive descriptor."); diff --git a/src/plugins/intel_cpu/src/nodes/grid_sample.cpp b/src/plugins/intel_cpu/src/nodes/grid_sample.cpp index eb143a1ad55199..618d6b39105689 100644 --- a/src/plugins/intel_cpu/src/nodes/grid_sample.cpp +++ b/src/plugins/intel_cpu/src/nodes/grid_sample.cpp @@ -182,14 +182,14 @@ void GridSample::createPrimitive() { void GridSample::prepareParams() { auto dataMemPtr = getSrcMemoryAtPort(IN_DATA); - if (!dataMemPtr || !dataMemPtr->isAllocated()) - THROW_CPU_NODE_ERR("has not allocated input data memory."); + if (!dataMemPtr || !dataMemPtr->isDefined()) + THROW_CPU_NODE_ERR("has undefined input data memory."); auto gridMemPtr = getSrcMemoryAtPort(IN_GRID); - if (!gridMemPtr || !gridMemPtr->isAllocated()) - THROW_CPU_NODE_ERR("has not allocated input grid memory."); + if (!gridMemPtr || !gridMemPtr->isDefined()) + THROW_CPU_NODE_ERR("has undefined input grid memory."); auto dstMemPtr = getDstMemoryAtPort(0); - if (!dstMemPtr || !dstMemPtr->isAllocated()) - THROW_CPU_NODE_ERR("has not allocated output memory."); + if (!dstMemPtr || !dstMemPtr->isDefined()) + THROW_CPU_NODE_ERR("has undefined output memory."); if (getSelectedPrimitiveDescriptor() == nullptr) THROW_CPU_NODE_ERR("has unidentified preferable primitive descriptor."); diff --git a/src/plugins/intel_cpu/src/nodes/grn.cpp b/src/plugins/intel_cpu/src/nodes/grn.cpp index 83e554acf8b255..f20e7d6b90a012 100644 --- a/src/plugins/intel_cpu/src/nodes/grn.cpp +++ b/src/plugins/intel_cpu/src/nodes/grn.cpp @@ -61,10 +61,10 @@ void GRN::prepareParams() { const auto& dataMemPtr = getSrcMemoryAtPort(0); const auto& dstMemPtr = getDstMemoryAtPort(0); - if (!dataMemPtr || !dataMemPtr->isAllocated()) - OPENVINO_THROW(errorPrefix, " has not allocated input memory"); - if (!dstMemPtr || !dstMemPtr->isAllocated()) - OPENVINO_THROW(errorPrefix, " has not allocated output memory"); + if (!dataMemPtr || !dataMemPtr->isDefined()) + OPENVINO_THROW(errorPrefix, " has undefined input memory"); + if (!dstMemPtr || !dstMemPtr->isDefined()) + OPENVINO_THROW(errorPrefix, " has undefined output memory"); if (getSelectedPrimitiveDescriptor() == nullptr) OPENVINO_THROW(errorPrefix, " has unidentified preferable primitive descriptor"); diff --git a/src/plugins/intel_cpu/src/nodes/input.cpp b/src/plugins/intel_cpu/src/nodes/input.cpp index 424cb7784fd8d5..ea659ec1e31b84 100644 --- a/src/plugins/intel_cpu/src/nodes/input.cpp +++ b/src/plugins/intel_cpu/src/nodes/input.cpp @@ -451,14 +451,14 @@ void Input::initSupportedPrimitiveDescriptors() { void Input::createPrimitive() { for (size_t i = 0; i < getChildEdges().size(); i++) { auto dstMemPtr = getDstMemoryAtPort(i); - if (!dstMemPtr || !dstMemPtr->isAllocated()) - THROW_CPU_NODE_ERR("has unallocated memory object at port ", i, + if (!dstMemPtr) + THROW_CPU_NODE_ERR("has null memory object at port ", i, " to node ", getChildEdgeAt(i)->getChild()->getName(), "."); } for (size_t i = 0; i < getParentEdges().size(); i++) { auto srcMemPtr = getSrcMemoryAtPort(i); - if (!srcMemPtr || !srcMemPtr->isAllocated()) - THROW_CPU_NODE_ERR("has unallocated memory object at port ", i, + if (!srcMemPtr) + THROW_CPU_NODE_ERR("has null memory object at port ", i, " from node ", getParentEdgeAt(i)->getParent()->getName(), "."); } diff --git a/src/plugins/intel_cpu/src/nodes/interpolate.cpp b/src/plugins/intel_cpu/src/nodes/interpolate.cpp index a04834fd55f1ae..e61fccd3328639 100644 --- a/src/plugins/intel_cpu/src/nodes/interpolate.cpp +++ b/src/plugins/intel_cpu/src/nodes/interpolate.cpp @@ -2261,27 +2261,27 @@ void Interpolate::prepareParams() { } auto dstMemPtr = getDstMemoryAtPort(0); - if (!dstMemPtr || !dstMemPtr->isAllocated()) - OPENVINO_THROW(errorPrefix, " did not allocate destination memory"); + if (!dstMemPtr || !dstMemPtr->isDefined()) + OPENVINO_THROW(errorPrefix, " has undefined destination memory"); auto srcMemPtr = getSrcMemoryAtPort(DATA_ID); - if (!srcMemPtr || !srcMemPtr->isAllocated()) - OPENVINO_THROW(errorPrefix, " did not allocate input memory"); + if (!srcMemPtr || !srcMemPtr->isDefined()) + OPENVINO_THROW(errorPrefix, " has undefined input memory"); if (interpAttrs.shapeCalcMode == InterpolateShapeCalcMode::sizes) { auto tsMemPtr = getSrcMemoryAtPort(TARGET_SHAPE_ID); - if (!tsMemPtr || !tsMemPtr->isAllocated()) - OPENVINO_THROW(errorPrefix, " did not allocate target shape memory"); + if (!tsMemPtr || !tsMemPtr->isDefined()) + OPENVINO_THROW(errorPrefix, " has undefined target shape memory"); } else { auto scaleMemPtr = getSrcMemoryAtPort(get_scale_id()); - if (!scaleMemPtr || !scaleMemPtr->isAllocated()) - OPENVINO_THROW(errorPrefix, " did not allocate scales memory"); + if (!scaleMemPtr || !scaleMemPtr->isDefined()) + OPENVINO_THROW(errorPrefix, " has undefined scales memory"); } if (isAxesSpecified) { auto axesMemPtr = getSrcMemoryAtPort(get_axis_id()); - if (!axesMemPtr || !axesMemPtr->isAllocated()) - OPENVINO_THROW(errorPrefix, " did not allocate axes memory"); + if (!axesMemPtr || !axesMemPtr->isDefined()) + OPENVINO_THROW(errorPrefix, " has undefined axes memory"); } const NodeDesc *selected_pd = getSelectedPrimitiveDescriptor(); @@ -2377,10 +2377,10 @@ void Interpolate::prepareParams() { void Interpolate::createPrimitive() { auto srcMemPtr = getSrcMemoryAtPort(DATA_ID); auto dstMemPtr = getDstMemoryAtPort(0); - if (!srcMemPtr || !srcMemPtr->isAllocated()) - OPENVINO_THROW(errorPrefix, " did not allocate input memory"); - if (!dstMemPtr || !dstMemPtr->isAllocated()) - OPENVINO_THROW(errorPrefix, " did not allocate destination memory"); + if (!srcMemPtr) + OPENVINO_THROW(errorPrefix, " has null input memory"); + if (!dstMemPtr) + OPENVINO_THROW(errorPrefix, " has null destination memory"); if (dstMemPtr->getDesc().hasLayoutType(LayoutType::ncsp)) { interpAttrs.layout = InterpolateLayoutType::planar; diff --git a/src/plugins/intel_cpu/src/nodes/lrn.cpp b/src/plugins/intel_cpu/src/nodes/lrn.cpp index 3f8a83d36e36fe..a26b58798b0dbd 100644 --- a/src/plugins/intel_cpu/src/nodes/lrn.cpp +++ b/src/plugins/intel_cpu/src/nodes/lrn.cpp @@ -163,10 +163,10 @@ std::shared_ptr Lrn::getSrcMemDesc(const dnnl::primitive_desc &prim_ void Lrn::prepareParams() { auto srcMemPtr = getSrcMemoryAtPort(0); auto dstMemPtr = getDstMemoryAtPort(0); - if (!srcMemPtr || !srcMemPtr->isAllocated()) - OPENVINO_THROW(errorPrefix, " input memory did not allocate"); - if (!dstMemPtr || !dstMemPtr->isAllocated()) - OPENVINO_THROW(errorPrefix, "destination memory did not allocate"); + if (!srcMemPtr || !srcMemPtr->isDefined()) + OPENVINO_THROW(errorPrefix, " input memory is undefined"); + if (!dstMemPtr || !dstMemPtr->isDefined()) + OPENVINO_THROW(errorPrefix, "destination memory is undefined"); const NodeDesc* selected_pd = getSelectedPrimitiveDescriptor(); if (selected_pd == nullptr) diff --git a/src/plugins/intel_cpu/src/nodes/matmul.cpp b/src/plugins/intel_cpu/src/nodes/matmul.cpp index 2841e6f100afb7..50cb3353612996 100644 --- a/src/plugins/intel_cpu/src/nodes/matmul.cpp +++ b/src/plugins/intel_cpu/src/nodes/matmul.cpp @@ -538,10 +538,10 @@ void MatMul::prepareParams() { auto dstMemPtr = getDstMemoryAtPort(0); auto src0MemPtr = getSrcMemoryAtPort(0); auto src1MemPtr = getSrcMemoryAtPort(1); - if (!dstMemPtr || !dstMemPtr->isAllocated()) - OPENVINO_THROW(errorPrefix, " did not allocate destination memory"); - if (!src0MemPtr || !src0MemPtr->isAllocated() || !src1MemPtr || !src1MemPtr->isAllocated()) - OPENVINO_THROW(errorPrefix, " did not allocate input memory"); + if (!dstMemPtr || !dstMemPtr->isDefined()) + OPENVINO_THROW(errorPrefix, " has undefined destination memory"); + if (!src0MemPtr || !src0MemPtr->isDefined() || !src1MemPtr || !src1MemPtr->isDefined()) + OPENVINO_THROW(errorPrefix, " has undefined input memory"); const NodeDesc* selected_pd = getSelectedPrimitiveDescriptor(); if (selected_pd == nullptr) @@ -576,8 +576,8 @@ void MatMul::prepareParams() { DnnlMemoryDescPtr dnnlBiasMemDesc = nullptr; if (withBiases) { auto biasMemory = getSrcMemoryAtPort(2); - if (!biasMemory || !biasMemory->isAllocated()) - OPENVINO_THROW(errorPrefix, " did not allocate bias memory"); + if (!biasMemory || !biasMemory->isDefined()) + OPENVINO_THROW(errorPrefix, " has undefined bias memory"); dnnlBiasMemDesc = biasMemory->getDescWithType(); } diff --git a/src/plugins/intel_cpu/src/nodes/memory.cpp b/src/plugins/intel_cpu/src/nodes/memory.cpp index f372259b783e50..e66b148c6f99ee 100644 --- a/src/plugins/intel_cpu/src/nodes/memory.cpp +++ b/src/plugins/intel_cpu/src/nodes/memory.cpp @@ -21,11 +21,34 @@ namespace node { namespace { class MemoryStub : public IMemory { public: - MemoryStub(const dnnl::engine& eng, const MemoryDescPtr& pMemDesc) : m_eng(eng), m_pMemDesc(pMemDesc) {} + class MemoryBlockStub : public IMemoryBlockObserver { + void* getRawPtr() const noexcept override { + return nullptr; + } + void setExtBuff(void* ptr, size_t size) override { + // pass + } + bool resize(size_t size) override { + // pass + return false; + } + bool hasExtBuffer() const noexcept override { + // pass + return false; + } + void registerMemory(Memory* memPtr) override { + // pass + } + void unregisterMemory(Memory* memPtr) override { + // pass + } + }; - bool isAllocated() const noexcept override { - return true; - } +public: + MemoryStub(const dnnl::engine& eng, const MemoryDescPtr& pMemDesc) + : m_eng(eng), + m_pMemDesc(pMemDesc), + m_pMemoryBlock(std::make_shared()) {} const MemoryDesc& getDesc() const override { return *m_pMemDesc; @@ -59,8 +82,8 @@ class MemoryStub : public IMemory { OPENVINO_THROW("Unexpected call MemoryStub::load()"); } - MemoryMngrPtr getMemoryMngr() const override { - OPENVINO_THROW("Unexpected call MemoryStub::getMemoryMngr()"); + MemoryBlockPtr getMemoryBlock() const override { + return m_pMemoryBlock; } dnnl::memory getPrimitive() const override { @@ -74,6 +97,7 @@ class MemoryStub : public IMemory { private: dnnl::engine m_eng; MemoryDescPtr m_pMemDesc; + std::shared_ptr m_pMemoryBlock; }; } // namespace @@ -233,8 +257,8 @@ void MemoryOutput::resolveInPlaceEdges(Edge::LOOK look) { " Unexpected inplace resolve call to an allocated edge: ", parentEdge->name()); auto memDesc = selected_pd->getConfig().inConfs.front().getMemDesc(); - memMngr = std::make_shared(); - auto edgeMem = std::make_shared(getEngine(), memDesc, memMngr); + memBlock = std::make_shared(); + auto edgeMem = std::make_shared(getEngine(), memDesc, memBlock); parentEdge->reuse(edgeMem); } @@ -251,13 +275,13 @@ void MemoryOutput::assignExtMemory(const MemoryPtr& mem, const MemoryDescPtr& me getName(), " assigned state has null base mem desc ptr"); - if (!memMngr) { return; } //nothing to do, edge memory isn't under control + if (!memBlock) { return; } //nothing to do, edge memory isn't under control auto inpDesc = getBaseMemDescAtInputPort(0); if (inpDesc->isCompatible(*extMemDesc)) { - memMngr->setMemMngrResize(assignedMem->getMemoryMngr()); + memBlock->setMemBlockResize(assignedMem->getMemoryBlock()); } else { - memMngr->reset(); + memBlock->reset(); } } @@ -569,20 +593,20 @@ void MemoryInput::runDynamic(dnnl::stream strm) { getName(), " assigned state has null memory ptr"); - // check whether we can share memory manager + // check whether we can share memory block const auto& stateDims = assignedMem->getStaticDims(); const bool hasZeroDims = std::count(std::begin(stateDims), std::end(stateDims), 0) > 0; auto internDesc = getBaseMemDescAtOutputPort(0)->cloneWithNewDims(stateDims, hasZeroDims); - OPENVINO_ASSERT(memMngr, + OPENVINO_ASSERT(memBlock, "MemoryInput ", getName(), - " has uninitialized memory manager."); + " has uninitialized memory block."); if (internDesc->isCompatible(assignedMem->getDesc())) { - memMngr->setMemMngr(assignedMem->getMemoryMngr()); + memBlock->setMemBlock(assignedMem->getMemoryBlock()); } else { - memMngr->reset(); + memBlock->reset(); } const bool processInitGraph = needInitGraphProcessing(); @@ -619,15 +643,15 @@ void MemoryInput::runStatic(dnnl::stream strm) { auto internDesc = getBaseMemDescAtOutputPort(0); - OPENVINO_ASSERT(memMngr, + OPENVINO_ASSERT(memBlock, "MemoryInput ", getName(), - " has uninitialized memory manager."); + " has uninitialized memory block."); if (internDesc->isCompatible(assignedMem->getDesc())) { - memMngr->setMemMngr(assignedMem->getMemoryMngr()); + memBlock->setMemBlock(assignedMem->getMemoryBlock()); } else { - memMngr->reset(); + memBlock->reset(); } const auto processInitGraph = needInitGraphProcessing(); @@ -653,13 +677,13 @@ void MemoryInput::resolveInPlaceEdges(Edge::LOOK look) { " failed getSelectedPrimitiveDescriptor() call, preferable primitive descriptor is not set"); auto memDesc = selected_pd->getConfig().outConfs.front().getMemDesc(); - memMngr = std::make_shared(); + memBlock = std::make_shared(); for (auto&& edge : getChildEdgesAtPort(0)) { // always only one child port OPENVINO_ASSERT(one_of(edge->getStatus(), Edge::Status::Uninitialized, Edge::Status::NotAllocated), " Unexpected inplace resolve call to an allocated edge: ", edge->name()); - auto edgeMem = std::make_shared(getEngine(), memDesc, memMngr); + auto edgeMem = std::make_shared(getEngine(), memDesc, memBlock); edge->reuse(edgeMem); } } diff --git a/src/plugins/intel_cpu/src/nodes/memory.hpp b/src/plugins/intel_cpu/src/nodes/memory.hpp index f804259b431402..88b6a3d1250f0f 100644 --- a/src/plugins/intel_cpu/src/nodes/memory.hpp +++ b/src/plugins/intel_cpu/src/nodes/memory.hpp @@ -103,7 +103,7 @@ class MemoryOutput : public MemoryOutputBase { private: MemoryPtr assignedMem = nullptr; MemoryDescPtr extMemDesc = nullptr; // used for resize - ProxyMemoryMngrPtr memMngr = nullptr; + ProxyMemoryBlockPtr memBlock = nullptr; }; class MemoryOutputStub : public MemoryOutputBase { @@ -186,7 +186,7 @@ class MemoryInput : public MemoryInputBase { bool needInitGraphProcessing() const; private: - ProxyMemoryMngrPtr memMngr = nullptr; + ProxyMemoryBlockPtr memBlock = nullptr; }; class MemoryInputSDPA : public MemoryInputBase { diff --git a/src/plugins/intel_cpu/src/nodes/multinomial.cpp b/src/plugins/intel_cpu/src/nodes/multinomial.cpp index 38413c145f1cd9..24958b4e2b980d 100644 --- a/src/plugins/intel_cpu/src/nodes/multinomial.cpp +++ b/src/plugins/intel_cpu/src/nodes/multinomial.cpp @@ -77,6 +77,14 @@ bool Multinomial::needPrepareParams() const { return true; } +void Multinomial::createPrimitive() { + if (!m_const_inputs[NUM_SAMPLES_PORT]) { + CPU_NODE_ASSERT(isDynamicNode(), "is static while the samples input is a variable"); + return; // avoid reading non initialized data from the NUM_SAMPLES_PORT input + } + Node::createPrimitive(); +} + void Multinomial::prepareParams() { const auto& probs_shape = getParentEdgeAt(PROBS_PORT)->getMemory().getStaticDims(); const auto& num_samples_shape = getParentEdgeAt(NUM_SAMPLES_PORT)->getMemory().getStaticDims(); diff --git a/src/plugins/intel_cpu/src/nodes/multinomial.hpp b/src/plugins/intel_cpu/src/nodes/multinomial.hpp index d4e1562a34add2..611b70503f5dba 100644 --- a/src/plugins/intel_cpu/src/nodes/multinomial.hpp +++ b/src/plugins/intel_cpu/src/nodes/multinomial.hpp @@ -28,6 +28,8 @@ class Multinomial : public Node { bool needPrepareParams() const override; void prepareParams() override; + void createPrimitive() override; + bool isExecutable() const override; void execute(dnnl::stream strm) override; void executeDynamicImpl(dnnl::stream strm) override; diff --git a/src/plugins/intel_cpu/src/nodes/mvn.cpp b/src/plugins/intel_cpu/src/nodes/mvn.cpp index e8d99c7947fcfc..cc6054a6e7717a 100644 --- a/src/plugins/intel_cpu/src/nodes/mvn.cpp +++ b/src/plugins/intel_cpu/src/nodes/mvn.cpp @@ -2017,10 +2017,10 @@ void MVN::MVNRefExecutor::exec(const uint8_t *src_data, uint8_t *dst_data, const void MVN::prepareParams() { auto dstMemPtr = getDstMemoryAtPort(0); auto srcMemPtr = getSrcMemoryAtPort(0); - if (!dstMemPtr || !dstMemPtr->isAllocated()) - OPENVINO_THROW("Destination memory didn't allocate."); - if (!srcMemPtr || !srcMemPtr->isAllocated()) - OPENVINO_THROW("Input memory didn't allocate."); + if (!dstMemPtr || !dstMemPtr->isDefined()) + OPENVINO_THROW("Destination memory is undefined."); + if (!srcMemPtr || !srcMemPtr->isDefined()) + OPENVINO_THROW("Input memory is undefined."); if (getSelectedPrimitiveDescriptor() == nullptr) OPENVINO_THROW("Preferable primitive descriptor is not set."); diff --git a/src/plugins/intel_cpu/src/nodes/normalize.cpp b/src/plugins/intel_cpu/src/nodes/normalize.cpp index 0f61e7c717d4bc..ca52e572b73ea8 100644 --- a/src/plugins/intel_cpu/src/nodes/normalize.cpp +++ b/src/plugins/intel_cpu/src/nodes/normalize.cpp @@ -884,9 +884,9 @@ void NormalizeL2::setPostOps(dnnl::primitive_attr& kernel_attrs, const VectorDim void NormalizeL2::createPrimitive() { auto dstMemPtr = getDstMemoryAtPort(DATA); auto srcMemPtr = getSrcMemoryAtPort(DATA); - if (!dstMemPtr || !dstMemPtr->isAllocated()) + if (!dstMemPtr) THROW_ERROR("can't get destination memory"); - if (!srcMemPtr || !srcMemPtr->isAllocated()) + if (!srcMemPtr) THROW_ERROR("can't get input memory"); if (getSelectedPrimitiveDescriptor() == nullptr) THROW_ERROR("has nullable preferable primitive descriptor"); diff --git a/src/plugins/intel_cpu/src/nodes/pad.cpp b/src/plugins/intel_cpu/src/nodes/pad.cpp index dbc31f0f112738..10cdb2a19b771f 100644 --- a/src/plugins/intel_cpu/src/nodes/pad.cpp +++ b/src/plugins/intel_cpu/src/nodes/pad.cpp @@ -183,7 +183,20 @@ void Pad::createPrimitive() { dstMemory.push_back(getDstMemoryAtPort(0)); } if (inputShapesDefined() && isExecutable() && !shapeHasDataDependency) { + // WA to prevent reading uninitialized data in case of the pad value is a parameter + MemoryCPtr padValue = srcMemory.size() > PAD_VALUE_ID ? srcMemory[PAD_VALUE_ID] : nullptr; + if (padValue && !getParentEdgeAt(PAD_VALUE_ID)->getParent()->isConstant()) { + //set artificial zero memory just to avoid reading garbage from the uninitilized input + auto tmpPadValue = std::make_shared(getEngine(), padValue->getDescPtr()); + tmpPadValue->nullify(); + srcMemory[PAD_VALUE_ID] = tmpPadValue; + } prepareParams(); + if (padValue) { + // restore original memory object + srcMemory[PAD_VALUE_ID] = padValue; + } + updateLastInputDims(); } } @@ -217,10 +230,10 @@ void Pad::PadExecutor::paramsInitialization(const PadAttrs& attrs, params.attrs = attrs; auto& srcMemPtr = srcMemory[DATA_ID]; auto& dstMemPtr = dstMemory[DATA_ID]; - if (!dstMemPtr || !dstMemPtr->isAllocated()) - OPENVINO_THROW(errorPrefix, "has not allocated source memory."); - if (!srcMemPtr || !srcMemPtr->isAllocated()) - OPENVINO_THROW(errorPrefix, "has not allocated destination memory."); + if (!dstMemPtr || !dstMemPtr->isDefined()) + OPENVINO_THROW(errorPrefix, "has undefined source memory."); + if (!srcMemPtr || !srcMemPtr->isDefined()) + OPENVINO_THROW(errorPrefix, "has undefined destination memory."); const auto srcBlockMemDesc = srcMemPtr->getDescWithType(); const auto dstBlockMemDesc = dstMemPtr->getDescWithType(); const auto& srcDims = srcBlockMemDesc->getBlockDims(); diff --git a/src/plugins/intel_cpu/src/nodes/pooling.cpp b/src/plugins/intel_cpu/src/nodes/pooling.cpp index 299ba4d15f4b6a..71e5c38f0e0a79 100644 --- a/src/plugins/intel_cpu/src/nodes/pooling.cpp +++ b/src/plugins/intel_cpu/src/nodes/pooling.cpp @@ -394,10 +394,10 @@ void Pooling::prepareParams() { if (useACL) { auto dstMemPtr = getDstMemoryAtPort(0); auto srcMemPtr = getSrcMemoryAtPort(0); - if (!dstMemPtr || !dstMemPtr->isAllocated()) - OPENVINO_THROW("Destination memory didn't allocate."); - if (!srcMemPtr || !srcMemPtr->isAllocated()) - OPENVINO_THROW("Input memory didn't allocate."); + if (!dstMemPtr || !dstMemPtr->isDefined()) + OPENVINO_THROW("Destination memory is undefined."); + if (!srcMemPtr || !srcMemPtr->isDefined()) + OPENVINO_THROW("Input memory is undefined."); std::vector srcMemoryDescs; for (size_t i = 0; i < getOriginalInputsNumber(); i++) { diff --git a/src/plugins/intel_cpu/src/nodes/reduce.cpp b/src/plugins/intel_cpu/src/nodes/reduce.cpp index ca474073dd34f6..b40c50f957514f 100644 --- a/src/plugins/intel_cpu/src/nodes/reduce.cpp +++ b/src/plugins/intel_cpu/src/nodes/reduce.cpp @@ -2172,10 +2172,10 @@ void Reduce::createPrimitive() { } auto dstMemPtr = getDstMemoryAtPort(0); auto srcMemPtr = getSrcMemoryAtPort(REDUCE_DATA); - if (!dstMemPtr || !dstMemPtr->isAllocated()) - OPENVINO_THROW(errorPrefix, " has not allocated destination memory."); - if (!srcMemPtr || !srcMemPtr->isAllocated()) - OPENVINO_THROW(errorPrefix, " has not allocate input memory."); + if (!dstMemPtr) + OPENVINO_THROW(errorPrefix, " has null destination memory."); + if (!srcMemPtr) + OPENVINO_THROW(errorPrefix, " has null input memory."); if (getSelectedPrimitiveDescriptor() == nullptr) OPENVINO_THROW(errorPrefix, " has nullable preferable primitive descriptor"); diff --git a/src/plugins/intel_cpu/src/nodes/reorder.cpp b/src/plugins/intel_cpu/src/nodes/reorder.cpp index e1015783356f69..9b521cdb3b57c7 100644 --- a/src/plugins/intel_cpu/src/nodes/reorder.cpp +++ b/src/plugins/intel_cpu/src/nodes/reorder.cpp @@ -184,10 +184,10 @@ void Reorder::prepareParams() { auto srcMemPtr = getSrcMemoryAtPort(0); auto dstMemPtr = getDstMemoryAtPort(0); - if (!dstMemPtr || !dstMemPtr->isAllocated()) - THROW_CPU_NODE_ERR("has unallocated destination memory object."); - if (!srcMemPtr || !srcMemPtr->isAllocated()) - THROW_CPU_NODE_ERR("has unallocated input memory object."); + if (!dstMemPtr || !dstMemPtr->isDefined()) + THROW_CPU_NODE_ERR("has undefined destination memory object."); + if (!srcMemPtr || !srcMemPtr->isDefined()) + THROW_CPU_NODE_ERR("has undefined input memory object."); if (getSelectedPrimitiveDescriptor() == nullptr) THROW_CPU_NODE_ERR("does not have preferable primitive descriptor."); @@ -243,30 +243,25 @@ void Reorder::prepareParams() { } } if (!canUseNcsp2Nspc && !canUseNspc2Ncsp) { - if (!dstMemPtr || !dstMemPtr->isAllocated()) - THROW_CPU_NODE_ERR("has unallocated destination memory object."); - if (!srcMemPtr || !srcMemPtr->isAllocated()) - THROW_CPU_NODE_ERR("has unallocated input memory object."); + if (!dstMemPtr || !dstMemPtr->isDefined()) + THROW_CPU_NODE_ERR("has undefined destination memory object."); + if (!srcMemPtr || !srcMemPtr->isDefined()) + THROW_CPU_NODE_ERR("has undefined input memory object."); if (getSelectedPrimitiveDescriptor() == nullptr) THROW_CPU_NODE_ERR("does not have preferable primitive descriptor."); - createReorderPrimitive(srcMemPtr->getDescWithType()->getDnnlDesc(), srcMemPtr->getData(), - dstMemPtr->getDescWithType()->getDnnlDesc(), dstMemPtr->getData()); + createReorderPrimitive(srcMemPtr->getDescWithType(), + dstMemPtr->getDescWithType()); } } -void Reorder::createReorderPrimitive(const dnnl::memory::desc& srcDesc, - void* srcPtr, - const dnnl::memory::desc& dstDesc, - void* dstPtr) { +void Reorder::createReorderPrimitive(const DnnlMemoryDescPtr& srcDesc, const DnnlMemoryDescPtr& dstDesc) { auto selectedPD = getSelectedPrimitiveDescriptor(); if (!selectedPD) THROW_CPU_NODE_ERR("does not have preferable primitive descriptor."); const auto engine = getEngine(); - src_blocked = std::make_shared(engine, DnnlExtensionUtils::makeDescriptor(srcDesc), srcPtr, false); - dst_blocked = std::make_shared(engine, DnnlExtensionUtils::makeDescriptor(dstDesc), dstPtr, false); - auto src_desc = src_blocked->getPrimitive().get_desc(); + auto src_desc = srcDesc->getDnnlDesc(); if (!src_permutation.empty()) { CPU_NODE_ASSERT(src_permutation.size() == static_cast(src_desc.get_ndims()), "src_permutation size (", @@ -282,7 +277,7 @@ void Reorder::createReorderPrimitive(const dnnl::memory::desc& srcDesc, src_desc = src_desc.permute_axes(src_permutation); } - auto dst_desc = dst_blocked->getPrimitive().get_desc(); + auto dst_desc = dstDesc->getDnnlDesc(); // TODO: We should keep shape consistency for const and expected shape for node. // If it requires reshape operation it should explicitly injected into graph. @@ -295,17 +290,13 @@ void Reorder::createReorderPrimitive(const dnnl::memory::desc& srcDesc, // perform such conversion if the source tensor can be reshaped to the destination rank. This is // useful in situations when rank in IR does not much rank that is required by the oneDNN primitive, // but the input tensor can be reshaped (e.g. weights for grouped convolutions, biases etc.) - if (src_blocked->getDesc().hasLayoutType(LayoutType::ncsp) && - src_blocked->getShape().getRank() != dst_blocked->getShape().getRank()) { - const auto newDims = dst_blocked->getStaticDims(); + if (srcDesc->hasLayoutType(LayoutType::ncsp) && srcDesc->getShape().getRank() != dstDesc->getShape().getRank()) { + const auto newDims = dstDesc->getShape().getStaticDims(); const auto newFormat = DnnlExtensionUtils::GetPlainFormatByRank(newDims.size()); - auto newDesc = dnnl::memory::desc(DnnlExtensionUtils::convertToDnnlDims(newDims), - src_blocked->getDataType(), - newFormat); - src_blocked = std::make_shared(getEngine(), DnnlExtensionUtils::makeDescriptor(newDesc), srcPtr, false); - - src_desc = src_blocked->getPrimitive().get_desc(); + src_desc = dnnl::memory::desc(DnnlExtensionUtils::convertToDnnlDims(newDims), + DnnlExtensionUtils::ElementTypeToDataType(srcDesc->getPrecision()), + newFormat); } DEBUG_LOG("CreateReorderPrimitive is called for node", getName(), " src desc: ", src_desc, " dst_desc: ", dst_desc); diff --git a/src/plugins/intel_cpu/src/nodes/reorder.h b/src/plugins/intel_cpu/src/nodes/reorder.h index cb99caa07bdfa6..ab94b60b6a4a18 100644 --- a/src/plugins/intel_cpu/src/nodes/reorder.h +++ b/src/plugins/intel_cpu/src/nodes/reorder.h @@ -61,9 +61,6 @@ class Reorder : public Node { std::vector src_permutation; - MemoryPtr dst_blocked; - MemoryPtr src_blocked; - bool isOptimized = false; bool isNspc2NcspCase = false; @@ -73,7 +70,7 @@ class Reorder : public Node { void optimizedNspc2Ncsp(); void optimizedNcsp2Nspc(); - void createReorderPrimitive(const dnnl::memory::desc &srcDesc, void* srcPtr, const dnnl::memory::desc &dstDesc, void* dstPtr); + void createReorderPrimitive(const DnnlMemoryDescPtr& srcDesc, const DnnlMemoryDescPtr& dstDesc); void prepareReorderAsTranspose(MemoryDescPtr parentDesc, MemoryDescPtr childDesc); TransposeExecutorPtr transposeExecutor; diff --git a/src/plugins/intel_cpu/src/nodes/reverse_sequence.cpp b/src/plugins/intel_cpu/src/nodes/reverse_sequence.cpp index 8c637a3896fd91..b51eab4bef393e 100644 --- a/src/plugins/intel_cpu/src/nodes/reverse_sequence.cpp +++ b/src/plugins/intel_cpu/src/nodes/reverse_sequence.cpp @@ -84,12 +84,12 @@ void ReverseSequence::prepareParams() { const auto& seqLengthsMemPtr = getSrcMemoryAtPort(REVERSESEQUENCE_LENGTHS); const auto& dstMemPtr = getDstMemoryAtPort(0); - if (!dataMemPtr || !dataMemPtr->isAllocated()) - OPENVINO_THROW(errorPrefix, " has not allocated input memory of 'data'"); - if (!seqLengthsMemPtr || !seqLengthsMemPtr->isAllocated()) - OPENVINO_THROW(errorPrefix, " has not allocated input memory of 'seq_lengths'"); - if (!dstMemPtr || !dstMemPtr->isAllocated()) - OPENVINO_THROW(errorPrefix, " has not allocated output memory"); + if (!dataMemPtr || !dataMemPtr->isDefined()) + OPENVINO_THROW(errorPrefix, " has undefined input memory of 'data'"); + if (!seqLengthsMemPtr || !seqLengthsMemPtr->isDefined()) + OPENVINO_THROW(errorPrefix, " has undefined input memory of 'seq_lengths'"); + if (!dstMemPtr || !dstMemPtr->isDefined()) + OPENVINO_THROW(errorPrefix, " has undefined output memory"); if (getSelectedPrimitiveDescriptor() == nullptr) OPENVINO_THROW(errorPrefix, " has unidentified preferable primitive descriptor"); diff --git a/src/plugins/intel_cpu/src/nodes/rnn.cpp b/src/plugins/intel_cpu/src/nodes/rnn.cpp index 221549234072bb..cab8bb3ad46325 100644 --- a/src/plugins/intel_cpu/src/nodes/rnn.cpp +++ b/src/plugins/intel_cpu/src/nodes/rnn.cpp @@ -1200,7 +1200,7 @@ Node::AttrPtr RNN::initPrimitiveAttr() { void RNN::prepareParams() { for (size_t i = 0; i < wIdx; i++) { auto memPtr = getSrcMemoryAtPort(i); - if (!memPtr || !memPtr->isAllocated()) + if (!memPtr || !memPtr->isDefined()) THROW_CPU_NODE_ERR("has uninitialized memory at port ", i); } if ((is_cell && DC != getParentEdgeAt(0)->getMemory().getDesc().getShape().getStaticDims()[1]) || diff --git a/src/plugins/intel_cpu/src/nodes/roi_align.cpp b/src/plugins/intel_cpu/src/nodes/roi_align.cpp index 3bc988784aa563..1de72415e1cbc0 100644 --- a/src/plugins/intel_cpu/src/nodes/roi_align.cpp +++ b/src/plugins/intel_cpu/src/nodes/roi_align.cpp @@ -816,10 +816,10 @@ void ROIAlign::initSupportedPrimitiveDescriptors() { void ROIAlign::createPrimitive() { auto srcMemPtr = getSrcMemoryAtPort(0); auto dstMemPtr = getDstMemoryAtPort(0); - if (!srcMemPtr || !srcMemPtr->isAllocated()) - OPENVINO_THROW(errorPrefix, " did not allocate input memory"); - if (!dstMemPtr || !dstMemPtr->isAllocated()) - OPENVINO_THROW(errorPrefix, " did not allocate destination memory"); + if (!srcMemPtr) + OPENVINO_THROW(errorPrefix, " has null input memory"); + if (!dstMemPtr) + OPENVINO_THROW(errorPrefix, " has null destination memory"); if (!roi_align_kernel) { ROIAlignLayoutType selectedLayout = ROIAlignLayoutType::nspc; diff --git a/src/plugins/intel_cpu/src/nodes/roi_pooling.cpp b/src/plugins/intel_cpu/src/nodes/roi_pooling.cpp index 1b05dc3673325f..7458457c7b540e 100644 --- a/src/plugins/intel_cpu/src/nodes/roi_pooling.cpp +++ b/src/plugins/intel_cpu/src/nodes/roi_pooling.cpp @@ -504,12 +504,12 @@ void ROIPooling::prepareParams() { const auto& srcMemPtr0 = getSrcMemoryAtPort(0); const auto& srcMemPtr1 = getSrcMemoryAtPort(0); const auto& dstMemPtr = getDstMemoryAtPort(0); - if (!srcMemPtr0 || !srcMemPtr0->isAllocated()) - OPENVINO_THROW("Input memory has not been allocated."); - if (!srcMemPtr1 || !srcMemPtr1->isAllocated()) - OPENVINO_THROW("Input memory has not been allocated."); - if (!dstMemPtr || !dstMemPtr->isAllocated()) - OPENVINO_THROW("Destination has not been allocated."); + if (!srcMemPtr0 || !srcMemPtr0->isDefined()) + OPENVINO_THROW("Input memory is undefined."); + if (!srcMemPtr1 || !srcMemPtr1->isDefined()) + OPENVINO_THROW("Input memory is undefined."); + if (!dstMemPtr || !dstMemPtr->isDefined()) + OPENVINO_THROW("Destination is undefined."); if (getSelectedPrimitiveDescriptor() == nullptr) OPENVINO_THROW("Preferable primitive descriptor is not set."); diff --git a/src/plugins/intel_cpu/src/nodes/roll.cpp b/src/plugins/intel_cpu/src/nodes/roll.cpp index 6f6ad7edc20d65..6f75361c13c37f 100644 --- a/src/plugins/intel_cpu/src/nodes/roll.cpp +++ b/src/plugins/intel_cpu/src/nodes/roll.cpp @@ -102,14 +102,14 @@ void Roll::prepareParams() { const auto& axesMemPtr = getSrcMemoryAtPort(AXES_INDEX); const auto& dstMemPtr = getDstMemoryAtPort(0); - if (!dataMemPtr || !dataMemPtr->isAllocated()) - OPENVINO_THROW(layerErrorPrefix, " has not allocated input memory of 'data'"); - if (!shiftMemPtr || !shiftMemPtr->isAllocated()) - OPENVINO_THROW(layerErrorPrefix, " has not allocated input memory of 'shift'"); - if (!axesMemPtr || !axesMemPtr->isAllocated()) - OPENVINO_THROW(layerErrorPrefix, " has not allocated input memory of 'axes'"); - if (!dstMemPtr || !dstMemPtr->isAllocated()) - OPENVINO_THROW(layerErrorPrefix, " has not allocated output memory"); + if (!dataMemPtr || !dataMemPtr->isDefined()) + OPENVINO_THROW(layerErrorPrefix, " has undefined input memory of 'data'"); + if (!shiftMemPtr || !shiftMemPtr->isDefined()) + OPENVINO_THROW(layerErrorPrefix, " has undefined input memory of 'shift'"); + if (!axesMemPtr || !axesMemPtr->isDefined()) + OPENVINO_THROW(layerErrorPrefix, " has undefined input memory of 'axes'"); + if (!dstMemPtr || !dstMemPtr->isDefined()) + OPENVINO_THROW(layerErrorPrefix, " has undefined output memory"); if (getSelectedPrimitiveDescriptor() == nullptr) OPENVINO_THROW(layerErrorPrefix, " has unidentified preferable primitive descriptor"); diff --git a/src/plugins/intel_cpu/src/nodes/shuffle_channels.cpp b/src/plugins/intel_cpu/src/nodes/shuffle_channels.cpp index 72ecca666c30dd..dd45a639e98847 100644 --- a/src/plugins/intel_cpu/src/nodes/shuffle_channels.cpp +++ b/src/plugins/intel_cpu/src/nodes/shuffle_channels.cpp @@ -128,10 +128,10 @@ void ShuffleChannels::initSupportedPrimitiveDescriptors() { void ShuffleChannels::createPrimitive() { auto dstMemPtr = getDstMemoryAtPort(0); auto srcMemPtr = getSrcMemoryAtPort(0); - if (!dstMemPtr || !dstMemPtr->isAllocated()) - THROW_SHCH_ERROR("has not allocated destination memory"); - if (!srcMemPtr || !srcMemPtr->isAllocated()) - THROW_SHCH_ERROR("has not allocated input memory"); + if (!dstMemPtr) + THROW_SHCH_ERROR("has null destination memory"); + if (!srcMemPtr) + THROW_SHCH_ERROR("has null input memory"); if (getSelectedPrimitiveDescriptor() == nullptr) THROW_SHCH_ERROR("has unidentified preferable primitive descriptor"); diff --git a/src/plugins/intel_cpu/src/nodes/space_to_depth.cpp b/src/plugins/intel_cpu/src/nodes/space_to_depth.cpp index 40d344ec64cb07..6753a9510bdc2f 100644 --- a/src/plugins/intel_cpu/src/nodes/space_to_depth.cpp +++ b/src/plugins/intel_cpu/src/nodes/space_to_depth.cpp @@ -166,10 +166,10 @@ void SpaceToDepth::initSupportedPrimitiveDescriptors() { void SpaceToDepth::createPrimitive() { auto dstMemPtr = getDstMemoryAtPort(0); auto srcMemPtr = getSrcMemoryAtPort(0); - if (!dstMemPtr || !dstMemPtr->isAllocated()) - THROW_ERROR("has not allocated destination memory"); - if (!srcMemPtr || !srcMemPtr->isAllocated()) - THROW_ERROR("has not allocated input memory"); + if (!dstMemPtr) + THROW_ERROR("has null destination memory"); + if (!srcMemPtr) + THROW_ERROR("has null input memory"); if (getSelectedPrimitiveDescriptor() == nullptr) THROW_ERROR("has unidentified preferable primitive descriptor"); diff --git a/src/plugins/intel_cpu/src/nodes/split.cpp b/src/plugins/intel_cpu/src/nodes/split.cpp index 5eafc402621008..157dbfd84a7f6c 100644 --- a/src/plugins/intel_cpu/src/nodes/split.cpp +++ b/src/plugins/intel_cpu/src/nodes/split.cpp @@ -232,10 +232,16 @@ bool Split::needPrepareParams() const { return needShapeInfer(); } +void Split::createPrimitive() { + if (outputShapesDefined()) { + Node::createPrimitive(); + } +} + void Split::prepareParams() { const auto &srcMemPtr = getSrcMemoryAtPort(0); - if (!srcMemPtr || !srcMemPtr->isAllocated()) { - THROW_ERROR("has not allocated input memory"); + if (!srcMemPtr || !srcMemPtr->isDefined()) { + THROW_ERROR("has undefined input memory"); } if (!constSplitLengths) { @@ -249,8 +255,8 @@ void Split::prepareParams() { std::vector outDescs; for (size_t port = 0; port < outputShapes.size(); ++port) { const auto &outMemPtr = this->getDstMemoryAtPort(port); - if (!outMemPtr || !outMemPtr->isAllocated()) { - THROW_ERROR("has not allocated destination memory"); + if (!outMemPtr || !outMemPtr->isDefined()) { + THROW_ERROR("has undefined destination memory"); } if (outMemPtr->getShape().hasZeroDims()) { @@ -541,7 +547,7 @@ void Split::resolveInPlaceEdges(Edge::LOOK look) { " Split node: ", getName(), " can not use inPlace memory with splitting on dynamic dimension"); - auto baseMemMngr = getParentEdgeAt(inplaceInpIndx)->getMemory().getMemoryMngr(); + auto baseMemBlock = getParentEdgeAt(inplaceInpIndx)->getMemory().getMemoryBlock(); ptrdiff_t offset = 0; for (size_t i = 0; i < numberOfOutputs; ++i) { auto partDim = outputShapes[i].getDims()[axis]; @@ -560,8 +566,8 @@ void Split::resolveInPlaceEdges(Edge::LOOK look) { auto memDesc = selected_pd->getConfig().outConfs[i].getMemDesc(); MemoryPtr newMem; if (partDim != 0) { - auto memMngr = std::make_shared(baseMemMngr, baseDim, offset, partDim); - newMem = std::make_shared(getEngine(), memDesc, memMngr); + auto memBlock = std::make_shared(baseMemBlock, baseDim, offset, partDim); + newMem = std::make_shared(getEngine(), memDesc, memBlock); } else { // empty tensor, no need to reference a part, default memory is enough newMem = std::make_shared(getEngine(), memDesc); diff --git a/src/plugins/intel_cpu/src/nodes/split.h b/src/plugins/intel_cpu/src/nodes/split.h index 02af229541f9bf..0782594bcf9989 100644 --- a/src/plugins/intel_cpu/src/nodes/split.h +++ b/src/plugins/intel_cpu/src/nodes/split.h @@ -28,7 +28,10 @@ class Split : public Node { bool needPrepareParams() const override; bool needShapeInfer() const override; void prepareParams() override; - void executeDynamicImpl(dnnl::stream strm) override { execute(strm); } + void createPrimitive() override; + void executeDynamicImpl(dnnl::stream strm) override { + execute(strm); + } void resolveInPlaceEdges(Edge::LOOK look) override; private: diff --git a/src/plugins/intel_cpu/src/nodes/tensoriterator.cpp b/src/plugins/intel_cpu/src/nodes/tensoriterator.cpp index 3d9f7a2217dd97..9a3b9788b838d2 100644 --- a/src/plugins/intel_cpu/src/nodes/tensoriterator.cpp +++ b/src/plugins/intel_cpu/src/nodes/tensoriterator.cpp @@ -54,12 +54,9 @@ static NodeConfig make_plain_config(const std::shared_ptr& op) { } static void redefineToMemories(const std::vector& to_mems, MemoryDescPtr new_desc) { - const auto &currDesc = to_mems.front()->getDesc(); - if (currDesc.getShape().isDynamic() || currDesc.getShape().getStaticDims() != new_desc->getShape().getStaticDims()) { - // TODO : check the entire dstMemPtrs usage considering the proper memory sharing - for (size_t j = 0; j < to_mems.size(); j++) { - to_mems[j]->redefineDesc(new_desc); - } + // TODO : check the entire dstMemPtrs usage considering the proper memory sharing + for (size_t j = 0; j < to_mems.size(); j++) { + to_mems[j]->redefineDesc(new_desc); } } @@ -517,7 +514,11 @@ void TensorIterator::createPrimitive() { if (runAsDynamic()) prepareDynamicBuffers(); - Node::createPrimitive(); + if (inputShapesDefined() && (getAlgorithm() == Algorithm::TensorIteratorLoop || needPrepareParams())) { + constexpr bool compileStage = true; + prepareParamsImpl(compileStage); + updateLastInputDims(); + } } bool TensorIterator::needPrepareParams() const { @@ -541,10 +542,15 @@ bool TensorIterator::needPrepareParams() const { // Thus, sliced input shapes and body input shapes are equal but iteration counts are different. So we should update trip count return Node::needPrepareParams(); } - void TensorIterator::prepareParams() { - prepareTripCount(); - prepareInitialCond(); + // due to specific createPrimitive implementation this method is called only during inference + constexpr bool compileStage = false; + prepareParamsImpl(compileStage); +} + +void TensorIterator::prepareParamsImpl(const bool compileStage) { + prepareTripCount(compileStage); + prepareInitialCond(compileStage); first_mappers.clear(); before_mappers.clear(); @@ -714,22 +720,30 @@ void TensorIterator::prepareContinueCond() { } } -void TensorIterator::prepareInitialCond() { +void TensorIterator::prepareInitialCond(const bool compileStage) { if (loopExecutionConditionIdx != -1 || !initial_cond_check) { - auto mem = getSrcMemoryAtPort(loopExecutionConditionIdx); + auto edge = getParentEdgeAt(loopExecutionConditionIdx); + auto mem = edge->getMemoryPtr(); initial_cond_check.reset(new asBoolCheck(mem)); - lastUsedCond = initial_cond_check->getStatus(); + if (IMPLICATION(compileStage, edge->getParent()->isConstant())) + lastUsedCond = initial_cond_check->getStatus(); } } -void TensorIterator::prepareTripCount() { +void TensorIterator::prepareTripCount(const bool compileStage) { + bool read_data = false; if (loopTripCountIdx == -1) { trip_count_check.reset(new staticValueCheck(getNumIteration(inputPortMap, outputPortMap))); + read_data = true; } else { - auto mem = getSrcMemoryAtPort(loopTripCountIdx); + auto edge = getParentEdgeAt(loopTripCountIdx); + auto mem = edge->getMemoryPtr(); trip_count_check.reset(new asIntCheck(mem)); + read_data = IMPLICATION(compileStage, edge->getParent()->isConstant()); + } + if (read_data) { + lastUsedTripCount = trip_count_check->getStatus(); } - lastUsedTripCount = trip_count_check->getStatus(); } /* *==============* *==============* *==============* *==============* *==============* */ diff --git a/src/plugins/intel_cpu/src/nodes/tensoriterator.h b/src/plugins/intel_cpu/src/nodes/tensoriterator.h index 4a8229605e2103..f8a8110c3fae48 100644 --- a/src/plugins/intel_cpu/src/nodes/tensoriterator.h +++ b/src/plugins/intel_cpu/src/nodes/tensoriterator.h @@ -130,14 +130,15 @@ class TensorIterator : public Node { void prepareDynamicBuffers(); void prepareLoopBodyCurrentIteration(); void prepareContinueCond(); - void prepareInitialCond(); - void prepareTripCount(); + void prepareInitialCond(const bool compileStage); + void prepareTripCount(const bool compileStage); /* Dynamic support */ void reshapeSubgraphInput(); void reshapeAndFillOutput(dnnl::stream strm); bool checkForInputAndBodyShapesInequality() const; int getNumIteration(const std::vector& inputPortMap, const std::vector& outputPortMap) const; + void prepareParamsImpl(const bool compileStage); /* run dynamic subgraph inside a static node */ bool runAsDynamic() const; diff --git a/src/plugins/intel_cpu/src/nodes/topk.cpp b/src/plugins/intel_cpu/src/nodes/topk.cpp index fc2d13adfc0df0..04602d5bfe69a6 100644 --- a/src/plugins/intel_cpu/src/nodes/topk.cpp +++ b/src/plugins/intel_cpu/src/nodes/topk.cpp @@ -1980,10 +1980,10 @@ void TopK::preset_params() { void TopK::prepareParams() { auto dstMemPtr = getDstMemoryAtPort(TOPK_DATA); auto srcMemPtr = getSrcMemoryAtPort(TOPK_DATA); - if (!dstMemPtr || !dstMemPtr->isAllocated()) - OPENVINO_THROW(errorPrefix, " has not allocated destination memory."); - if (!srcMemPtr || !srcMemPtr->isAllocated()) - OPENVINO_THROW(errorPrefix, " has not allocate input memory."); + if (!dstMemPtr || !dstMemPtr->isDefined()) + OPENVINO_THROW(errorPrefix, " has undefined destination memory."); + if (!srcMemPtr || !srcMemPtr->isDefined()) + OPENVINO_THROW(errorPrefix, " has undefined input memory."); if (getSelectedPrimitiveDescriptor() == nullptr) OPENVINO_THROW(errorPrefix, " has nullable preferable primitive descriptor"); diff --git a/src/plugins/intel_cpu/src/nodes/transpose.cpp b/src/plugins/intel_cpu/src/nodes/transpose.cpp index 9a958be2ead5e4..38712e04c50719 100644 --- a/src/plugins/intel_cpu/src/nodes/transpose.cpp +++ b/src/plugins/intel_cpu/src/nodes/transpose.cpp @@ -200,10 +200,10 @@ void Transpose::createPrimitive() { auto dstMemPtr = getDstMemoryAtPort(0); auto srcMemPtr = getSrcMemoryAtPort(INPUT_DATA_IDX); - if (!dstMemPtr || !dstMemPtr->isAllocated()) - OPENVINO_THROW("Destination memory was not allocated."); - if (!srcMemPtr || !srcMemPtr->isAllocated()) - OPENVINO_THROW("Input memory was not allocated."); + if (!dstMemPtr) + OPENVINO_THROW("Destination memory is null."); + if (!srcMemPtr) + OPENVINO_THROW("Input memory is null."); if (getSelectedPrimitiveDescriptor() == nullptr) OPENVINO_THROW("Preferable primitive descriptor was not set."); diff --git a/src/plugins/intel_cpu/src/nodes/unique.cpp b/src/plugins/intel_cpu/src/nodes/unique.cpp index 130213dfcb8703..a0a0cd95d000f3 100644 --- a/src/plugins/intel_cpu/src/nodes/unique.cpp +++ b/src/plugins/intel_cpu/src/nodes/unique.cpp @@ -91,14 +91,14 @@ void Unique::createPrimitive() { void Unique::prepareParams() { auto dataMemPtr = getSrcMemoryAtPort(IN_DATA); - if (!dataMemPtr || !dataMemPtr->isAllocated()) { - THROW_ERROR(" has not allocated input data memory."); + if (!dataMemPtr) { + THROW_ERROR(" has null input data memory."); } for (int i = 0; i < 4; i++) { if (definedOutputs[i]) { auto dstMemPtr = getDstMemoryAtPort(i); - if (!dstMemPtr || !dstMemPtr->isAllocated()) { - THROW_ERROR(" has not allocated output memory at port ", i); + if (!dstMemPtr) { + THROW_ERROR(" has null output memory at port ", i); } } } diff --git a/src/plugins/intel_cpu/src/partitioned_mem_mgr.cpp b/src/plugins/intel_cpu/src/partitioned_mem_mgr.cpp index d962546bf367f9..bd4376c8a2812e 100644 --- a/src/plugins/intel_cpu/src/partitioned_mem_mgr.cpp +++ b/src/plugins/intel_cpu/src/partitioned_mem_mgr.cpp @@ -6,28 +6,28 @@ using namespace ov::intel_cpu; -void* PartitionedMemoryMngr::getRawPtr() const noexcept { - return static_cast(m_pMngr->getRawPtr()) + m_offset_blocks * m_size / m_size_blocks; +void* PartitionedMemoryBlock::getRawPtr() const noexcept { + return static_cast(m_pBlock->getRawPtr()) + m_offset_chunks * m_size / m_size_chunks; } -void PartitionedMemoryMngr::setExtBuff(void* ptr, size_t size) { - m_pMngr->setExtBuff(ptr, size); +void PartitionedMemoryBlock::setExtBuff(void* ptr, size_t size) { + m_pBlock->setExtBuff(ptr, size); } -bool PartitionedMemoryMngr::resize(size_t size) { +bool PartitionedMemoryBlock::resize(size_t size) { m_size = size; - return m_pMngr->resize(m_size * m_total_blocks / m_size_blocks); + return m_pBlock->resize(m_size * m_total_chunks / m_size_chunks); } -bool PartitionedMemoryMngr::hasExtBuffer() const noexcept { - return m_pMngr->hasExtBuffer(); +bool PartitionedMemoryBlock::hasExtBuffer() const noexcept { + return m_pBlock->hasExtBuffer(); } -void PartitionedMemoryMngr::registerMemory(Memory* memPtr) { - m_pMngr->registerMemory(memPtr); +void PartitionedMemoryBlock::registerMemory(Memory* memPtr) { + m_pBlock->registerMemory(memPtr); } -void PartitionedMemoryMngr::unregisterMemory(Memory* memPtr) { - m_pMngr->unregisterMemory(memPtr); +void PartitionedMemoryBlock::unregisterMemory(Memory* memPtr) { + m_pBlock->unregisterMemory(memPtr); } diff --git a/src/plugins/intel_cpu/src/partitioned_mem_mgr.h b/src/plugins/intel_cpu/src/partitioned_mem_mgr.h index 2b7b5568bbc93d..58179ce5b04d55 100644 --- a/src/plugins/intel_cpu/src/partitioned_mem_mgr.h +++ b/src/plugins/intel_cpu/src/partitioned_mem_mgr.h @@ -10,15 +10,14 @@ namespace ov { namespace intel_cpu { /** - * This is a memory manager that represents a view on a partition inside a continuous memory block controlled by - * another memory manager. + * This is a memory block that represents a view on a subblock inside another continuous dynamic memory block * */ -class PartitionedMemoryMngr : public IMemoryMngrObserver { +class PartitionedMemoryBlock : public IMemoryBlockObserver { public: - PartitionedMemoryMngr(MemoryMngrPtr pMngr, size_t total_blocks = 1, ptrdiff_t offset_blocks = 0, size_t size_blocks = 1) - : m_pMngr(pMngr), m_total_blocks(total_blocks), m_offset_blocks(offset_blocks), m_size_blocks(size_blocks) { - OPENVINO_ASSERT(m_pMngr, "Memory manager is uninitialized"); + PartitionedMemoryBlock(MemoryBlockPtr pBlock, size_t total_chunks = 1, ptrdiff_t offset_chunks = 0, size_t size_chunks = 1) + : m_pBlock(pBlock), m_total_chunks(total_chunks), m_offset_chunks(offset_chunks), m_size_chunks(size_chunks) { + OPENVINO_ASSERT(m_pBlock, "Memory block is uninitialized"); } void* getRawPtr() const noexcept override; @@ -29,10 +28,10 @@ class PartitionedMemoryMngr : public IMemoryMngrObserver { void unregisterMemory(Memory* memPtr) override; private: - MemoryMngrPtr m_pMngr; - size_t m_total_blocks = 1; // size of the parent memory in abstract blocks - ptrdiff_t m_offset_blocks = 0; // offset from the beginning of the external memory in abstract blocks - size_t m_size_blocks = 1; // size of the viewed partition in abstract blocks + MemoryBlockPtr m_pBlock; + size_t m_total_chunks = 1; // size of the parent memory in abstract chunks + ptrdiff_t m_offset_chunks = 0; // offset from the beginning of the external memory in abstract chunks + size_t m_size_chunks = 1; // size of the viewed partition in abstract chunks size_t m_size = 0; // size of the viewed partition in bytes }; diff --git a/src/plugins/intel_cpu/src/proxy_mem_mgr.cpp b/src/plugins/intel_cpu/src/proxy_mem_mgr.cpp index fee56e70560895..1ab2f639985e67 100644 --- a/src/plugins/intel_cpu/src/proxy_mem_mgr.cpp +++ b/src/plugins/intel_cpu/src/proxy_mem_mgr.cpp @@ -7,75 +7,75 @@ using namespace ov::intel_cpu; -void ProxyMemoryMngr::setMemMngr(std::shared_ptr pMngr) { - OPENVINO_ASSERT(pMngr, "Attempt to set null memory manager to a ProxyMemoryMngr object"); - if (m_pMngr == pMngr) { +void ProxyMemoryBlock::setMemBlock(std::shared_ptr pBlock) { + OPENVINO_ASSERT(pBlock, "Attempt to set null memory block to a ProxyMemoryBlock object"); + if (m_pMemBlock == pBlock) { return; } - m_pMngr = pMngr; + m_pMemBlock = pBlock; notifyUpdate(); } -void ProxyMemoryMngr::setMemMngrResize(std::shared_ptr pMngr) { - OPENVINO_ASSERT(pMngr, "Attempt to set null memory manager to a ProxyMemoryMngr object"); - if (m_pMngr == pMngr) { +void ProxyMemoryBlock::setMemBlockResize(std::shared_ptr pBlock) { + OPENVINO_ASSERT(pBlock, "Attempt to set null memory block to a ProxyMemoryBlock object"); + if (m_pMemBlock == pBlock) { return; } - m_pMngr = pMngr; - m_pMngr->resize(m_size); + m_pMemBlock = pBlock; + m_pMemBlock->resize(m_size); notifyUpdate(); } -void ProxyMemoryMngr::reset() { - if (!m_pOrigMngr) { - m_pOrigMngr = std::make_shared(); +void ProxyMemoryBlock::reset() { + if (!m_pOrigBlock) { + m_pOrigBlock = std::make_shared(); } - if (m_pMngr == m_pOrigMngr) { + if (m_pMemBlock == m_pOrigBlock) { return; } - m_pMngr = m_pOrigMngr; - m_pMngr->resize(m_size); + m_pMemBlock = m_pOrigBlock; + m_pMemBlock->resize(m_size); notifyUpdate(); } -void* ProxyMemoryMngr::getRawPtr() const noexcept { - return m_pMngr->getRawPtr(); +void* ProxyMemoryBlock::getRawPtr() const noexcept { + return m_pMemBlock->getRawPtr(); } -void ProxyMemoryMngr::setExtBuff(void* ptr, size_t size) { - m_pMngr->setExtBuff(ptr, size); +void ProxyMemoryBlock::setExtBuff(void* ptr, size_t size) { + m_pMemBlock->setExtBuff(ptr, size); notifyUpdate(); } -bool ProxyMemoryMngr::resize(size_t size) { - auto res = m_pMngr->resize(size); - DEBUG_LOG(this, ", ", m_pMngr, " size ", m_size, " -> ", size, " resized? ", res, " RawPtr ", getRawPtr()); +bool ProxyMemoryBlock::resize(size_t size) { + auto res = m_pMemBlock->resize(size); + DEBUG_LOG(this, ", ", m_pMemBlock, " size ", m_size, " -> ", size, " resized? ", res, " RawPtr ", getRawPtr()); m_size = size; notifyUpdate(); return res; } -bool ProxyMemoryMngr::hasExtBuffer() const noexcept { - return m_pMngr->hasExtBuffer(); +bool ProxyMemoryBlock::hasExtBuffer() const noexcept { + return m_pMemBlock->hasExtBuffer(); } -void ProxyMemoryMngr::registerMemory(Memory* memPtr) { +void ProxyMemoryBlock::registerMemory(Memory* memPtr) { if (memPtr) { m_setMemPtrs.insert(memPtr); } } -void ProxyMemoryMngr::unregisterMemory(Memory* memPtr) { +void ProxyMemoryBlock::unregisterMemory(Memory* memPtr) { if (memPtr) { m_setMemPtrs.erase(memPtr); } } -void ProxyMemoryMngr::notifyUpdate() { +void ProxyMemoryBlock::notifyUpdate() { for (auto& item : m_setMemPtrs) { if (item) { item->update(); diff --git a/src/plugins/intel_cpu/src/proxy_mem_mgr.h b/src/plugins/intel_cpu/src/proxy_mem_mgr.h index 0788cef280ec96..9ce35887ef250e 100644 --- a/src/plugins/intel_cpu/src/proxy_mem_mgr.h +++ b/src/plugins/intel_cpu/src/proxy_mem_mgr.h @@ -12,12 +12,12 @@ namespace intel_cpu { /** * @brief A proxy object that additionally implements observer pattern */ -class ProxyMemoryMngr : public IMemoryMngrObserver { +class ProxyMemoryBlock : public IMemoryBlockObserver { public: - ProxyMemoryMngr() : m_pOrigMngr(std::make_shared()), m_pMngr(m_pOrigMngr) {} - explicit ProxyMemoryMngr(std::shared_ptr pMngr) { - OPENVINO_ASSERT(pMngr, "Memory manager is uninitialized"); - m_pMngr = pMngr; + ProxyMemoryBlock() : m_pOrigBlock(std::make_shared()), m_pMemBlock(m_pOrigBlock) {} + explicit ProxyMemoryBlock(std::shared_ptr pBlock) { + OPENVINO_ASSERT(pBlock, "Memory block is uninitialized"); + m_pMemBlock = pBlock; } void* getRawPtr() const noexcept override; @@ -28,26 +28,26 @@ class ProxyMemoryMngr : public IMemoryMngrObserver { void registerMemory(Memory* memPtr) override; void unregisterMemory(Memory* memPtr) override; - void setMemMngr(std::shared_ptr pMngr); - void setMemMngrResize(std::shared_ptr pMngr); + void setMemBlock(std::shared_ptr pBlock); + void setMemBlockResize(std::shared_ptr pBlock); void reset(); private: void notifyUpdate(); - // We keep the original MemMngr as may fallback to copy output. - std::shared_ptr m_pOrigMngr = nullptr; - std::shared_ptr m_pMngr = nullptr; + // We keep the original MemBlock as may fallback to copy output. + std::shared_ptr m_pOrigBlock = nullptr; + std::shared_ptr m_pMemBlock = nullptr; std::unordered_set m_setMemPtrs; // WA: resize stage might not work because there is no shape change, - // but the underlying actual memory manager changes. + // but the underlying actual memory block changes. size_t m_size = 0ul; }; -using ProxyMemoryMngrPtr = std::shared_ptr; -using ProxyMemoryMngrCPtr = std::shared_ptr; +using ProxyMemoryBlockPtr = std::shared_ptr; +using ProxyMemoryBlockCPtr = std::shared_ptr; } // namespace intel_cpu } // namespace ov \ No newline at end of file diff --git a/src/plugins/intel_cpu/src/utils/debug_capabilities.cpp b/src/plugins/intel_cpu/src/utils/debug_capabilities.cpp index 230705af88bb06..69b3da9be00227 100644 --- a/src/plugins/intel_cpu/src/utils/debug_capabilities.cpp +++ b/src/plugins/intel_cpu/src/utils/debug_capabilities.cpp @@ -654,7 +654,7 @@ std::string to_string(const T* values, size_t N, size_t maxsize) { std::ostream& operator<<(std::ostream& os, const IMemory& mem) { const auto& desc = mem.getDesc(); os << desc; - if (mem.isAllocated()) { + if (mem.isDefined()) { os << " ["; if (desc.getPrecision() == ov::element::i32) { os << to_string(mem.getDataAs(), mem.getSize() / sizeof(int32_t), 256); diff --git a/src/plugins/intel_cpu/tests/functional/custom/behavior/ov_executable_network/release_memory.cpp b/src/plugins/intel_cpu/tests/functional/custom/behavior/ov_executable_network/release_memory.cpp new file mode 100644 index 00000000000000..b5b2b38c34098e --- /dev/null +++ b/src/plugins/intel_cpu/tests/functional/custom/behavior/ov_executable_network/release_memory.cpp @@ -0,0 +1,128 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include "openvino/runtime/core.hpp" +#include "openvino/runtime/compiled_model.hpp" +#include "shared_test_classes/base/ov_subgraph.hpp" +#include "common_test_utils/node_builders/convolution.hpp" +#include "common_test_utils/node_builders/constant.hpp" + +using namespace ov::test; + +namespace { +class MemoryReleaseTest : public testing::WithParamInterface, public SubgraphBaseTest { +public: + static std::string getTestCaseName(const testing::TestParamInfo& obj) { + bool dyn_shapes = obj.param; + return dyn_shapes ? "dyn_shapes" : "static_shapes"; + } + +public: + void SetUp() override { + auto net_prc = ov::element::f32; + targetDevice = utils::DEVICE_CPU; + + bool dyn_shapes = this->GetParam(); + + InputShape input_shape; + + if (dyn_shapes) { + input_shape = {{1, 2048, -1}, {{1, 2048, 7}, {1, 2048, 10}}}; + } else { + input_shape = {{}, {{1, 2048, 7}}}; + } + + init_input_shapes({input_shape}); + + auto param = std::make_shared(net_prc, inputDynamicShapes.front()); + + //convolution params + static const ov::Shape kernel_1x1 = {1}; + static const ov::Shape kernel_3x3 = {3}; + static const ov::Shape dilations_1x1 = {1}; + static const ov::Shape strides_1x1 = {1}; + + static const ov::op::PadType pad_type = ov::op::PadType::EXPLICIT; + + static const std::vector zero_pads_begin = {0}; + static const std::vector zero_pads_end = {0}; + + static const std::vector unit_pads_begin = {1}; + static const std::vector unit_pads_end = {1}; + + auto relu0 = std::make_shared(param); + + auto conv1 = utils::make_convolution(relu0, + net_prc, + kernel_1x1, + strides_1x1, + zero_pads_begin, + zero_pads_end, + dilations_1x1, + pad_type, + 512, + true); + + auto relu1 = std::make_shared(conv1); + + auto conv2 = utils::make_convolution(relu1, + net_prc, + kernel_3x3, + strides_1x1, + unit_pads_begin, + unit_pads_end, + dilations_1x1, + pad_type, + 512, + true); + + auto relu2 = std::make_shared(conv2); + + auto conv3 = utils::make_convolution(relu2, + net_prc, + kernel_1x1, + strides_1x1, + zero_pads_begin, + zero_pads_end, + dilations_1x1, + pad_type, + 2048, + true); + + auto add = std::make_shared(conv3, relu0); + + auto axis = utils::make_constant(ov::element::i32, {1}, std::vector({2})); + + auto reduce = std::make_shared(add, axis, true); + + function = std::make_shared(ov::OutputVector{reduce}, ov::ParameterVector{param}); + } +}; + +TEST_P(MemoryReleaseTest, ConsequitiveRelease) { + compile_model(); + for (const auto& targetStaticShapeVec : targetStaticShapes) { + generate_inputs(targetStaticShapeVec); + validate(); + } + compiledModel.release_memory(); + for (const auto& targetStaticShapeVec : targetStaticShapes) { + generate_inputs(targetStaticShapeVec); + validate(); + } +} + +INSTANTIATE_TEST_SUITE_P(smoke_release_memory, + MemoryReleaseTest, + ::testing::Values(true, false), + MemoryReleaseTest::getTestCaseName); + +} // namespace + +// TBD: +// a few infer requests one graph +// a few infer request a few graphs +// a few infer request parallel release calls \ No newline at end of file diff --git a/src/plugins/intel_cpu/tests/unit/cpu_tensor_test.cpp b/src/plugins/intel_cpu/tests/unit/cpu_tensor_test.cpp index 76794b56531bd8..9b766319569995 100644 --- a/src/plugins/intel_cpu/tests/unit/cpu_tensor_test.cpp +++ b/src/plugins/intel_cpu/tests/unit/cpu_tensor_test.cpp @@ -63,7 +63,6 @@ class MockIMemory : public IMemory { MockIMemory(MemoryDescPtr desc) : m_pMemDesc(desc) {} MockIMemory(const MemoryDesc& desc) : m_pMemDesc(desc.clone()) {} - MOCK_METHOD(bool, isAllocated, (), (const, noexcept, override)); MOCK_METHOD(MemoryDesc&, getDesc, (), (const, override)); MOCK_METHOD(MemoryDescPtr, getDescPtr, (), (const, override)); @@ -73,7 +72,7 @@ class MockIMemory : public IMemory { MOCK_METHOD(void, redefineDesc, (MemoryDescPtr), (override)); MOCK_METHOD(void, load, (const IMemory&, bool), (const, override)); - MOCK_METHOD(MemoryMngrPtr, getMemoryMngr, (), (const, override)); + MOCK_METHOD(MemoryBlockPtr, getMemoryBlock, (), (const, override)); MOCK_METHOD(dnnl::memory, getPrimitive, (), (const, override)); MOCK_METHOD(void, nullify, (), (override)); diff --git a/src/plugins/intel_cpu/tests/unit/dnnl_memory_test.cpp b/src/plugins/intel_cpu/tests/unit/dnnl_memory_test.cpp index c73db6c8a28df8..9e0bce6d444f4f 100644 --- a/src/plugins/intel_cpu/tests/unit/dnnl_memory_test.cpp +++ b/src/plugins/intel_cpu/tests/unit/dnnl_memory_test.cpp @@ -55,7 +55,7 @@ TEST(MemoryTest, ConcurrentResizeGetPrimitive) { dnnl::memory dnnl_mem; auto desc = std::make_shared(ov::element::f32, Shape{10, 2}); Memory cpu_mem1(eng, desc); - Memory cpu_mem2(eng, desc, cpu_mem1.getMemoryMngr()); + Memory cpu_mem2(eng, desc, cpu_mem1.getMemoryBlock()); auto desc2 = std::make_shared(ov::element::f32, Shape{10, 20}); std::atomic lock{true}; @@ -84,7 +84,7 @@ TEST(StaticMemoryTest, UnsupportedDnnlPrecision) { CpuBlockedMemoryDesc memDescSupportedPrc(ov::element::f32, {5, 4, 7, 10}); MemoryPtr testMemory; OV_ASSERT_NO_THROW(testMemory = std::make_shared(eng, memDescSupportedPrc)); - ASSERT_TRUE(testMemory->isAllocated()); + ASSERT_TRUE(testMemory->isDefined()); dnnl::memory dnnl_memory; void* raw_data_ptr = nullptr; OV_ASSERT_NO_THROW(raw_data_ptr = testMemory->getData()); @@ -94,7 +94,7 @@ TEST(StaticMemoryTest, UnsupportedDnnlPrecision) { CpuBlockedMemoryDesc memDescUnSupportedPrc(ov::element::i64, {5, 4, 7, 10}); OV_ASSERT_NO_THROW(testMemory = std::make_shared(eng, memDescUnSupportedPrc)); - ASSERT_TRUE(testMemory->isAllocated()); + ASSERT_TRUE(testMemory->isDefined()); raw_data_ptr = nullptr; OV_ASSERT_NO_THROW(raw_data_ptr = testMemory->getData()); ASSERT_FALSE(nullptr == raw_data_ptr); diff --git a/src/plugins/intel_cpu/tests/unit/graph/merge_transpose_reorder_test.cpp b/src/plugins/intel_cpu/tests/unit/graph/merge_transpose_reorder_test.cpp index f959c90f770214..003aca979398fb 100644 --- a/src/plugins/intel_cpu/tests/unit/graph/merge_transpose_reorder_test.cpp +++ b/src/plugins/intel_cpu/tests/unit/graph/merge_transpose_reorder_test.cpp @@ -75,7 +75,8 @@ class MergeTransposeReorderCPUTest : public testing::WithParamInterface(Config(), nullptr, false); + Config conf; + m_context = std::make_shared(conf, nullptr, false); const auto replication_result = CreateModelAndReplicate(shape, params.firstNodeLayout, params.firstNodeInplaceDirection, diff --git a/src/tests/test_utils/common_test_utils/src/node_builders/convolution.cpp b/src/tests/test_utils/common_test_utils/src/node_builders/convolution.cpp index 23ce1f80f30bde..a0e79f6f9a8e11 100644 --- a/src/tests/test_utils/common_test_utils/src/node_builders/convolution.cpp +++ b/src/tests/test_utils/common_test_utils/src/node_builders/convolution.cpp @@ -45,11 +45,13 @@ std::shared_ptr make_convolution(const ov::Output& in, auto_pad); if (add_biases) { std::shared_ptr biases_weights_node; + const size_t rank = in.get_partial_shape().rank().get_length(); + ov::Shape bias_shape(rank, 1); + bias_shape[1] = num_out_channels; if (!biases_weights.empty()) { - biases_weights_node = - std::make_shared(type, ov::Shape{1, num_out_channels, 1, 1}, biases_weights); + biases_weights_node = std::make_shared(type, bias_shape, biases_weights); } else { - auto tensor = create_and_fill_tensor(type, ov::Shape{1, num_out_channels, 1, 1}, 9, 1); + auto tensor = create_and_fill_tensor(type, bias_shape, 9, 1); biases_weights_node = std::make_shared(tensor); } @@ -82,11 +84,13 @@ std::shared_ptr make_convolution(const ov::Output& in_data, auto_pad); if (add_biases) { std::shared_ptr biases_weights_node; + const size_t rank = in_data.get_partial_shape().rank().get_length(); + ov::Shape bias_shape(rank, 1); + bias_shape[1] = num_out_channels; if (!biases_weights.empty()) { - biases_weights_node = - std::make_shared(type, ov::Shape{1, num_out_channels, 1, 1}, biases_weights); + biases_weights_node = std::make_shared(type, bias_shape, biases_weights); } else { - auto tensor = create_and_fill_tensor(type, ov::Shape{1, num_out_channels, 1, 1}, 9, 1); + auto tensor = create_and_fill_tensor(type, bias_shape, 9, 1); biases_weights_node = std::make_shared(tensor); }