From f211b01505486d4e936b345a6e6d6c3acd7dddc3 Mon Sep 17 00:00:00 2001 From: Maksim Kutakov Date: Mon, 5 Aug 2024 13:11:31 +0200 Subject: [PATCH 01/36] Rename mem manager to mem block --- src/plugins/intel_cpu/src/cpu_memory.cpp | 173 +++++++----------- src/plugins/intel_cpu/src/cpu_memory.h | 138 ++++++-------- src/plugins/intel_cpu/src/dnnl_scratch_pad.h | 6 +- src/plugins/intel_cpu/src/edge.cpp | 8 +- src/plugins/intel_cpu/src/edge.h | 2 +- src/plugins/intel_cpu/src/graph.cpp | 32 ++-- src/plugins/intel_cpu/src/graph.h | 2 +- src/plugins/intel_cpu/src/infer_request.cpp | 44 ++--- src/plugins/intel_cpu/src/infer_request.h | 14 +- src/plugins/intel_cpu/src/node.cpp | 12 +- src/plugins/intel_cpu/src/nodes/concat.cpp | 8 +- src/plugins/intel_cpu/src/nodes/conv.cpp | 2 +- src/plugins/intel_cpu/src/nodes/deconv.cpp | 4 +- src/plugins/intel_cpu/src/nodes/gather.cpp | 6 +- src/plugins/intel_cpu/src/nodes/memory.cpp | 36 ++-- src/plugins/intel_cpu/src/nodes/memory.hpp | 4 +- src/plugins/intel_cpu/src/nodes/split.cpp | 6 +- .../intel_cpu/src/partitioned_mem_mgr.cpp | 24 +-- .../intel_cpu/src/partitioned_mem_mgr.h | 19 +- src/plugins/intel_cpu/src/proxy_mem_mgr.cpp | 54 +++--- src/plugins/intel_cpu/src/proxy_mem_mgr.h | 26 +-- 21 files changed, 278 insertions(+), 342 deletions(-) diff --git a/src/plugins/intel_cpu/src/cpu_memory.cpp b/src/plugins/intel_cpu/src/cpu_memory.cpp index ab454382f57d73..8229c0b1605b42 100644 --- a/src/plugins/intel_cpu/src/cpu_memory.cpp +++ b/src/plugins/intel_cpu/src/cpu_memory.cpp @@ -66,7 +66,7 @@ namespace { Memory::Memory(const dnnl::engine& eng, MemoryDescPtr desc, const void* data, bool pads_zeroing) : m_eng(eng), m_pMemDesc(desc), - m_mgrHandle(std::make_shared(make_unique()), this), + m_blockHandle(std::make_shared(make_unique()), this), dnnlMemHandle(this) { if (desc->getPrecision() == element::string) { OPENVINO_THROW("[CPU] Memory object cannot be created for string data."); @@ -77,18 +77,18 @@ Memory::Memory(const dnnl::engine& eng, MemoryDescPtr desc, const void* data, bo Memory::Memory(const dnnl::engine& eng, const MemoryDesc& desc, const void* data, bool pads_zeroing) : Memory::Memory(eng, desc.clone(), data, pads_zeroing) {} -Memory::Memory(const dnnl::engine& eng, MemoryDescPtr desc, MemoryMngrPtr mngr) : - m_eng(eng), m_pMemDesc(desc), m_mgrHandle(mngr, this), dnnlMemHandle(this) { +Memory::Memory(const dnnl::engine& eng, MemoryDescPtr desc, MemoryBlockPtr block) : + m_eng(eng), m_pMemDesc(desc), m_blockHandle(block, this), dnnlMemHandle(this) { if (desc->getPrecision() == element::string) { OPENVINO_THROW("[CPU] Memory object can't be created for string data."); } - bool memAllocated = m_mgrHandle->getRawPtr(); + bool memAllocated = m_blockHandle->getRawPtr(); create(desc, nullptr, !memAllocated); } -Memory::Memory(const dnnl::engine& eng, const MemoryDesc& desc, MemoryMngrPtr mngr) : - Memory::Memory(eng, desc.clone(), mngr) {} +Memory::Memory(const dnnl::engine& eng, const MemoryDesc& desc, MemoryBlockPtr block) : + Memory::Memory(eng, desc.clone(), block) {} size_t Memory::getSize() const { auto size = getDesc().getCurrentMemSize(); @@ -112,9 +112,9 @@ void Memory::create(MemoryDescPtr desc, const void* data, bool pads_zeroing) { } auto memSize = m_pMemDesc->getCurrentMemSize(); if (nullptr != data) { - m_mgrHandle->setExtBuff(const_cast(data), memSize); + m_blockHandle->setExtBuff(const_cast(data), memSize); } else { - m_mgrHandle->resize(memSize); + m_blockHandle->resize(memSize); } } @@ -145,7 +145,7 @@ void Memory::redefineDesc(MemoryDescPtr desc) { void Memory::update() { if (dnnlMemHandle.isInit()) { auto prim = dnnlMemHandle.getPrim(); - prim.set_data_handle(m_mgrHandle->getRawPtr()); + prim.set_data_handle(m_blockHandle->getRawPtr()); } } @@ -185,7 +185,7 @@ dnnl::memory Memory::DnnlMemPrimHandle::getPrim() const { } bool Memory::isAllocated() const noexcept { - if (m_mgrHandle->getRawPtr()) { + if (m_blockHandle->getRawPtr()) { return true; } if (!m_pMemDesc) { @@ -209,17 +209,17 @@ void* Memory::getData() const { return data; } -void* MemoryMngrWithReuse::getRawPtr() const noexcept { +void* MemoryBlockWithReuse::getRawPtr() const noexcept { return m_data.get(); } -void MemoryMngrWithReuse::setExtBuff(void *ptr, size_t size) { +void MemoryBlockWithReuse::setExtBuff(void *ptr, size_t size) { m_useExternalStorage = true; m_memUpperBound = size; m_data = decltype(m_data)(ptr, release); } -bool MemoryMngrWithReuse::resize(size_t size) { +bool MemoryBlockWithReuse::resize(size_t size) { constexpr int cacheLineSize = 64; bool sizeChanged = false; if (size > m_memUpperBound) { @@ -234,63 +234,20 @@ bool MemoryMngrWithReuse::resize(size_t size) { if (numa_node >= 0) { if (!mbind_move(ptr, size, numa_node)) { - DEBUG_LOG("MemoryMngrWithReuse move_memory to node ", numa_node, " failed\n"); + DEBUG_LOG("MemoryBlockWithReuse move_memory to node ", numa_node, " failed\n"); } } } return sizeChanged; } -bool MemoryMngrWithReuse::hasExtBuffer() const noexcept { +bool MemoryBlockWithReuse::hasExtBuffer() const noexcept { return m_useExternalStorage; } -void MemoryMngrWithReuse::release(void *ptr) {} +void MemoryBlockWithReuse::release(void *ptr) {} -void MemoryMngrWithReuse::destroy(void *ptr) { - dnnl::impl::free(ptr); -} - -void* MemoryMngrRealloc::getRawPtr() const noexcept { - return m_data.get(); -} - -void MemoryMngrRealloc::setExtBuff(void *ptr, size_t size) { - m_useExternalStorage = true; - m_memUpperBound = size; - m_data = decltype(m_data)(ptr, release); -} - -bool MemoryMngrRealloc::resize(size_t size) { - constexpr int cacheLineSize = 64; - constexpr size_t growFactor = 2; - bool sizeChanged = false; - if (size > m_memUpperBound) { - size *= growFactor; - void *ptr = dnnl::impl::malloc(size, cacheLineSize); - if (!ptr) { - OPENVINO_THROW("Failed to allocate ", size, " bytes of memory"); - } - - if (auto src = m_data.get()) { - std::memcpy(ptr, src, m_memUpperBound); - } - - m_memUpperBound = size; - m_useExternalStorage = false; - m_data = decltype(m_data)(ptr, destroy); - sizeChanged = true; - } - return sizeChanged; -} - -bool MemoryMngrRealloc::hasExtBuffer() const noexcept { - return m_useExternalStorage; -} - -void MemoryMngrRealloc::release(void *ptr) {} - -void MemoryMngrRealloc::destroy(void *ptr) { +void MemoryBlockWithReuse::destroy(void *ptr) { dnnl::impl::free(ptr); } @@ -301,7 +258,7 @@ StringMemory::StringMemory(const dnnl::engine& engine, const MemoryDescPtr& desc OPENVINO_THROW("[CPU] StringMemory supports String type only."); } - m_manager = std::make_shared(); + m_memoryBlock = std::make_shared(); if (!m_mem_desc->isDefined()) { return; @@ -311,9 +268,9 @@ StringMemory::StringMemory(const dnnl::engine& engine, const MemoryDescPtr& desc if (data != nullptr) { auto not_const_data = const_cast(data); - m_manager->setExtBuff(reinterpret_cast(not_const_data), string_size); + m_memoryBlock->setExtBuff(reinterpret_cast(not_const_data), string_size); } else { - m_manager->resize(string_size); + m_memoryBlock->resize(string_size); } } @@ -326,7 +283,7 @@ void StringMemory::load(const IMemory& src, bool ftz) const { } void* StringMemory::getData() const { - return m_manager->getRawPtr(); + return m_memoryBlock->getRawPtr(); } void StringMemory::redefineDesc(MemoryDescPtr desc) { @@ -339,13 +296,13 @@ void StringMemory::redefineDesc(MemoryDescPtr desc) { m_mem_desc = desc; const auto string_size = m_mem_desc->getShape().getElementsCount(); - m_manager->resize(string_size); + m_memoryBlock->resize(string_size); } void StringMemory::nullify() { - auto data_ptr = m_manager->getStringPtr(); + auto data_ptr = m_memoryBlock->getStringPtr(); if (data_ptr != nullptr) { - std::fill(data_ptr, data_ptr + m_manager->getStrLen(), OvString()); + std::fill(data_ptr, data_ptr + m_memoryBlock->getStrLen(), OvString()); } } @@ -373,25 +330,25 @@ size_t StringMemory::getSize() const { // In bytes return size; } -MemoryMngrPtr StringMemory::getMemoryMngr() const { - OPENVINO_THROW("Unexpected call of StringMemory::getMemoryMngr()"); +MemoryBlockPtr StringMemory::getMemoryBlock() const { + OPENVINO_THROW("Unexpected call of StringMemory::getMemoryBlock()"); } dnnl::memory StringMemory::getPrimitive() const { OPENVINO_THROW("Unexpected call of StringMemory::getPrimitive()"); } -void StringMemory::StringMemoryMngr::setExtBuff(OvString* ptr, size_t size) { +void StringMemory::StringMemoryBlock::setExtBuff(OvString* ptr, size_t size) { m_use_external_storage = true; m_str_upper_bound = size; m_data = decltype(m_data)(ptr, release); } -StringMemory::OvString* StringMemory::StringMemoryMngr::getStringPtr() const noexcept { +StringMemory::OvString* StringMemory::StringMemoryBlock::getStringPtr() const noexcept { return m_data.get(); } -bool StringMemory::StringMemoryMngr::resize(size_t size) { +bool StringMemory::StringMemoryBlock::resize(size_t size) { bool sizeChanged = false; if (size > m_str_upper_bound) { if (size > PTRDIFF_MAX) { @@ -410,58 +367,58 @@ bool StringMemory::StringMemoryMngr::resize(size_t size) { return sizeChanged; } -bool StringMemory::StringMemoryMngr::hasExtBuffer() const noexcept { +bool StringMemory::StringMemoryBlock::hasExtBuffer() const noexcept { return m_use_external_storage; } -size_t StringMemory::StringMemoryMngr::getStrLen() const noexcept { +size_t StringMemory::StringMemoryBlock::getStrLen() const noexcept { return m_str_upper_bound; } -void StringMemory::StringMemoryMngr::destroy(OvString* ptr) { +void StringMemory::StringMemoryBlock::destroy(OvString* ptr) { delete[] ptr; } -void* StringMemory::StringMemoryMngr::getRawPtr() const noexcept { +void* StringMemory::StringMemoryBlock::getRawPtr() const noexcept { return reinterpret_cast(m_data.get()); } -/////////////// DnnlMemoryMngr /////////////// +/////////////// DnnlMemoryBlock /////////////// -void* DnnlMemoryMngr::getRawPtr() const noexcept { - return m_pMemMngr->getRawPtr(); +void* DnnlMemoryBlock::getRawPtr() const noexcept { + return m_pMemBlock->getRawPtr(); } -void DnnlMemoryMngr::setExtBuff(void *ptr, size_t size) { - m_pMemMngr->setExtBuff(ptr, size); +void DnnlMemoryBlock::setExtBuff(void *ptr, size_t size) { + m_pMemBlock->setExtBuff(ptr, size); notifyUpdate(); } -bool DnnlMemoryMngr::resize(size_t size) { - bool sizeChanged = m_pMemMngr->resize(size); +bool DnnlMemoryBlock::resize(size_t size) { + bool sizeChanged = m_pMemBlock->resize(size); if (sizeChanged) { notifyUpdate(); } return sizeChanged; } -bool DnnlMemoryMngr::hasExtBuffer() const noexcept { - return m_pMemMngr->hasExtBuffer(); +bool DnnlMemoryBlock::hasExtBuffer() const noexcept { + return m_pMemBlock->hasExtBuffer(); } -void DnnlMemoryMngr::registerMemory(Memory* memPtr) { +void DnnlMemoryBlock::registerMemory(Memory* memPtr) { if (memPtr) { m_setMemPtrs.insert(memPtr); } } -void DnnlMemoryMngr::unregisterMemory(Memory* memPtr) { +void DnnlMemoryBlock::unregisterMemory(Memory* memPtr) { if (memPtr) { m_setMemPtrs.erase(memPtr); } } -void DnnlMemoryMngr::notifyUpdate() { +void DnnlMemoryBlock::notifyUpdate() { for (auto& item : m_setMemPtrs) { if (item) { item->update(); @@ -481,9 +438,9 @@ StaticMemory::StaticMemory(const dnnl::engine& eng, MemoryDescPtr desc, const vo m_size = m_pMemDesc->getCurrentMemSize(); if (data) { - m_pMemMngr = std::make_shared(const_cast(data), m_size); + m_pMemBlock = std::make_shared(const_cast(data), m_size); } else { - m_pMemMngr = std::make_shared(m_size); + m_pMemBlock = std::make_shared(m_size); } try { @@ -494,7 +451,7 @@ StaticMemory::StaticMemory(const dnnl::engine& eng, MemoryDescPtr desc, const vo m_prim = dnnl::memory(dnnl_desc->getDnnlDesc(), m_eng, DNNL_MEMORY_NONE); // // ======================== - m_prim.set_data_handle(m_pMemMngr->getRawPtr()); + m_prim.set_data_handle(m_pMemBlock->getRawPtr()); } catch (const std::exception& exc) { dnnlErrorCtx = exc.what(); @@ -517,7 +474,7 @@ MemoryDescPtr StaticMemory::getDescPtr() const { } void* StaticMemory::getData() const { - return m_pMemMngr->getRawPtr(); + return m_pMemBlock->getRawPtr(); } size_t StaticMemory::getSize() const { @@ -543,8 +500,8 @@ void StaticMemory::load(const IMemory& src, bool ftz) const { transferData(src, *this, ftz); } -MemoryMngrPtr StaticMemory::getMemoryMngr() const { - return m_pMemMngr; +MemoryBlockPtr StaticMemory::getMemoryBlock() const { + return m_pMemBlock; } //oneDNN specifics for backward compatibility @@ -561,38 +518,38 @@ void StaticMemory::nullify() { memset(dataPtr, 0, getSize()); } -StaticMemory::StaticMemoryMngr::StaticMemoryMngr(size_t size) : m_size(size) { - memMngrImpl.resize(m_size); +StaticMemory::StaticMemoryBlock::StaticMemoryBlock(size_t size) : m_size(size) { + memBlockImpl.resize(m_size); } -StaticMemory::StaticMemoryMngr::StaticMemoryMngr(void* data, size_t size) : m_size(size) { - memMngrImpl.setExtBuff(data, m_size); +StaticMemory::StaticMemoryBlock::StaticMemoryBlock(void* data, size_t size) : m_size(size) { + memBlockImpl.setExtBuff(data, m_size); } -void* StaticMemory::StaticMemoryMngr::getRawPtr() const noexcept { - return memMngrImpl.getRawPtr(); +void* StaticMemory::StaticMemoryBlock::getRawPtr() const noexcept { + return memBlockImpl.getRawPtr(); } -void StaticMemory::StaticMemoryMngr::setExtBuff(void* ptr, size_t size) { - OPENVINO_THROW("Unexpected: StaticMemoryMngr may not be modified"); +void StaticMemory::StaticMemoryBlock::setExtBuff(void* ptr, size_t size) { + OPENVINO_THROW("Unexpected: StaticMemoryBlock may not be modified"); } -bool StaticMemory::StaticMemoryMngr::resize(size_t size) { +bool StaticMemory::StaticMemoryBlock::resize(size_t size) { if (size != m_size) { - OPENVINO_THROW("Unexpected: StaticMemoryMngr may not resize the memory"); + OPENVINO_THROW("Unexpected: StaticMemoryBlock may not resize the memory"); } return false; } -bool StaticMemory::StaticMemoryMngr::hasExtBuffer() const noexcept { - return memMngrImpl.hasExtBuffer(); +bool StaticMemory::StaticMemoryBlock::hasExtBuffer() const noexcept { + return memBlockImpl.hasExtBuffer(); } -void StaticMemory::StaticMemoryMngr::registerMemory(Memory* memPtr) { +void StaticMemory::StaticMemoryBlock::registerMemory(Memory* memPtr) { //do nothing } -void StaticMemory::StaticMemoryMngr::unregisterMemory(Memory* memPtr) { +void StaticMemory::StaticMemoryBlock::unregisterMemory(Memory* memPtr) { //do nothing } diff --git a/src/plugins/intel_cpu/src/cpu_memory.h b/src/plugins/intel_cpu/src/cpu_memory.h index 69b70cb6e583a3..65d61bc97a2693 100644 --- a/src/plugins/intel_cpu/src/cpu_memory.h +++ b/src/plugins/intel_cpu/src/cpu_memory.h @@ -29,16 +29,16 @@ namespace ov { namespace intel_cpu { class Memory; -class ProxyMemoryMngr; +class ProxyMemoryBlock; /** - * @interface IMemoryMngr + * @interface IMemoryBlock * @brief An interface to memory control object */ -class IMemoryMngr { +class IMemoryBlock { public: - virtual ~IMemoryMngr() = default; + virtual ~IMemoryBlock() = default; /** * @brief Accessor to underlying memory buffer @@ -68,11 +68,11 @@ class IMemoryMngr { }; /** - * @brief An implementation of the mem manager where memory reallocation occurs only if a bigger buffer is requested. + * @brief An implementation of the mem block where memory reallocation occurs only if a bigger buffer is requested. */ -class MemoryMngrWithReuse : public IMemoryMngr { +class MemoryBlockWithReuse : public IMemoryBlock { public: - MemoryMngrWithReuse(int numa_node = -1) : m_data(nullptr, release), numa_node(numa_node) {} + MemoryBlockWithReuse(int numa_node = -1) : m_data(nullptr, release), numa_node(numa_node) {} void* getRawPtr() const noexcept override; void setExtBuff(void* ptr, size_t size) override; bool resize(size_t size) override; @@ -88,24 +88,7 @@ class MemoryMngrWithReuse : public IMemoryMngr { static void destroy(void *ptr); }; -class MemoryMngrRealloc : public IMemoryMngr { -public: - MemoryMngrRealloc() : m_data(nullptr, release) {} - void* getRawPtr() const noexcept override; - void setExtBuff(void* ptr, size_t size) override; - bool resize(size_t size) override; - bool hasExtBuffer() const noexcept override; - -private: - bool m_useExternalStorage = false; - size_t m_memUpperBound = 0ul; - std::unique_ptr m_data; - - static void release(void *ptr); - static void destroy(void *ptr); -}; - -class IMemoryMngrObserver : public IMemoryMngr { +class IMemoryBlockObserver : public IMemoryBlock { public: virtual void registerMemory(Memory* memPtr) = 0; virtual void unregisterMemory(Memory* memPtr) = 0; @@ -114,9 +97,9 @@ class IMemoryMngrObserver : public IMemoryMngr { /** * @brief A proxy object that additionally implements observer pattern */ -class DnnlMemoryMngr : public IMemoryMngrObserver { +class DnnlMemoryBlock : public IMemoryBlockObserver { public: - explicit DnnlMemoryMngr(std::unique_ptr mngr) : m_pMemMngr(std::move(mngr)) {} + explicit DnnlMemoryBlock(std::unique_ptr memBlock) : m_pMemBlock(std::move(memBlock)) {} void* getRawPtr() const noexcept override; void setExtBuff(void* ptr, size_t size) override; bool resize(size_t size) override; @@ -129,49 +112,49 @@ class DnnlMemoryMngr : public IMemoryMngrObserver { private: std::unordered_set m_setMemPtrs; - std::unique_ptr m_pMemMngr; + std::unique_ptr m_pMemBlock; }; -using MemoryMngrPtr = std::shared_ptr; -using MemoryMngrCPtr = std::shared_ptr; +using MemoryBlockPtr = std::shared_ptr; +using MemoryBlockCPtr = std::shared_ptr; -class DnnlMemMngrHandle { +class DnnlMemBlockHandle { public: - DnnlMemMngrHandle(MemoryMngrPtr pMgr, Memory* pMem) : m_pMgr(pMgr), m_pMem(pMem) { - if (m_pMgr) { - m_pMgr->registerMemory(m_pMem); + DnnlMemBlockHandle(MemoryBlockPtr pBlock, Memory* pMem) : m_pMemBlock(pBlock), m_pMem(pMem) { + if (m_pMemBlock) { + m_pMemBlock->registerMemory(m_pMem); } } - DnnlMemMngrHandle(const DnnlMemMngrHandle&) = delete; - DnnlMemMngrHandle& operator= (const DnnlMemMngrHandle&) = delete; + DnnlMemBlockHandle(const DnnlMemBlockHandle&) = delete; + DnnlMemBlockHandle& operator= (const DnnlMemBlockHandle&) = delete; - DnnlMemMngrHandle(DnnlMemMngrHandle&& source) { - std::swap(m_pMgr, source.m_pMgr); + DnnlMemBlockHandle(DnnlMemBlockHandle&& source) { + std::swap(m_pMemBlock, source.m_pMemBlock); std::swap(m_pMem, source.m_pMem); } - DnnlMemMngrHandle& operator= (DnnlMemMngrHandle&& rhs) { - std::swap(m_pMgr, rhs.m_pMgr); + DnnlMemBlockHandle& operator= (DnnlMemBlockHandle&& rhs) { + std::swap(m_pMemBlock, rhs.m_pMemBlock); std::swap(m_pMem, rhs.m_pMem); return *this; } - ~DnnlMemMngrHandle() { - if (m_pMgr) { - m_pMgr->unregisterMemory(m_pMem); + ~DnnlMemBlockHandle() { + if (m_pMemBlock) { + m_pMemBlock->unregisterMemory(m_pMem); } } - MemoryMngrPtr get() const { - return m_pMgr; + MemoryBlockPtr get() const { + return m_pMemBlock; } - MemoryMngrPtr::element_type* operator->() const noexcept { - return m_pMgr.get(); + MemoryBlockPtr::element_type* operator->() const noexcept { + return m_pMemBlock.get(); } private: - MemoryMngrPtr m_pMgr = nullptr; + MemoryBlockPtr m_pMemBlock = nullptr; Memory* m_pMem = nullptr; }; @@ -200,13 +183,13 @@ class IMemory { virtual const VectorDims& getStaticDims() const = 0; // Redefines descriptor. The memory descriptor will be replaced with the new one. - // Memory will not be reallocated if the new tensor size is less or equal the upper bound. + // Memory will not be reallocated according to the dynamic memory block policy // Caution!!! This action invalidates the previous data layout. The old data may become unreachable. virtual void redefineDesc(MemoryDescPtr desc) = 0; virtual void load(const IMemory& src, bool ftz = true) const = 0; - virtual MemoryMngrPtr getMemoryMngr() const = 0; + virtual MemoryBlockPtr getMemoryBlock() const = 0; //oneDNN specifics for backward compatibility virtual dnnl::memory getPrimitive() const = 0; @@ -229,10 +212,10 @@ class IMemory { class StaticMemory final : public IMemory { public: - class StaticMemoryMngr : public IMemoryMngrObserver { + class StaticMemoryBlock : public IMemoryBlockObserver { public: - explicit StaticMemoryMngr(size_t size); - StaticMemoryMngr(void* data, size_t size); + explicit StaticMemoryBlock(size_t size); + StaticMemoryBlock(void* data, size_t size); void* getRawPtr() const noexcept override; void setExtBuff(void* ptr, size_t size) override; bool resize(size_t size) override; @@ -242,10 +225,10 @@ class StaticMemory final : public IMemory { private: size_t m_size = 0; - MemoryMngrWithReuse memMngrImpl; + MemoryBlockWithReuse memBlockImpl; }; - using MemMngrPtr = std::shared_ptr; + using MemBlockPtr = std::shared_ptr; public: StaticMemory(const dnnl::engine& eng, MemoryDescPtr desc, const void* data = nullptr, bool pads_zeroing = true); @@ -273,7 +256,7 @@ class StaticMemory final : public IMemory { void load(const IMemory& src, bool ftz = true) const override; - MemoryMngrPtr getMemoryMngr() const override; + MemoryBlockPtr getMemoryBlock() const override; //oneDNN specifics for backward compatibility dnnl::memory getPrimitive() const override; @@ -285,7 +268,7 @@ class StaticMemory final : public IMemory { MemoryDescPtr m_pMemDesc; size_t m_size; dnnl::memory m_prim; - MemMngrPtr m_pMemMngr; + MemBlockPtr m_pMemBlock; std::string dnnlErrorCtx; }; @@ -293,8 +276,8 @@ class Memory : public IMemory { public: Memory(const dnnl::engine& eng, MemoryDescPtr desc, const void* data = nullptr, bool pads_zeroing = true); Memory(const dnnl::engine& eng, const MemoryDesc& desc, const void* data = nullptr, bool pads_zeroing = true); - Memory(const dnnl::engine& eng, MemoryDescPtr desc, MemoryMngrPtr mngr); - Memory(const dnnl::engine& eng, const MemoryDesc& desc, MemoryMngrPtr mbgr); + Memory(const dnnl::engine& eng, MemoryDescPtr desc, MemoryBlockPtr block); + Memory(const dnnl::engine& eng, const MemoryDesc& desc, MemoryBlockPtr block); Memory(const Memory&) = delete; Memory& operator= (const Memory&) = delete; @@ -326,9 +309,6 @@ class Memory : public IMemory { return getDesc().getShape().getStaticDims(); } - // Redefines descriptor. The memory descriptor will be replaced with the new one. - // Memory will not be reallocated if the new tensor size is less or equal the upper bound. - // Caution!!! This action invalidates the previous data layout. The old data may become unreachable. void redefineDesc(MemoryDescPtr desc) override; void load(const IMemory& src, bool ftz = true) const override; @@ -338,13 +318,13 @@ class Memory : public IMemory { return m_eng; } - MemoryMngrPtr getMemoryMngr() const override { - return m_mgrHandle.get(); + MemoryBlockPtr getMemoryBlock() const override { + return m_blockHandle.get(); } private: - friend DnnlMemoryMngr; - friend ProxyMemoryMngr; + friend DnnlMemoryBlock; + friend ProxyMemoryBlock; private: void update(); @@ -355,7 +335,7 @@ class Memory : public IMemory { private: dnnl::engine m_eng; MemoryDescPtr m_pMemDesc; - DnnlMemMngrHandle m_mgrHandle; + DnnlMemBlockHandle m_blockHandle; bool m_padsZeroing = true; class DnnlMemPrimHandle { public: @@ -373,7 +353,7 @@ class Memory : public IMemory { } dnnlMemHandle; void* getDataNoThrow() const noexcept { - return m_mgrHandle->getRawPtr(); + return m_blockHandle->getRawPtr(); } }; @@ -381,9 +361,9 @@ class StringMemory : public IMemory { public: using OvString = ov::element_type_traits::value_type; - class StringMemoryMngr { + class StringMemoryBlock { public: - StringMemoryMngr() : m_data(nullptr, release) {} + StringMemoryBlock() : m_data(nullptr, release) {} OvString* getStringPtr() const noexcept; void setExtBuff(OvString* ptr, size_t size); size_t getStrLen() const noexcept; @@ -400,18 +380,18 @@ class StringMemory : public IMemory { static void destroy(OvString* ptr); }; - using StringMemoryMngrPtr = std::shared_ptr; + using StringMemoryBlockPtr = std::shared_ptr; StringMemory(const dnnl::engine& engine, const MemoryDescPtr& desc, const void* data = nullptr); StringMemory(const dnnl::engine& engine, const MemoryDesc& desc, const void* data = nullptr) : StringMemory(engine, desc.clone(), data) {} - StringMemory(const dnnl::engine& engine, const MemoryDescPtr& desc, const StringMemoryMngrPtr& manager) - : m_engine(engine), m_mem_desc(desc), m_manager(manager) {} + StringMemory(const dnnl::engine& engine, const MemoryDescPtr& desc, const StringMemoryBlockPtr& block) + : m_engine(engine), m_mem_desc(desc), m_memoryBlock(block) {} - StringMemory(const dnnl::engine& engine, const MemoryDesc& desc, const StringMemoryMngrPtr& manager) - : StringMemory(engine, desc.clone(), manager) {} + StringMemory(const dnnl::engine& engine, const MemoryDesc& desc, const StringMemoryBlockPtr& block) + : StringMemory(engine, desc.clone(), block) {} bool isAllocated() const noexcept override; @@ -439,10 +419,10 @@ class StringMemory : public IMemory { void load(const IMemory& src, bool ftz = false) const override; - MemoryMngrPtr getMemoryMngr() const override; + MemoryBlockPtr getMemoryBlock() const override; - StringMemoryMngrPtr getStringMemoryMngrPtr() const { - return m_manager; + StringMemoryBlockPtr getStringMemoryBlockPtr() const { + return m_memoryBlock; } dnnl::memory getPrimitive() const override; @@ -452,7 +432,7 @@ class StringMemory : public IMemory { private: dnnl::engine m_engine; MemoryDescPtr m_mem_desc; - StringMemoryMngrPtr m_manager; + StringMemoryBlockPtr m_memoryBlock; }; using MemoryPtr = std::shared_ptr; diff --git a/src/plugins/intel_cpu/src/dnnl_scratch_pad.h b/src/plugins/intel_cpu/src/dnnl_scratch_pad.h index a589b9dbb0cf71..6f356e58c4770b 100644 --- a/src/plugins/intel_cpu/src/dnnl_scratch_pad.h +++ b/src/plugins/intel_cpu/src/dnnl_scratch_pad.h @@ -13,16 +13,16 @@ namespace ov { namespace intel_cpu { class DnnlScratchPad { - MemoryMngrPtr mgrPtr; + MemoryBlockPtr blockPtr; dnnl::engine eng; public: DnnlScratchPad(const dnnl::engine& eng, int numa_node = -1) : eng(eng) { - mgrPtr = std::make_shared(make_unique(numa_node)); + blockPtr = std::make_shared(make_unique(numa_node)); } MemoryPtr createScratchPadMem(const MemoryDescPtr& md) { - return std::make_shared(eng, md, mgrPtr); + return std::make_shared(eng, md, blockPtr); } }; diff --git a/src/plugins/intel_cpu/src/edge.cpp b/src/plugins/intel_cpu/src/edge.cpp index c193cb1641285b..0a9bc4cae34ddf 100644 --- a/src/plugins/intel_cpu/src/edge.cpp +++ b/src/plugins/intel_cpu/src/edge.cpp @@ -273,14 +273,14 @@ void Edge::allocate(const void* mem_ptr) { allocateCommon(allocateFunc); } -void Edge::allocate(MemoryMngrPtr memMngr) { - if (!memMngr) { - OPENVINO_THROW("Unexpected: Memory manager ptr is NULL"); +void Edge::allocate(MemoryBlockPtr memBlock) { + if (!memBlock) { + OPENVINO_THROW("Unexpected: Memory block ptr is NULL"); } auto allocateFunc = [OV_CAPTURE_CPY_AND_THIS](const MemoryDesc& inputDesc) -> MemoryPtr { auto parentPtr = getParent(); - return std::make_shared(parentPtr->getEngine(), inputDesc, memMngr); + return std::make_shared(parentPtr->getEngine(), inputDesc, memBlock); }; allocateCommon(allocateFunc); diff --git a/src/plugins/intel_cpu/src/edge.h b/src/plugins/intel_cpu/src/edge.h index e9f26a2d8955b4..29cb8113943cd3 100644 --- a/src/plugins/intel_cpu/src/edge.h +++ b/src/plugins/intel_cpu/src/edge.h @@ -52,7 +52,7 @@ class Edge { void init(); void allocate(const void* mem_ptr = nullptr); - void allocate(MemoryMngrPtr memMngr); + void allocate(MemoryBlockPtr memBlock); void externalAllocate(WeightsSharing::Ptr weightsCache); void reuse(MemoryPtr ptr); void validate(); diff --git a/src/plugins/intel_cpu/src/graph.cpp b/src/plugins/intel_cpu/src/graph.cpp index c939eb15eb8555..fd8f211d25f10a 100644 --- a/src/plugins/intel_cpu/src/graph.cpp +++ b/src/plugins/intel_cpu/src/graph.cpp @@ -727,7 +727,7 @@ void Graph::AllocateWithReuse(const std::vector& syncNodesInds) { // Special allocation for string tensors if (edge->getDesc().getPrecision() == element::string && edge->getStatus() == Edge::Status::NeedAllocation) { - StringMemory::StringMemoryMngrPtr mngr; + StringMemory::StringMemoryBlockPtr memBlcok; if (edge->getParent()->isConstant()) { if (edge->getParent()->getType() == Type::Input) { auto constNode = static_cast(edge->getParent().get()); @@ -738,11 +738,11 @@ void Graph::AllocateWithReuse(const std::vector& syncNodesInds) { auto stringMemory = dynamic_cast(edge->getMemoryPtr().get()); OPENVINO_ASSERT(stringMemory, "[CPU] Edge between nodes '", edge->getParent()->getName(), "' and '", edge->getChild()->getName(), "' must have StringMemory."); - mngr = stringMemory->getStringMemoryMngrPtr(); + memBlcok = stringMemory->getStringMemoryBlockPtr(); } else { auto memory = std::make_shared(getEngine(), edge->getDesc()); edge->reuse(memory); - mngr = memory->getStringMemoryMngrPtr(); + memBlcok = memory->getStringMemoryBlockPtr(); } for (auto& edge_c : cluster) { if (edge_c == edge) { @@ -750,7 +750,7 @@ void Graph::AllocateWithReuse(const std::vector& syncNodesInds) { } OPENVINO_ASSERT(edge_c->getDesc().getPrecision() == element::string, "All edges in the cluster must be string."); if (edge_c->getStatus() == Edge::Status::NotAllocated) { - auto memory = std::make_shared(getEngine(), edge_c->getDesc(), mngr); + auto memory = std::make_shared(getEngine(), edge_c->getDesc(), memBlcok); edge_c->reuse(memory); } else { OPENVINO_THROW("[CPU] String tensors allocation in the cluster. Edge between nodes '", edge_c->getParent()->getName(), "' and '", @@ -868,23 +868,23 @@ void Graph::AllocateWithReuse(const std::vector& syncNodesInds) { //Process undefined boxes (dynamic shapes) if (!undefinedBoxes.empty()) { - // Use proxy memory manager for output edges + // Use proxy memory block for output edges for (const auto& box : undefinedBoxes) { for (auto& edge : edge_clusters[box.id]) { const auto child = edge->getChild(); if (child->getType() == Type::Output && edge->getStatus() == Edge::Status::NeedAllocation) { - auto proxyMemMngr = - std::make_shared(); - DEBUG_LOG("ProxyMemoryMngr ", proxyMemMngr, " ", this); - edge->allocate(proxyMemMngr); + auto proxyMemBlock = + std::make_shared(); + DEBUG_LOG("ProxyMemoryBlock ", proxyMemBlock, " ", this); + edge->allocate(proxyMemBlock); - // Store the output memory managers. + // Store the output memory blocks. // So that, the infer requests can be able to access them. int count = 0; for (auto &output : outputNodesMap) { if (output.second == child) { - outputNodesMemMngrMap[output.first] = proxyMemMngr; + outputNodesMemBlocksMap[output.first] = proxyMemBlock; count++; } } @@ -941,12 +941,12 @@ void Graph::AllocateWithReuse(const std::vector& syncNodesInds) { } } for (auto& group : groups) { - auto grpMemMngr = - std::make_shared(make_unique()); + auto grpMemBlock = + std::make_shared(make_unique()); for (auto& box : group) { for (auto& edge : edge_clusters[box.id]) { if (edge->getStatus() == Edge::Status::NeedAllocation) { - edge->allocate(grpMemMngr); + edge->allocate(grpMemBlock); } } } @@ -975,7 +975,7 @@ void Graph::AllocateWithReuse(const std::vector& syncNodesInds) { } else { auto sharedEdge = edge->getSharedEdge(); auto sharedEdgeParent = sharedEdge->getParent(); - edge->allocate(sharedEdge->getMemoryPtr()->getMemoryMngr()); + edge->allocate(sharedEdge->getMemoryPtr()->getMemoryBlock()); DEBUG_LOG(*edge, " sharedEdge with ", *sharedEdge); } } @@ -1021,7 +1021,7 @@ bool Graph::ProcessDynNodes() { return node->isDynamicNode(); }); // In case of dynamic shapes, tensors may be resized due to the shapes variations. - // If the input tensor is included to memory reuse, it means that its memory manager is shared with other tensors in the graph, which in turn may cause data + // If the input tensor is included to memory reuse, it means that its memory block is shared with other tensors in the graph, which in turn may cause data // loss when one of the tensors down the graph requests mem resize, while the input data have not been yet read by the consumers. To avoid such situations // we disable io mem reuse for the case of dynamic shapes. if (containsDynamicNodes) { diff --git a/src/plugins/intel_cpu/src/graph.h b/src/plugins/intel_cpu/src/graph.h index 4e6d6e6f3beca6..ffcc970af6a5b6 100644 --- a/src/plugins/intel_cpu/src/graph.h +++ b/src/plugins/intel_cpu/src/graph.h @@ -244,7 +244,7 @@ class Graph { std::map inputNodesMap; std::map outputNodesMap; - std::unordered_map outputNodesMemMngrMap; + std::unordered_map outputNodesMemBlocksMap; // these node pointers (from graphNodes) are to avoid regular checking for // constantness of nodes in Infer methods and calls of diff --git a/src/plugins/intel_cpu/src/infer_request.cpp b/src/plugins/intel_cpu/src/infer_request.cpp index 82b72137b6b561..c9ebddfbcbe018 100644 --- a/src/plugins/intel_cpu/src/infer_request.cpp +++ b/src/plugins/intel_cpu/src/infer_request.cpp @@ -154,13 +154,13 @@ static inline void change_edge_ptr(const EdgePtr& edge, ov::SoPtr& OPENVINO_ASSERT(mem != nullptr, "Edge with name '", edge->name(), "' doesn't have allocated memory object."); if (tensor->get_element_type() == element::string) { - auto memMngr = dynamic_cast(mem.get())->getStringMemoryMngrPtr(); - OPENVINO_ASSERT(memMngr); - memMngr->setExtBuff(tensor->data(), tensor->get_size()); + auto memBlock = dynamic_cast(mem.get())->getStringMemoryBlockPtr(); + OPENVINO_ASSERT(memBlock); + memBlock->setExtBuff(tensor->data(), tensor->get_size()); } else { - auto memMngr = mem->getMemoryMngr(); - OPENVINO_ASSERT(memMngr); - memMngr->setExtBuff(tensor->data(), tensor->get_byte_size()); + auto memBlock = mem->getMemoryBlock(); + OPENVINO_ASSERT(memBlock); + memBlock->setExtBuff(tensor->data(), tensor->get_byte_size()); } } @@ -271,13 +271,13 @@ void SyncInferRequest::change_default_ptr() { } if (Graph::Status::ReadyDynamic == m_graph->getStatus()) { - const auto &outMemMngrMap = m_graph->outputNodesMemMngrMap; - for (auto&& item : outMemMngrMap) { + const auto &outMemBlocksMap = m_graph->outputNodesMemBlocksMap; + for (auto&& item : outMemBlocksMap) { const auto& name = item.first; - // share intel_cpu::Tensor to Graph by injecting to corresponding ProxyMemoryMngr instance. - auto outputMemMngr = item.second; - OPENVINO_ASSERT(outputMemMngr, "proxy mem manager for output ", name, " is empty."); + // share intel_cpu::Tensor to Graph by injecting to corresponding ProxyMemoryBlock instance. + auto outputMemBlock = item.second; + OPENVINO_ASSERT(outputMemBlock, "proxy mem block for output ", name, " is empty."); auto controlBlockItr = m_outputControlBlocks.find(name); @@ -288,15 +288,15 @@ void SyncInferRequest::change_default_ptr() { //avoid cyclic memory use auto&& controlBlock = controlBlockItr->second; - std::shared_ptr memMngr = inputPtrs.count(controlBlock.rawPtr()) ? // same memory is used on the input and output - controlBlock.nextMemMngr() : // then swap internal buffer to avoid data corruption - controlBlock.currentMemMngr(); // else reuse the existing buffer + std::shared_ptr memBlock = inputPtrs.count(controlBlock.rawPtr()) ? // same memory is used on the input and output + controlBlock.nextMemBlock() : // then swap internal buffer to avoid data corruption + controlBlock.currentMemBlock(); // else reuse the existing buffer - outputMemMngr->setMemMngrResize(memMngr); - DEBUG_LOG("reset proxy ", outputMemMngr, ", actual ", controlBlock.currentMemMngr(), " graph ", m_graph, " inferrequest ", this); + outputMemBlock->setMemBlockResize(memBlock); + DEBUG_LOG("reset proxy ", outputMemBlock, ", actual ", controlBlock.currentMemBlock(), " graph ", m_graph, " inferrequest ", this); DEBUG_LOG(name, ", tensor ", controlBlock.tensor()); } else { - outputMemMngr->reset(); // switch to the internal memory since memory sharing is no longer possible + outputMemBlock->reset(); // switch to the internal memory since memory sharing is no longer possible } } } @@ -536,8 +536,8 @@ void SyncInferRequest::init_tensor(const std::size_t& port_index, const ov::ISyn DEBUG_LOG(port_index, ", tensor ", control_block.tensor(), - ", memmngr ", - control_block.tensor()->get_memory()->getMemoryMngr(), + ", memBlock ", + control_block.tensor()->get_memory()->getMemoryBlock(), "memory object ", control_block.tensor()->get_memory().get()); @@ -581,8 +581,8 @@ void SyncInferRequest::push_input_data() { SyncInferRequest::OutputControlBlock::OutputControlBlock(const ov::element::Type& precision, const Shape& shape) { dnnl::engine eng(dnnl::engine::kind::cpu, 0); - m_buffers[m_buffIndx] = std::make_shared(); - m_proxyMemMngr = std::make_shared(m_buffers[m_buffIndx]); + m_buffers[m_buffIndx] = std::make_shared(); + m_proxyMemBlock = std::make_shared(m_buffers[m_buffIndx]); VectorDims memDims; if (shape.isDynamic()) { // this is a WA since the ITensor doesn't allow dyn shapes @@ -596,7 +596,7 @@ SyncInferRequest::OutputControlBlock::OutputControlBlock(const ov::element::Type CpuBlockedMemoryDescPtr desc = std::make_shared(precision, Shape{memDims}); - auto memory = std::make_shared(eng, desc, m_proxyMemMngr); + auto memory = std::make_shared(eng, desc, m_proxyMemBlock); m_tensor = std::make_shared(memory); } diff --git a/src/plugins/intel_cpu/src/infer_request.h b/src/plugins/intel_cpu/src/infer_request.h index e3839466f0ce0e..9344baf1879fe9 100644 --- a/src/plugins/intel_cpu/src/infer_request.h +++ b/src/plugins/intel_cpu/src/infer_request.h @@ -50,7 +50,7 @@ class SyncInferRequest : public ov::ISyncInferRequest { private: class OutputControlBlock { public: - using MemMngrPtr = std::shared_ptr; + using MemBlockPtr = std::shared_ptr; public: OutputControlBlock(const ov::element::Type& precision, const Shape& shape); @@ -69,26 +69,26 @@ class SyncInferRequest : public ov::ISyncInferRequest { return m_tensor->get_memory()->getData(); } - MemMngrPtr currentMemMngr() const { + MemBlockPtr currentMemBlock() const { return m_buffers[m_buffIndx]; } - MemMngrPtr nextMemMngr() { + MemBlockPtr nextMemBlock() { m_buffIndx ^= 0x1; if (!m_buffers[m_buffIndx]) { - m_buffers[m_buffIndx] = std::make_shared(); + m_buffers[m_buffIndx] = std::make_shared(); } return m_buffers[m_buffIndx]; } void update() { - m_proxyMemMngr->setMemMngrResize(currentMemMngr()); + m_proxyMemBlock->setMemBlockResize(currentMemBlock()); } private: std::shared_ptr m_tensor = nullptr; - ProxyMemoryMngrPtr m_proxyMemMngr = nullptr; - std::array m_buffers; + ProxyMemoryBlockPtr m_proxyMemBlock = nullptr; + std::array m_buffers; int m_buffIndx = 0; }; diff --git a/src/plugins/intel_cpu/src/node.cpp b/src/plugins/intel_cpu/src/node.cpp index 326b3e907dcc8f..62c091c9985300 100644 --- a/src/plugins/intel_cpu/src/node.cpp +++ b/src/plugins/intel_cpu/src/node.cpp @@ -365,9 +365,9 @@ void Node::resolveInPlaceEdges(Edge::LOOK look) { " Could not find an allocated edge to resolve in-place for node: ", getName()); - auto baseMemMngr = (*itr)->getMemory().getMemoryMngr(); - auto memMngr = std::make_shared(baseMemMngr); - auto newMem = std::make_shared(getEngine(), selected_pd->getConfig().inConfs[i].getMemDesc(), memMngr); + auto baseMemBlock = (*itr)->getMemory().getMemoryBlock(); + auto memBlock = std::make_shared(baseMemBlock); + auto newMem = std::make_shared(getEngine(), selected_pd->getConfig().inConfs[i].getMemDesc(), memBlock); parentEdge->reuse(newMem); } } @@ -378,15 +378,15 @@ void Node::resolveInPlaceEdges(Edge::LOOK look) { if (inplaceInpIndx < 0) continue; - auto baseMemMngr = getParentEdgeAt(inplaceInpIndx)->getMemory().getMemoryMngr(); - auto memMngr = std::make_shared(baseMemMngr); + auto baseMemBlock = getParentEdgeAt(inplaceInpIndx)->getMemory().getMemoryBlock(); + auto memBlock = std::make_shared(baseMemBlock); const auto& childEdges = getChildEdgesAtPort(i); for (auto& childEdge : childEdges) { OPENVINO_ASSERT(childEdge->getStatus() == Edge::Status::NotAllocated, " Unexpected inplace resolve call to an allocated edge: ", childEdge->name()); - auto newMem = std::make_shared(getEngine(), selected_pd->getConfig().outConfs[i].getMemDesc(), memMngr); + auto newMem = std::make_shared(getEngine(), selected_pd->getConfig().outConfs[i].getMemDesc(), memBlock); childEdge->reuse(newMem); } } diff --git a/src/plugins/intel_cpu/src/nodes/concat.cpp b/src/plugins/intel_cpu/src/nodes/concat.cpp index f02863b6f707aa..216e2b135d6cff 100644 --- a/src/plugins/intel_cpu/src/nodes/concat.cpp +++ b/src/plugins/intel_cpu/src/nodes/concat.cpp @@ -694,8 +694,8 @@ void Concat::resolveInPlaceEdges(Edge::LOOK look) { auto itr = std::find_if(edges.begin(), edges.end(), [](const EdgePtr& edge) { return edge->getStatus() == Edge::Status::Allocated; }); OPENVINO_ASSERT(itr != edges.end(), " Could not find allocated child edge for concat node: " , getName()); - auto baseMemMngr = (*itr)->getMemory().getMemoryMngr(); - OPENVINO_ASSERT(baseMemMngr != nullptr, " NULL base memory manager in concat node: " , getName()); + auto baseMemBlock = (*itr)->getMemory().getMemoryBlock(); + OPENVINO_ASSERT(baseMemBlock != nullptr, " NULL base memory block in concat node: ", getName()); ptrdiff_t offset = 0; for (size_t i = 0; i < numberOfInputs; ++i) { @@ -714,8 +714,8 @@ void Concat::resolveInPlaceEdges(Edge::LOOK look) { auto memDesc = selected_pd->getConfig().inConfs[i].getMemDesc(); MemoryPtr newMem; if (partDim != 0) { - auto memMngr = std::make_shared(baseMemMngr, baseDim, offset, partDim); - newMem = std::make_shared(getEngine(), memDesc, memMngr); + auto memBlock = std::make_shared(baseMemBlock, baseDim, offset, partDim); + newMem = std::make_shared(getEngine(), memDesc, memBlock); } else { // empty tensor, no need to reference a part, default memory is enough newMem = std::make_shared(getEngine(), memDesc); diff --git a/src/plugins/intel_cpu/src/nodes/conv.cpp b/src/plugins/intel_cpu/src/nodes/conv.cpp index 2422e2d3bb041c..60c59ea13708bb 100644 --- a/src/plugins/intel_cpu/src/nodes/conv.cpp +++ b/src/plugins/intel_cpu/src/nodes/conv.cpp @@ -1531,7 +1531,7 @@ void Convolution::executeDynamicImpl(dnnl::stream strm) { const auto& sumInpMem = getParentEdgeAt(sumPortNum)->getMemory(); auto inp1 = subgraph->getInput(1); auto inp1Mem = inp1->getDstMemoryAtPort(0); - inp1Mem->getMemoryMngr()->setExtBuff(sumInpMem.getData(), sumInpMem.getSize()); + inp1Mem->getMemoryBlock()->setExtBuff(sumInpMem.getData(), sumInpMem.getSize()); subgraph->infer(); diff --git a/src/plugins/intel_cpu/src/nodes/deconv.cpp b/src/plugins/intel_cpu/src/nodes/deconv.cpp index d3f1ae0ba691a5..499f172eca3645 100644 --- a/src/plugins/intel_cpu/src/nodes/deconv.cpp +++ b/src/plugins/intel_cpu/src/nodes/deconv.cpp @@ -270,9 +270,9 @@ void Deconvolution::createDnnlCompatibleWeights() { Shape(dnnlCompatibleWeiDims), blockedDims, order); - // Create the memory with the edge memory mgr. In the case of the weight memory changes when inference, + // Create the memory with the edge memory block. In the case of the weight memory changes when inference, // dnnlCompatibleWeights memory would be updated automatically via update inform mechanism. - dnnlCompatibleWeights = std::make_shared(getEngine(), desc, blob->getMemoryMngr()); + dnnlCompatibleWeights = std::make_shared(getEngine(), desc, blob->getMemoryBlock()); } bool Deconvolution::canBeExecutedInInt8() const { diff --git a/src/plugins/intel_cpu/src/nodes/gather.cpp b/src/plugins/intel_cpu/src/nodes/gather.cpp index 434a32073aca4f..799ec3d480028b 100644 --- a/src/plugins/intel_cpu/src/nodes/gather.cpp +++ b/src/plugins/intel_cpu/src/nodes/gather.cpp @@ -945,7 +945,7 @@ void Gather::resolveInPlaceEdges(Edge::LOOK look) { "Gather node: ", getName(), " can not use inPlace memory with splitting on dynamic dimention"); - auto baseMemMngr = getParentEdgeAt(inplaceInpIndx)->getMemory().getMemoryMngr(); + auto baseMemBlock = getParentEdgeAt(inplaceInpIndx)->getMemory().getMemoryBlock(); const auto index = constIndices.front(); const ptrdiff_t offset = index < 0 ? baseDim + index : index; const auto& childEdges = getChildEdgesAtPort(outputPort); @@ -956,8 +956,8 @@ void Gather::resolveInPlaceEdges(Edge::LOOK look) { " with type ", getTypeStr()); - auto memMngr = std::make_shared(baseMemMngr, baseDim, offset); - auto newMem = std::make_shared(getEngine(), config.outConfs[outputPort].getMemDesc(), memMngr); + auto memBlock = std::make_shared(baseMemBlock, baseDim, offset); + auto newMem = std::make_shared(getEngine(), config.outConfs[outputPort].getMemDesc(), memBlock); childEdge->reuse(newMem); } diff --git a/src/plugins/intel_cpu/src/nodes/memory.cpp b/src/plugins/intel_cpu/src/nodes/memory.cpp index f372259b783e50..9e70720e50146d 100644 --- a/src/plugins/intel_cpu/src/nodes/memory.cpp +++ b/src/plugins/intel_cpu/src/nodes/memory.cpp @@ -59,8 +59,8 @@ class MemoryStub : public IMemory { OPENVINO_THROW("Unexpected call MemoryStub::load()"); } - MemoryMngrPtr getMemoryMngr() const override { - OPENVINO_THROW("Unexpected call MemoryStub::getMemoryMngr()"); + MemoryBlockPtr getMemoryBlock() const override { + OPENVINO_THROW("Unexpected call MemoryStub::getMemoryBlock()"); } dnnl::memory getPrimitive() const override { @@ -233,8 +233,8 @@ void MemoryOutput::resolveInPlaceEdges(Edge::LOOK look) { " Unexpected inplace resolve call to an allocated edge: ", parentEdge->name()); auto memDesc = selected_pd->getConfig().inConfs.front().getMemDesc(); - memMngr = std::make_shared(); - auto edgeMem = std::make_shared(getEngine(), memDesc, memMngr); + memBlock = std::make_shared(); + auto edgeMem = std::make_shared(getEngine(), memDesc, memBlock); parentEdge->reuse(edgeMem); } @@ -251,13 +251,13 @@ void MemoryOutput::assignExtMemory(const MemoryPtr& mem, const MemoryDescPtr& me getName(), " assigned state has null base mem desc ptr"); - if (!memMngr) { return; } //nothing to do, edge memory isn't under control + if (!memBlock) { return; } //nothing to do, edge memory isn't under control auto inpDesc = getBaseMemDescAtInputPort(0); if (inpDesc->isCompatible(*extMemDesc)) { - memMngr->setMemMngrResize(assignedMem->getMemoryMngr()); + memBlock->setMemBlockResize(assignedMem->getMemoryBlock()); } else { - memMngr->reset(); + memBlock->reset(); } } @@ -569,20 +569,20 @@ void MemoryInput::runDynamic(dnnl::stream strm) { getName(), " assigned state has null memory ptr"); - // check whether we can share memory manager + // check whether we can share memory block const auto& stateDims = assignedMem->getStaticDims(); const bool hasZeroDims = std::count(std::begin(stateDims), std::end(stateDims), 0) > 0; auto internDesc = getBaseMemDescAtOutputPort(0)->cloneWithNewDims(stateDims, hasZeroDims); - OPENVINO_ASSERT(memMngr, + OPENVINO_ASSERT(memBlock, "MemoryInput ", getName(), - " has uninitialized memory manager."); + " has uninitialized memory block."); if (internDesc->isCompatible(assignedMem->getDesc())) { - memMngr->setMemMngr(assignedMem->getMemoryMngr()); + memBlock->setMemBlock(assignedMem->getMemoryBlock()); } else { - memMngr->reset(); + memBlock->reset(); } const bool processInitGraph = needInitGraphProcessing(); @@ -619,15 +619,15 @@ void MemoryInput::runStatic(dnnl::stream strm) { auto internDesc = getBaseMemDescAtOutputPort(0); - OPENVINO_ASSERT(memMngr, + OPENVINO_ASSERT(memBlock, "MemoryInput ", getName(), - " has uninitialized memory manager."); + " has uninitialized memory block."); if (internDesc->isCompatible(assignedMem->getDesc())) { - memMngr->setMemMngr(assignedMem->getMemoryMngr()); + memBlock->setMemBlock(assignedMem->getMemoryBlock()); } else { - memMngr->reset(); + memBlock->reset(); } const auto processInitGraph = needInitGraphProcessing(); @@ -653,13 +653,13 @@ void MemoryInput::resolveInPlaceEdges(Edge::LOOK look) { " failed getSelectedPrimitiveDescriptor() call, preferable primitive descriptor is not set"); auto memDesc = selected_pd->getConfig().outConfs.front().getMemDesc(); - memMngr = std::make_shared(); + memBlock = std::make_shared(); for (auto&& edge : getChildEdgesAtPort(0)) { // always only one child port OPENVINO_ASSERT(one_of(edge->getStatus(), Edge::Status::Uninitialized, Edge::Status::NotAllocated), " Unexpected inplace resolve call to an allocated edge: ", edge->name()); - auto edgeMem = std::make_shared(getEngine(), memDesc, memMngr); + auto edgeMem = std::make_shared(getEngine(), memDesc, memBlock); edge->reuse(edgeMem); } } diff --git a/src/plugins/intel_cpu/src/nodes/memory.hpp b/src/plugins/intel_cpu/src/nodes/memory.hpp index f804259b431402..88b6a3d1250f0f 100644 --- a/src/plugins/intel_cpu/src/nodes/memory.hpp +++ b/src/plugins/intel_cpu/src/nodes/memory.hpp @@ -103,7 +103,7 @@ class MemoryOutput : public MemoryOutputBase { private: MemoryPtr assignedMem = nullptr; MemoryDescPtr extMemDesc = nullptr; // used for resize - ProxyMemoryMngrPtr memMngr = nullptr; + ProxyMemoryBlockPtr memBlock = nullptr; }; class MemoryOutputStub : public MemoryOutputBase { @@ -186,7 +186,7 @@ class MemoryInput : public MemoryInputBase { bool needInitGraphProcessing() const; private: - ProxyMemoryMngrPtr memMngr = nullptr; + ProxyMemoryBlockPtr memBlock = nullptr; }; class MemoryInputSDPA : public MemoryInputBase { diff --git a/src/plugins/intel_cpu/src/nodes/split.cpp b/src/plugins/intel_cpu/src/nodes/split.cpp index 5eafc402621008..ad869bd40e03cb 100644 --- a/src/plugins/intel_cpu/src/nodes/split.cpp +++ b/src/plugins/intel_cpu/src/nodes/split.cpp @@ -541,7 +541,7 @@ void Split::resolveInPlaceEdges(Edge::LOOK look) { " Split node: ", getName(), " can not use inPlace memory with splitting on dynamic dimension"); - auto baseMemMngr = getParentEdgeAt(inplaceInpIndx)->getMemory().getMemoryMngr(); + auto baseMemBlock = getParentEdgeAt(inplaceInpIndx)->getMemory().getMemoryBlock(); ptrdiff_t offset = 0; for (size_t i = 0; i < numberOfOutputs; ++i) { auto partDim = outputShapes[i].getDims()[axis]; @@ -560,8 +560,8 @@ void Split::resolveInPlaceEdges(Edge::LOOK look) { auto memDesc = selected_pd->getConfig().outConfs[i].getMemDesc(); MemoryPtr newMem; if (partDim != 0) { - auto memMngr = std::make_shared(baseMemMngr, baseDim, offset, partDim); - newMem = std::make_shared(getEngine(), memDesc, memMngr); + auto memBlock = std::make_shared(baseMemBlock, baseDim, offset, partDim); + newMem = std::make_shared(getEngine(), memDesc, memBlock); } else { // empty tensor, no need to reference a part, default memory is enough newMem = std::make_shared(getEngine(), memDesc); diff --git a/src/plugins/intel_cpu/src/partitioned_mem_mgr.cpp b/src/plugins/intel_cpu/src/partitioned_mem_mgr.cpp index d962546bf367f9..bd4376c8a2812e 100644 --- a/src/plugins/intel_cpu/src/partitioned_mem_mgr.cpp +++ b/src/plugins/intel_cpu/src/partitioned_mem_mgr.cpp @@ -6,28 +6,28 @@ using namespace ov::intel_cpu; -void* PartitionedMemoryMngr::getRawPtr() const noexcept { - return static_cast(m_pMngr->getRawPtr()) + m_offset_blocks * m_size / m_size_blocks; +void* PartitionedMemoryBlock::getRawPtr() const noexcept { + return static_cast(m_pBlock->getRawPtr()) + m_offset_chunks * m_size / m_size_chunks; } -void PartitionedMemoryMngr::setExtBuff(void* ptr, size_t size) { - m_pMngr->setExtBuff(ptr, size); +void PartitionedMemoryBlock::setExtBuff(void* ptr, size_t size) { + m_pBlock->setExtBuff(ptr, size); } -bool PartitionedMemoryMngr::resize(size_t size) { +bool PartitionedMemoryBlock::resize(size_t size) { m_size = size; - return m_pMngr->resize(m_size * m_total_blocks / m_size_blocks); + return m_pBlock->resize(m_size * m_total_chunks / m_size_chunks); } -bool PartitionedMemoryMngr::hasExtBuffer() const noexcept { - return m_pMngr->hasExtBuffer(); +bool PartitionedMemoryBlock::hasExtBuffer() const noexcept { + return m_pBlock->hasExtBuffer(); } -void PartitionedMemoryMngr::registerMemory(Memory* memPtr) { - m_pMngr->registerMemory(memPtr); +void PartitionedMemoryBlock::registerMemory(Memory* memPtr) { + m_pBlock->registerMemory(memPtr); } -void PartitionedMemoryMngr::unregisterMemory(Memory* memPtr) { - m_pMngr->unregisterMemory(memPtr); +void PartitionedMemoryBlock::unregisterMemory(Memory* memPtr) { + m_pBlock->unregisterMemory(memPtr); } diff --git a/src/plugins/intel_cpu/src/partitioned_mem_mgr.h b/src/plugins/intel_cpu/src/partitioned_mem_mgr.h index 2b7b5568bbc93d..58179ce5b04d55 100644 --- a/src/plugins/intel_cpu/src/partitioned_mem_mgr.h +++ b/src/plugins/intel_cpu/src/partitioned_mem_mgr.h @@ -10,15 +10,14 @@ namespace ov { namespace intel_cpu { /** - * This is a memory manager that represents a view on a partition inside a continuous memory block controlled by - * another memory manager. + * This is a memory block that represents a view on a subblock inside another continuous dynamic memory block * */ -class PartitionedMemoryMngr : public IMemoryMngrObserver { +class PartitionedMemoryBlock : public IMemoryBlockObserver { public: - PartitionedMemoryMngr(MemoryMngrPtr pMngr, size_t total_blocks = 1, ptrdiff_t offset_blocks = 0, size_t size_blocks = 1) - : m_pMngr(pMngr), m_total_blocks(total_blocks), m_offset_blocks(offset_blocks), m_size_blocks(size_blocks) { - OPENVINO_ASSERT(m_pMngr, "Memory manager is uninitialized"); + PartitionedMemoryBlock(MemoryBlockPtr pBlock, size_t total_chunks = 1, ptrdiff_t offset_chunks = 0, size_t size_chunks = 1) + : m_pBlock(pBlock), m_total_chunks(total_chunks), m_offset_chunks(offset_chunks), m_size_chunks(size_chunks) { + OPENVINO_ASSERT(m_pBlock, "Memory block is uninitialized"); } void* getRawPtr() const noexcept override; @@ -29,10 +28,10 @@ class PartitionedMemoryMngr : public IMemoryMngrObserver { void unregisterMemory(Memory* memPtr) override; private: - MemoryMngrPtr m_pMngr; - size_t m_total_blocks = 1; // size of the parent memory in abstract blocks - ptrdiff_t m_offset_blocks = 0; // offset from the beginning of the external memory in abstract blocks - size_t m_size_blocks = 1; // size of the viewed partition in abstract blocks + MemoryBlockPtr m_pBlock; + size_t m_total_chunks = 1; // size of the parent memory in abstract chunks + ptrdiff_t m_offset_chunks = 0; // offset from the beginning of the external memory in abstract chunks + size_t m_size_chunks = 1; // size of the viewed partition in abstract chunks size_t m_size = 0; // size of the viewed partition in bytes }; diff --git a/src/plugins/intel_cpu/src/proxy_mem_mgr.cpp b/src/plugins/intel_cpu/src/proxy_mem_mgr.cpp index fee56e70560895..1ab2f639985e67 100644 --- a/src/plugins/intel_cpu/src/proxy_mem_mgr.cpp +++ b/src/plugins/intel_cpu/src/proxy_mem_mgr.cpp @@ -7,75 +7,75 @@ using namespace ov::intel_cpu; -void ProxyMemoryMngr::setMemMngr(std::shared_ptr pMngr) { - OPENVINO_ASSERT(pMngr, "Attempt to set null memory manager to a ProxyMemoryMngr object"); - if (m_pMngr == pMngr) { +void ProxyMemoryBlock::setMemBlock(std::shared_ptr pBlock) { + OPENVINO_ASSERT(pBlock, "Attempt to set null memory block to a ProxyMemoryBlock object"); + if (m_pMemBlock == pBlock) { return; } - m_pMngr = pMngr; + m_pMemBlock = pBlock; notifyUpdate(); } -void ProxyMemoryMngr::setMemMngrResize(std::shared_ptr pMngr) { - OPENVINO_ASSERT(pMngr, "Attempt to set null memory manager to a ProxyMemoryMngr object"); - if (m_pMngr == pMngr) { +void ProxyMemoryBlock::setMemBlockResize(std::shared_ptr pBlock) { + OPENVINO_ASSERT(pBlock, "Attempt to set null memory block to a ProxyMemoryBlock object"); + if (m_pMemBlock == pBlock) { return; } - m_pMngr = pMngr; - m_pMngr->resize(m_size); + m_pMemBlock = pBlock; + m_pMemBlock->resize(m_size); notifyUpdate(); } -void ProxyMemoryMngr::reset() { - if (!m_pOrigMngr) { - m_pOrigMngr = std::make_shared(); +void ProxyMemoryBlock::reset() { + if (!m_pOrigBlock) { + m_pOrigBlock = std::make_shared(); } - if (m_pMngr == m_pOrigMngr) { + if (m_pMemBlock == m_pOrigBlock) { return; } - m_pMngr = m_pOrigMngr; - m_pMngr->resize(m_size); + m_pMemBlock = m_pOrigBlock; + m_pMemBlock->resize(m_size); notifyUpdate(); } -void* ProxyMemoryMngr::getRawPtr() const noexcept { - return m_pMngr->getRawPtr(); +void* ProxyMemoryBlock::getRawPtr() const noexcept { + return m_pMemBlock->getRawPtr(); } -void ProxyMemoryMngr::setExtBuff(void* ptr, size_t size) { - m_pMngr->setExtBuff(ptr, size); +void ProxyMemoryBlock::setExtBuff(void* ptr, size_t size) { + m_pMemBlock->setExtBuff(ptr, size); notifyUpdate(); } -bool ProxyMemoryMngr::resize(size_t size) { - auto res = m_pMngr->resize(size); - DEBUG_LOG(this, ", ", m_pMngr, " size ", m_size, " -> ", size, " resized? ", res, " RawPtr ", getRawPtr()); +bool ProxyMemoryBlock::resize(size_t size) { + auto res = m_pMemBlock->resize(size); + DEBUG_LOG(this, ", ", m_pMemBlock, " size ", m_size, " -> ", size, " resized? ", res, " RawPtr ", getRawPtr()); m_size = size; notifyUpdate(); return res; } -bool ProxyMemoryMngr::hasExtBuffer() const noexcept { - return m_pMngr->hasExtBuffer(); +bool ProxyMemoryBlock::hasExtBuffer() const noexcept { + return m_pMemBlock->hasExtBuffer(); } -void ProxyMemoryMngr::registerMemory(Memory* memPtr) { +void ProxyMemoryBlock::registerMemory(Memory* memPtr) { if (memPtr) { m_setMemPtrs.insert(memPtr); } } -void ProxyMemoryMngr::unregisterMemory(Memory* memPtr) { +void ProxyMemoryBlock::unregisterMemory(Memory* memPtr) { if (memPtr) { m_setMemPtrs.erase(memPtr); } } -void ProxyMemoryMngr::notifyUpdate() { +void ProxyMemoryBlock::notifyUpdate() { for (auto& item : m_setMemPtrs) { if (item) { item->update(); diff --git a/src/plugins/intel_cpu/src/proxy_mem_mgr.h b/src/plugins/intel_cpu/src/proxy_mem_mgr.h index 0788cef280ec96..9ce35887ef250e 100644 --- a/src/plugins/intel_cpu/src/proxy_mem_mgr.h +++ b/src/plugins/intel_cpu/src/proxy_mem_mgr.h @@ -12,12 +12,12 @@ namespace intel_cpu { /** * @brief A proxy object that additionally implements observer pattern */ -class ProxyMemoryMngr : public IMemoryMngrObserver { +class ProxyMemoryBlock : public IMemoryBlockObserver { public: - ProxyMemoryMngr() : m_pOrigMngr(std::make_shared()), m_pMngr(m_pOrigMngr) {} - explicit ProxyMemoryMngr(std::shared_ptr pMngr) { - OPENVINO_ASSERT(pMngr, "Memory manager is uninitialized"); - m_pMngr = pMngr; + ProxyMemoryBlock() : m_pOrigBlock(std::make_shared()), m_pMemBlock(m_pOrigBlock) {} + explicit ProxyMemoryBlock(std::shared_ptr pBlock) { + OPENVINO_ASSERT(pBlock, "Memory block is uninitialized"); + m_pMemBlock = pBlock; } void* getRawPtr() const noexcept override; @@ -28,26 +28,26 @@ class ProxyMemoryMngr : public IMemoryMngrObserver { void registerMemory(Memory* memPtr) override; void unregisterMemory(Memory* memPtr) override; - void setMemMngr(std::shared_ptr pMngr); - void setMemMngrResize(std::shared_ptr pMngr); + void setMemBlock(std::shared_ptr pBlock); + void setMemBlockResize(std::shared_ptr pBlock); void reset(); private: void notifyUpdate(); - // We keep the original MemMngr as may fallback to copy output. - std::shared_ptr m_pOrigMngr = nullptr; - std::shared_ptr m_pMngr = nullptr; + // We keep the original MemBlock as may fallback to copy output. + std::shared_ptr m_pOrigBlock = nullptr; + std::shared_ptr m_pMemBlock = nullptr; std::unordered_set m_setMemPtrs; // WA: resize stage might not work because there is no shape change, - // but the underlying actual memory manager changes. + // but the underlying actual memory block changes. size_t m_size = 0ul; }; -using ProxyMemoryMngrPtr = std::shared_ptr; -using ProxyMemoryMngrCPtr = std::shared_ptr; +using ProxyMemoryBlockPtr = std::shared_ptr; +using ProxyMemoryBlockCPtr = std::shared_ptr; } // namespace intel_cpu } // namespace ov \ No newline at end of file From f723360e315b4ac83d46ce98ceda9b03df82e923 Mon Sep 17 00:00:00 2001 From: Maksim Kutakov Date: Tue, 6 Aug 2024 19:13:59 +0200 Subject: [PATCH 02/36] Remove isAllocated check from the memory class --- src/plugins/intel_cpu/src/cpu_memory.cpp | 36 ------------------- src/plugins/intel_cpu/src/cpu_memory.h | 19 +++++----- src/plugins/intel_cpu/src/node.cpp | 4 +-- src/plugins/intel_cpu/src/nodes/bucketize.cpp | 12 +++---- src/plugins/intel_cpu/src/nodes/concat.cpp | 8 ++--- src/plugins/intel_cpu/src/nodes/conv.cpp | 16 ++++----- src/plugins/intel_cpu/src/nodes/deconv.cpp | 20 +++++------ src/plugins/intel_cpu/src/nodes/def_conv.cpp | 20 +++++------ .../intel_cpu/src/nodes/depth_to_space.cpp | 8 ++--- .../src/nodes/extract_image_patches.cpp | 8 ++--- src/plugins/intel_cpu/src/nodes/eye.cpp | 4 +-- src/plugins/intel_cpu/src/nodes/gather.cpp | 8 ++--- src/plugins/intel_cpu/src/nodes/gather_nd.cpp | 12 +++---- .../intel_cpu/src/nodes/gather_tree.cpp | 16 ++++----- .../intel_cpu/src/nodes/grid_sample.cpp | 12 +++---- src/plugins/intel_cpu/src/nodes/grn.cpp | 8 ++--- src/plugins/intel_cpu/src/nodes/input.cpp | 8 ++--- .../intel_cpu/src/nodes/interpolate.cpp | 28 +++++++-------- src/plugins/intel_cpu/src/nodes/lrn.cpp | 8 ++--- src/plugins/intel_cpu/src/nodes/matmul.cpp | 12 +++---- src/plugins/intel_cpu/src/nodes/memory.cpp | 4 --- src/plugins/intel_cpu/src/nodes/mvn.cpp | 8 ++--- src/plugins/intel_cpu/src/nodes/normalize.cpp | 4 +-- src/plugins/intel_cpu/src/nodes/pad.cpp | 8 ++--- src/plugins/intel_cpu/src/nodes/pooling.cpp | 8 ++--- src/plugins/intel_cpu/src/nodes/reduce.cpp | 8 ++--- src/plugins/intel_cpu/src/nodes/reorder.cpp | 16 ++++----- .../intel_cpu/src/nodes/reverse_sequence.cpp | 12 +++---- src/plugins/intel_cpu/src/nodes/rnn.cpp | 2 +- src/plugins/intel_cpu/src/nodes/roi_align.cpp | 8 ++--- .../intel_cpu/src/nodes/roi_pooling.cpp | 12 +++---- src/plugins/intel_cpu/src/nodes/roll.cpp | 16 ++++----- .../intel_cpu/src/nodes/shuffle_channels.cpp | 8 ++--- .../intel_cpu/src/nodes/space_to_depth.cpp | 8 ++--- src/plugins/intel_cpu/src/nodes/split.cpp | 8 ++--- src/plugins/intel_cpu/src/nodes/topk.cpp | 8 ++--- src/plugins/intel_cpu/src/nodes/transpose.cpp | 8 ++--- src/plugins/intel_cpu/src/nodes/unique.cpp | 8 ++--- .../src/utils/debug_capabilities.cpp | 2 +- 39 files changed, 191 insertions(+), 232 deletions(-) diff --git a/src/plugins/intel_cpu/src/cpu_memory.cpp b/src/plugins/intel_cpu/src/cpu_memory.cpp index 8229c0b1605b42..2ae8da547c32f5 100644 --- a/src/plugins/intel_cpu/src/cpu_memory.cpp +++ b/src/plugins/intel_cpu/src/cpu_memory.cpp @@ -184,22 +184,6 @@ dnnl::memory Memory::DnnlMemPrimHandle::getPrim() const { return m_prim; } -bool Memory::isAllocated() const noexcept { - if (m_blockHandle->getRawPtr()) { - return true; - } - if (!m_pMemDesc) { - return false; - } - if (!(m_pMemDesc->isDefined())) { - return true; - } - if (m_pMemDesc->getCurrentMemSize() == 0) { - return true; - } - return false; -} - void* Memory::getData() const { void* data = getDataNoThrow(); if (data == nullptr && @@ -306,22 +290,6 @@ void StringMemory::nullify() { } } -bool StringMemory::isAllocated() const noexcept { - if (getData()) { - return true; - } - if (!m_mem_desc) { - return false; - } - if (!(m_mem_desc->isDefined())) { - return true; - } - if (m_mem_desc->getCurrentMemSize() == 0) { - return true; - } - return false; -} - size_t StringMemory::getSize() const { // In bytes auto size = getDesc().getCurrentMemSize(); if (size == MemoryDesc::UNDEFINED_SIZE) { @@ -461,10 +429,6 @@ StaticMemory::StaticMemory(const dnnl::engine& eng, MemoryDescPtr desc, const vo StaticMemory::StaticMemory(const dnnl::engine& eng, const MemoryDesc& desc, const void* data, bool pads_zeroing) : StaticMemory::StaticMemory(eng, desc.clone(), data, pads_zeroing) {} -bool StaticMemory::isAllocated() const noexcept { - return 0 == m_size || getData() != nullptr; -} - const MemoryDesc& StaticMemory::getDesc() const { return *m_pMemDesc; } diff --git a/src/plugins/intel_cpu/src/cpu_memory.h b/src/plugins/intel_cpu/src/cpu_memory.h index 65d61bc97a2693..4dfb3b700728fd 100644 --- a/src/plugins/intel_cpu/src/cpu_memory.h +++ b/src/plugins/intel_cpu/src/cpu_memory.h @@ -162,8 +162,6 @@ class IMemory { public: virtual ~IMemory() = default; - virtual bool isAllocated() const noexcept = 0; - virtual const MemoryDesc& getDesc() const = 0; virtual MemoryDescPtr getDescPtr() const = 0; @@ -191,6 +189,15 @@ class IMemory { virtual MemoryBlockPtr getMemoryBlock() const = 0; + virtual void nullify() = 0; + + bool isDefined() const noexcept { + if (auto desc = getDescPtr()) { + return desc->isDefined(); + } + return false; + } + //oneDNN specifics for backward compatibility virtual dnnl::memory getPrimitive() const = 0; @@ -202,8 +209,6 @@ class IMemory { return DnnlExtensionUtils::ElementTypeToDataType(getDesc().getPrecision()); } - virtual void nullify() = 0; - template ::value && !std::is_reference::value, int>::type = 0, typename std::enable_if::value, int>::type = 0> @@ -240,8 +245,6 @@ class StaticMemory final : public IMemory { StaticMemory(Memory&&) = delete; StaticMemory& operator= (StaticMemory&&) = delete; - bool isAllocated() const noexcept override; - const MemoryDesc& getDesc() const override; MemoryDescPtr getDescPtr() const override; @@ -287,8 +290,6 @@ class Memory : public IMemory { dnnl::memory getPrimitive() const override; - bool isAllocated() const noexcept override; - const MemoryDesc& getDesc() const override { return *m_pMemDesc; } @@ -393,8 +394,6 @@ class StringMemory : public IMemory { StringMemory(const dnnl::engine& engine, const MemoryDesc& desc, const StringMemoryBlockPtr& block) : StringMemory(engine, desc.clone(), block) {} - bool isAllocated() const noexcept override; - const MemoryDesc& getDesc() const override { return *m_mem_desc; } diff --git a/src/plugins/intel_cpu/src/node.cpp b/src/plugins/intel_cpu/src/node.cpp index 62c091c9985300..52f30e410a2942 100644 --- a/src/plugins/intel_cpu/src/node.cpp +++ b/src/plugins/intel_cpu/src/node.cpp @@ -1496,7 +1496,7 @@ bool Node::isInputTensorAtPortEmpty(size_t port) const { auto edge = getParentEdgeAt(port); if (one_of(edge->getStatus(), Edge::Status::Allocated, Edge::Status::Validated)) { auto&& mem = edge->getMemory(); - if (mem.isAllocated()) { + if (mem.isDefined()) { return mem.getShape().hasZeroDims(); } } @@ -1511,7 +1511,7 @@ bool Node::isOutputTensorAtPortEmpty(size_t port) const { return outputShapes[port].hasZeroDims(); } auto&& mem = getChildEdgeAt(port)->getMemory(); - if (mem.isAllocated()) { + if (mem.isDefined()) { return mem.getShape().hasZeroDims(); } return false; diff --git a/src/plugins/intel_cpu/src/nodes/bucketize.cpp b/src/plugins/intel_cpu/src/nodes/bucketize.cpp index 4d91bdbb8fac1d..a71255c0d531e4 100644 --- a/src/plugins/intel_cpu/src/nodes/bucketize.cpp +++ b/src/plugins/intel_cpu/src/nodes/bucketize.cpp @@ -189,12 +189,12 @@ void Bucketize::prepareParams() { auto inputTensorMemPtr = getSrcMemoryAtPort(INPUT_TENSOR_PORT); auto inputBinsMemPtr = getSrcMemoryAtPort(INPUT_BINS_PORT); auto dstMemPtr = getDstMemoryAtPort(0); - if (!dstMemPtr || !dstMemPtr->isAllocated()) - OPENVINO_THROW("Destination memory didn't allocate."); - if (!inputTensorMemPtr || !inputTensorMemPtr->isAllocated()) - OPENVINO_THROW("Input tensor didn't allocate."); - if (!inputBinsMemPtr || !inputBinsMemPtr->isAllocated()) - OPENVINO_THROW("Input bins didn't allocate."); + if (!dstMemPtr || !dstMemPtr->isDefined()) + OPENVINO_THROW("Destination memory is undefined."); + if (!inputTensorMemPtr || !inputTensorMemPtr->isDefined()) + OPENVINO_THROW("Input tensor is undefined."); + if (!inputBinsMemPtr || !inputBinsMemPtr->isDefined()) + OPENVINO_THROW("Input bins is undefined."); if (getSelectedPrimitiveDescriptor() == nullptr) OPENVINO_THROW("Preferable primitive descriptor is not set."); diff --git a/src/plugins/intel_cpu/src/nodes/concat.cpp b/src/plugins/intel_cpu/src/nodes/concat.cpp index 216e2b135d6cff..6ca87ab31e6b37 100644 --- a/src/plugins/intel_cpu/src/nodes/concat.cpp +++ b/src/plugins/intel_cpu/src/nodes/concat.cpp @@ -328,8 +328,8 @@ void Concat::prepareParams() { return; const auto& dstMemPtr = getDstMemoryAtPort(0); - if (!dstMemPtr || !dstMemPtr->isAllocated()) - OPENVINO_THROW("Destination memory didn't allocate."); + if (!dstMemPtr || !dstMemPtr->isDefined()) + OPENVINO_THROW("Destination memory is undefined."); auto dstMemDesc = dstMemPtr->getDescWithType(); if (getSelectedPrimitiveDescriptor() == nullptr) OPENVINO_THROW("Preferable primitive descriptor is not set."); @@ -375,9 +375,9 @@ void Concat::prepareParams() { nelemTotal = 0; for (size_t i = 0; i < getParentEdges().size(); i++) { const auto& srcMemPtr = getSrcMemoryAtPort(i); - if (!srcMemPtr || !srcMemPtr->isAllocated()) { + if (!srcMemPtr || !srcMemPtr->isDefined()) { auto parent = getParentEdgeAt(i)->getParent(); - OPENVINO_THROW("Source memory from ", parent->getName(), " didn't allocate for node ", getName(), "."); + OPENVINO_THROW("Source memory from ", parent->getName(), " is undefined for node ", getName(), "."); } if (canExecRef) { diff --git a/src/plugins/intel_cpu/src/nodes/conv.cpp b/src/plugins/intel_cpu/src/nodes/conv.cpp index 60c59ea13708bb..cbdb35db271622 100644 --- a/src/plugins/intel_cpu/src/nodes/conv.cpp +++ b/src/plugins/intel_cpu/src/nodes/conv.cpp @@ -1233,17 +1233,17 @@ void Convolution::prepareParams() { auto srcMemPtr = getSrcMemoryAtPort(0); auto wghMemPtr = getSrcMemoryAtPort(1); auto dstMemPtr = getOutputMemory(); - if (!dstMemPtr || !dstMemPtr->isAllocated()) - OPENVINO_THROW("Destination memory was not allocated."); - if (!srcMemPtr || !srcMemPtr->isAllocated()) - OPENVINO_THROW("Input memory was not allocated."); - if (!wghMemPtr || !wghMemPtr->isAllocated()) - OPENVINO_THROW("Weight memory was not allocated."); + if (!dstMemPtr || !dstMemPtr->isDefined()) + OPENVINO_THROW("Destination memory was undefined."); + if (!srcMemPtr || !srcMemPtr->isDefined()) + OPENVINO_THROW("Input memory was undefined."); + if (!wghMemPtr || !wghMemPtr->isDefined()) + OPENVINO_THROW("Weight memory was undefined."); MemoryPtr biasMemPtr = nullptr; if (withBiases) { biasMemPtr = getSrcMemoryAtPort(2); - if (!biasMemPtr || !biasMemPtr->isAllocated()) - OPENVINO_THROW("Input memory didn't allocate."); + if (!biasMemPtr || !biasMemPtr->isDefined()) + OPENVINO_THROW("Input memory is undefined."); } const NodeDesc *selected_pd = getSelectedPrimitiveDescriptor(); diff --git a/src/plugins/intel_cpu/src/nodes/deconv.cpp b/src/plugins/intel_cpu/src/nodes/deconv.cpp index 499f172eca3645..57046a0a06d55b 100644 --- a/src/plugins/intel_cpu/src/nodes/deconv.cpp +++ b/src/plugins/intel_cpu/src/nodes/deconv.cpp @@ -812,12 +812,12 @@ void Deconvolution::prepareParams() { auto srcMemPtr = getSrcMemoryAtPort(0); auto wghMemPtr = getSrcMemoryAtPort(1); auto dstMemPtr = getDstMemoryAtPort(0); - if (!dstMemPtr || !dstMemPtr->isAllocated()) - OPENVINO_THROW("Destination memory has not been allocated."); - if (!srcMemPtr || !srcMemPtr->isAllocated()) - OPENVINO_THROW("Input memory has not been allocated."); - if (!wghMemPtr || !wghMemPtr->isAllocated()) - OPENVINO_THROW("Weight memory has not been allocated."); + if (!dstMemPtr || !dstMemPtr->isDefined()) + OPENVINO_THROW("Destination memory is undefined."); + if (!srcMemPtr || !srcMemPtr->isDefined()) + OPENVINO_THROW("Input memory is undefined."); + if (!wghMemPtr || !wghMemPtr->isDefined()) + OPENVINO_THROW("Weight memory is undefined."); auto selected_pd = getSelectedPrimitiveDescriptor(); if (selected_pd == nullptr) OPENVINO_THROW("Preferable primitive descriptor is not set for node ", getName(), "."); @@ -869,8 +869,8 @@ void Deconvolution::prepareParams() { if (withBiases) { biasMemPtr = getSrcMemoryAtPort(biasPort); - if (!biasMemPtr || !biasMemPtr->isAllocated()) - OPENVINO_THROW("Bias memory memory didn't allocate."); + if (!biasMemPtr || !biasMemPtr->isDefined()) + OPENVINO_THROW("Bias memory memory is undefined."); biasDesc = biasMemPtr->getDescWithType(); } bool is1x1PaddingAsymmetric = false; @@ -1094,8 +1094,8 @@ std::vector Deconvolution::readOutputSpatialDims() const { OPENVINO_THROW("Can't get output spatial dims. Inputs number = ", getParentEdges().size()); } const auto &shapeMemPtr = getSrcMemoryAtPort(2); - if (!shapeMemPtr || !shapeMemPtr->isAllocated()) { - OPENVINO_THROW("'output_shape' input memory is not allocated."); + if (!shapeMemPtr || !shapeMemPtr->isDefined()) { + OPENVINO_THROW("'output_shape' input memory is undefined."); } const auto spDimsNum = getInputShapeAtPort(0).getRank() - 2; if (shapeMemPtr->getStaticDims()[0] != spDimsNum) { diff --git a/src/plugins/intel_cpu/src/nodes/def_conv.cpp b/src/plugins/intel_cpu/src/nodes/def_conv.cpp index 97fe2a2b2da08b..eb56902c653e99 100644 --- a/src/plugins/intel_cpu/src/nodes/def_conv.cpp +++ b/src/plugins/intel_cpu/src/nodes/def_conv.cpp @@ -1184,19 +1184,19 @@ void DeformableConvolution::prepareParams() { auto offMemPtr = getSrcMemoryAtPort(OFF_ID); auto weiMemPtr = getSrcMemoryAtPort(WEI_ID); - if (!dstMemPtr || !dstMemPtr->isAllocated()) - OPENVINO_THROW(errorPrefix, " did not allocate destination memory"); - if (!srcMemPtr || !srcMemPtr->isAllocated()) - OPENVINO_THROW(errorPrefix, " did not allocate input memory"); - if (!offMemPtr || !offMemPtr->isAllocated()) - OPENVINO_THROW(errorPrefix, " did not allocate offsets shape memory"); - if (!weiMemPtr || !weiMemPtr->isAllocated()) - OPENVINO_THROW(errorPrefix, " did not allocate weights memory"); + if (!dstMemPtr || !dstMemPtr->isDefined()) + OPENVINO_THROW(errorPrefix, " has undefined destination memory"); + if (!srcMemPtr || !srcMemPtr->isDefined()) + OPENVINO_THROW(errorPrefix, " has undefined input memory"); + if (!offMemPtr || !offMemPtr->isDefined()) + OPENVINO_THROW(errorPrefix, " has undefined offsets shape memory"); + if (!weiMemPtr || !weiMemPtr->isDefined()) + OPENVINO_THROW(errorPrefix, " has undefined weights memory"); if (getOriginalInputsNumber() > 3) { auto modMemPtr = getSrcMemoryAtPort(MOD_ID); - if (!modMemPtr || !modMemPtr->isAllocated()) - OPENVINO_THROW(errorPrefix, " did not allocate modulations memory"); + if (!modMemPtr || !modMemPtr->isDefined()) + OPENVINO_THROW(errorPrefix, " has undefined modulations memory"); } auto selectedPrimitiveDescriptor = getSelectedPrimitiveDescriptor(); diff --git a/src/plugins/intel_cpu/src/nodes/depth_to_space.cpp b/src/plugins/intel_cpu/src/nodes/depth_to_space.cpp index bc374a236c78fd..5a0e321dfec7f5 100644 --- a/src/plugins/intel_cpu/src/nodes/depth_to_space.cpp +++ b/src/plugins/intel_cpu/src/nodes/depth_to_space.cpp @@ -162,10 +162,10 @@ void DepthToSpace::initSupportedPrimitiveDescriptors() { void DepthToSpace::createPrimitive() { auto dstMemPtr = getDstMemoryAtPort(0); auto srcMemPtr = getSrcMemoryAtPort(0); - if (!dstMemPtr || !dstMemPtr->isAllocated()) - THROW_ERROR("has not allocated destination memory"); - if (!srcMemPtr || !srcMemPtr->isAllocated()) - THROW_ERROR("has not allocated input memory"); + if (!dstMemPtr || !dstMemPtr->isDefined()) + THROW_ERROR("has undefined destination memory"); + if (!srcMemPtr || !srcMemPtr->isDefined()) + THROW_ERROR("has undefined input memory"); if (getSelectedPrimitiveDescriptor() == nullptr) THROW_ERROR("has unidentified preferable primitive descriptor"); diff --git a/src/plugins/intel_cpu/src/nodes/extract_image_patches.cpp b/src/plugins/intel_cpu/src/nodes/extract_image_patches.cpp index 185ff6bfd216d5..7c3dda49931451 100644 --- a/src/plugins/intel_cpu/src/nodes/extract_image_patches.cpp +++ b/src/plugins/intel_cpu/src/nodes/extract_image_patches.cpp @@ -372,10 +372,10 @@ ExtractImagePatches::ExtractImagePatches(const std::shared_ptr& op, co void ExtractImagePatches::prepareParams() { const auto& srcMemPtr0 = getSrcMemoryAtPort(0); const auto& dstMemPtr = getDstMemoryAtPort(0); - if (!srcMemPtr0 || !srcMemPtr0->isAllocated()) - OPENVINO_THROW("Input memory has not been allocated."); - if (!dstMemPtr || !dstMemPtr->isAllocated()) - OPENVINO_THROW("Destination memory has not been allocated."); + if (!srcMemPtr0 || !srcMemPtr0->isDefined()) + OPENVINO_THROW("Input memory is undefined."); + if (!dstMemPtr || !dstMemPtr->isDefined()) + OPENVINO_THROW("Destination memory is undefined."); if (getSelectedPrimitiveDescriptor() == nullptr) OPENVINO_THROW("Preferable primitive descriptor is not set."); diff --git a/src/plugins/intel_cpu/src/nodes/eye.cpp b/src/plugins/intel_cpu/src/nodes/eye.cpp index f32f67279b75d0..f1e78b04510914 100644 --- a/src/plugins/intel_cpu/src/nodes/eye.cpp +++ b/src/plugins/intel_cpu/src/nodes/eye.cpp @@ -104,8 +104,8 @@ void Eye::executeSpecified() { const size_t colNum = getColNum(); const int64_t shift = getDiagIndex(); auto outPtr = getDstMemoryAtPort(0); - if (!outPtr || !outPtr ->isAllocated()) - THROW_ERROR(errorPrefix, "Destination memory didn't allocate."); + if (!outPtr || !outPtr ->isDefined()) + THROW_ERROR(errorPrefix, "Destination memory is undefined."); T *dst = outPtr->getDataAs(); const size_t batchVolume = getBatchVolume(getBatchShape()); diff --git a/src/plugins/intel_cpu/src/nodes/gather.cpp b/src/plugins/intel_cpu/src/nodes/gather.cpp index 799ec3d480028b..94debfba1901ab 100644 --- a/src/plugins/intel_cpu/src/nodes/gather.cpp +++ b/src/plugins/intel_cpu/src/nodes/gather.cpp @@ -338,11 +338,11 @@ bool Gather::needPrepareParams() const { void Gather::prepareParams() { auto dataMemPtr = getSrcMemoryAtPort(GATHER_DATA); - if (!dataMemPtr || !dataMemPtr->isAllocated()) - THROW_ERROR(" has not allocated input data memory."); + if (!dataMemPtr || !dataMemPtr->isDefined()) + THROW_ERROR(" has undefined input data memory."); auto idxMemPtr = getSrcMemoryAtPort(GATHER_INDICES); - if (!idxMemPtr || !idxMemPtr->isAllocated()) - THROW_ERROR(" has not allocated input indices memory."); + if (!idxMemPtr || !idxMemPtr->isDefined()) + THROW_ERROR(" has undefined input indices memory."); if (getSelectedPrimitiveDescriptor() == nullptr) THROW_ERROR(" has unidentified preferable primitive descriptor."); diff --git a/src/plugins/intel_cpu/src/nodes/gather_nd.cpp b/src/plugins/intel_cpu/src/nodes/gather_nd.cpp index 86723215a35ad3..2dc91dd12559f2 100644 --- a/src/plugins/intel_cpu/src/nodes/gather_nd.cpp +++ b/src/plugins/intel_cpu/src/nodes/gather_nd.cpp @@ -84,12 +84,12 @@ void GatherND::prepareParams() { auto srcMemPtr = getSrcMemoryAtPort(GATHERND_DATA); auto idxMemPtr = getSrcMemoryAtPort(GATHERND_INDEXES); auto dstMemPtr = getDstMemoryAtPort(0); - if (!srcMemPtr || !srcMemPtr->isAllocated()) - THROW_ERROR(" has not allocated input memory of 'data'."); - if (!idxMemPtr || !idxMemPtr->isAllocated()) - THROW_ERROR(" has not allocated input memory of 'indices'."); - if (!dstMemPtr || !dstMemPtr->isAllocated()) - THROW_ERROR(" has not allocated output memory."); + if (!srcMemPtr || !srcMemPtr->isDefined()) + THROW_ERROR(" has undefined input memory of 'data'."); + if (!idxMemPtr || !idxMemPtr->isDefined()) + THROW_ERROR(" has undefined input memory of 'indices'."); + if (!dstMemPtr || !dstMemPtr->isDefined()) + THROW_ERROR(" has undefined output memory."); if (getSelectedPrimitiveDescriptor() == nullptr) THROW_ERROR(" has unidentified preferable primitive descriptor."); diff --git a/src/plugins/intel_cpu/src/nodes/gather_tree.cpp b/src/plugins/intel_cpu/src/nodes/gather_tree.cpp index 8a14220b165f69..f318290defbf82 100644 --- a/src/plugins/intel_cpu/src/nodes/gather_tree.cpp +++ b/src/plugins/intel_cpu/src/nodes/gather_tree.cpp @@ -98,14 +98,14 @@ void GatherTree::prepareParams() { const auto& maxSeqLenMemPtr = getSrcMemoryAtPort(GATHER_TREE_MAX_SEQ_LEN); const auto& dstMemPtr = getDstMemoryAtPort(0); - if (!stepIdxMemPtr || !stepIdxMemPtr->isAllocated()) - OPENVINO_THROW(errorPrefix, " has not allocated input memory of 'step_ids'."); - if (!parentIdxMemPtr || !parentIdxMemPtr->isAllocated()) - OPENVINO_THROW(errorPrefix, " has not allocated input memory of 'parent_ids'."); - if (!maxSeqLenMemPtr || !maxSeqLenMemPtr->isAllocated()) - OPENVINO_THROW(errorPrefix, " has not allocated input memory of 'max_seq_len'."); - if (!dstMemPtr || !dstMemPtr->isAllocated()) - OPENVINO_THROW(errorPrefix, " has not allocated output memory."); + if (!stepIdxMemPtr || !stepIdxMemPtr->isDefined()) + OPENVINO_THROW(errorPrefix, " has undefined input memory of 'step_ids'."); + if (!parentIdxMemPtr || !parentIdxMemPtr->isDefined()) + OPENVINO_THROW(errorPrefix, " has undefined input memory of 'parent_ids'."); + if (!maxSeqLenMemPtr || !maxSeqLenMemPtr->isDefined()) + OPENVINO_THROW(errorPrefix, " has undefined input memory of 'max_seq_len'."); + if (!dstMemPtr || !dstMemPtr->isDefined()) + OPENVINO_THROW(errorPrefix, " has undefined output memory."); if (getSelectedPrimitiveDescriptor() == nullptr) OPENVINO_THROW(errorPrefix, " has unidentified preferable primitive descriptor."); diff --git a/src/plugins/intel_cpu/src/nodes/grid_sample.cpp b/src/plugins/intel_cpu/src/nodes/grid_sample.cpp index eb143a1ad55199..618d6b39105689 100644 --- a/src/plugins/intel_cpu/src/nodes/grid_sample.cpp +++ b/src/plugins/intel_cpu/src/nodes/grid_sample.cpp @@ -182,14 +182,14 @@ void GridSample::createPrimitive() { void GridSample::prepareParams() { auto dataMemPtr = getSrcMemoryAtPort(IN_DATA); - if (!dataMemPtr || !dataMemPtr->isAllocated()) - THROW_CPU_NODE_ERR("has not allocated input data memory."); + if (!dataMemPtr || !dataMemPtr->isDefined()) + THROW_CPU_NODE_ERR("has undefined input data memory."); auto gridMemPtr = getSrcMemoryAtPort(IN_GRID); - if (!gridMemPtr || !gridMemPtr->isAllocated()) - THROW_CPU_NODE_ERR("has not allocated input grid memory."); + if (!gridMemPtr || !gridMemPtr->isDefined()) + THROW_CPU_NODE_ERR("has undefined input grid memory."); auto dstMemPtr = getDstMemoryAtPort(0); - if (!dstMemPtr || !dstMemPtr->isAllocated()) - THROW_CPU_NODE_ERR("has not allocated output memory."); + if (!dstMemPtr || !dstMemPtr->isDefined()) + THROW_CPU_NODE_ERR("has undefined output memory."); if (getSelectedPrimitiveDescriptor() == nullptr) THROW_CPU_NODE_ERR("has unidentified preferable primitive descriptor."); diff --git a/src/plugins/intel_cpu/src/nodes/grn.cpp b/src/plugins/intel_cpu/src/nodes/grn.cpp index 83e554acf8b255..f20e7d6b90a012 100644 --- a/src/plugins/intel_cpu/src/nodes/grn.cpp +++ b/src/plugins/intel_cpu/src/nodes/grn.cpp @@ -61,10 +61,10 @@ void GRN::prepareParams() { const auto& dataMemPtr = getSrcMemoryAtPort(0); const auto& dstMemPtr = getDstMemoryAtPort(0); - if (!dataMemPtr || !dataMemPtr->isAllocated()) - OPENVINO_THROW(errorPrefix, " has not allocated input memory"); - if (!dstMemPtr || !dstMemPtr->isAllocated()) - OPENVINO_THROW(errorPrefix, " has not allocated output memory"); + if (!dataMemPtr || !dataMemPtr->isDefined()) + OPENVINO_THROW(errorPrefix, " has undefined input memory"); + if (!dstMemPtr || !dstMemPtr->isDefined()) + OPENVINO_THROW(errorPrefix, " has undefined output memory"); if (getSelectedPrimitiveDescriptor() == nullptr) OPENVINO_THROW(errorPrefix, " has unidentified preferable primitive descriptor"); diff --git a/src/plugins/intel_cpu/src/nodes/input.cpp b/src/plugins/intel_cpu/src/nodes/input.cpp index 72a22132aba175..c3521b8481f832 100644 --- a/src/plugins/intel_cpu/src/nodes/input.cpp +++ b/src/plugins/intel_cpu/src/nodes/input.cpp @@ -473,14 +473,14 @@ void Input::initSupportedPrimitiveDescriptors() { void Input::createPrimitive() { for (size_t i = 0; i < getChildEdges().size(); i++) { auto dstMemPtr = getDstMemoryAtPort(i); - if (!dstMemPtr || !dstMemPtr->isAllocated()) - THROW_CPU_NODE_ERR("has unallocated memory object at port ", i, + if (!dstMemPtr || !dstMemPtr->isDefined()) + THROW_CPU_NODE_ERR("has undefined memory object at port ", i, " to node ", getChildEdgeAt(i)->getChild()->getName(), "."); } for (size_t i = 0; i < getParentEdges().size(); i++) { auto srcMemPtr = getSrcMemoryAtPort(i); - if (!srcMemPtr || !srcMemPtr->isAllocated()) - THROW_CPU_NODE_ERR("has unallocated memory object at port ", i, + if (!srcMemPtr || !srcMemPtr->isDefined()) + THROW_CPU_NODE_ERR("has undefined memory object at port ", i, " from node ", getParentEdgeAt(i)->getParent()->getName(), "."); } diff --git a/src/plugins/intel_cpu/src/nodes/interpolate.cpp b/src/plugins/intel_cpu/src/nodes/interpolate.cpp index b883674606e362..50108288d6644a 100644 --- a/src/plugins/intel_cpu/src/nodes/interpolate.cpp +++ b/src/plugins/intel_cpu/src/nodes/interpolate.cpp @@ -2249,27 +2249,27 @@ void Interpolate::prepareParams() { } auto dstMemPtr = getDstMemoryAtPort(0); - if (!dstMemPtr || !dstMemPtr->isAllocated()) - OPENVINO_THROW(errorPrefix, " did not allocate destination memory"); + if (!dstMemPtr || !dstMemPtr->isDefined()) + OPENVINO_THROW(errorPrefix, " has undefined destination memory"); auto srcMemPtr = getSrcMemoryAtPort(DATA_ID); - if (!srcMemPtr || !srcMemPtr->isAllocated()) - OPENVINO_THROW(errorPrefix, " did not allocate input memory"); + if (!srcMemPtr || !srcMemPtr->isDefined()) + OPENVINO_THROW(errorPrefix, " has undefined input memory"); if (shapeCalcMode == InterpolateShapeCalcMode::sizes) { auto tsMemPtr = getSrcMemoryAtPort(TARGET_SHAPE_ID); - if (!tsMemPtr || !tsMemPtr->isAllocated()) - OPENVINO_THROW(errorPrefix, " did not allocate target shape memory"); + if (!tsMemPtr || !tsMemPtr->isDefined()) + OPENVINO_THROW(errorPrefix, " has undefined target shape memory"); } else { auto scaleMemPtr = getSrcMemoryAtPort(get_scale_id()); - if (!scaleMemPtr || !scaleMemPtr->isAllocated()) - OPENVINO_THROW(errorPrefix, " did not allocate scales memory"); + if (!scaleMemPtr || !scaleMemPtr->isDefined()) + OPENVINO_THROW(errorPrefix, " has undefined scales memory"); } if (isAxesSpecified) { auto axesMemPtr = getSrcMemoryAtPort(get_axis_id()); - if (!axesMemPtr || !axesMemPtr->isAllocated()) - OPENVINO_THROW(errorPrefix, " did not allocate axes memory"); + if (!axesMemPtr || !axesMemPtr->isDefined()) + OPENVINO_THROW(errorPrefix, " has undefined axes memory"); } const NodeDesc *selected_pd = getSelectedPrimitiveDescriptor(); @@ -2365,10 +2365,10 @@ void Interpolate::prepareParams() { void Interpolate::createPrimitive() { auto srcMemPtr = getSrcMemoryAtPort(DATA_ID); auto dstMemPtr = getDstMemoryAtPort(0); - if (!srcMemPtr || !srcMemPtr->isAllocated()) - OPENVINO_THROW(errorPrefix, " did not allocate input memory"); - if (!dstMemPtr || !dstMemPtr->isAllocated()) - OPENVINO_THROW(errorPrefix, " did not allocate destination memory"); + if (!srcMemPtr || !srcMemPtr->isDefined()) + OPENVINO_THROW(errorPrefix, " has undefined input memory"); + if (!dstMemPtr || !dstMemPtr->isDefined()) + OPENVINO_THROW(errorPrefix, " has undefined destination memory"); if (dstMemPtr->getDesc().hasLayoutType(LayoutType::ncsp)) { interpAttrs.layout = InterpolateLayoutType::planar; diff --git a/src/plugins/intel_cpu/src/nodes/lrn.cpp b/src/plugins/intel_cpu/src/nodes/lrn.cpp index 3f8a83d36e36fe..a26b58798b0dbd 100644 --- a/src/plugins/intel_cpu/src/nodes/lrn.cpp +++ b/src/plugins/intel_cpu/src/nodes/lrn.cpp @@ -163,10 +163,10 @@ std::shared_ptr Lrn::getSrcMemDesc(const dnnl::primitive_desc &prim_ void Lrn::prepareParams() { auto srcMemPtr = getSrcMemoryAtPort(0); auto dstMemPtr = getDstMemoryAtPort(0); - if (!srcMemPtr || !srcMemPtr->isAllocated()) - OPENVINO_THROW(errorPrefix, " input memory did not allocate"); - if (!dstMemPtr || !dstMemPtr->isAllocated()) - OPENVINO_THROW(errorPrefix, "destination memory did not allocate"); + if (!srcMemPtr || !srcMemPtr->isDefined()) + OPENVINO_THROW(errorPrefix, " input memory is undefined"); + if (!dstMemPtr || !dstMemPtr->isDefined()) + OPENVINO_THROW(errorPrefix, "destination memory is undefined"); const NodeDesc* selected_pd = getSelectedPrimitiveDescriptor(); if (selected_pd == nullptr) diff --git a/src/plugins/intel_cpu/src/nodes/matmul.cpp b/src/plugins/intel_cpu/src/nodes/matmul.cpp index 2841e6f100afb7..50cb3353612996 100644 --- a/src/plugins/intel_cpu/src/nodes/matmul.cpp +++ b/src/plugins/intel_cpu/src/nodes/matmul.cpp @@ -538,10 +538,10 @@ void MatMul::prepareParams() { auto dstMemPtr = getDstMemoryAtPort(0); auto src0MemPtr = getSrcMemoryAtPort(0); auto src1MemPtr = getSrcMemoryAtPort(1); - if (!dstMemPtr || !dstMemPtr->isAllocated()) - OPENVINO_THROW(errorPrefix, " did not allocate destination memory"); - if (!src0MemPtr || !src0MemPtr->isAllocated() || !src1MemPtr || !src1MemPtr->isAllocated()) - OPENVINO_THROW(errorPrefix, " did not allocate input memory"); + if (!dstMemPtr || !dstMemPtr->isDefined()) + OPENVINO_THROW(errorPrefix, " has undefined destination memory"); + if (!src0MemPtr || !src0MemPtr->isDefined() || !src1MemPtr || !src1MemPtr->isDefined()) + OPENVINO_THROW(errorPrefix, " has undefined input memory"); const NodeDesc* selected_pd = getSelectedPrimitiveDescriptor(); if (selected_pd == nullptr) @@ -576,8 +576,8 @@ void MatMul::prepareParams() { DnnlMemoryDescPtr dnnlBiasMemDesc = nullptr; if (withBiases) { auto biasMemory = getSrcMemoryAtPort(2); - if (!biasMemory || !biasMemory->isAllocated()) - OPENVINO_THROW(errorPrefix, " did not allocate bias memory"); + if (!biasMemory || !biasMemory->isDefined()) + OPENVINO_THROW(errorPrefix, " has undefined bias memory"); dnnlBiasMemDesc = biasMemory->getDescWithType(); } diff --git a/src/plugins/intel_cpu/src/nodes/memory.cpp b/src/plugins/intel_cpu/src/nodes/memory.cpp index 9e70720e50146d..037221dc31a3ae 100644 --- a/src/plugins/intel_cpu/src/nodes/memory.cpp +++ b/src/plugins/intel_cpu/src/nodes/memory.cpp @@ -23,10 +23,6 @@ class MemoryStub : public IMemory { public: MemoryStub(const dnnl::engine& eng, const MemoryDescPtr& pMemDesc) : m_eng(eng), m_pMemDesc(pMemDesc) {} - bool isAllocated() const noexcept override { - return true; - } - const MemoryDesc& getDesc() const override { return *m_pMemDesc; } diff --git a/src/plugins/intel_cpu/src/nodes/mvn.cpp b/src/plugins/intel_cpu/src/nodes/mvn.cpp index e8d99c7947fcfc..cc6054a6e7717a 100644 --- a/src/plugins/intel_cpu/src/nodes/mvn.cpp +++ b/src/plugins/intel_cpu/src/nodes/mvn.cpp @@ -2017,10 +2017,10 @@ void MVN::MVNRefExecutor::exec(const uint8_t *src_data, uint8_t *dst_data, const void MVN::prepareParams() { auto dstMemPtr = getDstMemoryAtPort(0); auto srcMemPtr = getSrcMemoryAtPort(0); - if (!dstMemPtr || !dstMemPtr->isAllocated()) - OPENVINO_THROW("Destination memory didn't allocate."); - if (!srcMemPtr || !srcMemPtr->isAllocated()) - OPENVINO_THROW("Input memory didn't allocate."); + if (!dstMemPtr || !dstMemPtr->isDefined()) + OPENVINO_THROW("Destination memory is undefined."); + if (!srcMemPtr || !srcMemPtr->isDefined()) + OPENVINO_THROW("Input memory is undefined."); if (getSelectedPrimitiveDescriptor() == nullptr) OPENVINO_THROW("Preferable primitive descriptor is not set."); diff --git a/src/plugins/intel_cpu/src/nodes/normalize.cpp b/src/plugins/intel_cpu/src/nodes/normalize.cpp index 0f61e7c717d4bc..65cfeb827b2986 100644 --- a/src/plugins/intel_cpu/src/nodes/normalize.cpp +++ b/src/plugins/intel_cpu/src/nodes/normalize.cpp @@ -884,9 +884,9 @@ void NormalizeL2::setPostOps(dnnl::primitive_attr& kernel_attrs, const VectorDim void NormalizeL2::createPrimitive() { auto dstMemPtr = getDstMemoryAtPort(DATA); auto srcMemPtr = getSrcMemoryAtPort(DATA); - if (!dstMemPtr || !dstMemPtr->isAllocated()) + if (!dstMemPtr || !dstMemPtr->isDefined()) THROW_ERROR("can't get destination memory"); - if (!srcMemPtr || !srcMemPtr->isAllocated()) + if (!srcMemPtr || !srcMemPtr->isDefined()) THROW_ERROR("can't get input memory"); if (getSelectedPrimitiveDescriptor() == nullptr) THROW_ERROR("has nullable preferable primitive descriptor"); diff --git a/src/plugins/intel_cpu/src/nodes/pad.cpp b/src/plugins/intel_cpu/src/nodes/pad.cpp index dbc31f0f112738..bd38f521be5167 100644 --- a/src/plugins/intel_cpu/src/nodes/pad.cpp +++ b/src/plugins/intel_cpu/src/nodes/pad.cpp @@ -217,10 +217,10 @@ void Pad::PadExecutor::paramsInitialization(const PadAttrs& attrs, params.attrs = attrs; auto& srcMemPtr = srcMemory[DATA_ID]; auto& dstMemPtr = dstMemory[DATA_ID]; - if (!dstMemPtr || !dstMemPtr->isAllocated()) - OPENVINO_THROW(errorPrefix, "has not allocated source memory."); - if (!srcMemPtr || !srcMemPtr->isAllocated()) - OPENVINO_THROW(errorPrefix, "has not allocated destination memory."); + if (!dstMemPtr || !dstMemPtr->isDefined()) + OPENVINO_THROW(errorPrefix, "has undefined source memory."); + if (!srcMemPtr || !srcMemPtr->isDefined()) + OPENVINO_THROW(errorPrefix, "has undefined destination memory."); const auto srcBlockMemDesc = srcMemPtr->getDescWithType(); const auto dstBlockMemDesc = dstMemPtr->getDescWithType(); const auto& srcDims = srcBlockMemDesc->getBlockDims(); diff --git a/src/plugins/intel_cpu/src/nodes/pooling.cpp b/src/plugins/intel_cpu/src/nodes/pooling.cpp index 299ba4d15f4b6a..71e5c38f0e0a79 100644 --- a/src/plugins/intel_cpu/src/nodes/pooling.cpp +++ b/src/plugins/intel_cpu/src/nodes/pooling.cpp @@ -394,10 +394,10 @@ void Pooling::prepareParams() { if (useACL) { auto dstMemPtr = getDstMemoryAtPort(0); auto srcMemPtr = getSrcMemoryAtPort(0); - if (!dstMemPtr || !dstMemPtr->isAllocated()) - OPENVINO_THROW("Destination memory didn't allocate."); - if (!srcMemPtr || !srcMemPtr->isAllocated()) - OPENVINO_THROW("Input memory didn't allocate."); + if (!dstMemPtr || !dstMemPtr->isDefined()) + OPENVINO_THROW("Destination memory is undefined."); + if (!srcMemPtr || !srcMemPtr->isDefined()) + OPENVINO_THROW("Input memory is undefined."); std::vector srcMemoryDescs; for (size_t i = 0; i < getOriginalInputsNumber(); i++) { diff --git a/src/plugins/intel_cpu/src/nodes/reduce.cpp b/src/plugins/intel_cpu/src/nodes/reduce.cpp index ca474073dd34f6..a77790d7a14954 100644 --- a/src/plugins/intel_cpu/src/nodes/reduce.cpp +++ b/src/plugins/intel_cpu/src/nodes/reduce.cpp @@ -2172,10 +2172,10 @@ void Reduce::createPrimitive() { } auto dstMemPtr = getDstMemoryAtPort(0); auto srcMemPtr = getSrcMemoryAtPort(REDUCE_DATA); - if (!dstMemPtr || !dstMemPtr->isAllocated()) - OPENVINO_THROW(errorPrefix, " has not allocated destination memory."); - if (!srcMemPtr || !srcMemPtr->isAllocated()) - OPENVINO_THROW(errorPrefix, " has not allocate input memory."); + if (!dstMemPtr || !dstMemPtr->isDefined()) + OPENVINO_THROW(errorPrefix, " has undefined destination memory."); + if (!srcMemPtr || !srcMemPtr->isDefined()) + OPENVINO_THROW(errorPrefix, " has undefined input memory."); if (getSelectedPrimitiveDescriptor() == nullptr) OPENVINO_THROW(errorPrefix, " has nullable preferable primitive descriptor"); diff --git a/src/plugins/intel_cpu/src/nodes/reorder.cpp b/src/plugins/intel_cpu/src/nodes/reorder.cpp index c868792574cc81..8a18b648a991dc 100644 --- a/src/plugins/intel_cpu/src/nodes/reorder.cpp +++ b/src/plugins/intel_cpu/src/nodes/reorder.cpp @@ -184,10 +184,10 @@ void Reorder::prepareParams() { auto srcMemPtr = getSrcMemoryAtPort(0); auto dstMemPtr = getDstMemoryAtPort(0); - if (!dstMemPtr || !dstMemPtr->isAllocated()) - THROW_CPU_NODE_ERR("has unallocated destination memory object."); - if (!srcMemPtr || !srcMemPtr->isAllocated()) - THROW_CPU_NODE_ERR("has unallocated input memory object."); + if (!dstMemPtr || !dstMemPtr->isDefined()) + THROW_CPU_NODE_ERR("has undefined destination memory object."); + if (!srcMemPtr || !srcMemPtr->isDefined()) + THROW_CPU_NODE_ERR("has undefined input memory object."); if (getSelectedPrimitiveDescriptor() == nullptr) THROW_CPU_NODE_ERR("does not have preferable primitive descriptor."); @@ -243,10 +243,10 @@ void Reorder::prepareParams() { } } if (!canUseNcsp2Nspc && !canUseNspc2Ncsp) { - if (!dstMemPtr || !dstMemPtr->isAllocated()) - THROW_CPU_NODE_ERR("has unallocated destination memory object."); - if (!srcMemPtr || !srcMemPtr->isAllocated()) - THROW_CPU_NODE_ERR("has unallocated input memory object."); + if (!dstMemPtr || !dstMemPtr->isDefined()) + THROW_CPU_NODE_ERR("has undefined destination memory object."); + if (!srcMemPtr || !srcMemPtr->isDefined()) + THROW_CPU_NODE_ERR("has undefined input memory object."); if (getSelectedPrimitiveDescriptor() == nullptr) THROW_CPU_NODE_ERR("does not have preferable primitive descriptor."); diff --git a/src/plugins/intel_cpu/src/nodes/reverse_sequence.cpp b/src/plugins/intel_cpu/src/nodes/reverse_sequence.cpp index 8c637a3896fd91..b51eab4bef393e 100644 --- a/src/plugins/intel_cpu/src/nodes/reverse_sequence.cpp +++ b/src/plugins/intel_cpu/src/nodes/reverse_sequence.cpp @@ -84,12 +84,12 @@ void ReverseSequence::prepareParams() { const auto& seqLengthsMemPtr = getSrcMemoryAtPort(REVERSESEQUENCE_LENGTHS); const auto& dstMemPtr = getDstMemoryAtPort(0); - if (!dataMemPtr || !dataMemPtr->isAllocated()) - OPENVINO_THROW(errorPrefix, " has not allocated input memory of 'data'"); - if (!seqLengthsMemPtr || !seqLengthsMemPtr->isAllocated()) - OPENVINO_THROW(errorPrefix, " has not allocated input memory of 'seq_lengths'"); - if (!dstMemPtr || !dstMemPtr->isAllocated()) - OPENVINO_THROW(errorPrefix, " has not allocated output memory"); + if (!dataMemPtr || !dataMemPtr->isDefined()) + OPENVINO_THROW(errorPrefix, " has undefined input memory of 'data'"); + if (!seqLengthsMemPtr || !seqLengthsMemPtr->isDefined()) + OPENVINO_THROW(errorPrefix, " has undefined input memory of 'seq_lengths'"); + if (!dstMemPtr || !dstMemPtr->isDefined()) + OPENVINO_THROW(errorPrefix, " has undefined output memory"); if (getSelectedPrimitiveDescriptor() == nullptr) OPENVINO_THROW(errorPrefix, " has unidentified preferable primitive descriptor"); diff --git a/src/plugins/intel_cpu/src/nodes/rnn.cpp b/src/plugins/intel_cpu/src/nodes/rnn.cpp index 7b0e46ce8e5ce7..a29df47968038a 100644 --- a/src/plugins/intel_cpu/src/nodes/rnn.cpp +++ b/src/plugins/intel_cpu/src/nodes/rnn.cpp @@ -1119,7 +1119,7 @@ Node::AttrPtr RNN::initPrimitiveAttr() { void RNN::prepareParams() { for (size_t i = 0; i < wIdx; i++) { auto memPtr = getSrcMemoryAtPort(i); - if (!memPtr || !memPtr->isAllocated()) + if (!memPtr || !memPtr->isDefined()) THROW_ERROR("has uninitialized memory at port ", i); } if ((is_cell && DC != getParentEdgeAt(0)->getMemory().getDesc().getShape().getStaticDims()[1]) || diff --git a/src/plugins/intel_cpu/src/nodes/roi_align.cpp b/src/plugins/intel_cpu/src/nodes/roi_align.cpp index 61bedf62811fa4..de28d5aff4e399 100644 --- a/src/plugins/intel_cpu/src/nodes/roi_align.cpp +++ b/src/plugins/intel_cpu/src/nodes/roi_align.cpp @@ -816,10 +816,10 @@ void ROIAlign::initSupportedPrimitiveDescriptors() { void ROIAlign::createPrimitive() { auto srcMemPtr = getSrcMemoryAtPort(0); auto dstMemPtr = getDstMemoryAtPort(0); - if (!srcMemPtr || !srcMemPtr->isAllocated()) - OPENVINO_THROW(errorPrefix, " did not allocate input memory"); - if (!dstMemPtr || !dstMemPtr->isAllocated()) - OPENVINO_THROW(errorPrefix, " did not allocate destination memory"); + if (!srcMemPtr || !srcMemPtr->isDefined()) + OPENVINO_THROW(errorPrefix, " has undefined input memory"); + if (!dstMemPtr || !dstMemPtr->isDefined()) + OPENVINO_THROW(errorPrefix, " has undefined destination memory"); if (!roi_align_kernel) { ROIAlignLayoutType selectedLayout = ROIAlignLayoutType::nspc; diff --git a/src/plugins/intel_cpu/src/nodes/roi_pooling.cpp b/src/plugins/intel_cpu/src/nodes/roi_pooling.cpp index f1a4291f0803a3..6a0e8c6c4569d0 100644 --- a/src/plugins/intel_cpu/src/nodes/roi_pooling.cpp +++ b/src/plugins/intel_cpu/src/nodes/roi_pooling.cpp @@ -501,12 +501,12 @@ void ROIPooling::prepareParams() { const auto& srcMemPtr0 = getSrcMemoryAtPort(0); const auto& srcMemPtr1 = getSrcMemoryAtPort(0); const auto& dstMemPtr = getDstMemoryAtPort(0); - if (!srcMemPtr0 || !srcMemPtr0->isAllocated()) - OPENVINO_THROW("Input memory has not been allocated."); - if (!srcMemPtr1 || !srcMemPtr1->isAllocated()) - OPENVINO_THROW("Input memory has not been allocated."); - if (!dstMemPtr || !dstMemPtr->isAllocated()) - OPENVINO_THROW("Destination has not been allocated."); + if (!srcMemPtr0 || !srcMemPtr0->isDefined()) + OPENVINO_THROW("Input memory is undefined."); + if (!srcMemPtr1 || !srcMemPtr1->isDefined()) + OPENVINO_THROW("Input memory is undefined."); + if (!dstMemPtr || !dstMemPtr->isDefined()) + OPENVINO_THROW("Destination is undefined."); if (getSelectedPrimitiveDescriptor() == nullptr) OPENVINO_THROW("Preferable primitive descriptor is not set."); diff --git a/src/plugins/intel_cpu/src/nodes/roll.cpp b/src/plugins/intel_cpu/src/nodes/roll.cpp index 6f6ad7edc20d65..6f75361c13c37f 100644 --- a/src/plugins/intel_cpu/src/nodes/roll.cpp +++ b/src/plugins/intel_cpu/src/nodes/roll.cpp @@ -102,14 +102,14 @@ void Roll::prepareParams() { const auto& axesMemPtr = getSrcMemoryAtPort(AXES_INDEX); const auto& dstMemPtr = getDstMemoryAtPort(0); - if (!dataMemPtr || !dataMemPtr->isAllocated()) - OPENVINO_THROW(layerErrorPrefix, " has not allocated input memory of 'data'"); - if (!shiftMemPtr || !shiftMemPtr->isAllocated()) - OPENVINO_THROW(layerErrorPrefix, " has not allocated input memory of 'shift'"); - if (!axesMemPtr || !axesMemPtr->isAllocated()) - OPENVINO_THROW(layerErrorPrefix, " has not allocated input memory of 'axes'"); - if (!dstMemPtr || !dstMemPtr->isAllocated()) - OPENVINO_THROW(layerErrorPrefix, " has not allocated output memory"); + if (!dataMemPtr || !dataMemPtr->isDefined()) + OPENVINO_THROW(layerErrorPrefix, " has undefined input memory of 'data'"); + if (!shiftMemPtr || !shiftMemPtr->isDefined()) + OPENVINO_THROW(layerErrorPrefix, " has undefined input memory of 'shift'"); + if (!axesMemPtr || !axesMemPtr->isDefined()) + OPENVINO_THROW(layerErrorPrefix, " has undefined input memory of 'axes'"); + if (!dstMemPtr || !dstMemPtr->isDefined()) + OPENVINO_THROW(layerErrorPrefix, " has undefined output memory"); if (getSelectedPrimitiveDescriptor() == nullptr) OPENVINO_THROW(layerErrorPrefix, " has unidentified preferable primitive descriptor"); diff --git a/src/plugins/intel_cpu/src/nodes/shuffle_channels.cpp b/src/plugins/intel_cpu/src/nodes/shuffle_channels.cpp index 72ecca666c30dd..ecbf8ceb0dc145 100644 --- a/src/plugins/intel_cpu/src/nodes/shuffle_channels.cpp +++ b/src/plugins/intel_cpu/src/nodes/shuffle_channels.cpp @@ -128,10 +128,10 @@ void ShuffleChannels::initSupportedPrimitiveDescriptors() { void ShuffleChannels::createPrimitive() { auto dstMemPtr = getDstMemoryAtPort(0); auto srcMemPtr = getSrcMemoryAtPort(0); - if (!dstMemPtr || !dstMemPtr->isAllocated()) - THROW_SHCH_ERROR("has not allocated destination memory"); - if (!srcMemPtr || !srcMemPtr->isAllocated()) - THROW_SHCH_ERROR("has not allocated input memory"); + if (!dstMemPtr || !dstMemPtr->isDefined()) + THROW_SHCH_ERROR("has undefined destination memory"); + if (!srcMemPtr || !srcMemPtr->isDefined()) + THROW_SHCH_ERROR("has undefined input memory"); if (getSelectedPrimitiveDescriptor() == nullptr) THROW_SHCH_ERROR("has unidentified preferable primitive descriptor"); diff --git a/src/plugins/intel_cpu/src/nodes/space_to_depth.cpp b/src/plugins/intel_cpu/src/nodes/space_to_depth.cpp index 40d344ec64cb07..adfdbf2f7d0f23 100644 --- a/src/plugins/intel_cpu/src/nodes/space_to_depth.cpp +++ b/src/plugins/intel_cpu/src/nodes/space_to_depth.cpp @@ -166,10 +166,10 @@ void SpaceToDepth::initSupportedPrimitiveDescriptors() { void SpaceToDepth::createPrimitive() { auto dstMemPtr = getDstMemoryAtPort(0); auto srcMemPtr = getSrcMemoryAtPort(0); - if (!dstMemPtr || !dstMemPtr->isAllocated()) - THROW_ERROR("has not allocated destination memory"); - if (!srcMemPtr || !srcMemPtr->isAllocated()) - THROW_ERROR("has not allocated input memory"); + if (!dstMemPtr || !dstMemPtr->isDefined()) + THROW_ERROR("has undefined destination memory"); + if (!srcMemPtr || !srcMemPtr->isDefined()) + THROW_ERROR("has undefined input memory"); if (getSelectedPrimitiveDescriptor() == nullptr) THROW_ERROR("has unidentified preferable primitive descriptor"); diff --git a/src/plugins/intel_cpu/src/nodes/split.cpp b/src/plugins/intel_cpu/src/nodes/split.cpp index ad869bd40e03cb..c2ea305d9cdc4a 100644 --- a/src/plugins/intel_cpu/src/nodes/split.cpp +++ b/src/plugins/intel_cpu/src/nodes/split.cpp @@ -234,8 +234,8 @@ bool Split::needPrepareParams() const { void Split::prepareParams() { const auto &srcMemPtr = getSrcMemoryAtPort(0); - if (!srcMemPtr || !srcMemPtr->isAllocated()) { - THROW_ERROR("has not allocated input memory"); + if (!srcMemPtr || !srcMemPtr->isDefined()) { + THROW_ERROR("has undefined input memory"); } if (!constSplitLengths) { @@ -249,8 +249,8 @@ void Split::prepareParams() { std::vector outDescs; for (size_t port = 0; port < outputShapes.size(); ++port) { const auto &outMemPtr = this->getDstMemoryAtPort(port); - if (!outMemPtr || !outMemPtr->isAllocated()) { - THROW_ERROR("has not allocated destination memory"); + if (!outMemPtr || !outMemPtr->isDefined()) { + THROW_ERROR("has undefined destination memory"); } if (outMemPtr->getShape().hasZeroDims()) { diff --git a/src/plugins/intel_cpu/src/nodes/topk.cpp b/src/plugins/intel_cpu/src/nodes/topk.cpp index be8ac05d85b3e6..f01fded788f495 100644 --- a/src/plugins/intel_cpu/src/nodes/topk.cpp +++ b/src/plugins/intel_cpu/src/nodes/topk.cpp @@ -1980,10 +1980,10 @@ void TopK::preset_params() { void TopK::prepareParams() { auto dstMemPtr = getDstMemoryAtPort(TOPK_DATA); auto srcMemPtr = getSrcMemoryAtPort(TOPK_DATA); - if (!dstMemPtr || !dstMemPtr->isAllocated()) - OPENVINO_THROW(errorPrefix, " has not allocated destination memory."); - if (!srcMemPtr || !srcMemPtr->isAllocated()) - OPENVINO_THROW(errorPrefix, " has not allocate input memory."); + if (!dstMemPtr || !dstMemPtr->isDefined()) + OPENVINO_THROW(errorPrefix, " has undefined destination memory."); + if (!srcMemPtr || !srcMemPtr->isDefined()) + OPENVINO_THROW(errorPrefix, " has undefined input memory."); if (getSelectedPrimitiveDescriptor() == nullptr) OPENVINO_THROW(errorPrefix, " has nullable preferable primitive descriptor"); diff --git a/src/plugins/intel_cpu/src/nodes/transpose.cpp b/src/plugins/intel_cpu/src/nodes/transpose.cpp index 9a958be2ead5e4..f8eebe8990528d 100644 --- a/src/plugins/intel_cpu/src/nodes/transpose.cpp +++ b/src/plugins/intel_cpu/src/nodes/transpose.cpp @@ -200,10 +200,10 @@ void Transpose::createPrimitive() { auto dstMemPtr = getDstMemoryAtPort(0); auto srcMemPtr = getSrcMemoryAtPort(INPUT_DATA_IDX); - if (!dstMemPtr || !dstMemPtr->isAllocated()) - OPENVINO_THROW("Destination memory was not allocated."); - if (!srcMemPtr || !srcMemPtr->isAllocated()) - OPENVINO_THROW("Input memory was not allocated."); + if (!dstMemPtr || !dstMemPtr->isDefined()) + OPENVINO_THROW("Destination memory was undefined."); + if (!srcMemPtr || !srcMemPtr->isDefined()) + OPENVINO_THROW("Input memory was undefined."); if (getSelectedPrimitiveDescriptor() == nullptr) OPENVINO_THROW("Preferable primitive descriptor was not set."); diff --git a/src/plugins/intel_cpu/src/nodes/unique.cpp b/src/plugins/intel_cpu/src/nodes/unique.cpp index 130213dfcb8703..79c98d4b1d44ad 100644 --- a/src/plugins/intel_cpu/src/nodes/unique.cpp +++ b/src/plugins/intel_cpu/src/nodes/unique.cpp @@ -91,14 +91,14 @@ void Unique::createPrimitive() { void Unique::prepareParams() { auto dataMemPtr = getSrcMemoryAtPort(IN_DATA); - if (!dataMemPtr || !dataMemPtr->isAllocated()) { - THROW_ERROR(" has not allocated input data memory."); + if (!dataMemPtr || !dataMemPtr->isDefined()) { + THROW_ERROR(" has undefined input data memory."); } for (int i = 0; i < 4; i++) { if (definedOutputs[i]) { auto dstMemPtr = getDstMemoryAtPort(i); - if (!dstMemPtr || !dstMemPtr->isAllocated()) { - THROW_ERROR(" has not allocated output memory at port ", i); + if (!dstMemPtr || !dstMemPtr->isDefined()) { + THROW_ERROR(" has undefined output memory at port ", i); } } } diff --git a/src/plugins/intel_cpu/src/utils/debug_capabilities.cpp b/src/plugins/intel_cpu/src/utils/debug_capabilities.cpp index 74b13244b54bff..0e96f7c95bc4fc 100644 --- a/src/plugins/intel_cpu/src/utils/debug_capabilities.cpp +++ b/src/plugins/intel_cpu/src/utils/debug_capabilities.cpp @@ -654,7 +654,7 @@ std::string to_string(const T* values, size_t N, size_t maxsize) { std::ostream& operator<<(std::ostream& os, const IMemory& mem) { const auto& desc = mem.getDesc(); os << desc; - if (mem.isAllocated()) { + if (mem.isDefined()) { os << " ["; if (desc.getPrecision() == ov::element::i32) { os << to_string(mem.getDataAs(), mem.getSize() / sizeof(int32_t), 256); From 5ced5f46810a10fcfa5fc1ba0b2be52aa08ef865 Mon Sep 17 00:00:00 2001 From: Maksim Kutakov Date: Wed, 7 Aug 2024 14:59:07 +0200 Subject: [PATCH 03/36] Fix tests build --- src/plugins/intel_cpu/tests/unit/cpu_tensor_test.cpp | 3 +-- src/plugins/intel_cpu/tests/unit/dnnl_memory_test.cpp | 6 +++--- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/src/plugins/intel_cpu/tests/unit/cpu_tensor_test.cpp b/src/plugins/intel_cpu/tests/unit/cpu_tensor_test.cpp index 76794b56531bd8..9b766319569995 100644 --- a/src/plugins/intel_cpu/tests/unit/cpu_tensor_test.cpp +++ b/src/plugins/intel_cpu/tests/unit/cpu_tensor_test.cpp @@ -63,7 +63,6 @@ class MockIMemory : public IMemory { MockIMemory(MemoryDescPtr desc) : m_pMemDesc(desc) {} MockIMemory(const MemoryDesc& desc) : m_pMemDesc(desc.clone()) {} - MOCK_METHOD(bool, isAllocated, (), (const, noexcept, override)); MOCK_METHOD(MemoryDesc&, getDesc, (), (const, override)); MOCK_METHOD(MemoryDescPtr, getDescPtr, (), (const, override)); @@ -73,7 +72,7 @@ class MockIMemory : public IMemory { MOCK_METHOD(void, redefineDesc, (MemoryDescPtr), (override)); MOCK_METHOD(void, load, (const IMemory&, bool), (const, override)); - MOCK_METHOD(MemoryMngrPtr, getMemoryMngr, (), (const, override)); + MOCK_METHOD(MemoryBlockPtr, getMemoryBlock, (), (const, override)); MOCK_METHOD(dnnl::memory, getPrimitive, (), (const, override)); MOCK_METHOD(void, nullify, (), (override)); diff --git a/src/plugins/intel_cpu/tests/unit/dnnl_memory_test.cpp b/src/plugins/intel_cpu/tests/unit/dnnl_memory_test.cpp index c73db6c8a28df8..9e0bce6d444f4f 100644 --- a/src/plugins/intel_cpu/tests/unit/dnnl_memory_test.cpp +++ b/src/plugins/intel_cpu/tests/unit/dnnl_memory_test.cpp @@ -55,7 +55,7 @@ TEST(MemoryTest, ConcurrentResizeGetPrimitive) { dnnl::memory dnnl_mem; auto desc = std::make_shared(ov::element::f32, Shape{10, 2}); Memory cpu_mem1(eng, desc); - Memory cpu_mem2(eng, desc, cpu_mem1.getMemoryMngr()); + Memory cpu_mem2(eng, desc, cpu_mem1.getMemoryBlock()); auto desc2 = std::make_shared(ov::element::f32, Shape{10, 20}); std::atomic lock{true}; @@ -84,7 +84,7 @@ TEST(StaticMemoryTest, UnsupportedDnnlPrecision) { CpuBlockedMemoryDesc memDescSupportedPrc(ov::element::f32, {5, 4, 7, 10}); MemoryPtr testMemory; OV_ASSERT_NO_THROW(testMemory = std::make_shared(eng, memDescSupportedPrc)); - ASSERT_TRUE(testMemory->isAllocated()); + ASSERT_TRUE(testMemory->isDefined()); dnnl::memory dnnl_memory; void* raw_data_ptr = nullptr; OV_ASSERT_NO_THROW(raw_data_ptr = testMemory->getData()); @@ -94,7 +94,7 @@ TEST(StaticMemoryTest, UnsupportedDnnlPrecision) { CpuBlockedMemoryDesc memDescUnSupportedPrc(ov::element::i64, {5, 4, 7, 10}); OV_ASSERT_NO_THROW(testMemory = std::make_shared(eng, memDescUnSupportedPrc)); - ASSERT_TRUE(testMemory->isAllocated()); + ASSERT_TRUE(testMemory->isDefined()); raw_data_ptr = nullptr; OV_ASSERT_NO_THROW(raw_data_ptr = testMemory->getData()); ASSERT_FALSE(nullptr == raw_data_ptr); From e2440df282e2f98c2d7730aa1e440f24eb170b26 Mon Sep 17 00:00:00 2001 From: Maksim Kutakov Date: Wed, 7 Aug 2024 16:05:43 +0200 Subject: [PATCH 04/36] Remove incorrect checs from Input --- src/plugins/intel_cpu/src/nodes/input.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/plugins/intel_cpu/src/nodes/input.cpp b/src/plugins/intel_cpu/src/nodes/input.cpp index c3521b8481f832..5a30fc6e61b1ad 100644 --- a/src/plugins/intel_cpu/src/nodes/input.cpp +++ b/src/plugins/intel_cpu/src/nodes/input.cpp @@ -473,14 +473,14 @@ void Input::initSupportedPrimitiveDescriptors() { void Input::createPrimitive() { for (size_t i = 0; i < getChildEdges().size(); i++) { auto dstMemPtr = getDstMemoryAtPort(i); - if (!dstMemPtr || !dstMemPtr->isDefined()) - THROW_CPU_NODE_ERR("has undefined memory object at port ", i, + if (!dstMemPtr) + THROW_CPU_NODE_ERR("has null memory object at port ", i, " to node ", getChildEdgeAt(i)->getChild()->getName(), "."); } for (size_t i = 0; i < getParentEdges().size(); i++) { auto srcMemPtr = getSrcMemoryAtPort(i); - if (!srcMemPtr || !srcMemPtr->isDefined()) - THROW_CPU_NODE_ERR("has undefined memory object at port ", i, + if (!srcMemPtr) + THROW_CPU_NODE_ERR("has null memory object at port ", i, " from node ", getParentEdgeAt(i)->getParent()->getName(), "."); } From d1b544e5cee38b6c05ad7540f8657c7cf2d3cc68 Mon Sep 17 00:00:00 2001 From: Maksim Kutakov Date: Wed, 7 Aug 2024 17:57:15 +0200 Subject: [PATCH 05/36] Remove incorrect isDefined checks --- src/plugins/intel_cpu/src/nodes/depth_to_space.cpp | 8 ++++---- src/plugins/intel_cpu/src/nodes/interpolate.cpp | 8 ++++---- src/plugins/intel_cpu/src/nodes/normalize.cpp | 4 ++-- src/plugins/intel_cpu/src/nodes/reduce.cpp | 8 ++++---- src/plugins/intel_cpu/src/nodes/roi_align.cpp | 8 ++++---- src/plugins/intel_cpu/src/nodes/shuffle_channels.cpp | 8 ++++---- src/plugins/intel_cpu/src/nodes/space_to_depth.cpp | 8 ++++---- src/plugins/intel_cpu/src/nodes/transpose.cpp | 8 ++++---- src/plugins/intel_cpu/src/nodes/unique.cpp | 8 ++++---- 9 files changed, 34 insertions(+), 34 deletions(-) diff --git a/src/plugins/intel_cpu/src/nodes/depth_to_space.cpp b/src/plugins/intel_cpu/src/nodes/depth_to_space.cpp index 5a0e321dfec7f5..15d521a423c7e7 100644 --- a/src/plugins/intel_cpu/src/nodes/depth_to_space.cpp +++ b/src/plugins/intel_cpu/src/nodes/depth_to_space.cpp @@ -162,10 +162,10 @@ void DepthToSpace::initSupportedPrimitiveDescriptors() { void DepthToSpace::createPrimitive() { auto dstMemPtr = getDstMemoryAtPort(0); auto srcMemPtr = getSrcMemoryAtPort(0); - if (!dstMemPtr || !dstMemPtr->isDefined()) - THROW_ERROR("has undefined destination memory"); - if (!srcMemPtr || !srcMemPtr->isDefined()) - THROW_ERROR("has undefined input memory"); + if (!dstMemPtr) + THROW_ERROR("has null destination memory"); + if (!srcMemPtr) + THROW_ERROR("has null input memory"); if (getSelectedPrimitiveDescriptor() == nullptr) THROW_ERROR("has unidentified preferable primitive descriptor"); diff --git a/src/plugins/intel_cpu/src/nodes/interpolate.cpp b/src/plugins/intel_cpu/src/nodes/interpolate.cpp index 50108288d6644a..7d84c833ef44cc 100644 --- a/src/plugins/intel_cpu/src/nodes/interpolate.cpp +++ b/src/plugins/intel_cpu/src/nodes/interpolate.cpp @@ -2365,10 +2365,10 @@ void Interpolate::prepareParams() { void Interpolate::createPrimitive() { auto srcMemPtr = getSrcMemoryAtPort(DATA_ID); auto dstMemPtr = getDstMemoryAtPort(0); - if (!srcMemPtr || !srcMemPtr->isDefined()) - OPENVINO_THROW(errorPrefix, " has undefined input memory"); - if (!dstMemPtr || !dstMemPtr->isDefined()) - OPENVINO_THROW(errorPrefix, " has undefined destination memory"); + if (!srcMemPtr) + OPENVINO_THROW(errorPrefix, " has null input memory"); + if (!dstMemPtr) + OPENVINO_THROW(errorPrefix, " has null destination memory"); if (dstMemPtr->getDesc().hasLayoutType(LayoutType::ncsp)) { interpAttrs.layout = InterpolateLayoutType::planar; diff --git a/src/plugins/intel_cpu/src/nodes/normalize.cpp b/src/plugins/intel_cpu/src/nodes/normalize.cpp index 65cfeb827b2986..ca52e572b73ea8 100644 --- a/src/plugins/intel_cpu/src/nodes/normalize.cpp +++ b/src/plugins/intel_cpu/src/nodes/normalize.cpp @@ -884,9 +884,9 @@ void NormalizeL2::setPostOps(dnnl::primitive_attr& kernel_attrs, const VectorDim void NormalizeL2::createPrimitive() { auto dstMemPtr = getDstMemoryAtPort(DATA); auto srcMemPtr = getSrcMemoryAtPort(DATA); - if (!dstMemPtr || !dstMemPtr->isDefined()) + if (!dstMemPtr) THROW_ERROR("can't get destination memory"); - if (!srcMemPtr || !srcMemPtr->isDefined()) + if (!srcMemPtr) THROW_ERROR("can't get input memory"); if (getSelectedPrimitiveDescriptor() == nullptr) THROW_ERROR("has nullable preferable primitive descriptor"); diff --git a/src/plugins/intel_cpu/src/nodes/reduce.cpp b/src/plugins/intel_cpu/src/nodes/reduce.cpp index a77790d7a14954..b40c50f957514f 100644 --- a/src/plugins/intel_cpu/src/nodes/reduce.cpp +++ b/src/plugins/intel_cpu/src/nodes/reduce.cpp @@ -2172,10 +2172,10 @@ void Reduce::createPrimitive() { } auto dstMemPtr = getDstMemoryAtPort(0); auto srcMemPtr = getSrcMemoryAtPort(REDUCE_DATA); - if (!dstMemPtr || !dstMemPtr->isDefined()) - OPENVINO_THROW(errorPrefix, " has undefined destination memory."); - if (!srcMemPtr || !srcMemPtr->isDefined()) - OPENVINO_THROW(errorPrefix, " has undefined input memory."); + if (!dstMemPtr) + OPENVINO_THROW(errorPrefix, " has null destination memory."); + if (!srcMemPtr) + OPENVINO_THROW(errorPrefix, " has null input memory."); if (getSelectedPrimitiveDescriptor() == nullptr) OPENVINO_THROW(errorPrefix, " has nullable preferable primitive descriptor"); diff --git a/src/plugins/intel_cpu/src/nodes/roi_align.cpp b/src/plugins/intel_cpu/src/nodes/roi_align.cpp index de28d5aff4e399..e26719b65bad2e 100644 --- a/src/plugins/intel_cpu/src/nodes/roi_align.cpp +++ b/src/plugins/intel_cpu/src/nodes/roi_align.cpp @@ -816,10 +816,10 @@ void ROIAlign::initSupportedPrimitiveDescriptors() { void ROIAlign::createPrimitive() { auto srcMemPtr = getSrcMemoryAtPort(0); auto dstMemPtr = getDstMemoryAtPort(0); - if (!srcMemPtr || !srcMemPtr->isDefined()) - OPENVINO_THROW(errorPrefix, " has undefined input memory"); - if (!dstMemPtr || !dstMemPtr->isDefined()) - OPENVINO_THROW(errorPrefix, " has undefined destination memory"); + if (!srcMemPtr) + OPENVINO_THROW(errorPrefix, " has null input memory"); + if (!dstMemPtr) + OPENVINO_THROW(errorPrefix, " has null destination memory"); if (!roi_align_kernel) { ROIAlignLayoutType selectedLayout = ROIAlignLayoutType::nspc; diff --git a/src/plugins/intel_cpu/src/nodes/shuffle_channels.cpp b/src/plugins/intel_cpu/src/nodes/shuffle_channels.cpp index ecbf8ceb0dc145..dd45a639e98847 100644 --- a/src/plugins/intel_cpu/src/nodes/shuffle_channels.cpp +++ b/src/plugins/intel_cpu/src/nodes/shuffle_channels.cpp @@ -128,10 +128,10 @@ void ShuffleChannels::initSupportedPrimitiveDescriptors() { void ShuffleChannels::createPrimitive() { auto dstMemPtr = getDstMemoryAtPort(0); auto srcMemPtr = getSrcMemoryAtPort(0); - if (!dstMemPtr || !dstMemPtr->isDefined()) - THROW_SHCH_ERROR("has undefined destination memory"); - if (!srcMemPtr || !srcMemPtr->isDefined()) - THROW_SHCH_ERROR("has undefined input memory"); + if (!dstMemPtr) + THROW_SHCH_ERROR("has null destination memory"); + if (!srcMemPtr) + THROW_SHCH_ERROR("has null input memory"); if (getSelectedPrimitiveDescriptor() == nullptr) THROW_SHCH_ERROR("has unidentified preferable primitive descriptor"); diff --git a/src/plugins/intel_cpu/src/nodes/space_to_depth.cpp b/src/plugins/intel_cpu/src/nodes/space_to_depth.cpp index adfdbf2f7d0f23..6753a9510bdc2f 100644 --- a/src/plugins/intel_cpu/src/nodes/space_to_depth.cpp +++ b/src/plugins/intel_cpu/src/nodes/space_to_depth.cpp @@ -166,10 +166,10 @@ void SpaceToDepth::initSupportedPrimitiveDescriptors() { void SpaceToDepth::createPrimitive() { auto dstMemPtr = getDstMemoryAtPort(0); auto srcMemPtr = getSrcMemoryAtPort(0); - if (!dstMemPtr || !dstMemPtr->isDefined()) - THROW_ERROR("has undefined destination memory"); - if (!srcMemPtr || !srcMemPtr->isDefined()) - THROW_ERROR("has undefined input memory"); + if (!dstMemPtr) + THROW_ERROR("has null destination memory"); + if (!srcMemPtr) + THROW_ERROR("has null input memory"); if (getSelectedPrimitiveDescriptor() == nullptr) THROW_ERROR("has unidentified preferable primitive descriptor"); diff --git a/src/plugins/intel_cpu/src/nodes/transpose.cpp b/src/plugins/intel_cpu/src/nodes/transpose.cpp index f8eebe8990528d..38712e04c50719 100644 --- a/src/plugins/intel_cpu/src/nodes/transpose.cpp +++ b/src/plugins/intel_cpu/src/nodes/transpose.cpp @@ -200,10 +200,10 @@ void Transpose::createPrimitive() { auto dstMemPtr = getDstMemoryAtPort(0); auto srcMemPtr = getSrcMemoryAtPort(INPUT_DATA_IDX); - if (!dstMemPtr || !dstMemPtr->isDefined()) - OPENVINO_THROW("Destination memory was undefined."); - if (!srcMemPtr || !srcMemPtr->isDefined()) - OPENVINO_THROW("Input memory was undefined."); + if (!dstMemPtr) + OPENVINO_THROW("Destination memory is null."); + if (!srcMemPtr) + OPENVINO_THROW("Input memory is null."); if (getSelectedPrimitiveDescriptor() == nullptr) OPENVINO_THROW("Preferable primitive descriptor was not set."); diff --git a/src/plugins/intel_cpu/src/nodes/unique.cpp b/src/plugins/intel_cpu/src/nodes/unique.cpp index 79c98d4b1d44ad..a0a0cd95d000f3 100644 --- a/src/plugins/intel_cpu/src/nodes/unique.cpp +++ b/src/plugins/intel_cpu/src/nodes/unique.cpp @@ -91,14 +91,14 @@ void Unique::createPrimitive() { void Unique::prepareParams() { auto dataMemPtr = getSrcMemoryAtPort(IN_DATA); - if (!dataMemPtr || !dataMemPtr->isDefined()) { - THROW_ERROR(" has undefined input data memory."); + if (!dataMemPtr) { + THROW_ERROR(" has null input data memory."); } for (int i = 0; i < 4; i++) { if (definedOutputs[i]) { auto dstMemPtr = getDstMemoryAtPort(i); - if (!dstMemPtr || !dstMemPtr->isDefined()) { - THROW_ERROR(" has undefined output memory at port ", i); + if (!dstMemPtr) { + THROW_ERROR(" has null output memory at port ", i); } } } From 0bf53b89b483e8a3777a8e94a2aa1dcb73ed83c6 Mon Sep 17 00:00:00 2001 From: Maksim Kutakov Date: Wed, 7 Aug 2024 18:58:43 +0200 Subject: [PATCH 06/36] Redefine Split createPrimitive method --- src/plugins/intel_cpu/src/nodes/split.cpp | 6 ++++++ src/plugins/intel_cpu/src/nodes/split.h | 5 ++++- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/src/plugins/intel_cpu/src/nodes/split.cpp b/src/plugins/intel_cpu/src/nodes/split.cpp index c2ea305d9cdc4a..157dbfd84a7f6c 100644 --- a/src/plugins/intel_cpu/src/nodes/split.cpp +++ b/src/plugins/intel_cpu/src/nodes/split.cpp @@ -232,6 +232,12 @@ bool Split::needPrepareParams() const { return needShapeInfer(); } +void Split::createPrimitive() { + if (outputShapesDefined()) { + Node::createPrimitive(); + } +} + void Split::prepareParams() { const auto &srcMemPtr = getSrcMemoryAtPort(0); if (!srcMemPtr || !srcMemPtr->isDefined()) { diff --git a/src/plugins/intel_cpu/src/nodes/split.h b/src/plugins/intel_cpu/src/nodes/split.h index 02af229541f9bf..0782594bcf9989 100644 --- a/src/plugins/intel_cpu/src/nodes/split.h +++ b/src/plugins/intel_cpu/src/nodes/split.h @@ -28,7 +28,10 @@ class Split : public Node { bool needPrepareParams() const override; bool needShapeInfer() const override; void prepareParams() override; - void executeDynamicImpl(dnnl::stream strm) override { execute(strm); } + void createPrimitive() override; + void executeDynamicImpl(dnnl::stream strm) override { + execute(strm); + } void resolveInPlaceEdges(Edge::LOOK look) override; private: From e20e7f206b39ac892d33435425de32a89ef058d5 Mon Sep 17 00:00:00 2001 From: Maksim Kutakov Date: Mon, 12 Aug 2024 18:22:56 +0200 Subject: [PATCH 07/36] Memory subsytem refactoring --- src/plugins/intel_cpu/src/graph.cpp | 270 +++++----------- src/plugins/intel_cpu/src/graph.h | 9 +- .../intel_cpu/src/memory_management.cpp | 295 ++++++++++++++++++ .../intel_cpu/src/memory_management.hpp | 47 +++ 4 files changed, 424 insertions(+), 197 deletions(-) create mode 100644 src/plugins/intel_cpu/src/memory_management.cpp create mode 100644 src/plugins/intel_cpu/src/memory_management.hpp diff --git a/src/plugins/intel_cpu/src/graph.cpp b/src/plugins/intel_cpu/src/graph.cpp index fd8f211d25f10a..61586cd5565d5d 100644 --- a/src/plugins/intel_cpu/src/graph.cpp +++ b/src/plugins/intel_cpu/src/graph.cpp @@ -22,6 +22,7 @@ #include "itt.h" #include "memory_desc/cpu_memory_desc_utils.h" #include "memory_desc/dnnl_blocked_memory_desc.h" +#include "memory_management.hpp" #include "node.h" #include "nodes/common/cpu_convert.h" #include "nodes/common/cpu_memcpy.h" @@ -43,7 +44,7 @@ #include #include "common/primitive_desc_iface.hpp" -#include "openvino/runtime/memory_solver.hpp" +#include "openvino/runtime/memory_solver.hpp" //TODO: remove #include "openvino/runtime/threading/cpu_streams_executor.hpp" #include "openvino/core/parallel.hpp" @@ -56,9 +57,6 @@ using namespace dnnl; namespace ov { namespace intel_cpu { -typedef std::unordered_set edge_cluster_t; -typedef std::vector edge_clusters_t; - Graph::~Graph() { CPU_DEBUG_CAP_ENABLE(summary_perf(*this)); } @@ -91,7 +89,6 @@ void Graph::CreateGraph(const std::vector& graphNodes, m_stream = dnnl::stream(getEngine()); this->_name = std::move(name); - this->reuse_io_tensors = false; this->graphNodes = graphNodes; this->graphEdges = graphEdges; @@ -117,7 +114,6 @@ template void Graph::CreateGraph(const std::shared_ptr&, const void Graph::Replicate(const std::shared_ptr &model) { OV_ITT_SCOPE_CHAIN(FIRST_INFERENCE, taskChain, itt::domains::intel_cpu_LT, "Graph::Replicate", "ov::Model"); this->_name = model->get_friendly_name(); - this->reuse_io_tensors = false; // Map data object onto producer node std::map, NodePtr> op2node; @@ -664,53 +660,8 @@ static inline bool isConstOutput(EdgePtr edge) { return edge->getParent()->isConstant() && !edge->getChild()->isConstant(); } -static edge_clusters_t findEdgeClusters(const std::vector & graphEdges) { - typedef std::unordered_map edge_cluster_idx_map_t; - - edge_clusters_t edge_clusters; - edge_cluster_idx_map_t edge_cluster_indices; - - for (auto &edge : graphEdges) { - auto edge_it = edge_cluster_indices.find(edge); - if (edge_it != edge_cluster_indices.end()) - continue; // edge is visited - - size_t cluster_idx = edge_clusters.size(); - EdgePtr last_shared_edge = nullptr; - - // find cluster index - for (auto shared_edge = edge->getSharedEdge(std::nothrow); - shared_edge; - shared_edge = shared_edge->getSharedEdge(std::nothrow)) { - auto shared_edge_it = edge_cluster_indices.find(shared_edge); - if (shared_edge_it != edge_cluster_indices.end()) { - cluster_idx = shared_edge_it->second; - last_shared_edge = shared_edge; - break; - } - } - - // add shared edges to cluster - edge_cluster_indices.emplace(edge, cluster_idx); - - if (cluster_idx == edge_clusters.size()) - edge_clusters.emplace_back(edge_cluster_t { edge }); - else - edge_clusters[cluster_idx].emplace(edge); - - for (auto shared_edge = edge->getSharedEdge(std::nothrow); - shared_edge != last_shared_edge; - shared_edge = shared_edge->getSharedEdge(std::nothrow)) { - edge_cluster_indices.emplace(shared_edge, cluster_idx); - edge_clusters[cluster_idx].emplace(shared_edge); - } - } - - return edge_clusters; -} - void Graph::AllocateWithReuse(const std::vector& syncNodesInds) { - edge_clusters_t edge_clusters = findEdgeClusters(graphEdges); + edgeClusters edge_clusters = MemoryControl::findEdgeClusters(graphEdges); size_t remaining_edge_clusters_count = edge_clusters.size(); @@ -782,77 +733,104 @@ void Graph::AllocateWithReuse(const std::vector& syncNodesInds) { } } - const int64_t alignment = 32; // 32 bytes + // Markup the memory regions + std::vector memoryRegions; + memoryRegions.reserve(remaining_edge_clusters_count); + + for (size_t i = 0; i < remaining_edge_clusters_count; ++i) { + MemoryRegion reg = {std::numeric_limits::max(), + 0, + 0, + static_cast(i), + MemoryRegion::RegionType::VARIABLE, + MemoryRegion::AllocType::UNKNOWN}; - // Markup the boxes - std::vector definedBoxes; - std::vector undefinedBoxes; - for (size_t i = 0; i < remaining_edge_clusters_count; i++) { - ov::MemorySolver::Box box = { std::numeric_limits::max(), 0, 0, static_cast(i) }; int64_t boxSize = 0; + bool isConst = false, isOutput = false, isInput = false; for (auto &edge : edge_clusters[i]) { - int e_start = edge->getParent()->execIndex; - int e_finish = edge->getChild()->execIndex; + int e_start = edge->getParent()->getExecIndex(); + int e_finish = edge->getChild()->getExecIndex(); - if (boxSize != -1 && edge->getDesc().isDefined()) { - int64_t e_size = edge->getDesc().getCurrentMemSize(); // size in bytes (from the beginning of data to the last element) + auto&& desc = edge->getDesc(); + + if (boxSize != -1 && desc.isDefined()) { + int64_t e_size = desc.getCurrentMemSize(); // size in bytes (from the beginning of data to the last element) boxSize = std::max(e_size, boxSize); } else { boxSize = -1; } - box.start = std::min(e_start, box.start); - box.finish = std::max(e_finish, box.finish); - } + reg.start = std::min(e_start, reg.start); + reg.finish = std::max(e_finish, reg.finish); + + auto allocType = + desc.getPrecision() == element::string ? MemoryRegion::AllocType::STRING : MemoryRegion::AllocType::POD; + + if (reg.alloc_type != allocType && MemoryRegion::AllocType::UNKNOWN != reg.alloc_type) { + OPENVINO_THROW("Different allocation types in the same memory region"); + } + reg.alloc_type = allocType; - // Constant data are filled once on load. - // So we need it untouchable during all execution time - // -1 is a place holder for a max timestamp. - bool isConst = false, isOutput = false, isInput = false; - for (auto &edge : edge_clusters[i]) { isConst |= isConstOutput(edge); - isOutput |= edge->getChild()->getType() == Type::Output; + isOutput |= edge->getParent()->getType() == Type::Output; isInput |= edge->getParent()->getType() == Type::Input; } - if (reuse_io_tensors) { - if (isInput | isConst) box.start = 0; - if (isOutput | isConst) box.finish = -1; - } else { - if (isInput | isOutput | isConst) { - box.start = 0; - box.finish = -1; + if (isConst) { + reg.type = MemoryRegion::RegionType::CONST; + } else if (isInput) { + if (isOutput) { + reg.type = MemoryRegion::RegionType::IO; + } else { + reg.type = MemoryRegion::RegionType::INPUT; } + } else if (isOutput) { + reg.type = MemoryRegion::RegionType::OUTPUT; } - if (boxSize != -1) { - box.size = div_up(boxSize, alignment); - definedBoxes.push_back(box); - } else { - box.size = boxSize; - undefinedBoxes.push_back(box); - } + memoryRegions.push_back(reg); } - // Process defined boxes (static shapes) - ov::MemorySolver staticMemSolver(definedBoxes); - size_t total_size = static_cast(staticMemSolver.solve()) * alignment; - - memWorkspace = std::make_shared(getEngine(), DnnlBlockedMemoryDesc(ov::element::i8, Shape(VectorDims{total_size}))); + // special processing of the dynamic output edges + auto it = std::remove_if(memoryRegions.begin(), memoryRegions.end(), [&](const MemoryRegion& region) { + if (region.size >= 0 || !one_of(region.type, MemoryRegion::RegionType::OUTPUT, MemoryRegion::RegionType::IO)) { + return false; + } + for (auto& edge : edge_clusters[region.id]) { + const auto child = edge->getChild(); + if (child->getType() == Type::Output && edge->getStatus() == Edge::Status::NeedAllocation) { + auto proxyMemBlock = std::make_shared(); + DEBUG_LOG("ProxyMemoryBlock ", proxyMemBlock, " ", this); + edge->allocate(proxyMemBlock); + + // Store the output memory blocks. + // So that, the infer requests can be able to access them. + int count = 0; + for (auto& output : outputNodesMap) { + if (output.second == child) { + outputNodesMemBlocksMap[output.first] = proxyMemBlock; + count++; + } + } + // sometimes there are unused output ports. + OPENVINO_ASSERT(count <= 1, "CPU plugin cannot find output node. count ", count); + } + } + return true; + }); - if (edge_clusters.empty()) - return; + memoryRegions.erase(it, memoryRegions.end()); - auto* workspace_ptr = static_cast(memWorkspace->getData()); + //Set up the memory control subsystem. + this->m_pMemoryControl = make_unique(syncNodesInds); + auto memoryBlocks = m_pMemoryControl->insert(memoryRegions); - for (const auto& box : definedBoxes) { + // attach all the not yet allocated edges to the memory contol + for (auto&& item : memoryBlocks) { int count = 0; - for (auto& edge : edge_clusters[box.id]) { + for (auto&& edge : edge_clusters[item.first]) { if (edge->getStatus() == Edge::Status::NeedAllocation) { - int64_t offset = staticMemSolver.get_offset(box.id); - // !! Fallback to individual memory allocation !! - // if you like to check infer without reuse just call this function without arguments. - edge->allocate(workspace_ptr + offset * alignment); // alignment in byte + edge->allocate(item.second); // TODO: WA for some test (like strided_slice_test) which use tensors with // shapes {0}. And it is implicitly converted into {1} tensor. @@ -866,93 +844,6 @@ void Graph::AllocateWithReuse(const std::vector& syncNodesInds) { OPENVINO_ASSERT(count == 1); } - //Process undefined boxes (dynamic shapes) - if (!undefinedBoxes.empty()) { - // Use proxy memory block for output edges - for (const auto& box : undefinedBoxes) { - for (auto& edge : edge_clusters[box.id]) { - const auto child = edge->getChild(); - if (child->getType() == Type::Output && - edge->getStatus() == Edge::Status::NeedAllocation) { - auto proxyMemBlock = - std::make_shared(); - DEBUG_LOG("ProxyMemoryBlock ", proxyMemBlock, " ", this); - edge->allocate(proxyMemBlock); - - // Store the output memory blocks. - // So that, the infer requests can be able to access them. - int count = 0; - for (auto &output : outputNodesMap) { - if (output.second == child) { - outputNodesMemBlocksMap[output.first] = proxyMemBlock; - count++; - } - } - // sometimes there are unused output ports. - OPENVINO_ASSERT(count <= 1, "CPU plugin cannot find output node. count ", count); - } - } - } - - if (!syncNodesInds.empty()) { - //We have to extend the lifespan of tensors that are crossing a sync point border in order to save - //the intermediate computation results from possible loss due to the tensor resize - for (auto& box : undefinedBoxes) { - if (-1 == box.finish) { - continue; - } - auto itr_upper = std::upper_bound(syncNodesInds.begin(), syncNodesInds.end(), box.finish, [](int y, int x) { return y <= x;}); - auto itr_lower = std::lower_bound(syncNodesInds.begin(), syncNodesInds.end(), box.start); - if (itr_lower != itr_upper) { // across sections - if (itr_upper == syncNodesInds.end()) { - box.finish = -1; - } else { - box.finish = *itr_upper; - } - } - } - } - - ov::MemorySolver::normalize_boxes(undefinedBoxes); - - std::vector> groups; //groups of nonoverlapping boxes - constexpr bool enableMemReuse = true; // set false to disable mem reuse for debug purposes - if (enableMemReuse) { - groups.push_back({undefinedBoxes.front()}); - for (size_t i = 1; i < undefinedBoxes.size(); ++i) { - const auto& box = undefinedBoxes[i]; - bool groupFound = false; - for (auto& group : groups) { - const auto& lastBox = group.back(); - if (lastBox.start > box.finish || lastBox.finish < box.start) { - group.push_back(box); - groupFound = true; - break; - } - } - - if (!groupFound) { - groups.push_back({box}); - } - } - } else { - for (auto& box : undefinedBoxes) { - groups.push_back({box}); - } - } - for (auto& group : groups) { - auto grpMemBlock = - std::make_shared(make_unique()); - for (auto& box : group) { - for (auto& edge : edge_clusters[box.id]) { - if (edge->getStatus() == Edge::Status::NeedAllocation) { - edge->allocate(grpMemBlock); - } - } - } - } - } - // Resolve all other edges with status NotAllocated and in-place for (auto& cluster : edge_clusters) { for (auto& edge : cluster) { @@ -1020,13 +911,6 @@ bool Graph::ProcessDynNodes() { const bool containsDynamicNodes = std::any_of(graphNodes.begin(), graphNodes.end(), [](const NodePtr& node) { return node->isDynamicNode(); }); - // In case of dynamic shapes, tensors may be resized due to the shapes variations. - // If the input tensor is included to memory reuse, it means that its memory block is shared with other tensors in the graph, which in turn may cause data - // loss when one of the tensors down the graph requests mem resize, while the input data have not been yet read by the consumers. To avoid such situations - // we disable io mem reuse for the case of dynamic shapes. - if (containsDynamicNodes) { - this->reuse_io_tensors = false; - } return containsDynamicNodes; } diff --git a/src/plugins/intel_cpu/src/graph.h b/src/plugins/intel_cpu/src/graph.h index ffcc970af6a5b6..728dff91b563eb 100644 --- a/src/plugins/intel_cpu/src/graph.h +++ b/src/plugins/intel_cpu/src/graph.h @@ -10,6 +10,7 @@ #include "node.h" #include "edge.h" #include "graph_context.h" +#include "memory_management.hpp" #include "openvino/runtime/profiling_info.hpp" #include @@ -40,6 +41,8 @@ class Graph { }; Graph() = default; + Graph(Graph&&) = default; + Graph& operator=(Graph&&) = default; ~Graph(); @@ -204,10 +207,6 @@ class Graph { // values mean increment it within each Infer() call int infer_count = -1; - bool reuse_io_tensors = true; - - MemoryPtr memWorkspace; - std::vector graphNodes; std::vector graphEdges; @@ -255,6 +254,8 @@ class Graph { GraphContext::CPtr context; dnnl::stream m_stream; + std::unique_ptr m_pMemoryControl; + void EnforceInferencePrecision(); void EnforceBF16(); void insertReorder(EdgePtr& edge, bool isOptimized, std::unordered_set& uniqueLayerNames); diff --git a/src/plugins/intel_cpu/src/memory_management.cpp b/src/plugins/intel_cpu/src/memory_management.cpp new file mode 100644 index 00000000000000..952eb27e0ca196 --- /dev/null +++ b/src/plugins/intel_cpu/src/memory_management.cpp @@ -0,0 +1,295 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "memory_management.hpp" + +#include + +#include "node.h" +#include "openvino/runtime/memory_solver.hpp" +#include "partitioned_mem_mgr.h" + +namespace ov { +namespace intel_cpu { + +namespace { + +class IMemoryManager { +public: + virtual ~IMemoryManager() = default; + virtual void insert(const MemoryRegion& reg) = 0; + virtual const MemoryControl::MemoryBlockMap& lastSolution() = 0; +}; + +using MemoryManagerPtr = std::shared_ptr; + +template +std::shared_ptr makeDnnlMemoryBlock(Args&&... args) { + return std::make_shared(make_unique(std::forward(args)...)); +} + +class MemoryManagerIndividualBlocks : public IMemoryManager { +public: + void insert(const MemoryRegion& reg) override { + m_blocks.insert({reg.id, makeDnnlMemoryBlock()}); + } + + const MemoryControl::MemoryBlockMap& lastSolution() override { + return m_blocks; + } + +private: + MemoryControl::MemoryBlockMap m_blocks; +}; + +class MemoryManagerStaticSolver : public IMemoryManager { +public: + void insert(const MemoryRegion& reg) override { + m_boxes.emplace_back(MemorySolver::Box{reg.start, reg.finish, reg.size, reg.id}); + } + + const MemoryControl::MemoryBlockMap& lastSolution() override { + if (!m_blocks.empty()) { + solve(); + } + return m_blocks; + } + +private: + void solve() { + constexpr size_t alignment = 32; + std::for_each(m_boxes.begin(), m_boxes.end(), [=](MemorySolver::Box& box) { + box.size = div_up(box.size, alignment); + }); + + ov::MemorySolver staticMemSolver(m_boxes); + size_t total_size = static_cast(staticMemSolver.solve()) * alignment; + + m_workspace = makeDnnlMemoryBlock(); + m_workspace->resize(total_size); + + for (const auto& box : m_boxes) { + int64_t offset = staticMemSolver.get_offset(box.id); + auto memoryBlock = std::make_shared(m_workspace, total_size, offset, box.size * alignment); + m_blocks[box.id] = std::move(memoryBlock); + } + // m_boxes.clear(); + } + +private: + MemoryControl::MemoryBlockMap m_blocks; + std::vector m_boxes; + MemoryBlockPtr m_workspace; +}; + +class MemoryManageNonOverlapingSets : public IMemoryManager { +public: + MemoryManageNonOverlapingSets(std::vector syncInds) : m_syncInds(std::move(syncInds)) {} + void insert(const MemoryRegion& reg) override { + MemorySolver::Box box = {reg.start, reg.finish, reg.size, reg.id}; + if (-1 != reg.finish) { + //We have to extend the lifespan of tensors that are crossing a sync point border in order to save + //the intermediate computation results from possible loss due to the tensor resize + auto itr_upper = + std::upper_bound(m_syncInds.begin(), m_syncInds.end(), box.finish, [](int y, int x) { + return y <= x; + }); + auto itr_lower = std::lower_bound(m_syncInds.begin(), m_syncInds.end(), box.start); + if (itr_lower != itr_upper) { // across sections + if (itr_upper == m_syncInds.end()) { + box.finish = -1; + } else { + box.finish = *itr_upper; + } + } + } + m_boxes.emplace_back(std::move(box)); + } + + const MemoryControl::MemoryBlockMap& lastSolution() override { + if (!m_blocks.empty()) { + solve(); + } + return m_blocks; + } + +private: + void solve() { + ov::MemorySolver::normalize_boxes(m_boxes); + + std::vector> groups; //groups of nonoverlapping boxes + groups.push_back({m_boxes.front()}); + for (size_t i = 1; i < m_boxes.size(); ++i) { + const auto& box = m_boxes[i]; + bool groupFound = false; + for (auto& group : groups) { + const auto& lastBox = group.back(); + if (lastBox.start > box.finish || lastBox.finish < box.start) { + group.push_back(box); + groupFound = true; + break; + } + } + + if (!groupFound) { + groups.push_back({box}); + } + } + for (auto& group : groups) { + auto grpMemBlock = makeDnnlMemoryBlock(); + for (auto& box : group) { + m_blocks[box.id] = grpMemBlock; + } + } + // m_boxes.clear(); + } + +private: + MemoryControl::MemoryBlockMap m_blocks; + std::vector m_boxes; + std::vector m_syncInds; +}; + +} // namespace + +class MemoryControl::RegionHandler { +public: + using Condition = std::function; + +public: + RegionHandler(Condition cond, MemoryManagerPtr memManager) + : m_cond(std::move(cond)), + m_memManager(std::move(memManager)) {} + + bool insert(const MemoryRegion& reg) { + if (!m_cond(reg)) { + return false; + } + + m_memManager->insert(reg); + return true; + } + + const MemoryControl::MemoryBlockMap& lastSolution() const { + return m_memManager->lastSolution(); + } + +private: + Condition m_cond; + MemoryManagerPtr m_memManager; +}; + +namespace { + +template +MemoryControl::RegionHandlerPtr buildHandler(F&& f, Args&&... args) { + return std::make_shared(std::forward(f), + std::make_shared(std::forward(args)...)); +} + +} // namespace + +MemoryControl::MemoryControl(std::vector syncInds) { + // init handlers + + // handler for dynamic tensors + m_handlers.emplace_back(buildHandler([](const MemoryRegion& reg) { + if (reg.size < 0 || MemoryRegion::RegionType::VARIABLE != reg.type || + MemoryRegion::AllocType::POD != reg.alloc_type) { + return false; + } + return true; + })); + + // handler for static tensors + m_handlers.emplace_back(buildHandler([](const MemoryRegion& reg) { + if (reg.size >= 0 || MemoryRegion::RegionType::VARIABLE != reg.type || + MemoryRegion::AllocType::POD != reg.alloc_type) { + return false; + } + return true; + }, std::move(syncInds))); + + //handler for I/O tensors, so far simply individual blocks + m_handlers.emplace_back(buildHandler([](const MemoryRegion& reg) { + if (MemoryRegion::RegionType::VARIABLE == reg.type || reg.alloc_type != MemoryRegion::AllocType::POD) { + return false; + } + return true; + })); +} + +void MemoryControl::insert(const MemoryRegion& region) { + for (auto&& handler : m_handlers) { + if (handler->insert(region)) { + return; + } + } + OPENVINO_THROW("No suitable hanlder was found for the given memory region"); +} + +MemoryControl::MemoryBlockMap MemoryControl::insert(const std::vector& regions) { + for (auto&& region : regions) { + insert(region); + } + + MemoryControl::MemoryBlockMap blocksMap; + blocksMap.reserve(regions.size()); + + for (auto&& handler : m_handlers) { + auto&& solution = handler->lastSolution(); + for (auto&& item : solution) { + auto res = blocksMap.insert(item); + OPENVINO_ASSERT(res.second, "Memory solutions has non unique entries"); + } + } + + return blocksMap; +} + +edgeClusters MemoryControl::findEdgeClusters(const std::vector& graphEdges) { + typedef std::unordered_map edge_cluster_idx_map_t; + + edgeClusters edge_clusters; + edge_cluster_idx_map_t edge_cluster_indices; + + for (auto& edge : graphEdges) { + auto edge_it = edge_cluster_indices.find(edge); + if (edge_it != edge_cluster_indices.end()) + continue; // edge is visited + + size_t cluster_idx = edge_clusters.size(); + EdgePtr last_shared_edge = nullptr; + + // find cluster index + for (auto shared_edge = edge->getSharedEdge(std::nothrow); shared_edge; + shared_edge = shared_edge->getSharedEdge(std::nothrow)) { + auto shared_edge_it = edge_cluster_indices.find(shared_edge); + if (shared_edge_it != edge_cluster_indices.end()) { + cluster_idx = shared_edge_it->second; + last_shared_edge = shared_edge; + break; + } + } + + // add shared edges to cluster + edge_cluster_indices.emplace(edge, cluster_idx); + + if (cluster_idx == edge_clusters.size()) + edge_clusters.emplace_back(edgeCluster{edge}); + else + edge_clusters[cluster_idx].emplace(edge); + + for (auto shared_edge = edge->getSharedEdge(std::nothrow); shared_edge != last_shared_edge; + shared_edge = shared_edge->getSharedEdge(std::nothrow)) { + edge_cluster_indices.emplace(shared_edge, cluster_idx); + edge_clusters[cluster_idx].emplace(shared_edge); + } + } + + return edge_clusters; +} + +} // namespace intel_cpu +} // namespace ov \ No newline at end of file diff --git a/src/plugins/intel_cpu/src/memory_management.hpp b/src/plugins/intel_cpu/src/memory_management.hpp new file mode 100644 index 00000000000000..8b0da95b1a4756 --- /dev/null +++ b/src/plugins/intel_cpu/src/memory_management.hpp @@ -0,0 +1,47 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "edge.h" + +namespace ov { +namespace intel_cpu { + +using edgeCluster = std::unordered_set; +using edgeClusters = std::vector; + +struct MemoryRegion { + int start; // Execution order index of first use. + int finish; // Execution order index of last use. -1 means inf + int64_t size; // size in bytes + int64_t id; // ID unique for each region + + enum class RegionType : uint8_t { VARIABLE, CONST, INPUT, OUTPUT, IO } type; + enum class AllocType : uint8_t { POD, STRING, UNKNOWN } alloc_type; +}; + +class MemoryControl { +public: + class RegionHandler; + + using RegionHandlerPtr = std::shared_ptr; + using MemoryBlockMap = std::unordered_map; + +public: + explicit MemoryControl(std::vector syncInds); + + static edgeClusters findEdgeClusters(const std::vector& graphEdges); + + MemoryBlockMap insert(const std::vector& regions); + +private: + void insert(const MemoryRegion& region); + +private: + std::vector m_syncInds; + std::vector m_handlers; +}; +} // namespace intel_cpu +} // namespace ov \ No newline at end of file From bbbcdf44430e2b6b158eb8f0c43505d5aa3c4678 Mon Sep 17 00:00:00 2001 From: Maksim Kutakov Date: Tue, 13 Aug 2024 11:44:37 +0200 Subject: [PATCH 08/36] Avoid using key word --- src/plugins/intel_cpu/src/graph.cpp | 2 +- src/plugins/intel_cpu/src/memory_management.hpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/plugins/intel_cpu/src/graph.cpp b/src/plugins/intel_cpu/src/graph.cpp index 7134536351768c..a49cb2d3bbaeba 100644 --- a/src/plugins/intel_cpu/src/graph.cpp +++ b/src/plugins/intel_cpu/src/graph.cpp @@ -777,7 +777,7 @@ void Graph::AllocateWithReuse(const std::vector& syncNodesInds) { } if (isConst) { - reg.type = MemoryRegion::RegionType::CONST; + reg.type = MemoryRegion::RegionType::CONSTANT; } else if (isInput) { if (isOutput) { reg.type = MemoryRegion::RegionType::IO; diff --git a/src/plugins/intel_cpu/src/memory_management.hpp b/src/plugins/intel_cpu/src/memory_management.hpp index 8b0da95b1a4756..70136c30e42011 100644 --- a/src/plugins/intel_cpu/src/memory_management.hpp +++ b/src/plugins/intel_cpu/src/memory_management.hpp @@ -18,7 +18,7 @@ struct MemoryRegion { int64_t size; // size in bytes int64_t id; // ID unique for each region - enum class RegionType : uint8_t { VARIABLE, CONST, INPUT, OUTPUT, IO } type; + enum class RegionType : uint8_t { VARIABLE, CONSTANT, INPUT, OUTPUT, IO } type; enum class AllocType : uint8_t { POD, STRING, UNKNOWN } alloc_type; }; From 692c02faba2a461045eeeda13083226afb17ce86 Mon Sep 17 00:00:00 2001 From: Maksim Kutakov Date: Tue, 13 Aug 2024 14:58:08 +0200 Subject: [PATCH 09/36] Bug fixes --- src/plugins/intel_cpu/src/graph.cpp | 4 +- .../intel_cpu/src/memory_management.cpp | 39 +++++++++++++++++-- 2 files changed, 38 insertions(+), 5 deletions(-) diff --git a/src/plugins/intel_cpu/src/graph.cpp b/src/plugins/intel_cpu/src/graph.cpp index a49cb2d3bbaeba..6304634dcfd395 100644 --- a/src/plugins/intel_cpu/src/graph.cpp +++ b/src/plugins/intel_cpu/src/graph.cpp @@ -772,10 +772,12 @@ void Graph::AllocateWithReuse(const std::vector& syncNodesInds) { reg.alloc_type = allocType; isConst |= isConstOutput(edge); - isOutput |= edge->getParent()->getType() == Type::Output; + isOutput |= edge->getChild()->getType() == Type::Output; isInput |= edge->getParent()->getType() == Type::Input; } + reg.size = boxSize; + if (isConst) { reg.type = MemoryRegion::RegionType::CONSTANT; } else if (isInput) { diff --git a/src/plugins/intel_cpu/src/memory_management.cpp b/src/plugins/intel_cpu/src/memory_management.cpp index 952eb27e0ca196..7d63ce15d801b8 100644 --- a/src/plugins/intel_cpu/src/memory_management.cpp +++ b/src/plugins/intel_cpu/src/memory_management.cpp @@ -8,13 +8,44 @@ #include "node.h" #include "openvino/runtime/memory_solver.hpp" -#include "partitioned_mem_mgr.h" namespace ov { namespace intel_cpu { namespace { +class StaticPartitionMemoryBlock : public IMemoryBlockObserver { +public: + StaticPartitionMemoryBlock(MemoryBlockPtr pBlock, ptrdiff_t offset) + : m_pBlock(pBlock), m_offset(offset) { + OPENVINO_ASSERT(m_pBlock, "Memory block is uninitialized"); + } + + void* getRawPtr() const noexcept override { + return static_cast(m_pBlock->getRawPtr()) + m_offset; + } + void setExtBuff(void* ptr, size_t size) override { + OPENVINO_THROW("Unexpected setExtBuff call to StaticPartitionMemoryBlock"); + } + bool resize(size_t size) override { + // don't pass over as it's static memory + return false; + } + bool hasExtBuffer() const noexcept override { + return m_pBlock->hasExtBuffer(); + } + void registerMemory(Memory* memPtr) override { + m_pBlock->registerMemory(memPtr); + } + void unregisterMemory(Memory* memPtr) override { + m_pBlock->unregisterMemory(memPtr); + } + +private: + MemoryBlockPtr m_pBlock; + ptrdiff_t m_offset = 0; +}; + class IMemoryManager { public: virtual ~IMemoryManager() = default; @@ -50,7 +81,7 @@ class MemoryManagerStaticSolver : public IMemoryManager { } const MemoryControl::MemoryBlockMap& lastSolution() override { - if (!m_blocks.empty()) { + if (!m_boxes.empty() && m_blocks.empty()) { solve(); } return m_blocks; @@ -71,7 +102,7 @@ class MemoryManagerStaticSolver : public IMemoryManager { for (const auto& box : m_boxes) { int64_t offset = staticMemSolver.get_offset(box.id); - auto memoryBlock = std::make_shared(m_workspace, total_size, offset, box.size * alignment); + auto memoryBlock = std::make_shared(m_workspace, offset); m_blocks[box.id] = std::move(memoryBlock); } // m_boxes.clear(); @@ -108,7 +139,7 @@ class MemoryManageNonOverlapingSets : public IMemoryManager { } const MemoryControl::MemoryBlockMap& lastSolution() override { - if (!m_blocks.empty()) { + if (!m_boxes.empty() && m_blocks.empty()) { solve(); } return m_blocks; From ece559e72343558cdf34fbb8c06dfab1adb7623f Mon Sep 17 00:00:00 2001 From: Maksim Kutakov Date: Wed, 14 Aug 2024 11:17:25 +0200 Subject: [PATCH 10/36] Fix linear offset calculation in static block --- src/plugins/intel_cpu/src/memory_management.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/plugins/intel_cpu/src/memory_management.cpp b/src/plugins/intel_cpu/src/memory_management.cpp index 7d63ce15d801b8..e87f94857f8171 100644 --- a/src/plugins/intel_cpu/src/memory_management.cpp +++ b/src/plugins/intel_cpu/src/memory_management.cpp @@ -102,7 +102,7 @@ class MemoryManagerStaticSolver : public IMemoryManager { for (const auto& box : m_boxes) { int64_t offset = staticMemSolver.get_offset(box.id); - auto memoryBlock = std::make_shared(m_workspace, offset); + auto memoryBlock = std::make_shared(m_workspace, offset * alignment); m_blocks[box.id] = std::move(memoryBlock); } // m_boxes.clear(); From b9da47a8fdc3bec33aff1f8c03a30648f8c0bb62 Mon Sep 17 00:00:00 2001 From: Maksim Kutakov Date: Wed, 14 Aug 2024 12:02:57 +0200 Subject: [PATCH 11/36] Fix output edges processing --- src/plugins/intel_cpu/src/graph.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/plugins/intel_cpu/src/graph.cpp b/src/plugins/intel_cpu/src/graph.cpp index 6304634dcfd395..23d61d68b33550 100644 --- a/src/plugins/intel_cpu/src/graph.cpp +++ b/src/plugins/intel_cpu/src/graph.cpp @@ -798,8 +798,9 @@ void Graph::AllocateWithReuse(const std::vector& syncNodesInds) { if (region.size >= 0 || !one_of(region.type, MemoryRegion::RegionType::OUTPUT, MemoryRegion::RegionType::IO)) { return false; } + bool result = false; for (auto& edge : edge_clusters[region.id]) { - const auto child = edge->getChild(); + auto child = edge->getChild(); if (child->getType() == Type::Output && edge->getStatus() == Edge::Status::NeedAllocation) { auto proxyMemBlock = std::make_shared(); DEBUG_LOG("ProxyMemoryBlock ", proxyMemBlock, " ", this); @@ -816,9 +817,10 @@ void Graph::AllocateWithReuse(const std::vector& syncNodesInds) { } // sometimes there are unused output ports. OPENVINO_ASSERT(count <= 1, "CPU plugin cannot find output node. count ", count); + result = true; } } - return true; + return result; }); memoryRegions.erase(it, memoryRegions.end()); From 8304f773b8ec3d7cb30e8a723a902f0652fe5ba8 Mon Sep 17 00:00:00 2001 From: Maksim Kutakov Date: Wed, 14 Aug 2024 16:27:01 +0200 Subject: [PATCH 12/36] Add allocate and free actions --- src/plugins/intel_cpu/src/cpu_memory.cpp | 6 ++ src/plugins/intel_cpu/src/cpu_memory.h | 1 + src/plugins/intel_cpu/src/graph.cpp | 2 + .../intel_cpu/src/memory_management.cpp | 99 +++++++++++++++++-- .../intel_cpu/src/memory_management.hpp | 3 + 5 files changed, 103 insertions(+), 8 deletions(-) diff --git a/src/plugins/intel_cpu/src/cpu_memory.cpp b/src/plugins/intel_cpu/src/cpu_memory.cpp index 2ae8da547c32f5..f159a118004f93 100644 --- a/src/plugins/intel_cpu/src/cpu_memory.cpp +++ b/src/plugins/intel_cpu/src/cpu_memory.cpp @@ -229,6 +229,12 @@ bool MemoryBlockWithReuse::hasExtBuffer() const noexcept { return m_useExternalStorage; } +void MemoryBlockWithReuse::free() { + m_data = decltype(m_data)(nullptr, release ); + m_memUpperBound = 0ul; + m_useExternalStorage = false; +} + void MemoryBlockWithReuse::release(void *ptr) {} void MemoryBlockWithReuse::destroy(void *ptr) { diff --git a/src/plugins/intel_cpu/src/cpu_memory.h b/src/plugins/intel_cpu/src/cpu_memory.h index 4dfb3b700728fd..b50f3291820fd1 100644 --- a/src/plugins/intel_cpu/src/cpu_memory.h +++ b/src/plugins/intel_cpu/src/cpu_memory.h @@ -77,6 +77,7 @@ class MemoryBlockWithReuse : public IMemoryBlock { void setExtBuff(void* ptr, size_t size) override; bool resize(size_t size) override; bool hasExtBuffer() const noexcept override; + void free(); private: bool m_useExternalStorage = false; diff --git a/src/plugins/intel_cpu/src/graph.cpp b/src/plugins/intel_cpu/src/graph.cpp index 23d61d68b33550..9d0aca7f591f5c 100644 --- a/src/plugins/intel_cpu/src/graph.cpp +++ b/src/plugins/intel_cpu/src/graph.cpp @@ -877,6 +877,8 @@ void Graph::AllocateWithReuse(const std::vector& syncNodesInds) { }); } } + + m_pMemoryControl->allocateMemory(); } void Graph::Allocate(const std::vector& syncNodesInds) { diff --git a/src/plugins/intel_cpu/src/memory_management.cpp b/src/plugins/intel_cpu/src/memory_management.cpp index e87f94857f8171..219c9775900931 100644 --- a/src/plugins/intel_cpu/src/memory_management.cpp +++ b/src/plugins/intel_cpu/src/memory_management.cpp @@ -46,11 +46,48 @@ class StaticPartitionMemoryBlock : public IMemoryBlockObserver { ptrdiff_t m_offset = 0; }; +class MemoryBlockWithRelease : public IMemoryBlockObserver { +public: + MemoryBlockWithRelease() { + auto pInternalMem = make_unique(); + m_pInternalMem = pInternalMem.get(); + m_pBlock = std::make_shared(std::move(pInternalMem)); + } + + void* getRawPtr() const noexcept override { + return m_pBlock->getRawPtr(); + } + void setExtBuff(void* ptr, size_t size) override { + m_pBlock->setExtBuff(ptr, size); + } + bool resize(size_t size) override { + return m_pBlock->resize(size); + } + bool hasExtBuffer() const noexcept override { + return m_pBlock->hasExtBuffer(); + } + void registerMemory(Memory* memPtr) override { + m_pBlock->registerMemory(memPtr); + } + void unregisterMemory(Memory* memPtr) override { + m_pBlock->unregisterMemory(memPtr); + } + void free() { + m_pInternalMem->free(); + } + +private: + MemoryBlockPtr m_pBlock; + MemoryBlockWithReuse* m_pInternalMem; +}; + class IMemoryManager { public: virtual ~IMemoryManager() = default; virtual void insert(const MemoryRegion& reg) = 0; virtual const MemoryControl::MemoryBlockMap& lastSolution() = 0; + virtual void allocate() = 0; + virtual void release() = 0; }; using MemoryManagerPtr = std::shared_ptr; @@ -60,7 +97,7 @@ std::shared_ptr makeDnnlMemoryBlock(Args&&... args) { return std::make_shared(make_unique(std::forward(args)...)); } -class MemoryManagerIndividualBlocks : public IMemoryManager { +class MemoryManagerIO : public IMemoryManager { public: void insert(const MemoryRegion& reg) override { m_blocks.insert({reg.id, makeDnnlMemoryBlock()}); @@ -70,6 +107,13 @@ class MemoryManagerIndividualBlocks : public IMemoryManager { return m_blocks; } + void allocate() override { + // nothing to do + } + void release() override { + // nothing to do + } + private: MemoryControl::MemoryBlockMap m_blocks; }; @@ -95,10 +139,9 @@ class MemoryManagerStaticSolver : public IMemoryManager { }); ov::MemorySolver staticMemSolver(m_boxes); - size_t total_size = static_cast(staticMemSolver.solve()) * alignment; + m_totalSize = static_cast(staticMemSolver.solve()) * alignment; - m_workspace = makeDnnlMemoryBlock(); - m_workspace->resize(total_size); + m_workspace = std::make_shared(); for (const auto& box : m_boxes) { int64_t offset = staticMemSolver.get_offset(box.id); @@ -108,10 +151,18 @@ class MemoryManagerStaticSolver : public IMemoryManager { // m_boxes.clear(); } + void allocate() override { + if(m_workspace) m_workspace->resize(m_totalSize); + } + void release() override { + if(m_workspace) m_workspace->free(); + } + private: MemoryControl::MemoryBlockMap m_blocks; std::vector m_boxes; - MemoryBlockPtr m_workspace; + std::shared_ptr m_workspace; + size_t m_totalSize = 0; }; class MemoryManageNonOverlapingSets : public IMemoryManager { @@ -141,6 +192,7 @@ class MemoryManageNonOverlapingSets : public IMemoryManager { const MemoryControl::MemoryBlockMap& lastSolution() override { if (!m_boxes.empty() && m_blocks.empty()) { solve(); + m_blocks = MemoryControl::MemoryBlockMap{m_internalBlocks.begin(), m_internalBlocks.end()}; } return m_blocks; } @@ -168,16 +220,27 @@ class MemoryManageNonOverlapingSets : public IMemoryManager { } } for (auto& group : groups) { - auto grpMemBlock = makeDnnlMemoryBlock(); + auto grpMemBlock = std::make_shared(); for (auto& box : group) { - m_blocks[box.id] = grpMemBlock; + m_internalBlocks[box.id] = grpMemBlock; } } // m_boxes.clear(); } + void allocate() override { + //nothing to do + } + void release() override { + for (auto&& item : m_internalBlocks) { + item.second->free(); + } + } + private: MemoryControl::MemoryBlockMap m_blocks; + std::unordered_map> + m_internalBlocks; std::vector m_boxes; std::vector m_syncInds; }; @@ -206,6 +269,14 @@ class MemoryControl::RegionHandler { return m_memManager->lastSolution(); } + void allocate() { + m_memManager->allocate(); + } + + void release() { + m_memManager->release(); + } + private: Condition m_cond; MemoryManagerPtr m_memManager; @@ -243,7 +314,7 @@ MemoryControl::MemoryControl(std::vector syncInds) { }, std::move(syncInds))); //handler for I/O tensors, so far simply individual blocks - m_handlers.emplace_back(buildHandler([](const MemoryRegion& reg) { + m_handlers.emplace_back(buildHandler([](const MemoryRegion& reg) { if (MemoryRegion::RegionType::VARIABLE == reg.type || reg.alloc_type != MemoryRegion::AllocType::POD) { return false; } @@ -279,6 +350,18 @@ MemoryControl::MemoryBlockMap MemoryControl::insert(const std::vectorallocate(); + } +} + +void MemoryControl::releaseMemory() { + for(auto&& handler : m_handlers) { + handler->release(); + } +} + edgeClusters MemoryControl::findEdgeClusters(const std::vector& graphEdges) { typedef std::unordered_map edge_cluster_idx_map_t; diff --git a/src/plugins/intel_cpu/src/memory_management.hpp b/src/plugins/intel_cpu/src/memory_management.hpp index 70136c30e42011..563f900db3458a 100644 --- a/src/plugins/intel_cpu/src/memory_management.hpp +++ b/src/plugins/intel_cpu/src/memory_management.hpp @@ -36,6 +36,9 @@ class MemoryControl { MemoryBlockMap insert(const std::vector& regions); + void allocateMemory(); + void releaseMemory(); + private: void insert(const MemoryRegion& region); From e58f01a348c2f8024d4bb9cfb1e29a0bb408fb58 Mon Sep 17 00:00:00 2001 From: Maksim Kutakov Date: Wed, 14 Aug 2024 18:16:59 +0200 Subject: [PATCH 13/36] Add flushing intermediate tensors --- src/plugins/intel_cpu/src/config.h | 1 + src/plugins/intel_cpu/src/graph.cpp | 30 +++++++++++++++++-- src/plugins/intel_cpu/src/graph.h | 15 ++++++++-- src/plugins/intel_cpu/src/node.cpp | 15 ++++++++++ .../graph/merge_transpose_reorder_test.cpp | 4 ++- .../graph/resolve_edge_conflicts_test.cpp | 2 ++ 6 files changed, 61 insertions(+), 6 deletions(-) diff --git a/src/plugins/intel_cpu/src/config.h b/src/plugins/intel_cpu/src/config.h index 4ca6332c25c3cc..797521e03a7b1e 100644 --- a/src/plugins/intel_cpu/src/config.h +++ b/src/plugins/intel_cpu/src/config.h @@ -46,6 +46,7 @@ struct Config { bool collectPerfCounters = false; bool exclusiveAsyncRequests = false; + bool flushIntermediateTensors = true; //TODO: change to false by default SnippetsMode snippetsMode = SnippetsMode::Enable; std::string dumpToDot = {}; std::string device_id = {}; diff --git a/src/plugins/intel_cpu/src/graph.cpp b/src/plugins/intel_cpu/src/graph.cpp index 9d0aca7f591f5c..34dc0709761c06 100644 --- a/src/plugins/intel_cpu/src/graph.cpp +++ b/src/plugins/intel_cpu/src/graph.cpp @@ -848,6 +848,14 @@ void Graph::AllocateWithReuse(const std::vector& syncNodesInds) { OPENVINO_ASSERT(count == 1); } + if (getConfig().flushIntermediateTensors) { + m_preInferEvents.push_back(&Graph::allocateIntermediateTensors); + m_postInferEvents.push_back(&Graph::releaseIntermediateTensors); + } else { + //allocate mem right away + allocateIntermediateTensors(); + } + // Resolve all other edges with status NotAllocated and in-place for (auto& cluster : edge_clusters) { for (auto& edge : cluster) { @@ -877,8 +885,6 @@ void Graph::AllocateWithReuse(const std::vector& syncNodesInds) { }); } } - - m_pMemoryControl->allocateMemory(); } void Graph::Allocate(const std::vector& syncNodesInds) { @@ -1352,6 +1358,10 @@ void Graph::ParalleMtNuma(size_t num_nodes, void Graph::Infer(SyncInferRequest* request) { DEBUG_LOG("Infer graph: ", GetName(), ". Status: ", static_cast(status)); + for(auto&& item : m_preInferEvents) { + (this->*item)(); + } + switch (status) { case Status::ReadyDynamic: InferDynamic(request, UpdateNodes(m_executableGraphNodes)); @@ -1366,6 +1376,10 @@ void Graph::Infer(SyncInferRequest* request) { OPENVINO_ASSERT(IsReady(), "Wrong state of the ov::intel_cpu::Graph. Topology is not ready: ", static_cast(status)); } + for(auto&& item : m_postInferEvents) { + (this->*item)(); + } + if (infer_count != -1) infer_count++; } @@ -1813,5 +1827,17 @@ const std::unordered_map& Graph::getInterna return context->getMemoryStatesRegister()->getMemoryStates(); } +void Graph::allocateIntermediateTensors() { + if (m_pMemoryControl) { + m_pMemoryControl->allocateMemory(); + } +} + +void Graph::releaseIntermediateTensors() { + if (m_pMemoryControl) { + m_pMemoryControl->releaseMemory(); + } +} + } // namespace intel_cpu } // namespace ov diff --git a/src/plugins/intel_cpu/src/graph.h b/src/plugins/intel_cpu/src/graph.h index 728dff91b563eb..4f34870bf191f7 100644 --- a/src/plugins/intel_cpu/src/graph.h +++ b/src/plugins/intel_cpu/src/graph.h @@ -238,6 +238,16 @@ class Graph { friend class intel_cpu::SyncInferRequest; friend std::shared_ptr dump_graph_as_ie_ngraph_net(const Graph &graph); +private: + using event_t = void (Graph::*)(void); + +private: + void EnforceInferencePrecision(); + void EnforceBF16(); + void insertReorder(EdgePtr& edge, bool isOptimized, std::unordered_set& uniqueLayerNames); + void allocateIntermediateTensors(); + void releaseIntermediateTensors(); + private: // TODO: change std::map to std::unordered_map std::map inputNodesMap; @@ -256,9 +266,8 @@ class Graph { std::unique_ptr m_pMemoryControl; - void EnforceInferencePrecision(); - void EnforceBF16(); - void insertReorder(EdgePtr& edge, bool isOptimized, std::unordered_set& uniqueLayerNames); + std::vector m_preInferEvents; + std::vector m_postInferEvents; }; using GraphPtr = std::shared_ptr; diff --git a/src/plugins/intel_cpu/src/node.cpp b/src/plugins/intel_cpu/src/node.cpp index 52f30e410a2942..bc723fcff9b02b 100644 --- a/src/plugins/intel_cpu/src/node.cpp +++ b/src/plugins/intel_cpu/src/node.cpp @@ -555,6 +555,21 @@ void Node::updateShapes() { if (ShapeInferStatus::success == result.status) { redefineOutputMemory(result.dims); } + } else { + //check the memory is allocated and try to reallocate + for (auto&& edge : getChildEdges()) { + auto edge_ptr = edge.lock(); + CPU_NODE_ASSERT(edge_ptr, " has null edge"); + auto mem = edge_ptr->getMemoryPtr(); + CPU_NODE_ASSERT(mem, " has null output memory"); + + if (mem->getShape().hasZeroDims()) { + continue; + } + if (nullptr == mem->getData()) { + mem->getMemoryBlock()->resize(mem->getSize()); // TODO: conceptually this is a very bad solution + } + } } } catch (const std::exception& exp) { THROW_CPU_NODE_ERR(exp.what()); diff --git a/src/plugins/intel_cpu/tests/unit/graph/merge_transpose_reorder_test.cpp b/src/plugins/intel_cpu/tests/unit/graph/merge_transpose_reorder_test.cpp index 09fb028a4fa5bf..a39434882e35ba 100644 --- a/src/plugins/intel_cpu/tests/unit/graph/merge_transpose_reorder_test.cpp +++ b/src/plugins/intel_cpu/tests/unit/graph/merge_transpose_reorder_test.cpp @@ -74,7 +74,9 @@ class MergeTransposeReorderCPUTest : public testing::WithParamInterface(Config(), nullptr, false); + Config conf; + conf.flushIntermediateTensors = false; // deferred allocation + m_context = std::make_shared(conf, nullptr, false); const auto replication_result = CreateModelAndReplicate(shape, params.firstNodeLayout, params.firstNodeInplaceDirection, diff --git a/src/plugins/intel_cpu/tests/unit/graph/resolve_edge_conflicts_test.cpp b/src/plugins/intel_cpu/tests/unit/graph/resolve_edge_conflicts_test.cpp index b44194a3d5806c..593be8ac950227 100644 --- a/src/plugins/intel_cpu/tests/unit/graph/resolve_edge_conflicts_test.cpp +++ b/src/plugins/intel_cpu/tests/unit/graph/resolve_edge_conflicts_test.cpp @@ -43,6 +43,7 @@ TEST(ResolveEdgeConflictsCPUTest, smoke_Run_ResolveEdgeConflicts) { */ Config conf; conf.rtCacheCapacity = 100; + conf.flushIntermediateTensors = false; // allocate memory at initialization auto context = std::make_shared(conf, nullptr, false); const dnnl::engine cpuEngine = context->getEngine(); @@ -104,6 +105,7 @@ TEST(ResolveEdgeConflictsCPUTest2, smoke_Run_ResolveEdgeConflicts2) { */ Config conf; conf.rtCacheCapacity = 100; + conf.flushIntermediateTensors = false; // allocate memory at initialization auto context = std::make_shared(conf, nullptr, false); std::unique_ptr graph = std::unique_ptr(new Graph()); From 334b7577ac0d2643a922d0fe5171391da40fb046 Mon Sep 17 00:00:00 2001 From: Maksim Kutakov Date: Fri, 16 Aug 2024 12:35:09 +0200 Subject: [PATCH 14/36] Linter fixes --- src/plugins/intel_cpu/src/cpu_memory.cpp | 2 +- src/plugins/intel_cpu/src/graph.cpp | 4 ++-- src/plugins/intel_cpu/src/memory_management.cpp | 8 ++++---- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/plugins/intel_cpu/src/cpu_memory.cpp b/src/plugins/intel_cpu/src/cpu_memory.cpp index f159a118004f93..ec1bbd55563210 100644 --- a/src/plugins/intel_cpu/src/cpu_memory.cpp +++ b/src/plugins/intel_cpu/src/cpu_memory.cpp @@ -230,7 +230,7 @@ bool MemoryBlockWithReuse::hasExtBuffer() const noexcept { } void MemoryBlockWithReuse::free() { - m_data = decltype(m_data)(nullptr, release ); + m_data = decltype(m_data)(nullptr, release); m_memUpperBound = 0ul; m_useExternalStorage = false; } diff --git a/src/plugins/intel_cpu/src/graph.cpp b/src/plugins/intel_cpu/src/graph.cpp index 34dc0709761c06..d35b7c37b305c6 100644 --- a/src/plugins/intel_cpu/src/graph.cpp +++ b/src/plugins/intel_cpu/src/graph.cpp @@ -1358,7 +1358,7 @@ void Graph::ParalleMtNuma(size_t num_nodes, void Graph::Infer(SyncInferRequest* request) { DEBUG_LOG("Infer graph: ", GetName(), ". Status: ", static_cast(status)); - for(auto&& item : m_preInferEvents) { + for (auto&& item : m_preInferEvents) { (this->*item)(); } @@ -1376,7 +1376,7 @@ void Graph::Infer(SyncInferRequest* request) { OPENVINO_ASSERT(IsReady(), "Wrong state of the ov::intel_cpu::Graph. Topology is not ready: ", static_cast(status)); } - for(auto&& item : m_postInferEvents) { + for (auto&& item : m_postInferEvents) { (this->*item)(); } diff --git a/src/plugins/intel_cpu/src/memory_management.cpp b/src/plugins/intel_cpu/src/memory_management.cpp index 219c9775900931..f8a74f587ee650 100644 --- a/src/plugins/intel_cpu/src/memory_management.cpp +++ b/src/plugins/intel_cpu/src/memory_management.cpp @@ -152,10 +152,10 @@ class MemoryManagerStaticSolver : public IMemoryManager { } void allocate() override { - if(m_workspace) m_workspace->resize(m_totalSize); + if (m_workspace) m_workspace->resize(m_totalSize); } void release() override { - if(m_workspace) m_workspace->free(); + if (m_workspace) m_workspace->free(); } private: @@ -351,13 +351,13 @@ MemoryControl::MemoryBlockMap MemoryControl::insert(const std::vectorallocate(); } } void MemoryControl::releaseMemory() { - for(auto&& handler : m_handlers) { + for (auto&& handler : m_handlers) { handler->release(); } } From b0d0964029f9c977fe001a8e6a9c8a512b3d10af Mon Sep 17 00:00:00 2001 From: Maksim Kutakov Date: Fri, 16 Aug 2024 12:35:40 +0200 Subject: [PATCH 15/36] Avoid calling getData to not allocated memory --- src/plugins/intel_cpu/src/node.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/plugins/intel_cpu/src/node.cpp b/src/plugins/intel_cpu/src/node.cpp index bc723fcff9b02b..f89e7df85d7d62 100644 --- a/src/plugins/intel_cpu/src/node.cpp +++ b/src/plugins/intel_cpu/src/node.cpp @@ -566,8 +566,10 @@ void Node::updateShapes() { if (mem->getShape().hasZeroDims()) { continue; } - if (nullptr == mem->getData()) { - mem->getMemoryBlock()->resize(mem->getSize()); // TODO: conceptually this is a very bad solution + // TODO: conceptually this is a very bad solution + auto block = mem->getMemoryBlock(); + if (nullptr == block->getRawPtr()) { + block->resize(mem->getSize()); } } } From f6a8deefee125ac712a5a8a517daec1c96edec5c Mon Sep 17 00:00:00 2001 From: Maksim Kutakov Date: Fri, 16 Aug 2024 14:50:57 +0200 Subject: [PATCH 16/36] Reallocate only defined mem --- src/plugins/intel_cpu/src/node.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/plugins/intel_cpu/src/node.cpp b/src/plugins/intel_cpu/src/node.cpp index f89e7df85d7d62..225a03ddd93680 100644 --- a/src/plugins/intel_cpu/src/node.cpp +++ b/src/plugins/intel_cpu/src/node.cpp @@ -568,7 +568,7 @@ void Node::updateShapes() { } // TODO: conceptually this is a very bad solution auto block = mem->getMemoryBlock(); - if (nullptr == block->getRawPtr()) { + if (nullptr == block->getRawPtr() && mem->isDefined()) { block->resize(mem->getSize()); } } From 01bb93ca51fe2ab86ba9b5f6b1aa925819fcbbae Mon Sep 17 00:00:00 2001 From: Maksim Kutakov Date: Fri, 16 Aug 2024 16:13:22 +0200 Subject: [PATCH 17/36] Refactor the Reorder node --- src/plugins/intel_cpu/src/nodes/reorder.cpp | 29 +++++++-------------- src/plugins/intel_cpu/src/nodes/reorder.h | 5 +--- 2 files changed, 11 insertions(+), 23 deletions(-) diff --git a/src/plugins/intel_cpu/src/nodes/reorder.cpp b/src/plugins/intel_cpu/src/nodes/reorder.cpp index abfaf0db4bcb53..9b521cdb3b57c7 100644 --- a/src/plugins/intel_cpu/src/nodes/reorder.cpp +++ b/src/plugins/intel_cpu/src/nodes/reorder.cpp @@ -250,23 +250,18 @@ void Reorder::prepareParams() { if (getSelectedPrimitiveDescriptor() == nullptr) THROW_CPU_NODE_ERR("does not have preferable primitive descriptor."); - createReorderPrimitive(srcMemPtr->getDescWithType()->getDnnlDesc(), srcMemPtr->getData(), - dstMemPtr->getDescWithType()->getDnnlDesc(), dstMemPtr->getData()); + createReorderPrimitive(srcMemPtr->getDescWithType(), + dstMemPtr->getDescWithType()); } } -void Reorder::createReorderPrimitive(const dnnl::memory::desc& srcDesc, - void* srcPtr, - const dnnl::memory::desc& dstDesc, - void* dstPtr) { +void Reorder::createReorderPrimitive(const DnnlMemoryDescPtr& srcDesc, const DnnlMemoryDescPtr& dstDesc) { auto selectedPD = getSelectedPrimitiveDescriptor(); if (!selectedPD) THROW_CPU_NODE_ERR("does not have preferable primitive descriptor."); const auto engine = getEngine(); - src_blocked = std::make_shared(engine, DnnlExtensionUtils::makeDescriptor(srcDesc), srcPtr, false); - dst_blocked = std::make_shared(engine, DnnlExtensionUtils::makeDescriptor(dstDesc), dstPtr, false); - auto src_desc = src_blocked->getPrimitive().get_desc(); + auto src_desc = srcDesc->getDnnlDesc(); if (!src_permutation.empty()) { CPU_NODE_ASSERT(src_permutation.size() == static_cast(src_desc.get_ndims()), "src_permutation size (", @@ -282,7 +277,7 @@ void Reorder::createReorderPrimitive(const dnnl::memory::desc& srcDesc, src_desc = src_desc.permute_axes(src_permutation); } - auto dst_desc = dst_blocked->getPrimitive().get_desc(); + auto dst_desc = dstDesc->getDnnlDesc(); // TODO: We should keep shape consistency for const and expected shape for node. // If it requires reshape operation it should explicitly injected into graph. @@ -295,17 +290,13 @@ void Reorder::createReorderPrimitive(const dnnl::memory::desc& srcDesc, // perform such conversion if the source tensor can be reshaped to the destination rank. This is // useful in situations when rank in IR does not much rank that is required by the oneDNN primitive, // but the input tensor can be reshaped (e.g. weights for grouped convolutions, biases etc.) - if (src_blocked->getDesc().hasLayoutType(LayoutType::ncsp) && - src_blocked->getShape().getRank() != dst_blocked->getShape().getRank()) { - const auto newDims = dst_blocked->getStaticDims(); + if (srcDesc->hasLayoutType(LayoutType::ncsp) && srcDesc->getShape().getRank() != dstDesc->getShape().getRank()) { + const auto newDims = dstDesc->getShape().getStaticDims(); const auto newFormat = DnnlExtensionUtils::GetPlainFormatByRank(newDims.size()); - auto newDesc = dnnl::memory::desc(DnnlExtensionUtils::convertToDnnlDims(newDims), - src_blocked->getDataType(), - newFormat); - src_blocked = std::make_shared(getEngine(), DnnlExtensionUtils::makeDescriptor(newDesc), srcPtr, false); - - src_desc = src_blocked->getPrimitive().get_desc(); + src_desc = dnnl::memory::desc(DnnlExtensionUtils::convertToDnnlDims(newDims), + DnnlExtensionUtils::ElementTypeToDataType(srcDesc->getPrecision()), + newFormat); } DEBUG_LOG("CreateReorderPrimitive is called for node", getName(), " src desc: ", src_desc, " dst_desc: ", dst_desc); diff --git a/src/plugins/intel_cpu/src/nodes/reorder.h b/src/plugins/intel_cpu/src/nodes/reorder.h index cb99caa07bdfa6..ab94b60b6a4a18 100644 --- a/src/plugins/intel_cpu/src/nodes/reorder.h +++ b/src/plugins/intel_cpu/src/nodes/reorder.h @@ -61,9 +61,6 @@ class Reorder : public Node { std::vector src_permutation; - MemoryPtr dst_blocked; - MemoryPtr src_blocked; - bool isOptimized = false; bool isNspc2NcspCase = false; @@ -73,7 +70,7 @@ class Reorder : public Node { void optimizedNspc2Ncsp(); void optimizedNcsp2Nspc(); - void createReorderPrimitive(const dnnl::memory::desc &srcDesc, void* srcPtr, const dnnl::memory::desc &dstDesc, void* dstPtr); + void createReorderPrimitive(const DnnlMemoryDescPtr& srcDesc, const DnnlMemoryDescPtr& dstDesc); void prepareReorderAsTranspose(MemoryDescPtr parentDesc, MemoryDescPtr childDesc); TransposeExecutorPtr transposeExecutor; From 1adc24c96662f446e10c9fd24a591d25a6e0fa06 Mon Sep 17 00:00:00 2001 From: Maksim Kutakov Date: Fri, 16 Aug 2024 17:06:52 +0200 Subject: [PATCH 18/36] Refactor set output default ptr --- src/plugins/intel_cpu/src/infer_request.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/plugins/intel_cpu/src/infer_request.cpp b/src/plugins/intel_cpu/src/infer_request.cpp index c9ebddfbcbe018..edaaae3ca4d9ba 100644 --- a/src/plugins/intel_cpu/src/infer_request.cpp +++ b/src/plugins/intel_cpu/src/infer_request.cpp @@ -233,11 +233,11 @@ void SyncInferRequest::change_default_ptr() { auto output = outputNodesMap.find(it.first); OPENVINO_ASSERT(outputNodesMap.end() != output, "Cannot find output tensor with index: ", it.first); auto parentEdge = output->second->getParentEdgeAt(0); - if (parentEdge->getMemory().getData() == static_cast(it.second->data())) + void* const outputRawPtr = parentEdge->getMemory().getData(); + if (outputRawPtr == static_cast(it.second->data())) continue; bool canBeInPlace = true; - void* defaultPtr = parentEdge->getMemory().getData(); // Cannot be in-place after concat because concat is using different ptrs without offsets auto parent = parentEdge->getParent(); NodePtr previousParent; @@ -259,7 +259,7 @@ void SyncInferRequest::change_default_ptr() { if (!e) OPENVINO_THROW("Node ", parent->getName(), " contains empty parent edge"); - if (e->getMemory().getData() == defaultPtr) { + if (parent_port == parent->inPlaceInputPort(e->getOutputNum())) { parent = e->getParent(); parent_port = e->getInputNum(); break; From fd3b99a004ad528581390a3c8ff060cae9924cbf Mon Sep 17 00:00:00 2001 From: Maksim Kutakov Date: Mon, 19 Aug 2024 11:27:59 +0200 Subject: [PATCH 19/36] Adapt FC executor --- .../src/nodes/executors/dnnl/dnnl_fullyconnected.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/plugins/intel_cpu/src/nodes/executors/dnnl/dnnl_fullyconnected.hpp b/src/plugins/intel_cpu/src/nodes/executors/dnnl/dnnl_fullyconnected.hpp index 95ba35cb8f3c47..266e78b3d46c77 100644 --- a/src/plugins/intel_cpu/src/nodes/executors/dnnl/dnnl_fullyconnected.hpp +++ b/src/plugins/intel_cpu/src/nodes/executors/dnnl/dnnl_fullyconnected.hpp @@ -99,7 +99,7 @@ class DnnlFCExecutor : public Executor { resetSrcMemoryDataHandle = true; // create 2D memory without underlying buffer and reset to the actual memory in scope of 'execute' call m_primArgs[DNNL_ARG_SRC] = - dnnl::memory(primMemDesc->getDnnlDesc(), m_context->getEngine(), memory->getData()); + dnnl::memory(primMemDesc->getDnnlDesc(), m_context->getEngine(), DNNL_MEMORY_NONE); } } @@ -111,7 +111,7 @@ class DnnlFCExecutor : public Executor { resetDstMemoryDataHandle = true; // create 2D memory without underlying buffer and reset to the actual memory in scope of 'execute' call m_primArgs[DNNL_ARG_DST] = - dnnl::memory(primMemDesc->getDnnlDesc(), m_context->getEngine(), memory->getData()); + dnnl::memory(primMemDesc->getDnnlDesc(), m_context->getEngine(), DNNL_MEMORY_NONE); } } From 548e2d77c5856505f3980d62792bade4f4e09af0 Mon Sep 17 00:00:00 2001 From: Maksim Kutakov Date: Mon, 19 Aug 2024 18:36:15 +0200 Subject: [PATCH 20/36] Fix dynamic memory allocation --- src/plugins/intel_cpu/src/node.cpp | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/src/plugins/intel_cpu/src/node.cpp b/src/plugins/intel_cpu/src/node.cpp index 225a03ddd93680..c4b2d1961dc53d 100644 --- a/src/plugins/intel_cpu/src/node.cpp +++ b/src/plugins/intel_cpu/src/node.cpp @@ -543,6 +543,14 @@ std::vector Node::getAvailableFormatsForDims(const Shape &di return {memory::format_tag::any}; } +static void fetchRawMemory(const MemoryPtr& mem) { + // TODO: conceptually this is a very bad solution + auto block = mem->getMemoryBlock(); + if (mem->isDefined()) { + block->resize(mem->getSize()); + } +} + void Node::updateShapes() { OPENVINO_ASSERT(isDynamicNode(), "Node::updateShapes() is called to a static shape node of type: ", @@ -556,7 +564,10 @@ void Node::updateShapes() { redefineOutputMemory(result.dims); } } else { - //check the memory is allocated and try to reallocate + //guard check for internal dynamic nodes to avoid possible overestimation of the required memory size + if (shapeInference && FULL_PORT_MASK == shapeInference->get_port_mask()) + return; + for (auto&& edge : getChildEdges()) { auto edge_ptr = edge.lock(); CPU_NODE_ASSERT(edge_ptr, " has null edge"); @@ -566,11 +577,7 @@ void Node::updateShapes() { if (mem->getShape().hasZeroDims()) { continue; } - // TODO: conceptually this is a very bad solution - auto block = mem->getMemoryBlock(); - if (nullptr == block->getRawPtr() && mem->isDefined()) { - block->resize(mem->getSize()); - } + fetchRawMemory(mem); } } } catch (const std::exception& exp) { @@ -646,6 +653,9 @@ void Node::redefineOutputMemory(const size_t port, const VectorDims& new_output_ const auto& curr_desc = edges[0]->getMemory().getDesc(); if (curr_desc.getShape().isStatic() && curr_desc.getShape().getStaticDims() == new_shape) { + for (auto&& edge : edges) { + fetchRawMemory(edge->getMemoryPtr()); + } return; } From 2722aeb630693fe2422e59403509689caca9bc26 Mon Sep 17 00:00:00 2001 From: Maksim Kutakov Date: Tue, 20 Aug 2024 11:27:57 +0200 Subject: [PATCH 21/36] Adapt Multimodal node --- src/plugins/intel_cpu/src/nodes/multinomial.cpp | 8 ++++++++ src/plugins/intel_cpu/src/nodes/multinomial.hpp | 2 ++ 2 files changed, 10 insertions(+) diff --git a/src/plugins/intel_cpu/src/nodes/multinomial.cpp b/src/plugins/intel_cpu/src/nodes/multinomial.cpp index 38413c145f1cd9..24958b4e2b980d 100644 --- a/src/plugins/intel_cpu/src/nodes/multinomial.cpp +++ b/src/plugins/intel_cpu/src/nodes/multinomial.cpp @@ -77,6 +77,14 @@ bool Multinomial::needPrepareParams() const { return true; } +void Multinomial::createPrimitive() { + if (!m_const_inputs[NUM_SAMPLES_PORT]) { + CPU_NODE_ASSERT(isDynamicNode(), "is static while the samples input is a variable"); + return; // avoid reading non initialized data from the NUM_SAMPLES_PORT input + } + Node::createPrimitive(); +} + void Multinomial::prepareParams() { const auto& probs_shape = getParentEdgeAt(PROBS_PORT)->getMemory().getStaticDims(); const auto& num_samples_shape = getParentEdgeAt(NUM_SAMPLES_PORT)->getMemory().getStaticDims(); diff --git a/src/plugins/intel_cpu/src/nodes/multinomial.hpp b/src/plugins/intel_cpu/src/nodes/multinomial.hpp index d4e1562a34add2..611b70503f5dba 100644 --- a/src/plugins/intel_cpu/src/nodes/multinomial.hpp +++ b/src/plugins/intel_cpu/src/nodes/multinomial.hpp @@ -28,6 +28,8 @@ class Multinomial : public Node { bool needPrepareParams() const override; void prepareParams() override; + void createPrimitive() override; + bool isExecutable() const override; void execute(dnnl::stream strm) override; void executeDynamicImpl(dnnl::stream strm) override; From 906d6496ebdd750916c5f0682a6b56d790614605 Mon Sep 17 00:00:00 2001 From: Maksim Kutakov Date: Tue, 20 Aug 2024 15:07:31 +0200 Subject: [PATCH 22/36] Skip memory refresh for inPlace up --- src/plugins/intel_cpu/src/edge.cpp | 3 ++- src/plugins/intel_cpu/src/node.cpp | 4 ++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/src/plugins/intel_cpu/src/edge.cpp b/src/plugins/intel_cpu/src/edge.cpp index 0a9bc4cae34ddf..c314718bb82416 100644 --- a/src/plugins/intel_cpu/src/edge.cpp +++ b/src/plugins/intel_cpu/src/edge.cpp @@ -533,11 +533,12 @@ EdgePtr Edge::getBaseEdge(int look) { bool Edge::inPlace(LOOK look) const { int inputNum = getInputNum(); - int outputNum = getOutputNum(); if (look & LOOK_UP) { if (getParent()->inPlaceOutPort(inputNum) >= 0) return true; } + + int outputNum = getOutputNum(); if (look & LOOK_DOWN) { if (getChild()->inPlaceInputPort(outputNum) >= 0) return true; diff --git a/src/plugins/intel_cpu/src/node.cpp b/src/plugins/intel_cpu/src/node.cpp index c4b2d1961dc53d..83e30eb5167d91 100644 --- a/src/plugins/intel_cpu/src/node.cpp +++ b/src/plugins/intel_cpu/src/node.cpp @@ -571,6 +571,10 @@ void Node::updateShapes() { for (auto&& edge : getChildEdges()) { auto edge_ptr = edge.lock(); CPU_NODE_ASSERT(edge_ptr, " has null edge"); + if (edge_ptr->inPlace(Edge::LOOK_UP)) { + continue; + } + auto mem = edge_ptr->getMemoryPtr(); CPU_NODE_ASSERT(mem, " has null output memory"); From 5c41e7ccdc69b0c60e8f2fc33c394fe5844aea10 Mon Sep 17 00:00:00 2001 From: Maksim Kutakov Date: Tue, 20 Aug 2024 15:39:14 +0200 Subject: [PATCH 23/36] Skip string tensors in memory refresh --- src/plugins/intel_cpu/src/node.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/plugins/intel_cpu/src/node.cpp b/src/plugins/intel_cpu/src/node.cpp index 83e30eb5167d91..e5df93410191cd 100644 --- a/src/plugins/intel_cpu/src/node.cpp +++ b/src/plugins/intel_cpu/src/node.cpp @@ -544,7 +544,10 @@ std::vector Node::getAvailableFormatsForDims(const Shape &di } static void fetchRawMemory(const MemoryPtr& mem) { - // TODO: conceptually this is a very bad solution + // TODO: conceptually fetchRawMemory is a very bad solution + if (mem->getDesc().getPrecision() == element::string) { + return; + } auto block = mem->getMemoryBlock(); if (mem->isDefined()) { block->resize(mem->getSize()); From f570550156fa9e4837100619f7f3fa912643310c Mon Sep 17 00:00:00 2001 From: Maksim Kutakov Date: Tue, 20 Aug 2024 16:51:13 +0200 Subject: [PATCH 24/36] Avoid reading uninit data in Loop initialization --- src/plugins/intel_cpu/src/nodes/tensoriterator.cpp | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/plugins/intel_cpu/src/nodes/tensoriterator.cpp b/src/plugins/intel_cpu/src/nodes/tensoriterator.cpp index 3d9f7a2217dd97..14db1e9f123bdb 100644 --- a/src/plugins/intel_cpu/src/nodes/tensoriterator.cpp +++ b/src/plugins/intel_cpu/src/nodes/tensoriterator.cpp @@ -517,7 +517,10 @@ void TensorIterator::createPrimitive() { if (runAsDynamic()) prepareDynamicBuffers(); - Node::createPrimitive(); + if (inputShapesDefined() && (getAlgorithm() == Algorithm::TensorIteratorLoop || needPrepareParams())) { + prepareParams(); + updateLastInputDims(); + } } bool TensorIterator::needPrepareParams() const { @@ -716,9 +719,11 @@ void TensorIterator::prepareContinueCond() { void TensorIterator::prepareInitialCond() { if (loopExecutionConditionIdx != -1 || !initial_cond_check) { + const bool first_call = !(static_cast(initial_cond_check)); auto mem = getSrcMemoryAtPort(loopExecutionConditionIdx); initial_cond_check.reset(new asBoolCheck(mem)); - lastUsedCond = initial_cond_check->getStatus(); + if (IMPLICATION(first_call, getParentEdgeAt(loopExecutionConditionIdx)->getParent()->isConstant())) + lastUsedCond = initial_cond_check->getStatus(); } } From da1f1848375e598f7b68c9fa52db5d96a1452fb2 Mon Sep 17 00:00:00 2001 From: Maksim Kutakov Date: Wed, 21 Aug 2024 13:17:17 +0200 Subject: [PATCH 25/36] Fix loop trip count reading in Loop --- .../intel_cpu/src/nodes/tensoriterator.cpp | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/src/plugins/intel_cpu/src/nodes/tensoriterator.cpp b/src/plugins/intel_cpu/src/nodes/tensoriterator.cpp index 14db1e9f123bdb..e529fa4cad6ef3 100644 --- a/src/plugins/intel_cpu/src/nodes/tensoriterator.cpp +++ b/src/plugins/intel_cpu/src/nodes/tensoriterator.cpp @@ -720,21 +720,29 @@ void TensorIterator::prepareContinueCond() { void TensorIterator::prepareInitialCond() { if (loopExecutionConditionIdx != -1 || !initial_cond_check) { const bool first_call = !(static_cast(initial_cond_check)); - auto mem = getSrcMemoryAtPort(loopExecutionConditionIdx); + auto edge = getParentEdgeAt(loopExecutionConditionIdx); + auto mem = edge->getMemoryPtr(); initial_cond_check.reset(new asBoolCheck(mem)); - if (IMPLICATION(first_call, getParentEdgeAt(loopExecutionConditionIdx)->getParent()->isConstant())) + if (IMPLICATION(first_call, edge->getParent()->isConstant())) lastUsedCond = initial_cond_check->getStatus(); } } void TensorIterator::prepareTripCount() { + bool read_data = false; if (loopTripCountIdx == -1) { trip_count_check.reset(new staticValueCheck(getNumIteration(inputPortMap, outputPortMap))); + read_data = true; } else { - auto mem = getSrcMemoryAtPort(loopTripCountIdx); + const bool first_call = !(static_cast(initial_cond_check)); + auto edge = getParentEdgeAt(loopTripCountIdx); + auto mem = edge->getMemoryPtr(); trip_count_check.reset(new asIntCheck(mem)); + read_data = IMPLICATION(first_call, edge->getParent()->isConstant()); + } + if (read_data) { + lastUsedTripCount = trip_count_check->getStatus(); } - lastUsedTripCount = trip_count_check->getStatus(); } /* *==============* *==============* *==============* *==============* *==============* */ From adeeb0284a0a98e9dad4a8f67d7918485bf63d2d Mon Sep 17 00:00:00 2001 From: Maksim Kutakov Date: Wed, 21 Aug 2024 14:54:48 +0200 Subject: [PATCH 26/36] Introduce memory block stub --- src/plugins/intel_cpu/src/nodes/memory.cpp | 32 ++++++++++++++++++++-- 1 file changed, 30 insertions(+), 2 deletions(-) diff --git a/src/plugins/intel_cpu/src/nodes/memory.cpp b/src/plugins/intel_cpu/src/nodes/memory.cpp index 037221dc31a3ae..e66b148c6f99ee 100644 --- a/src/plugins/intel_cpu/src/nodes/memory.cpp +++ b/src/plugins/intel_cpu/src/nodes/memory.cpp @@ -21,7 +21,34 @@ namespace node { namespace { class MemoryStub : public IMemory { public: - MemoryStub(const dnnl::engine& eng, const MemoryDescPtr& pMemDesc) : m_eng(eng), m_pMemDesc(pMemDesc) {} + class MemoryBlockStub : public IMemoryBlockObserver { + void* getRawPtr() const noexcept override { + return nullptr; + } + void setExtBuff(void* ptr, size_t size) override { + // pass + } + bool resize(size_t size) override { + // pass + return false; + } + bool hasExtBuffer() const noexcept override { + // pass + return false; + } + void registerMemory(Memory* memPtr) override { + // pass + } + void unregisterMemory(Memory* memPtr) override { + // pass + } + }; + +public: + MemoryStub(const dnnl::engine& eng, const MemoryDescPtr& pMemDesc) + : m_eng(eng), + m_pMemDesc(pMemDesc), + m_pMemoryBlock(std::make_shared()) {} const MemoryDesc& getDesc() const override { return *m_pMemDesc; @@ -56,7 +83,7 @@ class MemoryStub : public IMemory { } MemoryBlockPtr getMemoryBlock() const override { - OPENVINO_THROW("Unexpected call MemoryStub::getMemoryBlock()"); + return m_pMemoryBlock; } dnnl::memory getPrimitive() const override { @@ -70,6 +97,7 @@ class MemoryStub : public IMemory { private: dnnl::engine m_eng; MemoryDescPtr m_pMemDesc; + std::shared_ptr m_pMemoryBlock; }; } // namespace From e4dbf00eb5e28cd543aeffb3a0415ac7347d3392 Mon Sep 17 00:00:00 2001 From: Maksim Kutakov Date: Thu, 22 Aug 2024 14:21:27 +0200 Subject: [PATCH 27/36] WA in the Pad node to prevent reading uninit data --- src/plugins/intel_cpu/src/nodes/pad.cpp | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/src/plugins/intel_cpu/src/nodes/pad.cpp b/src/plugins/intel_cpu/src/nodes/pad.cpp index bd38f521be5167..10cdb2a19b771f 100644 --- a/src/plugins/intel_cpu/src/nodes/pad.cpp +++ b/src/plugins/intel_cpu/src/nodes/pad.cpp @@ -183,7 +183,20 @@ void Pad::createPrimitive() { dstMemory.push_back(getDstMemoryAtPort(0)); } if (inputShapesDefined() && isExecutable() && !shapeHasDataDependency) { + // WA to prevent reading uninitialized data in case of the pad value is a parameter + MemoryCPtr padValue = srcMemory.size() > PAD_VALUE_ID ? srcMemory[PAD_VALUE_ID] : nullptr; + if (padValue && !getParentEdgeAt(PAD_VALUE_ID)->getParent()->isConstant()) { + //set artificial zero memory just to avoid reading garbage from the uninitilized input + auto tmpPadValue = std::make_shared(getEngine(), padValue->getDescPtr()); + tmpPadValue->nullify(); + srcMemory[PAD_VALUE_ID] = tmpPadValue; + } prepareParams(); + if (padValue) { + // restore original memory object + srcMemory[PAD_VALUE_ID] = padValue; + } + updateLastInputDims(); } } From 8ad88fba55af9823252d1b2145e20936fb64c657 Mon Sep 17 00:00:00 2001 From: Maksim Kutakov Date: Thu, 22 Aug 2024 18:05:01 +0200 Subject: [PATCH 28/36] Introduce network level memory control unit --- src/plugins/intel_cpu/src/graph.cpp | 4 +-- src/plugins/intel_cpu/src/graph.h | 4 +-- src/plugins/intel_cpu/src/graph_context.cpp | 4 ++- src/plugins/intel_cpu/src/graph_context.h | 7 +++++ ...mory_management.cpp => memory_control.cpp} | 21 ++++++++++++++- ...mory_management.hpp => memory_control.hpp} | 26 +++++++++++++++++-- 6 files changed, 58 insertions(+), 8 deletions(-) rename src/plugins/intel_cpu/src/{memory_management.cpp => memory_control.cpp} (95%) rename src/plugins/intel_cpu/src/{memory_management.hpp => memory_control.hpp} (74%) diff --git a/src/plugins/intel_cpu/src/graph.cpp b/src/plugins/intel_cpu/src/graph.cpp index bec78b9cf47107..1c7cc8b9b0bda8 100644 --- a/src/plugins/intel_cpu/src/graph.cpp +++ b/src/plugins/intel_cpu/src/graph.cpp @@ -23,7 +23,7 @@ #include "itt.h" #include "memory_desc/cpu_memory_desc_utils.h" #include "memory_desc/dnnl_blocked_memory_desc.h" -#include "memory_management.hpp" +#include "memory_control.hpp" #include "node.h" #include "nodes/common/cpu_convert.h" #include "nodes/common/cpu_memcpy.h" @@ -806,7 +806,7 @@ void Graph::AllocateWithReuse(const std::vector& syncNodesInds) { memoryRegions.erase(it, memoryRegions.end()); //Set up the memory control subsystem. - this->m_pMemoryControl = make_unique(syncNodesInds); + this->m_pMemoryControl = &(getGraphContext()->getNetworkMemoryControl()->createMemoryControlUnit(syncNodesInds)); auto memoryBlocks = m_pMemoryControl->insert(memoryRegions); // attach all the not yet allocated edges to the memory contol diff --git a/src/plugins/intel_cpu/src/graph.h b/src/plugins/intel_cpu/src/graph.h index dd62e8b947e296..57410e480599d8 100644 --- a/src/plugins/intel_cpu/src/graph.h +++ b/src/plugins/intel_cpu/src/graph.h @@ -10,7 +10,7 @@ #include "node.h" #include "edge.h" #include "graph_context.h" -#include "memory_management.hpp" +#include "memory_control.hpp" #include "openvino/runtime/profiling_info.hpp" #include @@ -262,7 +262,7 @@ class Graph { GraphContext::CPtr context; dnnl::stream m_stream; - std::unique_ptr m_pMemoryControl; + MemoryControl* m_pMemoryControl = nullptr; std::vector m_preInferEvents; std::vector m_postInferEvents; diff --git a/src/plugins/intel_cpu/src/graph_context.cpp b/src/plugins/intel_cpu/src/graph_context.cpp index 2699a8854afb80..e200766fa4791c 100644 --- a/src/plugins/intel_cpu/src/graph_context.cpp +++ b/src/plugins/intel_cpu/src/graph_context.cpp @@ -4,6 +4,7 @@ #include "dnnl_types.h" #include "graph_context.h" #include "nodes/memory.hpp" +#include "memory_control.hpp" namespace ov { namespace intel_cpu { @@ -18,7 +19,8 @@ GraphContext::GraphContext(const Config& config, isGraphQuantizedFlag(isGraphQuantized), streamExecutor(streamExecutor), subMemoryManager(sub_memory_manager), - memoryStatesRegister(std::make_shared()) { + memoryStatesRegister(std::make_shared()), + networkMemoryControl(std::make_shared()) { rtParamsCache = std::make_shared(config.rtCacheCapacity); // primitive/executors can be shared across sub-stream // but scratch pad cannot be shared. diff --git a/src/plugins/intel_cpu/src/graph_context.h b/src/plugins/intel_cpu/src/graph_context.h index 138ccebe0f9a40..db2b126213978c 100644 --- a/src/plugins/intel_cpu/src/graph_context.h +++ b/src/plugins/intel_cpu/src/graph_context.h @@ -18,6 +18,8 @@ namespace node { class MemoryStatesRegister; } // namespace node +class NetworkMemoryControl; + class GraphContext { public: typedef std::shared_ptr Ptr; @@ -76,6 +78,10 @@ class GraphContext { return memoryStatesRegister; } + const std::shared_ptr& getNetworkMemoryControl() const { + return networkMemoryControl; + } + private: Config config; // network-level config @@ -97,6 +103,7 @@ class GraphContext { int numNumaNodes = 1; std::shared_ptr memoryStatesRegister; + std::shared_ptr networkMemoryControl; }; } // namespace intel_cpu diff --git a/src/plugins/intel_cpu/src/memory_management.cpp b/src/plugins/intel_cpu/src/memory_control.cpp similarity index 95% rename from src/plugins/intel_cpu/src/memory_management.cpp rename to src/plugins/intel_cpu/src/memory_control.cpp index f8a74f587ee650..3802188b01b7c6 100644 --- a/src/plugins/intel_cpu/src/memory_management.cpp +++ b/src/plugins/intel_cpu/src/memory_control.cpp @@ -2,7 +2,7 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "memory_management.hpp" +#include "memory_control.hpp" #include @@ -354,12 +354,14 @@ void MemoryControl::allocateMemory() { for (auto&& handler : m_handlers) { handler->allocate(); } + m_allocated = true; } void MemoryControl::releaseMemory() { for (auto&& handler : m_handlers) { handler->release(); } + m_allocated = false; } edgeClusters MemoryControl::findEdgeClusters(const std::vector& graphEdges) { @@ -405,5 +407,22 @@ edgeClusters MemoryControl::findEdgeClusters(const std::vector& graphEd return edge_clusters; } +MemoryControl& NetworkMemoryControl::createMemoryControlUnit(std::vector syncInds) { + m_storage.emplace_back(std::unique_ptr(new MemoryControl(syncInds))); + return *(m_storage.back()); +} + +void NetworkMemoryControl::allocateMemory() { + for (auto&& item : m_storage) { + item->allocateMemory(); + } +} + +void NetworkMemoryControl::releaseMemory() { + for (auto&& item : m_storage) { + item->releaseMemory(); + } +} + } // namespace intel_cpu } // namespace ov \ No newline at end of file diff --git a/src/plugins/intel_cpu/src/memory_management.hpp b/src/plugins/intel_cpu/src/memory_control.hpp similarity index 74% rename from src/plugins/intel_cpu/src/memory_management.hpp rename to src/plugins/intel_cpu/src/memory_control.hpp index 563f900db3458a..3157291dfa79f9 100644 --- a/src/plugins/intel_cpu/src/memory_management.hpp +++ b/src/plugins/intel_cpu/src/memory_control.hpp @@ -30,21 +30,43 @@ class MemoryControl { using MemoryBlockMap = std::unordered_map; public: - explicit MemoryControl(std::vector syncInds); - static edgeClusters findEdgeClusters(const std::vector& graphEdges); MemoryBlockMap insert(const std::vector& regions); + bool allocated() const { + return m_allocated; + } + void allocateMemory(); void releaseMemory(); private: + explicit MemoryControl(std::vector syncInds); void insert(const MemoryRegion& region); + friend class NetworkMemoryControl; + private: std::vector m_syncInds; std::vector m_handlers; + bool m_allocated = false; +}; + +class NetworkMemoryControl { +public: + NetworkMemoryControl() = default; + MemoryControl& createMemoryControlUnit(std::vector syncInds); + + void allocateMemory(); + void releaseMemory(); + +private: + using value_type = std::unique_ptr; + +private: + std::vector m_storage; }; + } // namespace intel_cpu } // namespace ov \ No newline at end of file From 4f38db608d536e4ed49404c23b72b35177d88682 Mon Sep 17 00:00:00 2001 From: Maksim Kutakov Date: Mon, 26 Aug 2024 20:12:39 +0200 Subject: [PATCH 29/36] Fix Loop for dynamic shape applications --- .../intel_cpu/src/nodes/tensoriterator.cpp | 33 ++++++++++--------- .../intel_cpu/src/nodes/tensoriterator.h | 5 +-- 2 files changed, 20 insertions(+), 18 deletions(-) diff --git a/src/plugins/intel_cpu/src/nodes/tensoriterator.cpp b/src/plugins/intel_cpu/src/nodes/tensoriterator.cpp index e529fa4cad6ef3..9a3b9788b838d2 100644 --- a/src/plugins/intel_cpu/src/nodes/tensoriterator.cpp +++ b/src/plugins/intel_cpu/src/nodes/tensoriterator.cpp @@ -54,12 +54,9 @@ static NodeConfig make_plain_config(const std::shared_ptr& op) { } static void redefineToMemories(const std::vector& to_mems, MemoryDescPtr new_desc) { - const auto &currDesc = to_mems.front()->getDesc(); - if (currDesc.getShape().isDynamic() || currDesc.getShape().getStaticDims() != new_desc->getShape().getStaticDims()) { - // TODO : check the entire dstMemPtrs usage considering the proper memory sharing - for (size_t j = 0; j < to_mems.size(); j++) { - to_mems[j]->redefineDesc(new_desc); - } + // TODO : check the entire dstMemPtrs usage considering the proper memory sharing + for (size_t j = 0; j < to_mems.size(); j++) { + to_mems[j]->redefineDesc(new_desc); } } @@ -518,7 +515,8 @@ void TensorIterator::createPrimitive() { prepareDynamicBuffers(); if (inputShapesDefined() && (getAlgorithm() == Algorithm::TensorIteratorLoop || needPrepareParams())) { - prepareParams(); + constexpr bool compileStage = true; + prepareParamsImpl(compileStage); updateLastInputDims(); } } @@ -544,10 +542,15 @@ bool TensorIterator::needPrepareParams() const { // Thus, sliced input shapes and body input shapes are equal but iteration counts are different. So we should update trip count return Node::needPrepareParams(); } - void TensorIterator::prepareParams() { - prepareTripCount(); - prepareInitialCond(); + // due to specific createPrimitive implementation this method is called only during inference + constexpr bool compileStage = false; + prepareParamsImpl(compileStage); +} + +void TensorIterator::prepareParamsImpl(const bool compileStage) { + prepareTripCount(compileStage); + prepareInitialCond(compileStage); first_mappers.clear(); before_mappers.clear(); @@ -717,28 +720,26 @@ void TensorIterator::prepareContinueCond() { } } -void TensorIterator::prepareInitialCond() { +void TensorIterator::prepareInitialCond(const bool compileStage) { if (loopExecutionConditionIdx != -1 || !initial_cond_check) { - const bool first_call = !(static_cast(initial_cond_check)); auto edge = getParentEdgeAt(loopExecutionConditionIdx); auto mem = edge->getMemoryPtr(); initial_cond_check.reset(new asBoolCheck(mem)); - if (IMPLICATION(first_call, edge->getParent()->isConstant())) + if (IMPLICATION(compileStage, edge->getParent()->isConstant())) lastUsedCond = initial_cond_check->getStatus(); } } -void TensorIterator::prepareTripCount() { +void TensorIterator::prepareTripCount(const bool compileStage) { bool read_data = false; if (loopTripCountIdx == -1) { trip_count_check.reset(new staticValueCheck(getNumIteration(inputPortMap, outputPortMap))); read_data = true; } else { - const bool first_call = !(static_cast(initial_cond_check)); auto edge = getParentEdgeAt(loopTripCountIdx); auto mem = edge->getMemoryPtr(); trip_count_check.reset(new asIntCheck(mem)); - read_data = IMPLICATION(first_call, edge->getParent()->isConstant()); + read_data = IMPLICATION(compileStage, edge->getParent()->isConstant()); } if (read_data) { lastUsedTripCount = trip_count_check->getStatus(); diff --git a/src/plugins/intel_cpu/src/nodes/tensoriterator.h b/src/plugins/intel_cpu/src/nodes/tensoriterator.h index 4a8229605e2103..f8a8110c3fae48 100644 --- a/src/plugins/intel_cpu/src/nodes/tensoriterator.h +++ b/src/plugins/intel_cpu/src/nodes/tensoriterator.h @@ -130,14 +130,15 @@ class TensorIterator : public Node { void prepareDynamicBuffers(); void prepareLoopBodyCurrentIteration(); void prepareContinueCond(); - void prepareInitialCond(); - void prepareTripCount(); + void prepareInitialCond(const bool compileStage); + void prepareTripCount(const bool compileStage); /* Dynamic support */ void reshapeSubgraphInput(); void reshapeAndFillOutput(dnnl::stream strm); bool checkForInputAndBodyShapesInequality() const; int getNumIteration(const std::vector& inputPortMap, const std::vector& outputPortMap) const; + void prepareParamsImpl(const bool compileStage); /* run dynamic subgraph inside a static node */ bool runAsDynamic() const; From 8704e614efeea8b509bc91d3f78a32b83ea7a6e8 Mon Sep 17 00:00:00 2001 From: Maksim Kutakov Date: Thu, 22 Aug 2024 19:33:16 +0200 Subject: [PATCH 30/36] Add an interface call releasing intermediate memory --- .../openvino/runtime/icompiled_model.hpp | 6 ++++ .../openvino/runtime/compiled_model.hpp | 9 ++++++ src/inference/src/cpp/compiled_model.cpp | 4 +++ src/inference/src/dev/icompiled_model.cpp | 4 +++ src/plugins/intel_cpu/src/compiled_model.cpp | 8 +++++ src/plugins/intel_cpu/src/compiled_model.h | 2 ++ src/plugins/intel_cpu/src/graph.cpp | 31 +++++-------------- src/plugins/intel_cpu/src/graph.h | 5 --- .../intel_cpu/src/utils/general_utils.h | 12 +++++++ 9 files changed, 53 insertions(+), 28 deletions(-) diff --git a/src/inference/dev_api/openvino/runtime/icompiled_model.hpp b/src/inference/dev_api/openvino/runtime/icompiled_model.hpp index eca22b3b0036f3..6819492e2534cc 100644 --- a/src/inference/dev_api/openvino/runtime/icompiled_model.hpp +++ b/src/inference/dev_api/openvino/runtime/icompiled_model.hpp @@ -134,6 +134,12 @@ class OPENVINO_RUNTIME_API ICompiledModel : public std::enable_shared_from_this< */ ov::SoPtr get_context() const; + /** + * @brief Release intermediate memory + * + */ + virtual void release_buffers(); + virtual ~ICompiledModel() = default; private: diff --git a/src/inference/include/openvino/runtime/compiled_model.hpp b/src/inference/include/openvino/runtime/compiled_model.hpp index 7fb005ee999f84..8e447efb85bc10 100644 --- a/src/inference/include/openvino/runtime/compiled_model.hpp +++ b/src/inference/include/openvino/runtime/compiled_model.hpp @@ -200,6 +200,15 @@ class OPENVINO_RUNTIME_API CompiledModel { return get_property(property.name()).template as(); } + /** + * @brief Release intermediate memory. + * + * This methods forces the Compiled model to release memory allocated for intermediate structures, e.g. caches, + * tensors, temporal buffers etc. + * + */ + void release_buffers(); + /** * @brief Returns pointer to device-specific shared context * on a remote accelerator device that was used to create this CompiledModel. diff --git a/src/inference/src/cpp/compiled_model.cpp b/src/inference/src/cpp/compiled_model.cpp index 14ae5b98d1826a..3106a0ef2b8da0 100644 --- a/src/inference/src/cpp/compiled_model.cpp +++ b/src/inference/src/cpp/compiled_model.cpp @@ -145,6 +145,10 @@ Any CompiledModel::get_property(const std::string& name) const { }); } +void CompiledModel::release_buffers() { + OV_COMPILED_MODEL_CALL_STATEMENT(_impl->release_buffers()); +} + RemoteContext CompiledModel::get_context() const { OV_COMPILED_MODEL_CALL_STATEMENT({ auto ctx = _impl->get_context(); diff --git a/src/inference/src/dev/icompiled_model.cpp b/src/inference/src/dev/icompiled_model.cpp index 0079826cdeb1b5..5fdd99470e0cc7 100644 --- a/src/inference/src/dev/icompiled_model.cpp +++ b/src/inference/src/dev/icompiled_model.cpp @@ -147,3 +147,7 @@ ov::SoPtr ov::ICompiledModel::get_context() const { void ov::ICompiledModel::set_model_shared_object(ov::Model& model, const std::shared_ptr& shared_object) { model.m_shared_object = shared_object; } + +void ov::ICompiledModel::release_buffers() { + OPENVINO_THROW("ov::ICompiledModel::release_buffers() is not implemented"); +} diff --git a/src/plugins/intel_cpu/src/compiled_model.cpp b/src/plugins/intel_cpu/src/compiled_model.cpp index af8df9657ad34b..4301e80fbb2b32 100644 --- a/src/plugins/intel_cpu/src/compiled_model.cpp +++ b/src/plugins/intel_cpu/src/compiled_model.cpp @@ -342,5 +342,13 @@ void CompiledModel::export_model(std::ostream& modelStream) const { serializer << m_model; } +void CompiledModel::release_buffers() { + for (auto&& graph : m_graphs) { + GraphGuard::Lock graph_lock{graph}; + auto ctx = graph_lock._graph.getGraphContext(); + ctx->getNetworkMemoryControl()->releaseMemory(); + } +} + } // namespace intel_cpu } // namespace ov diff --git a/src/plugins/intel_cpu/src/compiled_model.h b/src/plugins/intel_cpu/src/compiled_model.h index facd9ef3698ca7..7782afa09e703a 100644 --- a/src/plugins/intel_cpu/src/compiled_model.h +++ b/src/plugins/intel_cpu/src/compiled_model.h @@ -49,6 +49,8 @@ class CompiledModel : public ov::ICompiledModel { "Set property to Core::compile_model during compilation"); }; + void release_buffers() override; + private: std::shared_ptr create_sync_infer_request() const override; friend class SyncInferRequest; diff --git a/src/plugins/intel_cpu/src/graph.cpp b/src/plugins/intel_cpu/src/graph.cpp index 1c7cc8b9b0bda8..2e7b90f11a61a0 100644 --- a/src/plugins/intel_cpu/src/graph.cpp +++ b/src/plugins/intel_cpu/src/graph.cpp @@ -828,12 +828,9 @@ void Graph::AllocateWithReuse(const std::vector& syncNodesInds) { OPENVINO_ASSERT(count == 1); } - if (getConfig().flushIntermediateTensors) { - m_preInferEvents.push_back(&Graph::allocateIntermediateTensors); - m_postInferEvents.push_back(&Graph::releaseIntermediateTensors); - } else { + if (!getConfig().flushIntermediateTensors) { //allocate mem right away - allocateIntermediateTensors(); + m_pMemoryControl->allocateMemory(); } // Resolve all other edges with status NotAllocated and in-place @@ -1278,8 +1275,12 @@ int Graph::GetNumaNodeId() const { void Graph::Infer(SyncInferRequest* request) { DEBUG_LOG("Infer graph: ", GetName(), ". Status: ", static_cast(status)); - for (auto&& item : m_preInferEvents) { - (this->*item)(); + if ov_unlikely(!m_pMemoryControl) { + OPENVINO_THROW("Memory control unit is not initilized in graph: ", GetName()); + } + + if ov_unlikely(!m_pMemoryControl->allocated()) { + m_pMemoryControl->allocateMemory(); } switch (status) { @@ -1296,10 +1297,6 @@ void Graph::Infer(SyncInferRequest* request) { OPENVINO_ASSERT(IsReady(), "Wrong state of the ov::intel_cpu::Graph. Topology is not ready: ", static_cast(status)); } - for (auto&& item : m_postInferEvents) { - (this->*item)(); - } - if (infer_count != -1) infer_count++; } @@ -1730,17 +1727,5 @@ const std::unordered_map& Graph::getInterna return context->getMemoryStatesRegister()->getMemoryStates(); } -void Graph::allocateIntermediateTensors() { - if (m_pMemoryControl) { - m_pMemoryControl->allocateMemory(); - } -} - -void Graph::releaseIntermediateTensors() { - if (m_pMemoryControl) { - m_pMemoryControl->releaseMemory(); - } -} - } // namespace intel_cpu } // namespace ov diff --git a/src/plugins/intel_cpu/src/graph.h b/src/plugins/intel_cpu/src/graph.h index 57410e480599d8..e7c5a25917fe8e 100644 --- a/src/plugins/intel_cpu/src/graph.h +++ b/src/plugins/intel_cpu/src/graph.h @@ -243,8 +243,6 @@ class Graph { void insertReorder(EdgePtr& edge, bool isOptimized, std::unordered_set& uniqueLayerNames); void insertConvert(EdgePtr& edge); int GetNumaNodeId() const; - void allocateIntermediateTensors(); - void releaseIntermediateTensors(); private: // TODO: change std::map to std::unordered_map @@ -263,9 +261,6 @@ class Graph { dnnl::stream m_stream; MemoryControl* m_pMemoryControl = nullptr; - - std::vector m_preInferEvents; - std::vector m_postInferEvents; }; using GraphPtr = std::shared_ptr; diff --git a/src/plugins/intel_cpu/src/utils/general_utils.h b/src/plugins/intel_cpu/src/utils/general_utils.h index 836868eff41d3a..983f631fa34e11 100644 --- a/src/plugins/intel_cpu/src/utils/general_utils.h +++ b/src/plugins/intel_cpu/src/utils/general_utils.h @@ -11,6 +11,18 @@ #include "openvino/core/type/element_type.hpp" + +#if defined(__GNUC__) || defined(__clang__) +#define ov_unlikely(x) (__builtin_expect(!!(x), false)) +#define ov_likely(x) (__builtin_expect(!!(x), true)) +#elif(defined(__cplusplus) && (__cplusplus >= 202002L)) || (defined(_MSVC_LANG) && _MSVC_LANG >= 202002L) +#define ov_unlikely(x) (x) [[unlikely]] +#define ov_likely(x) (x) [[likely]] +#else +#define ov_unlikely(x) (x) +#define ov_likely(x) (x) +#endif + namespace ov { namespace intel_cpu { From 5f9e4d37bb3a673dd8b9e5980c65ee3e4a8c2099 Mon Sep 17 00:00:00 2001 From: Maksim Kutakov Date: Mon, 2 Sep 2024 15:42:00 +0200 Subject: [PATCH 31/36] Code cleanup --- src/plugins/intel_cpu/src/config.h | 1 - src/plugins/intel_cpu/src/graph.cpp | 12 +++--------- src/plugins/intel_cpu/src/memory_control.cpp | 16 ++++++++-------- src/plugins/intel_cpu/src/memory_control.hpp | 2 +- src/plugins/intel_cpu/src/utils/general_utils.h | 12 ------------ .../unit/graph/merge_transpose_reorder_test.cpp | 1 - .../unit/graph/resolve_edge_conflicts_test.cpp | 2 -- 7 files changed, 12 insertions(+), 34 deletions(-) diff --git a/src/plugins/intel_cpu/src/config.h b/src/plugins/intel_cpu/src/config.h index 12c4d11029cae5..eeb8e78f5fa91a 100644 --- a/src/plugins/intel_cpu/src/config.h +++ b/src/plugins/intel_cpu/src/config.h @@ -46,7 +46,6 @@ struct Config { bool collectPerfCounters = false; bool exclusiveAsyncRequests = false; - bool flushIntermediateTensors = true; //TODO: change to false by default SnippetsMode snippetsMode = SnippetsMode::Enable; std::string dumpToDot = {}; std::string device_id = {}; diff --git a/src/plugins/intel_cpu/src/graph.cpp b/src/plugins/intel_cpu/src/graph.cpp index 942e68c21cf0dc..bab063b15b3136 100644 --- a/src/plugins/intel_cpu/src/graph.cpp +++ b/src/plugins/intel_cpu/src/graph.cpp @@ -23,7 +23,6 @@ #include "itt.h" #include "memory_desc/cpu_memory_desc_utils.h" #include "memory_desc/dnnl_blocked_memory_desc.h" -#include "memory_control.hpp" #include "node.h" #include "nodes/common/cpu_convert.h" #include "nodes/common/cpu_memcpy.h" @@ -45,8 +44,6 @@ #include #include "common/primitive_desc_iface.hpp" -#include "openvino/runtime/memory_solver.hpp" //TODO: remove - #include "openvino/runtime/threading/cpu_streams_executor.hpp" #include "openvino/core/parallel.hpp" @@ -832,10 +829,7 @@ void Graph::AllocateWithReuse(const std::vector& syncNodesInds) { OPENVINO_ASSERT(count == 1); } - if (!getConfig().flushIntermediateTensors) { - //allocate mem right away - m_pMemoryControl->allocateMemory(); - } + m_pMemoryControl->allocateMemory(); // Resolve all other edges with status NotAllocated and in-place for (auto& cluster : edge_clusters) { @@ -1279,11 +1273,11 @@ int Graph::GetNumaNodeId() const { void Graph::Infer(SyncInferRequest* request) { DEBUG_LOG("Infer graph: ", GetName(), ". Status: ", static_cast(status)); - if ov_unlikely(!m_pMemoryControl) { + if (!m_pMemoryControl) { OPENVINO_THROW("Memory control unit is not initilized in graph: ", GetName()); } - if ov_unlikely(!m_pMemoryControl->allocated()) { + if (!m_pMemoryControl->allocated()) { m_pMemoryControl->allocateMemory(); } diff --git a/src/plugins/intel_cpu/src/memory_control.cpp b/src/plugins/intel_cpu/src/memory_control.cpp index 3802188b01b7c6..0f202c296891c1 100644 --- a/src/plugins/intel_cpu/src/memory_control.cpp +++ b/src/plugins/intel_cpu/src/memory_control.cpp @@ -118,7 +118,7 @@ class MemoryManagerIO : public IMemoryManager { MemoryControl::MemoryBlockMap m_blocks; }; -class MemoryManagerStaticSolver : public IMemoryManager { +class MemoryManagerStatic : public IMemoryManager { public: void insert(const MemoryRegion& reg) override { m_boxes.emplace_back(MemorySolver::Box{reg.start, reg.finish, reg.size, reg.id}); @@ -148,7 +148,7 @@ class MemoryManagerStaticSolver : public IMemoryManager { auto memoryBlock = std::make_shared(m_workspace, offset * alignment); m_blocks[box.id] = std::move(memoryBlock); } - // m_boxes.clear(); + m_boxes.clear(); } void allocate() override { @@ -225,7 +225,7 @@ class MemoryManageNonOverlapingSets : public IMemoryManager { m_internalBlocks[box.id] = grpMemBlock; } } - // m_boxes.clear(); + m_boxes.clear(); } void allocate() override { @@ -296,7 +296,7 @@ MemoryControl::MemoryControl(std::vector syncInds) { // init handlers // handler for dynamic tensors - m_handlers.emplace_back(buildHandler([](const MemoryRegion& reg) { + m_handlers.emplace_back(buildHandler([](const MemoryRegion& reg) { if (reg.size < 0 || MemoryRegion::RegionType::VARIABLE != reg.type || MemoryRegion::AllocType::POD != reg.alloc_type) { return false; @@ -408,18 +408,18 @@ edgeClusters MemoryControl::findEdgeClusters(const std::vector& graphEd } MemoryControl& NetworkMemoryControl::createMemoryControlUnit(std::vector syncInds) { - m_storage.emplace_back(std::unique_ptr(new MemoryControl(syncInds))); - return *(m_storage.back()); + m_controlUnits.emplace_back(std::unique_ptr(new MemoryControl(syncInds))); + return *(m_controlUnits.back()); } void NetworkMemoryControl::allocateMemory() { - for (auto&& item : m_storage) { + for (auto&& item : m_controlUnits) { item->allocateMemory(); } } void NetworkMemoryControl::releaseMemory() { - for (auto&& item : m_storage) { + for (auto&& item : m_controlUnits) { item->releaseMemory(); } } diff --git a/src/plugins/intel_cpu/src/memory_control.hpp b/src/plugins/intel_cpu/src/memory_control.hpp index 3157291dfa79f9..ce4dc90890f3fa 100644 --- a/src/plugins/intel_cpu/src/memory_control.hpp +++ b/src/plugins/intel_cpu/src/memory_control.hpp @@ -65,7 +65,7 @@ class NetworkMemoryControl { using value_type = std::unique_ptr; private: - std::vector m_storage; + std::vector m_controlUnits; }; } // namespace intel_cpu diff --git a/src/plugins/intel_cpu/src/utils/general_utils.h b/src/plugins/intel_cpu/src/utils/general_utils.h index 983f631fa34e11..836868eff41d3a 100644 --- a/src/plugins/intel_cpu/src/utils/general_utils.h +++ b/src/plugins/intel_cpu/src/utils/general_utils.h @@ -11,18 +11,6 @@ #include "openvino/core/type/element_type.hpp" - -#if defined(__GNUC__) || defined(__clang__) -#define ov_unlikely(x) (__builtin_expect(!!(x), false)) -#define ov_likely(x) (__builtin_expect(!!(x), true)) -#elif(defined(__cplusplus) && (__cplusplus >= 202002L)) || (defined(_MSVC_LANG) && _MSVC_LANG >= 202002L) -#define ov_unlikely(x) (x) [[unlikely]] -#define ov_likely(x) (x) [[likely]] -#else -#define ov_unlikely(x) (x) -#define ov_likely(x) (x) -#endif - namespace ov { namespace intel_cpu { diff --git a/src/plugins/intel_cpu/tests/unit/graph/merge_transpose_reorder_test.cpp b/src/plugins/intel_cpu/tests/unit/graph/merge_transpose_reorder_test.cpp index 85929c0a9d33a0..003aca979398fb 100644 --- a/src/plugins/intel_cpu/tests/unit/graph/merge_transpose_reorder_test.cpp +++ b/src/plugins/intel_cpu/tests/unit/graph/merge_transpose_reorder_test.cpp @@ -76,7 +76,6 @@ class MergeTransposeReorderCPUTest : public testing::WithParamInterface(conf, nullptr, false); const auto replication_result = CreateModelAndReplicate(shape, params.firstNodeLayout, diff --git a/src/plugins/intel_cpu/tests/unit/graph/resolve_edge_conflicts_test.cpp b/src/plugins/intel_cpu/tests/unit/graph/resolve_edge_conflicts_test.cpp index 593be8ac950227..b44194a3d5806c 100644 --- a/src/plugins/intel_cpu/tests/unit/graph/resolve_edge_conflicts_test.cpp +++ b/src/plugins/intel_cpu/tests/unit/graph/resolve_edge_conflicts_test.cpp @@ -43,7 +43,6 @@ TEST(ResolveEdgeConflictsCPUTest, smoke_Run_ResolveEdgeConflicts) { */ Config conf; conf.rtCacheCapacity = 100; - conf.flushIntermediateTensors = false; // allocate memory at initialization auto context = std::make_shared(conf, nullptr, false); const dnnl::engine cpuEngine = context->getEngine(); @@ -105,7 +104,6 @@ TEST(ResolveEdgeConflictsCPUTest2, smoke_Run_ResolveEdgeConflicts2) { */ Config conf; conf.rtCacheCapacity = 100; - conf.flushIntermediateTensors = false; // allocate memory at initialization auto context = std::make_shared(conf, nullptr, false); std::unique_ptr graph = std::unique_ptr(new Graph()); From f2bea35332bc027be132e030c6d2e68d45037ff9 Mon Sep 17 00:00:00 2001 From: Maksim Kutakov Date: Tue, 3 Sep 2024 14:19:32 +0200 Subject: [PATCH 32/36] Rename release_buffers to release_memory --- src/inference/dev_api/openvino/runtime/icompiled_model.hpp | 2 +- src/inference/include/openvino/runtime/compiled_model.hpp | 4 ++-- src/inference/src/cpp/compiled_model.cpp | 4 ++-- src/inference/src/dev/icompiled_model.cpp | 4 ++-- src/plugins/intel_cpu/src/compiled_model.cpp | 2 +- src/plugins/intel_cpu/src/compiled_model.h | 2 +- 6 files changed, 9 insertions(+), 9 deletions(-) diff --git a/src/inference/dev_api/openvino/runtime/icompiled_model.hpp b/src/inference/dev_api/openvino/runtime/icompiled_model.hpp index 6819492e2534cc..01f7b556da909f 100644 --- a/src/inference/dev_api/openvino/runtime/icompiled_model.hpp +++ b/src/inference/dev_api/openvino/runtime/icompiled_model.hpp @@ -138,7 +138,7 @@ class OPENVINO_RUNTIME_API ICompiledModel : public std::enable_shared_from_this< * @brief Release intermediate memory * */ - virtual void release_buffers(); + virtual void release_memory(); virtual ~ICompiledModel() = default; diff --git a/src/inference/include/openvino/runtime/compiled_model.hpp b/src/inference/include/openvino/runtime/compiled_model.hpp index 8e447efb85bc10..bb2bebb1686f22 100644 --- a/src/inference/include/openvino/runtime/compiled_model.hpp +++ b/src/inference/include/openvino/runtime/compiled_model.hpp @@ -204,10 +204,10 @@ class OPENVINO_RUNTIME_API CompiledModel { * @brief Release intermediate memory. * * This methods forces the Compiled model to release memory allocated for intermediate structures, e.g. caches, - * tensors, temporal buffers etc. + * tensors, temporal buffers etc., when possible * */ - void release_buffers(); + void release_memory(); /** * @brief Returns pointer to device-specific shared context diff --git a/src/inference/src/cpp/compiled_model.cpp b/src/inference/src/cpp/compiled_model.cpp index 3106a0ef2b8da0..c780bbee1e991d 100644 --- a/src/inference/src/cpp/compiled_model.cpp +++ b/src/inference/src/cpp/compiled_model.cpp @@ -145,8 +145,8 @@ Any CompiledModel::get_property(const std::string& name) const { }); } -void CompiledModel::release_buffers() { - OV_COMPILED_MODEL_CALL_STATEMENT(_impl->release_buffers()); +void CompiledModel::release_memory() { + OV_COMPILED_MODEL_CALL_STATEMENT(_impl->release_memory()); } RemoteContext CompiledModel::get_context() const { diff --git a/src/inference/src/dev/icompiled_model.cpp b/src/inference/src/dev/icompiled_model.cpp index 5fdd99470e0cc7..b1cbedac1632ab 100644 --- a/src/inference/src/dev/icompiled_model.cpp +++ b/src/inference/src/dev/icompiled_model.cpp @@ -148,6 +148,6 @@ void ov::ICompiledModel::set_model_shared_object(ov::Model& model, const std::sh model.m_shared_object = shared_object; } -void ov::ICompiledModel::release_buffers() { - OPENVINO_THROW("ov::ICompiledModel::release_buffers() is not implemented"); +void ov::ICompiledModel::release_memory() { + // nothing to do } diff --git a/src/plugins/intel_cpu/src/compiled_model.cpp b/src/plugins/intel_cpu/src/compiled_model.cpp index 46b50ed96cbd85..72943b837f1f3b 100644 --- a/src/plugins/intel_cpu/src/compiled_model.cpp +++ b/src/plugins/intel_cpu/src/compiled_model.cpp @@ -342,7 +342,7 @@ void CompiledModel::export_model(std::ostream& modelStream) const { serializer << m_model; } -void CompiledModel::release_buffers() { +void CompiledModel::release_memory() { for (auto&& graph : m_graphs) { GraphGuard::Lock graph_lock{graph}; auto ctx = graph_lock._graph.getGraphContext(); diff --git a/src/plugins/intel_cpu/src/compiled_model.h b/src/plugins/intel_cpu/src/compiled_model.h index 7782afa09e703a..faedf1ae5a744c 100644 --- a/src/plugins/intel_cpu/src/compiled_model.h +++ b/src/plugins/intel_cpu/src/compiled_model.h @@ -49,7 +49,7 @@ class CompiledModel : public ov::ICompiledModel { "Set property to Core::compile_model during compilation"); }; - void release_buffers() override; + void release_memory() override; private: std::shared_ptr create_sync_infer_request() const override; From 297e65c2b414d491249a7412fac5ab1d7a6f7009 Mon Sep 17 00:00:00 2001 From: Maksim Kutakov Date: Tue, 3 Sep 2024 19:18:09 +0200 Subject: [PATCH 33/36] Trivial behavior test --- .../ov_executable_network/release_memory.cpp | 121 ++++++++++++++++++ .../src/node_builders/convolution.cpp | 14 +- 2 files changed, 131 insertions(+), 4 deletions(-) create mode 100644 src/plugins/intel_cpu/tests/functional/custom/behavior/ov_executable_network/release_memory.cpp diff --git a/src/plugins/intel_cpu/tests/functional/custom/behavior/ov_executable_network/release_memory.cpp b/src/plugins/intel_cpu/tests/functional/custom/behavior/ov_executable_network/release_memory.cpp new file mode 100644 index 00000000000000..fd2d90f6aa46da --- /dev/null +++ b/src/plugins/intel_cpu/tests/functional/custom/behavior/ov_executable_network/release_memory.cpp @@ -0,0 +1,121 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include "openvino/runtime/core.hpp" +#include "openvino/runtime/compiled_model.hpp" +#include "shared_test_classes/base/ov_subgraph.hpp" +#include "common_test_utils/node_builders/convolution.hpp" +#include "common_test_utils/node_builders/constant.hpp" + +using namespace ov::test; + +namespace { +class MemoryReleaseTest : public testing::WithParamInterface, public SubgraphBaseTest { +public: + static std::string getTestCaseName(const testing::TestParamInfo& obj) { + bool dyn_shapes = obj.param; + return dyn_shapes ? "dyn_shapes" : "static_shapes"; + } + +public: + void SetUp() override { + auto net_prc = ov::element::f32; + targetDevice = utils::DEVICE_CPU; + + bool dyn_shapes = this->GetParam(); + + InputShape input_shape; + + if (dyn_shapes) { + input_shape = {{1, 2048, -1}, {{1, 2048, 7}, {1, 2048, 10}}}; + } else { + input_shape = {{}, {{1, 2048, 7}}}; + } + + init_input_shapes({input_shape}); + + auto param = std::make_shared(net_prc, inputDynamicShapes.front()); + + //convolution params + static const ov::Shape kernel_1x1 = {1}; + static const ov::Shape kernel_3x3 = {3}; + static const ov::Shape dilations_1x1 = {1}; + static const ov::Shape strides_1x1 = {1}; + + static const ov::op::PadType pad_type = ov::op::PadType::EXPLICIT; + + static const std::vector zero_pads_begin = {0}; + static const std::vector zero_pads_end = {0}; + + static const std::vector unit_pads_begin = {1}; + static const std::vector unit_pads_end = {1}; + + auto relu0 = std::make_shared(param); + + auto conv1 = utils::make_convolution(relu0, + net_prc, + kernel_1x1, + strides_1x1, + zero_pads_begin, + zero_pads_end, + dilations_1x1, + pad_type, + 512, + true); + + auto relu1 = std::make_shared(conv1); + + auto conv2 = utils::make_convolution(relu1, + net_prc, + kernel_3x3, + strides_1x1, + unit_pads_begin, + unit_pads_end, + dilations_1x1, + pad_type, + 512, + true); + + auto relu2 = std::make_shared(conv2); + + auto conv3 = utils::make_convolution(relu2, + net_prc, + kernel_1x1, + strides_1x1, + zero_pads_begin, + zero_pads_end, + dilations_1x1, + pad_type, + 2048, + true); + + auto add = std::make_shared(conv3, relu0); + + auto axis = utils::make_constant(ov::element::i32, {1}, std::vector({2})); + + auto reduce = std::make_shared(add, axis, true); + + function = std::make_shared(ov::OutputVector{reduce}, ov::ParameterVector{param}); + } +}; + +TEST_P(MemoryReleaseTest, ConsequitiveRelease) { + run(); + compiledModel.release_memory(); + run(); +} + +INSTANTIATE_TEST_SUITE_P(smoke_release_memory, + MemoryReleaseTest, + ::testing::Values(true, false), + MemoryReleaseTest::getTestCaseName); + +} // namespace + +// TBD: +// a few infer requests one graph +// a few infer request a few graphs +// a few infer request parallel release calls \ No newline at end of file diff --git a/src/tests/test_utils/common_test_utils/src/node_builders/convolution.cpp b/src/tests/test_utils/common_test_utils/src/node_builders/convolution.cpp index 23ce1f80f30bde..9b9af63a5a3c2e 100644 --- a/src/tests/test_utils/common_test_utils/src/node_builders/convolution.cpp +++ b/src/tests/test_utils/common_test_utils/src/node_builders/convolution.cpp @@ -45,11 +45,14 @@ std::shared_ptr make_convolution(const ov::Output& in, auto_pad); if (add_biases) { std::shared_ptr biases_weights_node; + const size_t rank = in.get_partial_shape().rank().get_length(); + ov::Shape bias_shape(rank, 1); + bias_shape[1] = num_out_channels; if (!biases_weights.empty()) { biases_weights_node = - std::make_shared(type, ov::Shape{1, num_out_channels, 1, 1}, biases_weights); + std::make_shared(type, bias_shape, biases_weights); } else { - auto tensor = create_and_fill_tensor(type, ov::Shape{1, num_out_channels, 1, 1}, 9, 1); + auto tensor = create_and_fill_tensor(type, bias_shape, 9, 1); biases_weights_node = std::make_shared(tensor); } @@ -82,11 +85,14 @@ std::shared_ptr make_convolution(const ov::Output& in_data, auto_pad); if (add_biases) { std::shared_ptr biases_weights_node; + const size_t rank = in_data.get_partial_shape().rank().get_length(); + ov::Shape bias_shape(rank, 1); + bias_shape[1] = num_out_channels; if (!biases_weights.empty()) { biases_weights_node = - std::make_shared(type, ov::Shape{1, num_out_channels, 1, 1}, biases_weights); + std::make_shared(type, bias_shape, biases_weights); } else { - auto tensor = create_and_fill_tensor(type, ov::Shape{1, num_out_channels, 1, 1}, 9, 1); + auto tensor = create_and_fill_tensor(type, bias_shape, 9, 1); biases_weights_node = std::make_shared(tensor); } From 735b90c66248fb2d8bb2b4135873c118d8e2a64d Mon Sep 17 00:00:00 2001 From: Maksim Kutakov Date: Tue, 3 Sep 2024 19:23:58 +0200 Subject: [PATCH 34/36] Fix clang format --- .../common_test_utils/src/node_builders/convolution.cpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/tests/test_utils/common_test_utils/src/node_builders/convolution.cpp b/src/tests/test_utils/common_test_utils/src/node_builders/convolution.cpp index 9b9af63a5a3c2e..a0e79f6f9a8e11 100644 --- a/src/tests/test_utils/common_test_utils/src/node_builders/convolution.cpp +++ b/src/tests/test_utils/common_test_utils/src/node_builders/convolution.cpp @@ -49,8 +49,7 @@ std::shared_ptr make_convolution(const ov::Output& in, ov::Shape bias_shape(rank, 1); bias_shape[1] = num_out_channels; if (!biases_weights.empty()) { - biases_weights_node = - std::make_shared(type, bias_shape, biases_weights); + biases_weights_node = std::make_shared(type, bias_shape, biases_weights); } else { auto tensor = create_and_fill_tensor(type, bias_shape, 9, 1); biases_weights_node = std::make_shared(tensor); @@ -89,8 +88,7 @@ std::shared_ptr make_convolution(const ov::Output& in_data, ov::Shape bias_shape(rank, 1); bias_shape[1] = num_out_channels; if (!biases_weights.empty()) { - biases_weights_node = - std::make_shared(type, bias_shape, biases_weights); + biases_weights_node = std::make_shared(type, bias_shape, biases_weights); } else { auto tensor = create_and_fill_tensor(type, bias_shape, 9, 1); biases_weights_node = std::make_shared(tensor); From e961b25c4b07be06da6a797ba2190e4e39b1eb61 Mon Sep 17 00:00:00 2001 From: Maksim Kutakov Date: Wed, 4 Sep 2024 10:40:50 +0200 Subject: [PATCH 35/36] Modify behavior test to avoid recompilation --- .../include/openvino/runtime/compiled_model.hpp | 2 +- .../behavior/ov_executable_network/release_memory.cpp | 11 +++++++++-- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/src/inference/include/openvino/runtime/compiled_model.hpp b/src/inference/include/openvino/runtime/compiled_model.hpp index bb2bebb1686f22..33aae94a2bd2cd 100644 --- a/src/inference/include/openvino/runtime/compiled_model.hpp +++ b/src/inference/include/openvino/runtime/compiled_model.hpp @@ -203,7 +203,7 @@ class OPENVINO_RUNTIME_API CompiledModel { /** * @brief Release intermediate memory. * - * This methods forces the Compiled model to release memory allocated for intermediate structures, e.g. caches, + * This method forces the Compiled model to release memory allocated for intermediate structures, e.g. caches, * tensors, temporal buffers etc., when possible * */ diff --git a/src/plugins/intel_cpu/tests/functional/custom/behavior/ov_executable_network/release_memory.cpp b/src/plugins/intel_cpu/tests/functional/custom/behavior/ov_executable_network/release_memory.cpp index fd2d90f6aa46da..b5b2b38c34098e 100644 --- a/src/plugins/intel_cpu/tests/functional/custom/behavior/ov_executable_network/release_memory.cpp +++ b/src/plugins/intel_cpu/tests/functional/custom/behavior/ov_executable_network/release_memory.cpp @@ -103,9 +103,16 @@ class MemoryReleaseTest : public testing::WithParamInterface, public Subgr }; TEST_P(MemoryReleaseTest, ConsequitiveRelease) { - run(); + compile_model(); + for (const auto& targetStaticShapeVec : targetStaticShapes) { + generate_inputs(targetStaticShapeVec); + validate(); + } compiledModel.release_memory(); - run(); + for (const auto& targetStaticShapeVec : targetStaticShapes) { + generate_inputs(targetStaticShapeVec); + validate(); + } } INSTANTIATE_TEST_SUITE_P(smoke_release_memory, From 14df043e06467f4e5ea2db4407fc9ae6b230dd4a Mon Sep 17 00:00:00 2001 From: Maksim Kutakov Date: Wed, 4 Sep 2024 16:32:50 +0200 Subject: [PATCH 36/36] Fix mem size calculation for half byte types --- .../intel_cpu/src/dnnl_extension_utils.cpp | 15 ++++++++++++- .../memory_desc/cpu_blocked_memory_desc.cpp | 21 +++++++++++++++++-- 2 files changed, 33 insertions(+), 3 deletions(-) diff --git a/src/plugins/intel_cpu/src/dnnl_extension_utils.cpp b/src/plugins/intel_cpu/src/dnnl_extension_utils.cpp index ca5e6ab6c1438a..d60ede1decf6c1 100644 --- a/src/plugins/intel_cpu/src/dnnl_extension_utils.cpp +++ b/src/plugins/intel_cpu/src/dnnl_extension_utils.cpp @@ -171,6 +171,18 @@ DnnlMemoryDescPtr DnnlExtensionUtils::makeDescriptor(const_dnnl_memory_desc_t de } } +static size_t sub_byte_data_type_multiplier(dnnl::memory::data_type dataType) { + switch (dataType) { + case dnnl::memory::data_type::nf4: + case dnnl::memory::data_type::s4: + case dnnl::memory::data_type::u4: + case dnnl::memory::data_type::f4_e2m1: + return 2; + default: + return 1; + } +} + size_t DnnlExtensionUtils::getMemSizeForDnnlDesc(const dnnl::memory::desc& desc) { auto tmpDesc = desc; @@ -181,7 +193,8 @@ size_t DnnlExtensionUtils::getMemSizeForDnnlDesc(const dnnl::memory::desc& desc) if (size == DNNL_RUNTIME_SIZE_VAL) return MemoryDesc::UNDEFINED_SIZE; - size += offset0 * sizeOfDataType(tmpDesc.get_data_type()); + size += div_up(offset0 * sizeOfDataType(tmpDesc.get_data_type()), + sub_byte_data_type_multiplier(tmpDesc.get_data_type())); return size; } diff --git a/src/plugins/intel_cpu/src/memory_desc/cpu_blocked_memory_desc.cpp b/src/plugins/intel_cpu/src/memory_desc/cpu_blocked_memory_desc.cpp index cb458b09c06eae..d1c50d0048c57d 100644 --- a/src/plugins/intel_cpu/src/memory_desc/cpu_blocked_memory_desc.cpp +++ b/src/plugins/intel_cpu/src/memory_desc/cpu_blocked_memory_desc.cpp @@ -121,9 +121,26 @@ size_t CpuBlockedMemoryDesc::getCurrentMemSizeImp() const { e_size += (getBlockDims()[j] - 1) * getStrides()[j]; } - e_size *= getPrecision() == ov::element::u1 ? 1 : getPrecision().size(); + const auto prc = getPrecision(); - return e_size; + if (prc == ov::element::u1) { + return e_size; + } + + auto byte_size = e_size * prc.bitwidth(); + + if (one_of(prc, ov::element::u3, ov::element::u6)) { + constexpr size_t storage_unit_size = 24; + byte_size += storage_unit_size - 1; + byte_size /= storage_unit_size; + byte_size *= 3; + } else { + constexpr size_t storage_unit_size = 8; + byte_size += storage_unit_size - 1; + byte_size /= storage_unit_size; + } + + return byte_size; } size_t CpuBlockedMemoryDesc::getMaxMemSize() const {