From 60a36c4ab0d2c322a5e3b41c930c3c9ec292c487 Mon Sep 17 00:00:00 2001 From: MirceaDan99 Date: Wed, 11 Dec 2024 10:37:13 +0200 Subject: [PATCH] Use alternative from `PR #27981` instead for memory mapped buffers --- .../openvino/runtime/aligned_buffer.hpp | 8 +- .../openvino/runtime/shared_buffer.hpp | 21 ----- .../openvino/runtime/compilation_context.hpp | 7 +- .../dev_api/openvino/runtime/iplugin.hpp | 27 ------ src/inference/src/cache_manager.hpp | 1 - src/inference/src/dev/compilation_context.cpp | 7 +- src/inference/src/dev/core_impl.cpp | 2 +- src/inference/src/dev/iplugin.cpp | 13 --- src/inference/src/dev/plugin.cpp | 13 --- src/inference/src/dev/plugin.hpp | 10 +-- src/plugins/intel_cpu/src/plugin.cpp | 28 +++--- src/plugins/intel_cpu/src/plugin.h | 16 +--- src/plugins/intel_cpu/src/utils/serialize.cpp | 70 ++++++--------- src/plugins/intel_cpu/src/utils/serialize.hpp | 8 +- .../intel_npu/common/icompiler_adapter.hpp | 2 +- .../include/intel_npu/common/igraph.hpp | 9 +- .../common/include/intel_npu/common/npu.hpp | 10 --- .../intel_npu/src/common/src/igraph.cpp | 6 +- .../src/compiler_adapter/src/driver_graph.cpp | 4 +- .../src/plugin_compiler_adapter.cpp | 1 + .../src/compiler_adapter/src/plugin_graph.cpp | 14 +-- .../intel_npu/src/plugin/include/plugin.hpp | 9 -- .../intel_npu/src/plugin/src/plugin.cpp | 90 +++++-------------- 23 files changed, 95 insertions(+), 281 deletions(-) diff --git a/src/core/dev_api/openvino/runtime/aligned_buffer.hpp b/src/core/dev_api/openvino/runtime/aligned_buffer.hpp index a7cf78ae9ee658..904e30999d10df 100644 --- a/src/core/dev_api/openvino/runtime/aligned_buffer.hpp +++ b/src/core/dev_api/openvino/runtime/aligned_buffer.hpp @@ -30,17 +30,14 @@ class OPENVINO_API AlignedBuffer { size_t size() const { return m_byte_size; } - void updateOffset(size_t offset) { - m_offset = offset; - } void* get_ptr(size_t offset) const { return m_aligned_buffer + offset; } void* get_ptr() { - return m_aligned_buffer + m_offset; + return m_aligned_buffer; } const void* get_ptr() const { - return m_aligned_buffer + m_offset; + return m_aligned_buffer; } template T* get_ptr() { @@ -64,7 +61,6 @@ class OPENVINO_API AlignedBuffer { char* m_allocated_buffer; char* m_aligned_buffer; size_t m_byte_size; - size_t m_offset = 0; }; template <> diff --git a/src/core/dev_api/openvino/runtime/shared_buffer.hpp b/src/core/dev_api/openvino/runtime/shared_buffer.hpp index cdfe58f0741e1e..2c784ef6081c35 100644 --- a/src/core/dev_api/openvino/runtime/shared_buffer.hpp +++ b/src/core/dev_api/openvino/runtime/shared_buffer.hpp @@ -16,7 +16,6 @@ class SharedBuffer : public ov::AlignedBuffer { m_allocated_buffer = data; m_aligned_buffer = data; m_byte_size = size; - m_offset = 0; } virtual ~SharedBuffer() { @@ -82,26 +81,6 @@ class OwningSharedStreamBuffer : public SharedStreamBuffer { return m_shared_obj; } - std::streamsize xsgetn(char* s, std::streamsize count) override { - auto streamSize = SharedStreamBuffer::xsgetn(s, count); - m_shared_obj->updateOffset(m_offset); - return streamSize; - } - - int_type uflow() override { - auto val = SharedStreamBuffer::uflow(); - m_shared_obj->updateOffset(m_offset); - return val; - } - - pos_type seekoff(off_type off, - std::ios_base::seekdir dir, - std::ios_base::openmode which = std::ios_base::in) override { - auto pos = SharedStreamBuffer::seekoff(off, dir, which); - m_shared_obj->updateOffset(m_offset); - return pos; - } - protected: std::shared_ptr m_shared_obj; }; diff --git a/src/inference/dev_api/openvino/runtime/compilation_context.hpp b/src/inference/dev_api/openvino/runtime/compilation_context.hpp index 033797c9d0d811..ba3a2aa8d64ded 100644 --- a/src/inference/dev_api/openvino/runtime/compilation_context.hpp +++ b/src/inference/dev_api/openvino/runtime/compilation_context.hpp @@ -32,10 +32,9 @@ class CompiledBlobHeader final { std::string m_ieVersion; std::string m_fileInfo; std::string m_runtimeInfo; - std::shared_ptr m_model_buffer; public: - CompiledBlobHeader(std::shared_ptr model_buffer); + CompiledBlobHeader(); CompiledBlobHeader(const std::string& ieVersion, const std::string& fileInfo, const std::string& runtimeInfo); const std::string& get_openvino_version() const { @@ -50,10 +49,6 @@ class CompiledBlobHeader final { return m_runtimeInfo; } - const std::shared_ptr get_model_buffer() const { - return m_model_buffer; - } - friend std::istream& operator>>(std::istream& stream, CompiledBlobHeader& header); friend std::ostream& operator<<(std::ostream& stream, const CompiledBlobHeader& header); diff --git a/src/inference/dev_api/openvino/runtime/iplugin.hpp b/src/inference/dev_api/openvino/runtime/iplugin.hpp index e88c3e4a539d15..8165e658c206f0 100644 --- a/src/inference/dev_api/openvino/runtime/iplugin.hpp +++ b/src/inference/dev_api/openvino/runtime/iplugin.hpp @@ -185,33 +185,6 @@ class OPENVINO_RUNTIME_API IPlugin : public std::enable_shared_from_this& context, const ov::AnyMap& properties) const = 0; - /** - * @brief Creates an compiled model from an previously exported model using plugin implementation - * and removes OpenVINO Runtime magic and plugin name - * @param model Reference to model output stream - * @param weights_buffer AlignedBuffer with cached model - * @param properties A ov::AnyMap of properties - * @return An Compiled model - */ - virtual std::shared_ptr import_model(std::istream& model, - std::shared_ptr model_buffer, - const ov::AnyMap& properties) const; - - /** - * @brief Creates an compiled model from an previously exported model using plugin implementation - * and removes OpenVINO Runtime magic and plugin name - * @param model Reference to model output stream - * @param weights_buffer AlignedBuffer with cached model - * @param context A pointer to plugin context derived from RemoteContext class used to - * execute the network - * @param properties A ov::AnyMap of properties - * @return An Compiled model - */ - virtual std::shared_ptr import_model(std::istream& model, - std::shared_ptr model_buffer, - const ov::SoPtr& context, - const ov::AnyMap& properties) const; - /** * @brief Queries a plugin about supported layers in model * @param model Model object to query. diff --git a/src/inference/src/cache_manager.hpp b/src/inference/src/cache_manager.hpp index 2cf0bca01b17e7..82813e5dd4788f 100644 --- a/src/inference/src/cache_manager.hpp +++ b/src/inference/src/cache_manager.hpp @@ -141,7 +141,6 @@ class FileStorageCacheManager final : public ICacheManager { auto mmap = ov::load_mmap_object(blob_file_name); auto shared_buffer = std::make_shared>>(mmap->data(), mmap->size(), mmap); -#if 0 OwningSharedStreamBuffer buf(shared_buffer); std::istream stream(&buf); reader(stream, shared_buffer); diff --git a/src/inference/src/dev/compilation_context.cpp b/src/inference/src/dev/compilation_context.cpp index 34f7156190f231..c8eac0d22af35b 100644 --- a/src/inference/src/dev/compilation_context.cpp +++ b/src/inference/src/dev/compilation_context.cpp @@ -156,8 +156,7 @@ std::string ModelCache::compute_hash(const std::string& modelStr, ////////////////////////////////////////////////// -CompiledBlobHeader::CompiledBlobHeader(std::shared_ptr model_buffer) - : m_model_buffer(model_buffer) {} +CompiledBlobHeader::CompiledBlobHeader() {} CompiledBlobHeader::CompiledBlobHeader(const std::string& ieVersion, const std::string& fileInfo, @@ -169,10 +168,6 @@ CompiledBlobHeader::CompiledBlobHeader(const std::string& ieVersion, std::istream& operator>>(std::istream& stream, CompiledBlobHeader& header) { std::string xmlStr; std::getline(stream, xmlStr); - auto model_buffer = header.get_model_buffer(); - if (model_buffer != nullptr) { - model_buffer->updateOffset(stream.tellg()); - } pugi::xml_document document; pugi::xml_parse_result res = document.load_string(xmlStr.c_str()); diff --git a/src/inference/src/dev/core_impl.cpp b/src/inference/src/dev/core_impl.cpp index 02f48cdbcb0e09..673f6fd569a11e 100644 --- a/src/inference/src/dev/core_impl.cpp +++ b/src/inference/src/dev/core_impl.cpp @@ -1418,7 +1418,7 @@ ov::SoPtr ov::CoreImpl::load_model_from_cache( ov::itt::domains::LoadTime, "Core::load_model_from_cache::ReadStreamAndImport"); try { - ov::CompiledBlobHeader header(model_buffer); + ov::CompiledBlobHeader header; networkStream >> header; if (header.get_file_info() != ov::ModelCache::calculate_file_info(cacheContent.modelPath)) { // Original file is changed, don't use cache diff --git a/src/inference/src/dev/iplugin.cpp b/src/inference/src/dev/iplugin.cpp index 1e1b70af861b58..1049e39bee6f49 100644 --- a/src/inference/src/dev/iplugin.cpp +++ b/src/inference/src/dev/iplugin.cpp @@ -57,19 +57,6 @@ const std::string& ov::IPlugin::get_device_name() const { return m_plugin_name; } -std::shared_ptr ov::IPlugin::import_model(std::istream& model, - std::shared_ptr model_buffer, - const ov::AnyMap& properties) const { - OPENVINO_THROW_NOT_IMPLEMENTED("This method is not implemented"); -} - -std::shared_ptr ov::IPlugin::import_model(std::istream& model, - std::shared_ptr model_buffer, - const ov::SoPtr& context, - const ov::AnyMap& properties) const { - OPENVINO_THROW_NOT_IMPLEMENTED("This method is not implemented"); -} - void ov::IPlugin::set_core(const std::weak_ptr& core) { OPENVINO_ASSERT(!core.expired()); m_core = core; diff --git a/src/inference/src/dev/plugin.cpp b/src/inference/src/dev/plugin.cpp index 605dc94e0ef487..40207bac9087fa 100644 --- a/src/inference/src/dev/plugin.cpp +++ b/src/inference/src/dev/plugin.cpp @@ -79,19 +79,6 @@ ov::SoPtr ov::Plugin::import_model(std::istream& model, OV_PLUGIN_CALL_STATEMENT(return {m_ptr->import_model(model, context, config), m_so}); } -ov::SoPtr ov::Plugin::import_model(std::istream& model, - std::shared_ptr model_buffer, - const ov::AnyMap& properties) const { - OV_PLUGIN_CALL_STATEMENT(return {m_ptr->import_model(model, model_buffer, properties), m_so}); -} - -ov::SoPtr ov::Plugin::import_model(std::istream& model, - std::shared_ptr model_buffer, - const ov::SoPtr& context, - const ov::AnyMap& config) const { - OV_PLUGIN_CALL_STATEMENT(return {m_ptr->import_model(model, model_buffer, context, config), m_so}); -} - ov::SoPtr ov::Plugin::create_context(const AnyMap& params) const { OV_PLUGIN_CALL_STATEMENT({ auto remote = m_ptr->create_context(params); diff --git a/src/inference/src/dev/plugin.hpp b/src/inference/src/dev/plugin.hpp index bdc84737456aec..14a5adebbab3a4 100644 --- a/src/inference/src/dev/plugin.hpp +++ b/src/inference/src/dev/plugin.hpp @@ -59,15 +59,6 @@ class Plugin { const ov::SoPtr& context, const ov::AnyMap& config) const; - SoPtr import_model(std::istream& model, - std::shared_ptr model_buffer, - const ov::AnyMap& properties) const; - - SoPtr import_model(std::istream& model, - std::shared_ptr model_buffer, - const ov::SoPtr& context, - const ov::AnyMap& config) const; - ov::SoPtr create_context(const AnyMap& params) const; ov::SoPtr get_default_context(const AnyMap& params) const; @@ -87,3 +78,4 @@ class Plugin { }; } // namespace ov + diff --git a/src/plugins/intel_cpu/src/plugin.cpp b/src/plugins/intel_cpu/src/plugin.cpp index b30f130c2a2a1f..33cb87b337bfef 100644 --- a/src/plugins/intel_cpu/src/plugin.cpp +++ b/src/plugins/intel_cpu/src/plugin.cpp @@ -7,7 +7,6 @@ #include "cpu_streams_calculation.hpp" #include "internal_properties.hpp" #include "itt.h" -#include "openvino/op/paged_attention.hpp" #include "openvino/runtime/intel_cpu/properties.hpp" #include "openvino/runtime/internal_properties.hpp" #include "openvino/runtime/properties.hpp" @@ -20,6 +19,7 @@ #include "utils/precision_support.h" #include "utils/serialize.hpp" #include "weights_cache.hpp" +#include "openvino/op/paged_attention.hpp" #if defined(__linux__) # include @@ -200,7 +200,7 @@ static Config::ModelType getModelType(const std::shared_ptr& model) return Config::ModelType::CNN; if ((op::util::has_op_with_type(model) && model->get_variables().size() > 0) || - op::util::has_op_with_type(model)) + op::util::has_op_with_type(model)) return Config::ModelType::LLM; return Config::ModelType::Unknown; @@ -446,17 +446,15 @@ ov::Any Plugin::get_ro_property(const std::string& name, const ov::AnyMap& optio return decltype(ov::supported_properties)::value_type(std::move(supportedProperties)); } else if (ov::internal::supported_properties == name) { - return decltype(ov::internal::supported_properties)::value_type { + return decltype(ov::internal::supported_properties)::value_type{ ov::PropertyName{ov::internal::caching_properties.name(), ov::PropertyMutability::RO}, #if !defined(OPENVINO_ARCH_ARM) && !(defined(__APPLE__) || defined(__MACOSX)) - ov::PropertyName{ov::internal::caching_with_mmap.name(), ov::PropertyMutability::RO}, + ov::PropertyName{ov::internal::caching_with_mmap.name(), ov::PropertyMutability::RO}, #endif - ov::PropertyName{ov::internal::exclusive_async_requests.name(), ov::PropertyMutability::RW}, - ov::PropertyName{ov::internal::compiled_model_runtime_properties.name(), ov::PropertyMutability::RO}, - ov::PropertyName { - ov::internal::compiled_model_runtime_properties_supported.name(), ov::PropertyMutability::RO - } - }; + ov::PropertyName{ov::internal::exclusive_async_requests.name(), ov::PropertyMutability::RW}, + ov::PropertyName{ov::internal::compiled_model_runtime_properties.name(), ov::PropertyMutability::RO}, + ov::PropertyName{ov::internal::compiled_model_runtime_properties_supported.name(), + ov::PropertyMutability::RO}}; } else if (name == ov::device::full_name) { return decltype(ov::device::full_name)::value_type(deviceFullName); } else if (name == ov::available_devices) { @@ -555,16 +553,11 @@ ov::SupportedOpsMap Plugin::query_model(const std::shared_ptr& return res; } -std::shared_ptr Plugin::import_model(std::istream& model_stream, const ov::AnyMap& config) const { - return import_model(model_stream, nullptr, config); -} - std::shared_ptr Plugin::import_model(std::istream& model_stream, - std::shared_ptr model_buffer, const ov::AnyMap& config) const { OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::intel_cpu_LT, "import_model"); - CacheDecrypt decrypt{codec_xor}; + CacheDecrypt decrypt{ codec_xor }; bool decript_from_string = false; if (config.count(ov::cache_encryption_callbacks.name())) { auto encryption_callbacks = config.at(ov::cache_encryption_callbacks.name()).as(); @@ -585,8 +578,7 @@ std::shared_ptr Plugin::import_model(std::istream& model_str [this](const std::shared_ptr& model, const std::shared_ptr& weights) { return get_core()->read_model(model, weights); }, - decrypt, - decript_from_string); + decrypt, decript_from_string); std::shared_ptr model; deserializer >> model; diff --git a/src/plugins/intel_cpu/src/plugin.h b/src/plugins/intel_cpu/src/plugin.h index c7f1dee9fb52c6..8973478d30403f 100644 --- a/src/plugins/intel_cpu/src/plugin.h +++ b/src/plugins/intel_cpu/src/plugin.h @@ -20,7 +20,8 @@ class Plugin : public ov::IPlugin { std::shared_ptr compile_model(const std::shared_ptr& model, const ov::AnyMap& properties, const ov::SoPtr& context) const override { - OPENVINO_THROW_NOT_IMPLEMENTED("compile_model with RemoteContext is not supported by CPU plugin!"); + OPENVINO_THROW_NOT_IMPLEMENTED( + "compile_model with RemoteContext is not supported by CPU plugin!"); }; void set_property(const ov::AnyMap& properties) override; @@ -29,17 +30,8 @@ class Plugin : public ov::IPlugin { std::shared_ptr import_model(std::istream& model, const ov::SoPtr& context, const ov::AnyMap& properties) const override { - OPENVINO_THROW_NOT_IMPLEMENTED("import_model with RemoteContext is not supported by CPU plugin!"); - }; - - std::shared_ptr import_model(std::istream& model, - std::shared_ptr model_buffer, - const ov::AnyMap& properties) const override; - std::shared_ptr import_model(std::istream& model, - std::shared_ptr model_buffer, - const ov::SoPtr& context, - const ov::AnyMap& properties) const override { - OPENVINO_THROW_NOT_IMPLEMENTED("import_model with RemoteContext is not supported by CPU plugin!"); + OPENVINO_THROW_NOT_IMPLEMENTED( + "import_model with RemoteContext is not supported by CPU plugin!"); }; ov::SupportedOpsMap query_model(const std::shared_ptr& model, diff --git a/src/plugins/intel_cpu/src/utils/serialize.cpp b/src/plugins/intel_cpu/src/utils/serialize.cpp index 55b53116e4ac01..33d8140fbe4a84 100644 --- a/src/plugins/intel_cpu/src/utils/serialize.cpp +++ b/src/plugins/intel_cpu/src/utils/serialize.cpp @@ -14,8 +14,7 @@ namespace intel_cpu { ////////// ModelSerializer ////////// ModelSerializer::ModelSerializer(std::ostream& ostream, CacheEncrypt encrypt_fn) - : m_ostream(ostream), - m_cache_encrypt(std::move(encrypt_fn)) {} + : m_ostream(ostream), m_cache_encrypt(std::move(encrypt_fn)) {} void ModelSerializer::operator<<(const std::shared_ptr& model) { auto serialize_info = [&](std::ostream& stream) { @@ -36,25 +35,22 @@ ModelDeserializer::ModelDeserializer(std::istream& model_stream, ModelBuilder fn, const CacheDecrypt& decrypt_fn, bool decript_from_string) - : m_istream(model_stream), - m_model_builder(std::move(fn)), - m_decript_from_string(decript_from_string), - m_model_buffer(model_buffer) { - if (m_decript_from_string) { - m_cache_decrypt.m_decrypt_str = decrypt_fn.m_decrypt_str; - } else { - m_cache_decrypt.m_decrypt_char = decrypt_fn.m_decrypt_char; + : m_istream(model_stream), m_model_builder(std::move(fn)), m_decript_from_string(decript_from_string), m_model_buffer(model_buffer) { + if (m_decript_from_string) { + m_cache_decrypt.m_decrypt_str = decrypt_fn.m_decrypt_str; + } else { + m_cache_decrypt.m_decrypt_char = decrypt_fn.m_decrypt_char; + } } -} -void ModelDeserializer::set_info(pugi::xml_node& root, std::shared_ptr& model) {} + void ModelDeserializer::set_info(pugi::xml_node& root, std::shared_ptr& model) {} -void ModelDeserializer::operator>>(std::shared_ptr& model) { - if (m_model_buffer) { - process_mmap(model, m_model_buffer); - } else { - process_stream(model); - } + void ModelDeserializer::operator>>(std::shared_ptr& model) { + if (m_model_buffer) { + process_mmap(model, m_model_buffer); + } else { + process_stream(model); + } } void ModelDeserializer::process_mmap(std::shared_ptr& model, @@ -81,10 +77,7 @@ void ModelDeserializer::process_mmap(std::shared_ptr& model, // Read model input/output precisions. pugi::xml_document xml_in_out_doc; if (hdr.custom_data_size > 0lu) { - auto res = xml_in_out_doc.load_buffer(buffer_base + hdr.custom_data_offset, - hdr.custom_data_size, - pugi::parse_default, - pugi::encoding_utf8); + auto res = xml_in_out_doc.load_buffer(buffer_base + hdr.custom_data_offset, hdr.custom_data_size, pugi::parse_default, pugi::encoding_utf8); if (res.status != pugi::status_ok) { OPENVINO_THROW("[CPU] Could to deserialize custom data."); } @@ -93,10 +86,7 @@ void ModelDeserializer::process_mmap(std::shared_ptr& model, // Map blob content std::shared_ptr weights_buf; if (hdr.consts_size) { - weights_buf = - std::make_shared>>(buffer_base + hdr.consts_offset, - hdr.consts_size, - mmemory); + weights_buf = std::make_shared>>(buffer_base + hdr.consts_offset, hdr.consts_size, mmemory); } // XML content @@ -113,7 +103,9 @@ void ModelDeserializer::process_mmap(std::shared_ptr& model, xml_buff->assign(buffer_base + hdr.model_offset, hdr.model_size); } std::shared_ptr model_buf = - std::make_shared>>(&((*xml_buff)[0]), hdr.model_size, xml_buff); + std::make_shared>>(&((*xml_buff)[0]), + hdr.model_size, + xml_buff); model = m_model_builder(model_buf, weights_buf); @@ -158,7 +150,7 @@ void ModelDeserializer::process_stream(std::shared_ptr& model) { auto data_blob = std::make_shared(ov::element::u8, ov::Shape({hdr.consts_size})); m_istream.seekg(hdr.consts_offset); if (hdr.consts_size) { - m_istream.read(static_cast(data_blob->data(ov::element::u8)), hdr.consts_size); + m_istream.read(static_cast(data_blob->data(ov::element::u8)), hdr.consts_size); } // read XML content @@ -170,20 +162,16 @@ void ModelDeserializer::process_stream(std::shared_ptr& model) { if (m_decript_from_string) { *xml_string = m_cache_decrypt.m_decrypt_str(*xml_string); } else { - m_cache_decrypt.m_decrypt_char(const_cast(xml_string->data()), - xml_string->data(), - xml_string->size()); + m_cache_decrypt.m_decrypt_char(const_cast(xml_string->data()), xml_string->data(), xml_string->size()); } } - auto model_buf = - std::make_shared>>(const_cast(xml_string->data()), - xml_string->size(), - xml_string); - auto weights_buf = std::make_shared>>( - reinterpret_cast(data_blob->data(ov::element::u8)), - hdr.consts_size, - data_blob); + auto model_buf = std::make_shared>>(const_cast(xml_string->data()), + xml_string->size(), + xml_string); + auto weights_buf = std::make_shared>>(reinterpret_cast(data_blob->data(ov::element::u8)), + hdr.consts_size, + data_blob); model = m_model_builder(model_buf, weights_buf); @@ -192,5 +180,5 @@ void ModelDeserializer::process_stream(std::shared_ptr& model) { set_info(root, model); } -} // namespace intel_cpu -} // namespace ov +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/utils/serialize.hpp b/src/plugins/intel_cpu/src/utils/serialize.hpp index 0821b1160c38d7..4dfdd6b22afbd4 100644 --- a/src/plugins/intel_cpu/src/utils/serialize.hpp +++ b/src/plugins/intel_cpu/src/utils/serialize.hpp @@ -29,9 +29,7 @@ class ModelSerializer { class ModelDeserializer { public: - typedef std::function(const std::shared_ptr&, - const std::shared_ptr&)> - ModelBuilder; + typedef std::function(const std::shared_ptr&, const std::shared_ptr&)> ModelBuilder; ModelDeserializer(std::istream& model, std::shared_ptr model_buffer, @@ -57,5 +55,5 @@ class ModelDeserializer { std::shared_ptr m_model_buffer; }; -} // namespace intel_cpu -} // namespace ov +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_npu/src/common/include/intel_npu/common/icompiler_adapter.hpp b/src/plugins/intel_npu/src/common/include/intel_npu/common/icompiler_adapter.hpp index 6e585299d68a1d..375ab305db57fc 100644 --- a/src/plugins/intel_npu/src/common/include/intel_npu/common/icompiler_adapter.hpp +++ b/src/plugins/intel_npu/src/common/include/intel_npu/common/icompiler_adapter.hpp @@ -12,7 +12,7 @@ class ICompilerAdapter { public: virtual std::shared_ptr compile(const std::shared_ptr& model, const Config& config) const = 0; - virtual std::shared_ptr parse(std::vector network, const Config& config) const = 0; + virtual std::shared_ptr parse(std::unique_ptr blobPtr, const Config& config) const = 0; virtual ov::SupportedOpsMap query(const std::shared_ptr& model, const Config& config) const = 0; virtual ~ICompilerAdapter() = default; diff --git a/src/plugins/intel_npu/src/common/include/intel_npu/common/igraph.hpp b/src/plugins/intel_npu/src/common/include/intel_npu/common/igraph.hpp index 365cc35727cf4c..e64380021544fd 100644 --- a/src/plugins/intel_npu/src/common/include/intel_npu/common/igraph.hpp +++ b/src/plugins/intel_npu/src/common/include/intel_npu/common/igraph.hpp @@ -58,10 +58,12 @@ class BlobContainerVector : public BlobContainer { class BlobContainerAlignedBuffer : public BlobContainer { public: - BlobContainerAlignedBuffer(const std::shared_ptr& blobSO) : _ownershipBlob(blobSO) {} + BlobContainerAlignedBuffer(const std::shared_ptr& blobSO, size_t offset) + : _ownershipBlob(blobSO), + _offset(offset) {} void* get_ptr() override { - return _ownershipBlob->get_ptr(); + return _ownershipBlob->get_ptr(_offset); } size_t size() const override { @@ -74,6 +76,7 @@ class BlobContainerAlignedBuffer : public BlobContainer { private: std::shared_ptr _ownershipBlob; + size_t _offset; }; class IGraph : public std::enable_shared_from_this { @@ -149,7 +152,7 @@ class IGraph : public std::enable_shared_from_this { // first inference starts running std::mutex _mutex; - std::unique_ptr _blob; + std::unique_ptr _blobPtr; uint32_t _unique_id = 0; uint32_t _last_submitted_id; diff --git a/src/plugins/intel_npu/src/common/include/intel_npu/common/npu.hpp b/src/plugins/intel_npu/src/common/include/intel_npu/common/npu.hpp index 168b57e30945ee..9e4c59852151ce 100644 --- a/src/plugins/intel_npu/src/common/include/intel_npu/common/npu.hpp +++ b/src/plugins/intel_npu/src/common/include/intel_npu/common/npu.hpp @@ -57,16 +57,6 @@ class IEngineBackend : public std::enable_shared_from_this { //------------------------------------------------------------------------------ -class ICompilerAdapter { -public: - virtual std::shared_ptr compile(const std::shared_ptr& model, - const Config& config) const = 0; - virtual std::shared_ptr parse(std::unique_ptr blobPtr, const Config& config) const = 0; - virtual ov::SupportedOpsMap query(const std::shared_ptr& model, const Config& config) const = 0; - - virtual ~ICompilerAdapter() = default; -}; - //------------------------------------------------------------------------------ class IDevice : public std::enable_shared_from_this { diff --git a/src/plugins/intel_npu/src/common/src/igraph.cpp b/src/plugins/intel_npu/src/common/src/igraph.cpp index 8bd26367cd0f1f..3f04bb0ce8e5ff 100644 --- a/src/plugins/intel_npu/src/common/src/igraph.cpp +++ b/src/plugins/intel_npu/src/common/src/igraph.cpp @@ -17,12 +17,12 @@ namespace intel_npu { IGraph::IGraph(ze_graph_handle_t handle, NetworkMetadata metadata, const Config& config, - std::optional> blob) + std::optional> blobPtr) : _handle(handle), _metadata(std::move(metadata)), _logger("IGraph", config.get()) { - if (blob.has_value()) { - _blob = std::move(*blob); + if (blobPtr.has_value()) { + _blobPtr = std::move(*blobPtr); } } diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/driver_graph.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/driver_graph.cpp index 0019eb1bdf17d4..3ce216c255f0e4 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/src/driver_graph.cpp +++ b/src/plugins/intel_npu/src/compiler_adapter/src/driver_graph.cpp @@ -139,7 +139,7 @@ void DriverGraph::initialize(const Config& config) { } bool DriverGraph::release_blob(const Config& config) { - if (_blob == nullptr || _zeroInitStruct->getGraphDdiTable().version() < ZE_GRAPH_EXT_VERSION_1_8 || + if (_blobPtr == nullptr || _zeroInitStruct->getGraphDdiTable().version() < ZE_GRAPH_EXT_VERSION_1_8 || config.get()) { return false; } @@ -152,7 +152,7 @@ bool DriverGraph::release_blob(const Config& config) { return false; } - if (!_blob->release_from_memory()) { + if (!_blobPtr->release_from_memory()) { return false; } diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp index fc4eaf980dec8f..719950533289ff 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp +++ b/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp @@ -95,6 +95,7 @@ std::shared_ptr PluginCompilerAdapter::compile(const std::shared_ptr(_zeGraphExt, _compiler, _zeroInitStruct, diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/plugin_graph.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/plugin_graph.cpp index ce02e0caad8edd..87d530a4086817 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/src/plugin_graph.cpp +++ b/src/plugins/intel_npu/src/compiler_adapter/src/plugin_graph.cpp @@ -34,7 +34,7 @@ PluginGraph::PluginGraph(const std::shared_ptr& zeGraphExt, } void PluginGraph::export_blob(std::ostream& stream) const { - stream.write(reinterpret_cast(_blob->get_ptr()), _blob->size()); + stream.write(reinterpret_cast(_blobPtr->get_ptr()), _blobPtr->size()); if (!stream) { _logger.error("Write blob to stream failed. Blob is broken!"); @@ -43,14 +43,14 @@ void PluginGraph::export_blob(std::ostream& stream) const { if (_logger.level() >= ov::log::Level::INFO) { std::uint32_t result = 1171117u; - for (const uint8_t* it = reinterpret_cast(_blob->get_ptr()); - it != reinterpret_cast(_blob->get_ptr()) + _blob->size(); + for (const uint8_t* it = reinterpret_cast(_blobPtr->get_ptr()); + it != reinterpret_cast(_blobPtr->get_ptr()) + _blobPtr->size(); ++it) { result = ((result << 7) + result) + static_cast(*it); } std::stringstream str; - str << "Blob size: " << _blob->size() << ", hash: " << std::hex << result; + str << "Blob size: " << _blobPtr->size() << ", hash: " << std::hex << result; _logger.info(str.str().c_str()); } _logger.info("Write blob to stream successfully."); @@ -58,9 +58,9 @@ void PluginGraph::export_blob(std::ostream& stream) const { std::vector PluginGraph::process_profiling_output(const std::vector& profData, const Config& config) const { - std::vector blob(_blob->size()); - blob.assign(reinterpret_cast(_blob->get_ptr()), - reinterpret_cast(_blob->get_ptr()) + _blob->size()); + std::vector blob(_blobPtr->size()); + blob.assign(reinterpret_cast(_blobPtr->get_ptr()), + reinterpret_cast(_blobPtr->get_ptr()) + _blobPtr->size()); return _compiler->process_profiling_output(profData, blob, config); } diff --git a/src/plugins/intel_npu/src/plugin/include/plugin.hpp b/src/plugins/intel_npu/src/plugin/include/plugin.hpp index c91af51c5443ce..6b1b46872788e3 100644 --- a/src/plugins/intel_npu/src/plugin/include/plugin.hpp +++ b/src/plugins/intel_npu/src/plugin/include/plugin.hpp @@ -44,16 +44,7 @@ class Plugin : public ov::IPlugin { std::shared_ptr import_model(std::istream& stream, const ov::AnyMap& properties) const override; - std::shared_ptr import_model(std::istream& /* unusedStream */, - std::shared_ptr model_buffer, - const ov::AnyMap& properties) const override; - - std::shared_ptr import_model(std::istream& stream, - const ov::SoPtr& context, - const ov::AnyMap& properties) const override; - std::shared_ptr import_model(std::istream& stream, - std::shared_ptr model_buffer, const ov::SoPtr& context, const ov::AnyMap& properties) const override; diff --git a/src/plugins/intel_npu/src/plugin/src/plugin.cpp b/src/plugins/intel_npu/src/plugin/src/plugin.cpp index dd322c0ed47962..798164403bbbbb 100644 --- a/src/plugins/intel_npu/src/plugin/src/plugin.cpp +++ b/src/plugins/intel_npu/src/plugin/src/plugin.cpp @@ -752,7 +752,14 @@ std::shared_ptr Plugin::import_model(std::istream& stream, c OV_ITT_SCOPED_TASK(itt::domains::NPUPlugin, "Plugin::import_model"); OV_ITT_TASK_CHAIN(PLUGIN_IMPORT_MODEL, itt::domains::NPUPlugin, "Plugin::import_model", "merge_configs"); - const std::map propertiesMap = any_copy(properties); + auto _properties = properties; + std::shared_ptr modelBuffer; + if (_properties.count(ov::internal::cached_model_buffer.name())) { + modelBuffer = _properties.at(ov::internal::cached_model_buffer.name()).as>(); + _properties.erase(ov::internal::cached_model_buffer.name()); + } + + const std::map propertiesMap = any_copy(_properties); auto localConfig = merge_configs(_globalConfig, propertiesMap, OptionMode::RunTime); _logger.setLevel(localConfig.get()); const auto platform = _backends->getCompilationPlatform(localConfig.get(), localConfig.get()); @@ -775,63 +782,24 @@ std::shared_ptr Plugin::import_model(std::istream& stream, c CompilerAdapterFactory compilerAdapterFactory; auto compiler = compilerAdapterFactory.getCompiler(_backends->getIEngineBackend(), localConfig); - auto graphSize = getFileSize(stream); - - std::vector blob(graphSize); - stream.read(reinterpret_cast(blob.data()), graphSize); - if (!stream) { - OPENVINO_THROW("Failed to read data from stream!"); - } - _logger.debug("Successfully read %zu bytes into blob.", graphSize); - - auto blobContainerPtr = std::make_unique(std::move(blob)); - auto graph = compiler->parse(std::move(blobContainerPtr), localConfig); - graph->update_network_name("net" + std::to_string(_compiledModelLoadCounter++)); - - const std::shared_ptr modelDummy = - create_dummy_model(graph->get_metadata().inputs, graph->get_metadata().outputs); - - compiledModel = std::make_shared(modelDummy, shared_from_this(), device, graph, localConfig); - } catch (const std::exception& ex) { - OPENVINO_THROW("Can't import network: ", ex.what()); - } catch (...) { - OPENVINO_THROW("NPU import_model got unexpected exception from CompiledModel"); - } - - OV_ITT_TASK_SKIP(PLUGIN_IMPORT_MODEL); - - return compiledModel; -} - -std::shared_ptr Plugin::import_model(std::istream& /* unusedStream */, - std::shared_ptr model_buffer, - const ov::AnyMap& properties) const { - OV_ITT_SCOPED_TASK(itt::domains::NPUPlugin, "Plugin::import_model"); - OV_ITT_TASK_CHAIN(PLUGIN_IMPORT_MODEL, itt::domains::NPUPlugin, "Plugin::import_model", "merge_configs"); - - const std::map propertiesMap = any_copy(properties); - auto localConfig = merge_configs(_globalConfig, propertiesMap, OptionMode::RunTime); - _logger.setLevel(localConfig.get()); - const auto platform = _backends->getCompilationPlatform(localConfig.get(), localConfig.get()); - localConfig.update({{ov::intel_npu::platform.name(), platform}}); - auto device = _backends->getDevice(localConfig.get()); - - set_batch_config(_backends->isBatchingSupported(), localConfig); + std::unique_ptr blobPtr; - const auto loadedFromCache = localConfig.get(); - if (!loadedFromCache) { - _logger.warning( - "The usage of a compiled model can lead to undefined behavior. Please use OpenVINO IR instead!"); - } + if (modelBuffer == nullptr) { + auto graphSize = getFileSize(stream); - OV_ITT_TASK_NEXT(PLUGIN_IMPORT_MODEL, "parse"); + std::vector blob(graphSize); + stream.read(reinterpret_cast(blob.data()), graphSize); + if (!stream) { + OPENVINO_THROW("Failed to read data from stream!"); + } + _logger.debug("Successfully read %zu bytes into blob.", graphSize); - std::shared_ptr compiledModel; + blobPtr = std::make_unique(std::move(blob)); + } else { + blobPtr = std::make_unique(modelBuffer, stream.tellg()); + } - try { - auto compiler = getCompiler(localConfig); - auto blobContainerPtr = std::make_unique(model_buffer); - auto graph = compiler->parse(std::move(blobContainerPtr), localConfig); + auto graph = compiler->parse(std::move(blobPtr), localConfig); graph->update_network_name("net" + std::to_string(_compiledModelLoadCounter++)); const std::shared_ptr modelDummy = @@ -857,19 +825,7 @@ std::shared_ptr Plugin::import_model(std::istream& stream, OPENVINO_THROW("Invalid remote context type. Can't cast to ov::intel_npu::RemoteContext type"); } - return import_model(stream, context, properties); -} - -std::shared_ptr Plugin::import_model(std::istream& stream, - std::shared_ptr model_buffer, - const ov::SoPtr& context, - const ov::AnyMap& properties) const { - auto casted = std::dynamic_pointer_cast(context._ptr); - if (casted == nullptr) { - OPENVINO_THROW("Invalid remote context type. Can't cast to ov::intel_npu::RemoteContext type"); - } - - return import_model(stream, model_buffer, properties); + return import_model(stream, properties); } ov::SupportedOpsMap Plugin::query_model(const std::shared_ptr& model,