diff --git a/src/inference/dev_api/openvino/runtime/internal_properties.hpp b/src/inference/dev_api/openvino/runtime/internal_properties.hpp index 47feb69322c087..5f61ac516707d4 100644 --- a/src/inference/dev_api/openvino/runtime/internal_properties.hpp +++ b/src/inference/dev_api/openvino/runtime/internal_properties.hpp @@ -67,6 +67,25 @@ static constexpr Property threads_per_stream{"THREADS_PER_STREAM"}; +/** + * @brief It contains compiled_model_format information to make plugin runtime can check whether it compatible + * with the cached compiled model, and it is returned by get_property. + * + * The information details are defined by plugin itself, each plugin can have different contents. + * For example, CPU plugin will contain OV version, while GPU plugin will contain OV and GPU driver version, etc. + * Core doesn't understand its content and only read it from plugin and write it into blob header. + * + * @ingroup ov_dev_api_plugin_api + */ +static constexpr Property compiled_model_format{"COMPILED_MODEL_FORMAT"}; + +/** + * @brief Check whether the attached compile_model_format is supported by this plugin. + * @ingroup ov_dev_api_plugin_api + */ +static constexpr Property compiled_model_format_supported{ + "COMPILED_MODEL_FORMAT_SUPPORTED"}; + } // namespace internal OPENVINO_DEPRECATED( "This property is deprecated and will be removed soon. Use ov::internal::caching_properties instead of it.") diff --git a/src/inference/src/compilation_context.cpp b/src/inference/src/compilation_context.cpp index c71b83c6df9fee..5c9b789b883518 100644 --- a/src/inference/src/compilation_context.cpp +++ b/src/inference/src/compilation_context.cpp @@ -190,9 +190,12 @@ std::string ModelCache::compute_hash(const std::string& modelStr, CompiledBlobHeader::CompiledBlobHeader() {} -CompiledBlobHeader::CompiledBlobHeader(const std::string& ieVersion, const std::string& fileInfo) +CompiledBlobHeader::CompiledBlobHeader(const std::string& ieVersion, + const std::string& fileInfo, + const std::string& runtimeInfo) : m_ieVersion(ieVersion), - m_fileInfo(fileInfo) {} + m_fileInfo(fileInfo), + m_runtimeInfo(runtimeInfo) {} std::istream& operator>>(std::istream& stream, CompiledBlobHeader& header) { std::string xmlStr; @@ -208,6 +211,7 @@ std::istream& operator>>(std::istream& stream, CompiledBlobHeader& header) { pugi::xml_node compiledBlobNode = document.document_element(); header.m_ieVersion = pugixml::utils::GetStrAttr(compiledBlobNode, "ie_version"); header.m_fileInfo = pugixml::utils::GetStrAttr(compiledBlobNode, "file_info"); + header.m_runtimeInfo = pugixml::utils::GetStrAttr(compiledBlobNode, "runtime_info"); return stream; } @@ -217,6 +221,7 @@ std::ostream& operator<<(std::ostream& stream, const CompiledBlobHeader& header) auto compiledBlobNode = document.append_child("compiled_blob"); compiledBlobNode.append_attribute("ie_version").set_value(header.m_ieVersion.c_str()); compiledBlobNode.append_attribute("file_info").set_value(header.m_fileInfo.c_str()); + compiledBlobNode.append_attribute("runtime_info").set_value(header.m_runtimeInfo.c_str()); document.save(stream, nullptr, pugi::format_raw); document.reset(); diff --git a/src/inference/src/compilation_context.hpp b/src/inference/src/compilation_context.hpp index cfaacba242c0ee..9b5575d056d684 100644 --- a/src/inference/src/compilation_context.hpp +++ b/src/inference/src/compilation_context.hpp @@ -31,10 +31,11 @@ struct ModelCache final { class CompiledBlobHeader final { std::string m_ieVersion; std::string m_fileInfo; + std::string m_runtimeInfo; public: CompiledBlobHeader(); - CompiledBlobHeader(const std::string& ieVersion, const std::string& fileInfo); + CompiledBlobHeader(const std::string& ieVersion, const std::string& fileInfo, const std::string& runtimeInfo); const std::string& getIeVersion() const { return m_ieVersion; @@ -44,6 +45,10 @@ class CompiledBlobHeader final { return m_fileInfo; } + const std::string& getRuntimeInfo() const { + return m_runtimeInfo; + } + friend std::istream& operator>>(std::istream& stream, CompiledBlobHeader& header); friend std::ostream& operator<<(std::ostream& stream, const CompiledBlobHeader& header); diff --git a/src/inference/src/dev/core_impl.cpp b/src/inference/src/dev/core_impl.cpp index 458563633ed0b3..40052cf6712b29 100644 --- a/src/inference/src/dev/core_impl.cpp +++ b/src/inference/src/dev/core_impl.cpp @@ -1435,9 +1435,15 @@ ov::SoPtr ov::CoreImpl::compile_model_and_cache(const std::s try { // need to export network for further import from "cache" OV_ITT_SCOPE(FIRST_INFERENCE, ov::itt::domains::LoadTime, "Core::compile_model::Export"); + std::string compiled_model_format(""); + if (device_supports_internal_property(plugin, ov::internal::compiled_model_format.name())) { + compiled_model_format = + plugin.get_property(ov::internal::compiled_model_format.name(), {}).as(); + } cacheContent.cacheManager->write_cache_entry(cacheContent.blobId, [&](std::ostream& networkStream) { networkStream << ov::CompiledBlobHeader(InferenceEngine::GetInferenceEngineVersion()->buildNumber, - ov::ModelCache::calculate_file_info(cacheContent.modelPath)); + ov::ModelCache::calculate_file_info(cacheContent.modelPath), + compiled_model_format); execNetwork->export_model(networkStream); }); } catch (...) { @@ -1466,14 +1472,25 @@ ov::SoPtr ov::CoreImpl::load_model_from_cache( try { ov::CompiledBlobHeader header; networkStream >> header; - if (header.getIeVersion() != ov::get_openvino_version().buildNumber) { - // Build number mismatch, don't use this cache - OPENVINO_THROW("Version does not match"); - } if (header.getFileInfo() != ov::ModelCache::calculate_file_info(cacheContent.modelPath)) { // Original file is changed, don't use cache OPENVINO_THROW("Original model file is changed"); } + if (util::contains(plugin.get_property(ov::internal::supported_properties), + ov::internal::compiled_model_format_supported.name())) { + ov::AnyMap compiled_model_format = { + {ov::internal::compiled_model_format_supported.name(), std::string(header.getRuntimeInfo())}}; + auto res = plugin.get_property(ov::internal::compiled_model_format_supported.name(), + compiled_model_format); + if (!res.as()) { + OPENVINO_THROW("Original model format has been changed, not supported anymore!"); + } + } else { + if (header.getIeVersion() != ov::get_openvino_version().buildNumber) { + // Build number mismatch, don't use this cache + OPENVINO_THROW("Version does not match"); + } + } } catch (...) { throw HeaderException(); } diff --git a/src/inference/tests/functional/caching_test.cpp b/src/inference/tests/functional/caching_test.cpp index 2668eafc44a34e..c50eb0b3976c68 100644 --- a/src/inference/tests/functional/caching_test.cpp +++ b/src/inference/tests/functional/caching_test.cpp @@ -1700,6 +1700,99 @@ TEST_P(CachingTest, TestCacheFileOldVersion) { } } +TEST_P(CachingTest, TestCacheFileWithCompiledModelFormat) { + EXPECT_CALL(*mockPlugin, get_property(ov::supported_properties.name(), _)).Times(AnyNumber()); + EXPECT_CALL(*mockPlugin, get_property(ov::device::capability::EXPORT_IMPORT, _)).Times(AnyNumber()); + EXPECT_CALL(*mockPlugin, get_property(ov::device::architecture.name(), _)).Times(AnyNumber()); + EXPECT_CALL(*mockPlugin, get_property(ov::internal::supported_properties.name(), _)).Times(AnyNumber()); + EXPECT_CALL(*mockPlugin, get_property(ov::internal::caching_properties.name(), _)).Times(AnyNumber()); + EXPECT_CALL(*mockPlugin, get_property(ov::device::capabilities.name(), _)).Times(AnyNumber()); + EXPECT_CALL(*mockPlugin, get_property(ov::internal::supported_properties.name(), _)) + .Times(AnyNumber()) + .WillRepeatedly(Invoke([&](const std::string&, const ov::AnyMap&) { + return std::vector{ov::internal::caching_properties.name(), + ov::internal::compiled_model_format.name(), + ov::internal::compiled_model_format_supported.name()}; + })); + const std::string compiled_model_format("Mock compiled model format segment."); + EXPECT_CALL(*mockPlugin, get_property(ov::internal::compiled_model_format.name(), _)) + .Times(AtLeast(1)) + .WillRepeatedly(Return(compiled_model_format)); + EXPECT_CALL(*mockPlugin, get_property(ov::internal::compiled_model_format_supported.name(), _)) + .Times(AtLeast(1)) + .WillRepeatedly(Invoke([&](const std::string&, const ov::AnyMap& options) { + auto it = options.find(ov::internal::compiled_model_format_supported.name()); + ov::Any ret = true; + if (it == options.end() || it->second.as() != compiled_model_format) + ret = false; + return ret; + })); + { + EXPECT_CALL(*mockPlugin, compile_model(_, _, _)).Times(m_remoteContext ? 1 : 0); + EXPECT_CALL(*mockPlugin, compile_model(A&>(), _)) + .Times(!m_remoteContext ? 1 : 0); + EXPECT_CALL(*mockPlugin, import_model(_, _, _)).Times(0); + EXPECT_CALL(*mockPlugin, import_model(_, _)).Times(0); + m_post_mock_net_callbacks.emplace_back([&](MockICompiledModelImpl& net) { + EXPECT_CALL(net, export_model(_)).Times(1); + }); + testLoad([&](ov::Core& core) { + EXPECT_NO_THROW(core.set_property(ov::cache_dir(m_cacheDir))); + EXPECT_NO_THROW(m_testFunction(core)); + }); + } + { + auto blobs = ov::test::utils::listFilesWithExt(m_cacheDir, "blob"); + for (const auto& fileName : blobs) { + std::string content; + { + std::ifstream inp(fileName, std::ios_base::binary); + std::ostringstream ostr; + ostr << inp.rdbuf(); + content = ostr.str(); + } + auto index = content.find(compiled_model_format.c_str()); + std::string new_compiled_model_format(compiled_model_format.size(), '0'); + if (index != std::string::npos) { + content.replace(index, compiled_model_format.size(), new_compiled_model_format); + } else { + return; // skip test + } + std::ofstream out(fileName, std::ios_base::binary); + out.write(content.c_str(), static_cast(content.size())); + } + } + m_post_mock_net_callbacks.pop_back(); + { // Step 2. compiled_model_format mismatch, cache will be silently removed + EXPECT_CALL(*mockPlugin, compile_model(_, _, _)).Times(m_remoteContext ? 1 : 0); + EXPECT_CALL(*mockPlugin, compile_model(A&>(), _)) + .Times(!m_remoteContext ? 1 : 0); + EXPECT_CALL(*mockPlugin, import_model(_, _, _)).Times(0); + EXPECT_CALL(*mockPlugin, import_model(_, _)).Times(0); + m_post_mock_net_callbacks.emplace_back([&](MockICompiledModelImpl& net) { + EXPECT_CALL(net, export_model(_)).Times(1); + }); + testLoad([&](ov::Core& core) { + EXPECT_NO_THROW(core.set_property(ov::cache_dir(m_cacheDir))); + EXPECT_NO_THROW(m_testFunction(core)); + }); + } + m_post_mock_net_callbacks.pop_back(); + { // Step 3: same load, should be ok now due to re-creation of cache + EXPECT_CALL(*mockPlugin, compile_model(_, _, _)).Times(0); + EXPECT_CALL(*mockPlugin, compile_model(A&>(), _)).Times(0); + EXPECT_CALL(*mockPlugin, import_model(_, _, _)).Times(m_remoteContext ? 1 : 0); + EXPECT_CALL(*mockPlugin, import_model(_, _)).Times(!m_remoteContext ? 1 : 0); + for (auto& net : comp_models) { + EXPECT_CALL(*net, export_model(_)).Times(0); + } + testLoad([&](ov::Core& core) { + EXPECT_NO_THROW(core.set_property(ov::cache_dir(m_cacheDir))); + EXPECT_NO_THROW(m_testFunction(core)); + }); + } +} + TEST_P(CachingTest, LoadHetero_NoCacheMetric) { EXPECT_CALL(*mockPlugin, query_model(_, _)).Times(AnyNumber()); EXPECT_CALL(*mockPlugin, get_property(ov::supported_properties.name(), _)) diff --git a/src/plugins/intel_cpu/src/plugin.cpp b/src/plugins/intel_cpu/src/plugin.cpp index 03bd79e28c85dd..ceda2769f05deb 100644 --- a/src/plugins/intel_cpu/src/plugin.cpp +++ b/src/plugins/intel_cpu/src/plugin.cpp @@ -174,6 +174,8 @@ Engine::Engine() : #if defined(OV_CPU_WITH_ACL) scheduler_guard = SchedulerGuard::instance(); #endif + auto& ov_version = ov::get_openvino_version(); + compiled_model_format_info = std::string(ov_version.buildNumber); } Engine::~Engine() { @@ -688,6 +690,15 @@ ov::Any Engine::get_property(const std::string& name, const ov::AnyMap& options) return decltype(ov::hint::num_requests)::value_type(engConfig.hintNumRequests); } else if (name == ov::hint::execution_mode) { return engConfig.executionMode; + } else if (name == ov::internal::compiled_model_format.name()) { + return decltype(ov::internal::compiled_model_format)::value_type(compiled_model_format_info); + } else if (name == ov::internal::compiled_model_format_supported.name()) { + ov::Any res = false; + auto it = options.find(ov::internal::compiled_model_format_supported.name()); + if (it != options.end() && it->second.as() == compiled_model_format_info) { + res = true; + } + return res; } return get_ro_property(name, options); } @@ -738,7 +749,9 @@ ov::Any Engine::get_metric_legacy(const std::string& name, const ov::AnyMap& opt } else if (ov::internal::supported_properties.name() == name) { return decltype(ov::internal::supported_properties)::value_type{ ov::PropertyName{ov::internal::caching_properties.name(), ov::PropertyMutability::RO}, - ov::PropertyName{ov::internal::exclusive_async_requests.name(), ov::PropertyMutability::RW}}; + ov::PropertyName{ov::internal::exclusive_async_requests.name(), ov::PropertyMutability::RW}, + ov::PropertyName{ov::internal::compiled_model_format.name(), ov::PropertyMutability::RO}, + ov::PropertyName{ov::internal::compiled_model_format_supported.name(), ov::PropertyMutability::RO}}; } else if (name == ov::internal::caching_properties) { std::vector cachingProperties = {ov::device::full_name.name()}; return decltype(ov::internal::caching_properties)::value_type(std::move(cachingProperties)); @@ -796,7 +809,9 @@ ov::Any Engine::get_ro_property(const std::string& name, const ov::AnyMap& optio } else if (ov::internal::supported_properties == name) { return decltype(ov::internal::supported_properties)::value_type{ ov::PropertyName{ov::internal::caching_properties.name(), ov::PropertyMutability::RO}, - ov::PropertyName{ov::internal::exclusive_async_requests.name(), ov::PropertyMutability::RW}}; + ov::PropertyName{ov::internal::exclusive_async_requests.name(), ov::PropertyMutability::RW}, + ov::PropertyName{ov::internal::compiled_model_format.name(), ov::PropertyMutability::RO}, + ov::PropertyName{ov::internal::compiled_model_format_supported.name(), ov::PropertyMutability::RO}}; } else if (name == ov::device::full_name) { return decltype(ov::device::full_name)::value_type(deviceFullName); } else if (name == ov::available_devices) { diff --git a/src/plugins/intel_cpu/src/plugin.h b/src/plugins/intel_cpu/src/plugin.h index 7f04a3c3708d6b..fee633dcef95b6 100644 --- a/src/plugins/intel_cpu/src/plugin.h +++ b/src/plugins/intel_cpu/src/plugin.h @@ -67,6 +67,7 @@ class Engine : public ov::IPlugin { So track if streams is set explicitly (not auto-configured) */ bool streamsExplicitlySetForEngine = false; const std::string deviceFullName; + std::string compiled_model_format_info; std::shared_ptr specialSetup; diff --git a/src/plugins/intel_gpu/include/intel_gpu/plugin/plugin.hpp b/src/plugins/intel_gpu/include/intel_gpu/plugin/plugin.hpp index d92cd695f4b569..881a8b01d0fe79 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/plugin/plugin.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/plugin/plugin.hpp @@ -19,6 +19,7 @@ class Plugin : public ov::IPlugin { std::string m_default_device_id = "0"; std::map m_device_map; std::map m_configs_map; + ov::AnyMap m_compiled_model_format; mutable std::map> m_default_contexts; mutable std::once_flag m_default_contexts_once; @@ -38,6 +39,7 @@ class Plugin : public ov::IPlugin { std::vector get_device_capabilities(const cldnn::device_info& info) const; uint32_t get_optimal_batch_size(const ov::AnyMap& options) const; uint32_t get_max_batch_size(const ov::AnyMap& options) const; + ov::AnyMap parse_compiled_model_format(const std::string& input) const; ov::AnyMap preprocess_config(const ov::AnyMap& orig_config) const; bool is_metric(const std::string& name) const; diff --git a/src/plugins/intel_gpu/src/plugin/plugin.cpp b/src/plugins/intel_gpu/src/plugin/plugin.cpp index 5ed0e899364dfa..c43932dbd54bf2 100644 --- a/src/plugins/intel_gpu/src/plugin/plugin.cpp +++ b/src/plugins/intel_gpu/src/plugin/plugin.cpp @@ -104,6 +104,27 @@ std::string Plugin::get_device_id(const ov::AnyMap& config) const { return id; } +/** Parse compiled model format to be ov::AnyMap + * input:"aaa:1234;ccc:xyzw;" + * output: + * out["aaa"] = "1234" + * out["ccc"] = "xyzw" + */ +ov::AnyMap Plugin::parse_compiled_model_format(const std::string& input) const { + ov::AnyMap res = {}; + auto in = input; + while (!in.empty()) { + auto pos_1 = in.find_first_of(':'); + auto pos_2 = in.find_first_of(';'); + if (pos_1 == std::string::npos || pos_2 == std::string::npos) { + break; + } + res[in.substr(0, pos_1)] = in.substr(pos_1 + 1, pos_2 - pos_1 - 1); + in = in.substr(pos_2 + 1); + } + return res; +} + void Plugin::transform_model(std::shared_ptr& model, const ExecutionConfig& config) const { OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Plugin::transform_model"); auto deviceInfo = m_device_map.at(config.get_property(ov::device::id))->get_info(); @@ -174,6 +195,13 @@ Plugin::Plugin() { for (const auto& device : m_device_map) { m_configs_map.insert({device.first, ExecutionConfig(ov::device::id(device.first))}); } + + // Set runtime info + auto& ov_version = ov::get_openvino_version(); + m_compiled_model_format["OV_VERSION"] = ov_version.buildNumber; + for (const auto& device : m_device_map) { + m_compiled_model_format[device.first] = device.second->get_info().driver_version; + } } std::shared_ptr Plugin::compile_model(const std::shared_ptr& model, const ov::AnyMap& orig_config) const { @@ -331,6 +359,29 @@ ov::Any Plugin::get_property(const std::string& name, const ov::AnyMap& options) return decltype(ov::available_devices)::value_type {available_devices}; } else if (name == ov::internal::caching_properties) { return decltype(ov::internal::caching_properties)::value_type(get_caching_properties()); + } else if (name == ov::internal::compiled_model_format.name()) { + std::string format_info; + for (auto& it : m_compiled_model_format) { + format_info += it.first + ":" + it.second.as() + ";"; + } + return decltype(ov::internal::compiled_model_format)::value_type(format_info); + } else if (name == ov::internal::compiled_model_format_supported.name()) { + ov::Any res = true; + auto it = options.find(ov::internal::compiled_model_format_supported.name()); + if (it == options.end()) { + res = false; + } else { + const auto data = it->second.as(); + auto input = parse_compiled_model_format(data); + for (auto& item : m_compiled_model_format) { + auto it = input.find(item.first); + if (it == input.end() || it->second.as() != item.second.as()) { + res = false; + break; + } + } + } + return res; } OPENVINO_SUPPRESS_DEPRECATED_START @@ -572,7 +623,9 @@ std::vector Plugin::get_supported_internal_properties() const static const std::vector supported_internal_properties = { ov::PropertyName{ov::internal::caching_properties.name(), ov::PropertyMutability::RO}, ov::PropertyName{ov::internal::config_device_id.name(), ov::PropertyMutability::WO}, - ov::PropertyName{ov::internal::exclusive_async_requests.name(), ov::PropertyMutability::RW}}; + ov::PropertyName{ov::internal::exclusive_async_requests.name(), ov::PropertyMutability::RW}, + ov::PropertyName{ov::internal::compiled_model_format.name(), ov::PropertyMutability::RO}, + ov::PropertyName{ov::internal::compiled_model_format_supported.name(), ov::PropertyMutability::RO}}; return supported_internal_properties; }