Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Generalize model cache reusing #21492

Merged
20 changes: 20 additions & 0 deletions src/inference/dev_api/openvino/runtime/internal_properties.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,26 @@ static constexpr Property<ov::threading::IStreamsExecutor::ThreadBindingType, Pr
*/
static constexpr Property<size_t, PropertyMutability::RW> threads_per_stream{"THREADS_PER_STREAM"};

/**
* @brief It contains compiled_model_runtime_properties information to make plugin runtime can check whether it is
* compatible with the cached compiled model, the result is returned by get_property() calling.
*
* The information details are defined by plugin itself, each plugin may require different runtime contents.
* For example, CPU plugin will contain OV version, while GPU plugin will contain OV and GPU driver version, etc.
* Core doesn't understand its content and only read it from plugin and write it into blob header.
*
* @ingroup ov_dev_api_plugin_api
*/
static constexpr Property<std::string, PropertyMutability::RO> compiled_model_runtime_properties{
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it can contain a vector<string> and actual conversion to string can be done on Core level

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

May I know any benefits if we choose std::vector<std::string> rather than std::string?
From my understanding, core level doesn't need understand its contents and only read it from blob file's header or send it to plugin side, so std::string is enough, right?

Copy link
Contributor

@ilya-lavrenov ilya-lavrenov Dec 19, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

IMO, it will simplify plugin code, because currently they have to parse / convert from / to string

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In plugin side, we adopt ov::AnyMap and convert it to or from std::string, seems it is not too complex?

"COMPILED_MODEL_RUNTIME_PROPERTIES"};

/**
* @brief Check whether the attached compiled_model_runtime_properties is supported by this device runtime.
* @ingroup ov_dev_api_plugin_api
*/
static constexpr Property<bool, PropertyMutability::RO> compiled_model_runtime_properties_supported{
"COMPILED_MODEL_RUNTIME_PROPERTIES_SUPPORTED"};

} // namespace internal
OPENVINO_DEPRECATED(
"This property is deprecated and will be removed soon. Use ov::internal::caching_properties instead of it.")
Expand Down
9 changes: 7 additions & 2 deletions src/inference/src/compilation_context.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -190,9 +190,12 @@ std::string ModelCache::compute_hash(const std::string& modelStr,

CompiledBlobHeader::CompiledBlobHeader() {}

CompiledBlobHeader::CompiledBlobHeader(const std::string& ieVersion, const std::string& fileInfo)
CompiledBlobHeader::CompiledBlobHeader(const std::string& ieVersion,
const std::string& fileInfo,
const std::string& runtimeInfo)
: m_ieVersion(ieVersion),
m_fileInfo(fileInfo) {}
m_fileInfo(fileInfo),
m_runtimeInfo(runtimeInfo) {}

std::istream& operator>>(std::istream& stream, CompiledBlobHeader& header) {
std::string xmlStr;
Expand All @@ -208,6 +211,7 @@ std::istream& operator>>(std::istream& stream, CompiledBlobHeader& header) {
pugi::xml_node compiledBlobNode = document.document_element();
header.m_ieVersion = pugixml::utils::GetStrAttr(compiledBlobNode, "ie_version");
header.m_fileInfo = pugixml::utils::GetStrAttr(compiledBlobNode, "file_info");
header.m_runtimeInfo = pugixml::utils::GetStrAttr(compiledBlobNode, "runtime_info");

return stream;
}
Expand All @@ -217,6 +221,7 @@ std::ostream& operator<<(std::ostream& stream, const CompiledBlobHeader& header)
auto compiledBlobNode = document.append_child("compiled_blob");
compiledBlobNode.append_attribute("ie_version").set_value(header.m_ieVersion.c_str());
compiledBlobNode.append_attribute("file_info").set_value(header.m_fileInfo.c_str());
compiledBlobNode.append_attribute("runtime_info").set_value(header.m_runtimeInfo.c_str());

document.save(stream, nullptr, pugi::format_raw);
document.reset();
Expand Down
7 changes: 6 additions & 1 deletion src/inference/src/compilation_context.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,10 +31,11 @@ struct ModelCache final {
class CompiledBlobHeader final {
std::string m_ieVersion;
std::string m_fileInfo;
std::string m_runtimeInfo;

public:
CompiledBlobHeader();
CompiledBlobHeader(const std::string& ieVersion, const std::string& fileInfo);
CompiledBlobHeader(const std::string& ieVersion, const std::string& fileInfo, const std::string& runtimeInfo);

const std::string& getIeVersion() const {
return m_ieVersion;
Expand All @@ -44,6 +45,10 @@ class CompiledBlobHeader final {
return m_fileInfo;
}

const std::string& getRuntimeInfo() const {
return m_runtimeInfo;
}

friend std::istream& operator>>(std::istream& stream, CompiledBlobHeader& header);

friend std::ostream& operator<<(std::ostream& stream, const CompiledBlobHeader& header);
Expand Down
27 changes: 22 additions & 5 deletions src/inference/src/dev/core_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1436,9 +1436,15 @@ ov::SoPtr<ov::ICompiledModel> ov::CoreImpl::compile_model_and_cache(const std::s
try {
// need to export network for further import from "cache"
OV_ITT_SCOPE(FIRST_INFERENCE, ov::itt::domains::LoadTime, "Core::compile_model::Export");
std::string compiled_model_runtime_properties;
if (device_supports_internal_property(plugin, ov::internal::compiled_model_runtime_properties.name())) {
compiled_model_runtime_properties =
plugin.get_property(ov::internal::compiled_model_runtime_properties.name(), {}).as<std::string>();
}
cacheContent.cacheManager->write_cache_entry(cacheContent.blobId, [&](std::ostream& networkStream) {
networkStream << ov::CompiledBlobHeader(InferenceEngine::GetInferenceEngineVersion()->buildNumber,
ov::ModelCache::calculate_file_info(cacheContent.modelPath));
ov::ModelCache::calculate_file_info(cacheContent.modelPath),
compiled_model_runtime_properties);
execNetwork->export_model(networkStream);
});
} catch (...) {
Expand Down Expand Up @@ -1467,14 +1473,25 @@ ov::SoPtr<ov::ICompiledModel> ov::CoreImpl::load_model_from_cache(
try {
ov::CompiledBlobHeader header;
networkStream >> header;
if (header.getIeVersion() != ov::get_openvino_version().buildNumber) {
// Build number mismatch, don't use this cache
OPENVINO_THROW("Version does not match");
}
if (header.getFileInfo() != ov::ModelCache::calculate_file_info(cacheContent.modelPath)) {
// Original file is changed, don't use cache
OPENVINO_THROW("Original model file is changed");
}
if (util::contains(plugin.get_property(ov::internal::supported_properties),
ov::internal::compiled_model_runtime_properties_supported.name())) {
ov::AnyMap compiled_model_runtime_properties = {
{ov::internal::compiled_model_runtime_properties.name(), std::string(header.getRuntimeInfo())}};
auto res = plugin.get_property(ov::internal::compiled_model_runtime_properties_supported.name(),
compiled_model_runtime_properties);
if (!res.as<bool>()) {
OPENVINO_THROW("Original model runtime properties have been changed, not supported anymore!");
}
} else {
if (header.getIeVersion() != ov::get_openvino_version().buildNumber) {
wangleis marked this conversation as resolved.
Show resolved Hide resolved
// Build number mismatch, don't use this cache
OPENVINO_THROW("Version does not match");
}
}
} catch (...) {
throw HeaderException();
}
Expand Down
93 changes: 93 additions & 0 deletions src/inference/tests/functional/caching_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1703,6 +1703,99 @@ TEST_P(CachingTest, TestCacheFileOldVersion) {
}
}

TEST_P(CachingTest, TestCacheFileWithCompiledModelRuntimeProperties) {
EXPECT_CALL(*mockPlugin, get_property(ov::supported_properties.name(), _)).Times(AnyNumber());
EXPECT_CALL(*mockPlugin, get_property(ov::device::capability::EXPORT_IMPORT, _)).Times(AnyNumber());
EXPECT_CALL(*mockPlugin, get_property(ov::device::architecture.name(), _)).Times(AnyNumber());
EXPECT_CALL(*mockPlugin, get_property(ov::internal::supported_properties.name(), _)).Times(AnyNumber());
EXPECT_CALL(*mockPlugin, get_property(ov::internal::caching_properties.name(), _)).Times(AnyNumber());
EXPECT_CALL(*mockPlugin, get_property(ov::device::capabilities.name(), _)).Times(AnyNumber());
EXPECT_CALL(*mockPlugin, get_property(ov::internal::supported_properties.name(), _))
.Times(AnyNumber())
.WillRepeatedly(Invoke([&](const std::string&, const ov::AnyMap&) {
return std::vector<ov::PropertyName>{ov::internal::caching_properties.name(),
ov::internal::compiled_model_runtime_properties.name(),
ov::internal::compiled_model_runtime_properties_supported.name()};
}));
const std::string compiled_model_runtime_properties("Mock compiled model format segment.");
EXPECT_CALL(*mockPlugin, get_property(ov::internal::compiled_model_runtime_properties.name(), _))
.Times(AtLeast(1))
.WillRepeatedly(Return(compiled_model_runtime_properties));
EXPECT_CALL(*mockPlugin, get_property(ov::internal::compiled_model_runtime_properties_supported.name(), _))
.Times(AtLeast(1))
.WillRepeatedly(Invoke([&](const std::string&, const ov::AnyMap& options) {
auto it = options.find(ov::internal::compiled_model_runtime_properties.name());
ov::Any ret = true;
if (it == options.end() || it->second.as<std::string>() != compiled_model_runtime_properties)
ret = false;
return ret;
}));
{
EXPECT_CALL(*mockPlugin, compile_model(_, _, _)).Times(m_remoteContext ? 1 : 0);
EXPECT_CALL(*mockPlugin, compile_model(A<const std::shared_ptr<const ov::Model>&>(), _))
.Times(!m_remoteContext ? 1 : 0);
EXPECT_CALL(*mockPlugin, import_model(_, _, _)).Times(0);
EXPECT_CALL(*mockPlugin, import_model(_, _)).Times(0);
m_post_mock_net_callbacks.emplace_back([&](MockICompiledModelImpl& net) {
EXPECT_CALL(net, export_model(_)).Times(1);
});
testLoad([&](ov::Core& core) {
EXPECT_NO_THROW(core.set_property(ov::cache_dir(m_cacheDir)));
EXPECT_NO_THROW(m_testFunction(core));
});
}
{
auto blobs = ov::test::utils::listFilesWithExt(m_cacheDir, "blob");
for (const auto& fileName : blobs) {
std::string content;
{
std::ifstream inp(fileName, std::ios_base::binary);
std::ostringstream ostr;
ostr << inp.rdbuf();
content = ostr.str();
}
auto index = content.find(compiled_model_runtime_properties.c_str());
std::string new_compiled_model_runtime_properties(compiled_model_runtime_properties.size(), '0');
if (index != std::string::npos) {
content.replace(index, compiled_model_runtime_properties.size(), new_compiled_model_runtime_properties);
} else {
return; // skip test
}
std::ofstream out(fileName, std::ios_base::binary);
out.write(content.c_str(), static_cast<std::streamsize>(content.size()));
}
}
m_post_mock_net_callbacks.pop_back();
{ // Step 2. compiled_model_runtime_properties mismatch, cache will be silently removed
EXPECT_CALL(*mockPlugin, compile_model(_, _, _)).Times(m_remoteContext ? 1 : 0);
EXPECT_CALL(*mockPlugin, compile_model(A<const std::shared_ptr<const ov::Model>&>(), _))
.Times(!m_remoteContext ? 1 : 0);
EXPECT_CALL(*mockPlugin, import_model(_, _, _)).Times(0);
EXPECT_CALL(*mockPlugin, import_model(_, _)).Times(0);
m_post_mock_net_callbacks.emplace_back([&](MockICompiledModelImpl& net) {
EXPECT_CALL(net, export_model(_)).Times(1);
});
testLoad([&](ov::Core& core) {
EXPECT_NO_THROW(core.set_property(ov::cache_dir(m_cacheDir)));
EXPECT_NO_THROW(m_testFunction(core));
});
}
m_post_mock_net_callbacks.pop_back();
{ // Step 3: same load, should be ok now due to re-creation of cache
EXPECT_CALL(*mockPlugin, compile_model(_, _, _)).Times(0);
EXPECT_CALL(*mockPlugin, compile_model(A<const std::shared_ptr<const ov::Model>&>(), _)).Times(0);
EXPECT_CALL(*mockPlugin, import_model(_, _, _)).Times(m_remoteContext ? 1 : 0);
EXPECT_CALL(*mockPlugin, import_model(_, _)).Times(!m_remoteContext ? 1 : 0);
for (auto& net : comp_models) {
EXPECT_CALL(*net, export_model(_)).Times(0);
}
testLoad([&](ov::Core& core) {
EXPECT_NO_THROW(core.set_property(ov::cache_dir(m_cacheDir)));
EXPECT_NO_THROW(m_testFunction(core));
});
}
}

TEST_P(CachingTest, LoadHetero_NoCacheMetric) {
EXPECT_CALL(*mockPlugin, query_model(_, _)).Times(AnyNumber());
EXPECT_CALL(*mockPlugin, get_property(ov::supported_properties.name(), _))
Expand Down
30 changes: 28 additions & 2 deletions src/plugins/intel_cpu/src/plugin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,8 @@ Engine::Engine() :
#if defined(OV_CPU_WITH_ACL)
scheduler_guard = SchedulerGuard::instance();
#endif
auto& ov_version = ov::get_openvino_version();
m_compiled_model_runtime_properties["OV_VERSION"] = std::string(ov_version.buildNumber);
}

Engine::~Engine() {
Expand Down Expand Up @@ -690,6 +692,26 @@ ov::Any Engine::get_property(const std::string& name, const ov::AnyMap& options)
return decltype(ov::hint::num_requests)::value_type(engConfig.hintNumRequests);
} else if (name == ov::hint::execution_mode) {
return engConfig.executionMode;
} else if (name == ov::internal::compiled_model_runtime_properties.name()) {
auto model_runtime_properties = ov::Any(m_compiled_model_runtime_properties);
return decltype(ov::internal::compiled_model_runtime_properties)::value_type(
std::move(model_runtime_properties.as<std::string>()));
} else if (name == ov::internal::compiled_model_runtime_properties_supported.name()) {
ov::Any res = true;
auto it = options.find(ov::internal::compiled_model_runtime_properties.name());
if (it == options.end()) {
res = false;
} else {
ov::AnyMap input_map = it->second.as<ov::AnyMap>();
for (auto& item : m_compiled_model_runtime_properties) {
auto it = input_map.find(item.first);
if (it == input_map.end() || it->second.as<std::string>() != item.second.as<std::string>()) {
res = false;
break;
}
}
}
return res;
}
return get_ro_property(name, options);
}
Expand Down Expand Up @@ -740,7 +762,9 @@ ov::Any Engine::get_metric_legacy(const std::string& name, const ov::AnyMap& opt
} else if (ov::internal::supported_properties.name() == name) {
return decltype(ov::internal::supported_properties)::value_type{
ov::PropertyName{ov::internal::caching_properties.name(), ov::PropertyMutability::RO},
ov::PropertyName{ov::internal::exclusive_async_requests.name(), ov::PropertyMutability::RW}};
ov::PropertyName{ov::internal::exclusive_async_requests.name(), ov::PropertyMutability::RW},
ov::PropertyName{ov::internal::compiled_model_runtime_properties.name(), ov::PropertyMutability::RO},
ov::PropertyName{ov::internal::compiled_model_runtime_properties_supported.name(), ov::PropertyMutability::RO}};
} else if (name == ov::internal::caching_properties) {
std::vector<ov::PropertyName> cachingProperties = {ov::device::full_name.name()};
return decltype(ov::internal::caching_properties)::value_type(std::move(cachingProperties));
Expand Down Expand Up @@ -798,7 +822,9 @@ ov::Any Engine::get_ro_property(const std::string& name, const ov::AnyMap& optio
} else if (ov::internal::supported_properties == name) {
return decltype(ov::internal::supported_properties)::value_type{
ov::PropertyName{ov::internal::caching_properties.name(), ov::PropertyMutability::RO},
ov::PropertyName{ov::internal::exclusive_async_requests.name(), ov::PropertyMutability::RW}};
ov::PropertyName{ov::internal::exclusive_async_requests.name(), ov::PropertyMutability::RW},
ov::PropertyName{ov::internal::compiled_model_runtime_properties.name(), ov::PropertyMutability::RO},
ov::PropertyName{ov::internal::compiled_model_runtime_properties_supported.name(), ov::PropertyMutability::RO}};
} else if (name == ov::device::full_name) {
return decltype(ov::device::full_name)::value_type(deviceFullName);
} else if (name == ov::available_devices) {
Expand Down
1 change: 1 addition & 0 deletions src/plugins/intel_cpu/src/plugin.h
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ class Engine : public ov::IPlugin {
So track if streams is set explicitly (not auto-configured) */
bool streamsExplicitlySetForEngine = false;
const std::string deviceFullName;
ov::AnyMap m_compiled_model_runtime_properties;

std::shared_ptr<void> specialSetup;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ class Plugin : public ov::IPlugin {
std::string m_default_device_id = "0";
std::map<std::string, cldnn::device::ptr> m_device_map;
std::map<std::string, ExecutionConfig> m_configs_map;
ov::AnyMap m_compiled_model_runtime_properties;

mutable std::map<std::string, std::shared_ptr<RemoteContextImpl>> m_default_contexts;
mutable std::once_flag m_default_contexts_once;
Expand Down
Loading
Loading