Skip to content

Commit

Permalink
Generalize model cache reusing
Browse files Browse the repository at this point in the history
  • Loading branch information
riverlijunjie committed Dec 7, 2023
1 parent 35330d5 commit 599496a
Show file tree
Hide file tree
Showing 9 changed files with 221 additions and 11 deletions.
19 changes: 19 additions & 0 deletions src/inference/dev_api/openvino/runtime/internal_properties.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,25 @@ static constexpr Property<ov::threading::IStreamsExecutor::ThreadBindingType, Pr
*/
static constexpr Property<size_t, PropertyMutability::RW> threads_per_stream{"THREADS_PER_STREAM"};

/**
* @brief It contains compiled_model_format information to make plugin runtime can check whether it compatible
* with the cached compiled model, and it is returned by get_property.
*
* The information details are defined by plugin itself, each plugin can have different contents.
* For example, CPU plugin will contain OV version, while GPU plugin will contain OV and GPU driver version, etc.
* Core doesn't understand its content and only read it from plugin and write it into blob header.
*
* @ingroup ov_dev_api_plugin_api
*/
static constexpr Property<std::string, PropertyMutability::RO> compiled_model_format{"COMPILED_MODEL_FORMAT"};

/**
* @brief Check whether the attached compile_model_format is supported by this plugin.
* @ingroup ov_dev_api_plugin_api
*/
static constexpr Property<bool, PropertyMutability::RO> compiled_model_format_supported{
"COMPILED_MODEL_FORMAT_SUPPORTED"};

} // namespace internal
OPENVINO_DEPRECATED(
"This property is deprecated and will be removed soon. Use ov::internal::caching_properties instead of it.")
Expand Down
9 changes: 7 additions & 2 deletions src/inference/src/compilation_context.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -190,9 +190,12 @@ std::string ModelCache::compute_hash(const std::string& modelStr,

CompiledBlobHeader::CompiledBlobHeader() {}

CompiledBlobHeader::CompiledBlobHeader(const std::string& ieVersion, const std::string& fileInfo)
CompiledBlobHeader::CompiledBlobHeader(const std::string& ieVersion,
const std::string& fileInfo,
const std::string& runtimeInfo)
: m_ieVersion(ieVersion),
m_fileInfo(fileInfo) {}
m_fileInfo(fileInfo),
m_runtimeInfo(runtimeInfo) {}

std::istream& operator>>(std::istream& stream, CompiledBlobHeader& header) {
std::string xmlStr;
Expand All @@ -208,6 +211,7 @@ std::istream& operator>>(std::istream& stream, CompiledBlobHeader& header) {
pugi::xml_node compiledBlobNode = document.document_element();
header.m_ieVersion = pugixml::utils::GetStrAttr(compiledBlobNode, "ie_version");
header.m_fileInfo = pugixml::utils::GetStrAttr(compiledBlobNode, "file_info");
header.m_runtimeInfo = pugixml::utils::GetStrAttr(compiledBlobNode, "runtime_info");

return stream;
}
Expand All @@ -217,6 +221,7 @@ std::ostream& operator<<(std::ostream& stream, const CompiledBlobHeader& header)
auto compiledBlobNode = document.append_child("compiled_blob");
compiledBlobNode.append_attribute("ie_version").set_value(header.m_ieVersion.c_str());
compiledBlobNode.append_attribute("file_info").set_value(header.m_fileInfo.c_str());
compiledBlobNode.append_attribute("runtime_info").set_value(header.m_runtimeInfo.c_str());

document.save(stream, nullptr, pugi::format_raw);
document.reset();
Expand Down
7 changes: 6 additions & 1 deletion src/inference/src/compilation_context.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,10 +31,11 @@ struct ModelCache final {
class CompiledBlobHeader final {
std::string m_ieVersion;
std::string m_fileInfo;
std::string m_runtimeInfo;

public:
CompiledBlobHeader();
CompiledBlobHeader(const std::string& ieVersion, const std::string& fileInfo);
CompiledBlobHeader(const std::string& ieVersion, const std::string& fileInfo, const std::string& runtimeInfo);

const std::string& getIeVersion() const {
return m_ieVersion;
Expand All @@ -44,6 +45,10 @@ class CompiledBlobHeader final {
return m_fileInfo;
}

const std::string& getRuntimeInfo() const {
return m_runtimeInfo;
}

friend std::istream& operator>>(std::istream& stream, CompiledBlobHeader& header);

friend std::ostream& operator<<(std::ostream& stream, const CompiledBlobHeader& header);
Expand Down
27 changes: 22 additions & 5 deletions src/inference/src/dev/core_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1435,9 +1435,15 @@ ov::SoPtr<ov::ICompiledModel> ov::CoreImpl::compile_model_and_cache(const std::s
try {
// need to export network for further import from "cache"
OV_ITT_SCOPE(FIRST_INFERENCE, ov::itt::domains::LoadTime, "Core::compile_model::Export");
std::string compiled_model_format("");
if (device_supports_internal_property(plugin, ov::internal::compiled_model_format.name())) {
compiled_model_format =
plugin.get_property(ov::internal::compiled_model_format.name(), {}).as<std::string>();
}
cacheContent.cacheManager->write_cache_entry(cacheContent.blobId, [&](std::ostream& networkStream) {
networkStream << ov::CompiledBlobHeader(InferenceEngine::GetInferenceEngineVersion()->buildNumber,
ov::ModelCache::calculate_file_info(cacheContent.modelPath));
ov::ModelCache::calculate_file_info(cacheContent.modelPath),
compiled_model_format);
execNetwork->export_model(networkStream);
});
} catch (...) {
Expand Down Expand Up @@ -1466,14 +1472,25 @@ ov::SoPtr<ov::ICompiledModel> ov::CoreImpl::load_model_from_cache(
try {
ov::CompiledBlobHeader header;
networkStream >> header;
if (header.getIeVersion() != ov::get_openvino_version().buildNumber) {
// Build number mismatch, don't use this cache
OPENVINO_THROW("Version does not match");
}
if (header.getFileInfo() != ov::ModelCache::calculate_file_info(cacheContent.modelPath)) {
// Original file is changed, don't use cache
OPENVINO_THROW("Original model file is changed");
}
if (util::contains(plugin.get_property(ov::internal::supported_properties),
ov::internal::compiled_model_format_supported.name())) {
ov::AnyMap compiled_model_format = {
{ov::internal::compiled_model_format_supported.name(), std::string(header.getRuntimeInfo())}};
auto res = plugin.get_property(ov::internal::compiled_model_format_supported.name(),
compiled_model_format);
if (!res.as<bool>()) {
OPENVINO_THROW("Original model format has been changed, not supported anymore!");
}
} else {
if (header.getIeVersion() != ov::get_openvino_version().buildNumber) {
// Build number mismatch, don't use this cache
OPENVINO_THROW("Version does not match");
}
}
} catch (...) {
throw HeaderException();
}
Expand Down
93 changes: 93 additions & 0 deletions src/inference/tests/functional/caching_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1700,6 +1700,99 @@ TEST_P(CachingTest, TestCacheFileOldVersion) {
}
}

TEST_P(CachingTest, TestCacheFileWithCompiledModelFormat) {
EXPECT_CALL(*mockPlugin, get_property(ov::supported_properties.name(), _)).Times(AnyNumber());
EXPECT_CALL(*mockPlugin, get_property(ov::device::capability::EXPORT_IMPORT, _)).Times(AnyNumber());
EXPECT_CALL(*mockPlugin, get_property(ov::device::architecture.name(), _)).Times(AnyNumber());
EXPECT_CALL(*mockPlugin, get_property(ov::internal::supported_properties.name(), _)).Times(AnyNumber());
EXPECT_CALL(*mockPlugin, get_property(ov::internal::caching_properties.name(), _)).Times(AnyNumber());
EXPECT_CALL(*mockPlugin, get_property(ov::device::capabilities.name(), _)).Times(AnyNumber());
EXPECT_CALL(*mockPlugin, get_property(ov::internal::supported_properties.name(), _))
.Times(AnyNumber())
.WillRepeatedly(Invoke([&](const std::string&, const ov::AnyMap&) {
return std::vector<ov::PropertyName>{ov::internal::caching_properties.name(),
ov::internal::compiled_model_format.name(),
ov::internal::compiled_model_format_supported.name()};
}));
const std::string compiled_model_format("Mock compiled model format segment.");
EXPECT_CALL(*mockPlugin, get_property(ov::internal::compiled_model_format.name(), _))
.Times(AtLeast(1))
.WillRepeatedly(Return(compiled_model_format));
EXPECT_CALL(*mockPlugin, get_property(ov::internal::compiled_model_format_supported.name(), _))
.Times(AtLeast(1))
.WillRepeatedly(Invoke([&](const std::string&, const ov::AnyMap& options) {
auto it = options.find(ov::internal::compiled_model_format_supported.name());
ov::Any ret = true;
if (it == options.end() || it->second.as<std::string>() != compiled_model_format)
ret = false;
return ret;
}));
{
EXPECT_CALL(*mockPlugin, compile_model(_, _, _)).Times(m_remoteContext ? 1 : 0);
EXPECT_CALL(*mockPlugin, compile_model(A<const std::shared_ptr<const ov::Model>&>(), _))
.Times(!m_remoteContext ? 1 : 0);
EXPECT_CALL(*mockPlugin, import_model(_, _, _)).Times(0);
EXPECT_CALL(*mockPlugin, import_model(_, _)).Times(0);
m_post_mock_net_callbacks.emplace_back([&](MockICompiledModelImpl& net) {
EXPECT_CALL(net, export_model(_)).Times(1);
});
testLoad([&](ov::Core& core) {
EXPECT_NO_THROW(core.set_property(ov::cache_dir(m_cacheDir)));
EXPECT_NO_THROW(m_testFunction(core));
});
}
{
auto blobs = ov::test::utils::listFilesWithExt(m_cacheDir, "blob");
for (const auto& fileName : blobs) {
std::string content;
{
std::ifstream inp(fileName, std::ios_base::binary);
std::ostringstream ostr;
ostr << inp.rdbuf();
content = ostr.str();
}
auto index = content.find(compiled_model_format.c_str());
std::string new_compiled_model_format(compiled_model_format.size(), '0');
if (index != std::string::npos) {
content.replace(index, compiled_model_format.size(), new_compiled_model_format);
} else {
return; // skip test
}
std::ofstream out(fileName, std::ios_base::binary);
out.write(content.c_str(), static_cast<std::streamsize>(content.size()));
}
}
m_post_mock_net_callbacks.pop_back();
{ // Step 2. compiled_model_format mismatch, cache will be silently removed
EXPECT_CALL(*mockPlugin, compile_model(_, _, _)).Times(m_remoteContext ? 1 : 0);
EXPECT_CALL(*mockPlugin, compile_model(A<const std::shared_ptr<const ov::Model>&>(), _))
.Times(!m_remoteContext ? 1 : 0);
EXPECT_CALL(*mockPlugin, import_model(_, _, _)).Times(0);
EXPECT_CALL(*mockPlugin, import_model(_, _)).Times(0);
m_post_mock_net_callbacks.emplace_back([&](MockICompiledModelImpl& net) {
EXPECT_CALL(net, export_model(_)).Times(1);
});
testLoad([&](ov::Core& core) {
EXPECT_NO_THROW(core.set_property(ov::cache_dir(m_cacheDir)));
EXPECT_NO_THROW(m_testFunction(core));
});
}
m_post_mock_net_callbacks.pop_back();
{ // Step 3: same load, should be ok now due to re-creation of cache
EXPECT_CALL(*mockPlugin, compile_model(_, _, _)).Times(0);
EXPECT_CALL(*mockPlugin, compile_model(A<const std::shared_ptr<const ov::Model>&>(), _)).Times(0);
EXPECT_CALL(*mockPlugin, import_model(_, _, _)).Times(m_remoteContext ? 1 : 0);
EXPECT_CALL(*mockPlugin, import_model(_, _)).Times(!m_remoteContext ? 1 : 0);
for (auto& net : comp_models) {
EXPECT_CALL(*net, export_model(_)).Times(0);
}
testLoad([&](ov::Core& core) {
EXPECT_NO_THROW(core.set_property(ov::cache_dir(m_cacheDir)));
EXPECT_NO_THROW(m_testFunction(core));
});
}
}

TEST_P(CachingTest, LoadHetero_NoCacheMetric) {
EXPECT_CALL(*mockPlugin, query_model(_, _)).Times(AnyNumber());
EXPECT_CALL(*mockPlugin, get_property(ov::supported_properties.name(), _))
Expand Down
19 changes: 17 additions & 2 deletions src/plugins/intel_cpu/src/plugin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,8 @@ Engine::Engine() :
#if defined(OV_CPU_WITH_ACL)
scheduler_guard = SchedulerGuard::instance();
#endif
auto& ov_version = ov::get_openvino_version();
compiled_model_format_info = std::string(ov_version.buildNumber);
}

Engine::~Engine() {
Expand Down Expand Up @@ -688,6 +690,15 @@ ov::Any Engine::get_property(const std::string& name, const ov::AnyMap& options)
return decltype(ov::hint::num_requests)::value_type(engConfig.hintNumRequests);
} else if (name == ov::hint::execution_mode) {
return engConfig.executionMode;
} else if (name == ov::internal::compiled_model_format.name()) {
return decltype(ov::internal::compiled_model_format)::value_type(compiled_model_format_info);
} else if (name == ov::internal::compiled_model_format_supported.name()) {
ov::Any res = false;
auto it = options.find(ov::internal::compiled_model_format_supported.name());
if (it != options.end() && it->second.as<std::string>() == compiled_model_format_info) {
res = true;
}
return res;
}
return get_ro_property(name, options);
}
Expand Down Expand Up @@ -738,7 +749,9 @@ ov::Any Engine::get_metric_legacy(const std::string& name, const ov::AnyMap& opt
} else if (ov::internal::supported_properties.name() == name) {
return decltype(ov::internal::supported_properties)::value_type{
ov::PropertyName{ov::internal::caching_properties.name(), ov::PropertyMutability::RO},
ov::PropertyName{ov::internal::exclusive_async_requests.name(), ov::PropertyMutability::RW}};
ov::PropertyName{ov::internal::exclusive_async_requests.name(), ov::PropertyMutability::RW},
ov::PropertyName{ov::internal::compiled_model_format.name(), ov::PropertyMutability::RO},
ov::PropertyName{ov::internal::compiled_model_format_supported.name(), ov::PropertyMutability::RO}};
} else if (name == ov::internal::caching_properties) {
std::vector<ov::PropertyName> cachingProperties = {ov::device::full_name.name()};
return decltype(ov::internal::caching_properties)::value_type(std::move(cachingProperties));
Expand Down Expand Up @@ -796,7 +809,9 @@ ov::Any Engine::get_ro_property(const std::string& name, const ov::AnyMap& optio
} else if (ov::internal::supported_properties == name) {
return decltype(ov::internal::supported_properties)::value_type{
ov::PropertyName{ov::internal::caching_properties.name(), ov::PropertyMutability::RO},
ov::PropertyName{ov::internal::exclusive_async_requests.name(), ov::PropertyMutability::RW}};
ov::PropertyName{ov::internal::exclusive_async_requests.name(), ov::PropertyMutability::RW},
ov::PropertyName{ov::internal::compiled_model_format.name(), ov::PropertyMutability::RO},
ov::PropertyName{ov::internal::compiled_model_format_supported.name(), ov::PropertyMutability::RO}};
} else if (name == ov::device::full_name) {
return decltype(ov::device::full_name)::value_type(deviceFullName);
} else if (name == ov::available_devices) {
Expand Down
1 change: 1 addition & 0 deletions src/plugins/intel_cpu/src/plugin.h
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ class Engine : public ov::IPlugin {
So track if streams is set explicitly (not auto-configured) */
bool streamsExplicitlySetForEngine = false;
const std::string deviceFullName;
std::string compiled_model_format_info;

std::shared_ptr<void> specialSetup;

Expand Down
2 changes: 2 additions & 0 deletions src/plugins/intel_gpu/include/intel_gpu/plugin/plugin.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ class Plugin : public ov::IPlugin {
std::string m_default_device_id = "0";
std::map<std::string, cldnn::device::ptr> m_device_map;
std::map<std::string, ExecutionConfig> m_configs_map;
ov::AnyMap m_compiled_model_format;

mutable std::map<std::string, std::shared_ptr<RemoteContextImpl>> m_default_contexts;
mutable std::once_flag m_default_contexts_once;
Expand All @@ -38,6 +39,7 @@ class Plugin : public ov::IPlugin {
std::vector<std::string> get_device_capabilities(const cldnn::device_info& info) const;
uint32_t get_optimal_batch_size(const ov::AnyMap& options) const;
uint32_t get_max_batch_size(const ov::AnyMap& options) const;
ov::AnyMap parse_compiled_model_format(const std::string& input) const;

ov::AnyMap preprocess_config(const ov::AnyMap& orig_config) const;
bool is_metric(const std::string& name) const;
Expand Down
Loading

0 comments on commit 599496a

Please sign in to comment.