Skip to content

Commit

Permalink
Generalize model cache reusing
Browse files Browse the repository at this point in the history
  • Loading branch information
riverlijunjie committed Dec 6, 2023
1 parent 35330d5 commit 824da2b
Show file tree
Hide file tree
Showing 9 changed files with 217 additions and 7 deletions.
19 changes: 19 additions & 0 deletions src/inference/dev_api/openvino/runtime/internal_properties.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,25 @@ static constexpr Property<ov::threading::IStreamsExecutor::ThreadBindingType, Pr
*/
static constexpr Property<size_t, PropertyMutability::RW> threads_per_stream{"THREADS_PER_STREAM"};

/**
* @brief It contains compiled_model_format information to make plugin runtime can check whether it compatible
* with the cached compiled model, and it is returned by get_property.
*
* The information details are defined by plugin itself, each plugin can have different contents.
* For example, CPU plugin will contain OV version, while GPU plugin will contain OV and GPU driver version, etc.
* Core doesn't understand its content and only read it from plugin and write it into blob header.
*
* @ingroup ov_dev_api_plugin_api
*/
static constexpr Property<std::string, PropertyMutability::RO> compiled_model_format{"COMPILED_MODEL_FORMAT"};

/**
* @brief Check whether the attached compile_model_format is supported by this plugin.
* @ingroup ov_dev_api_plugin_api
*/
static constexpr Property<bool, PropertyMutability::RO> compiled_model_format_supported{
"COMPILED_MODEL_FORMAT_SUPPORTED"};

} // namespace internal
OPENVINO_DEPRECATED(
"This property is deprecated and will be removed soon. Use ov::internal::caching_properties instead of it.")
Expand Down
9 changes: 7 additions & 2 deletions src/inference/src/compilation_context.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -190,9 +190,12 @@ std::string ModelCache::compute_hash(const std::string& modelStr,

CompiledBlobHeader::CompiledBlobHeader() {}

CompiledBlobHeader::CompiledBlobHeader(const std::string& ieVersion, const std::string& fileInfo)
CompiledBlobHeader::CompiledBlobHeader(const std::string& ieVersion,
const std::string& fileInfo,
const std::string& runtimeInfo)
: m_ieVersion(ieVersion),
m_fileInfo(fileInfo) {}
m_fileInfo(fileInfo),
m_runtimeInfo(runtimeInfo) {}

std::istream& operator>>(std::istream& stream, CompiledBlobHeader& header) {
std::string xmlStr;
Expand All @@ -208,6 +211,7 @@ std::istream& operator>>(std::istream& stream, CompiledBlobHeader& header) {
pugi::xml_node compiledBlobNode = document.document_element();
header.m_ieVersion = pugixml::utils::GetStrAttr(compiledBlobNode, "ie_version");
header.m_fileInfo = pugixml::utils::GetStrAttr(compiledBlobNode, "file_info");
header.m_runtimeInfo = pugixml::utils::GetStrAttr(compiledBlobNode, "runtime_info");

return stream;
}
Expand All @@ -217,6 +221,7 @@ std::ostream& operator<<(std::ostream& stream, const CompiledBlobHeader& header)
auto compiledBlobNode = document.append_child("compiled_blob");
compiledBlobNode.append_attribute("ie_version").set_value(header.m_ieVersion.c_str());
compiledBlobNode.append_attribute("file_info").set_value(header.m_fileInfo.c_str());
compiledBlobNode.append_attribute("runtime_info").set_value(header.m_runtimeInfo.c_str());

document.save(stream, nullptr, pugi::format_raw);
document.reset();
Expand Down
7 changes: 6 additions & 1 deletion src/inference/src/compilation_context.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,10 +31,11 @@ struct ModelCache final {
class CompiledBlobHeader final {
std::string m_ieVersion;
std::string m_fileInfo;
std::string m_runtimeInfo;

public:
CompiledBlobHeader();
CompiledBlobHeader(const std::string& ieVersion, const std::string& fileInfo);
CompiledBlobHeader(const std::string& ieVersion, const std::string& fileInfo, const std::string& runtimeInfo);

const std::string& getIeVersion() const {
return m_ieVersion;
Expand All @@ -44,6 +45,10 @@ class CompiledBlobHeader final {
return m_fileInfo;
}

const std::string& getRuntimeInfo() const {
return m_runtimeInfo;
}

friend std::istream& operator>>(std::istream& stream, CompiledBlobHeader& header);

friend std::ostream& operator<<(std::ostream& stream, const CompiledBlobHeader& header);
Expand Down
19 changes: 18 additions & 1 deletion src/inference/src/dev/core_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1435,9 +1435,15 @@ ov::SoPtr<ov::ICompiledModel> ov::CoreImpl::compile_model_and_cache(const std::s
try {
// need to export network for further import from "cache"
OV_ITT_SCOPE(FIRST_INFERENCE, ov::itt::domains::LoadTime, "Core::compile_model::Export");
std::string compiled_model_format("");
if (device_supports_internal_property(plugin, ov::internal::compiled_model_format.name())) {
compiled_model_format =
plugin.get_property(ov::internal::compiled_model_format.name(), {}).as<std::string>();
}
cacheContent.cacheManager->write_cache_entry(cacheContent.blobId, [&](std::ostream& networkStream) {
networkStream << ov::CompiledBlobHeader(InferenceEngine::GetInferenceEngineVersion()->buildNumber,
ov::ModelCache::calculate_file_info(cacheContent.modelPath));
ov::ModelCache::calculate_file_info(cacheContent.modelPath),
compiled_model_format);
execNetwork->export_model(networkStream);
});
} catch (...) {
Expand Down Expand Up @@ -1466,6 +1472,7 @@ ov::SoPtr<ov::ICompiledModel> ov::CoreImpl::load_model_from_cache(
try {
ov::CompiledBlobHeader header;
networkStream >> header;
// TODO: it will be moved into plugin's compiled_model_format
if (header.getIeVersion() != ov::get_openvino_version().buildNumber) {
// Build number mismatch, don't use this cache
OPENVINO_THROW("Version does not match");
Expand All @@ -1474,6 +1481,16 @@ ov::SoPtr<ov::ICompiledModel> ov::CoreImpl::load_model_from_cache(
// Original file is changed, don't use cache
OPENVINO_THROW("Original model file is changed");
}
if (util::contains(plugin.get_property(ov::internal::supported_properties),
ov::internal::compiled_model_format_supported.name())) {
ov::AnyMap compiled_model_format = {
{ov::internal::compiled_model_format_supported.name(), std::string(header.getRuntimeInfo())}};
auto res = plugin.get_property(ov::internal::compiled_model_format_supported.name(),
compiled_model_format);
if (!res.as<bool>()) {
OPENVINO_THROW("Original model format has been changed, not supported anymore!");
}
}
} catch (...) {
throw HeaderException();
}
Expand Down
93 changes: 93 additions & 0 deletions src/inference/tests/functional/caching_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1700,6 +1700,99 @@ TEST_P(CachingTest, TestCacheFileOldVersion) {
}
}

TEST_P(CachingTest, TestCacheFileWithCompiledModelFormat) {
EXPECT_CALL(*mockPlugin, get_property(ov::supported_properties.name(), _)).Times(AnyNumber());
EXPECT_CALL(*mockPlugin, get_property(ov::device::capability::EXPORT_IMPORT, _)).Times(AnyNumber());
EXPECT_CALL(*mockPlugin, get_property(ov::device::architecture.name(), _)).Times(AnyNumber());
EXPECT_CALL(*mockPlugin, get_property(ov::internal::supported_properties.name(), _)).Times(AnyNumber());
EXPECT_CALL(*mockPlugin, get_property(ov::internal::caching_properties.name(), _)).Times(AnyNumber());
EXPECT_CALL(*mockPlugin, get_property(ov::device::capabilities.name(), _)).Times(AnyNumber());
EXPECT_CALL(*mockPlugin, get_property(ov::internal::supported_properties.name(), _))
.Times(AnyNumber())
.WillRepeatedly(Invoke([&](const std::string&, const ov::AnyMap&) {
return std::vector<ov::PropertyName>{ov::internal::caching_properties.name(),
ov::internal::compiled_model_format.name(),
ov::internal::compiled_model_format_supported.name()};
}));
const std::string compiled_model_format("Mock compiled model format segment.");
EXPECT_CALL(*mockPlugin, get_property(ov::internal::compiled_model_format.name(), _))
.Times(AtLeast(1))
.WillRepeatedly(Return(compiled_model_format));
EXPECT_CALL(*mockPlugin, get_property(ov::internal::compiled_model_format_supported.name(), _))
.Times(AtLeast(1))
.WillRepeatedly(Invoke([&](const std::string&, const ov::AnyMap& options) {
auto it = options.find(ov::internal::compiled_model_format_supported.name());
ov::Any ret = true;
if (it == options.end() || it->second.as<std::string>() != compiled_model_format)
ret = false;
return ret;
}));
{
EXPECT_CALL(*mockPlugin, compile_model(_, _, _)).Times(m_remoteContext ? 1 : 0);
EXPECT_CALL(*mockPlugin, compile_model(A<const std::shared_ptr<const ov::Model>&>(), _))
.Times(!m_remoteContext ? 1 : 0);
EXPECT_CALL(*mockPlugin, import_model(_, _, _)).Times(0);
EXPECT_CALL(*mockPlugin, import_model(_, _)).Times(0);
m_post_mock_net_callbacks.emplace_back([&](MockICompiledModelImpl& net) {
EXPECT_CALL(net, export_model(_)).Times(1);
});
testLoad([&](ov::Core& core) {
EXPECT_NO_THROW(core.set_property(ov::cache_dir(m_cacheDir)));
EXPECT_NO_THROW(m_testFunction(core));
});
}
{
auto blobs = ov::test::utils::listFilesWithExt(m_cacheDir, "blob");
for (const auto& fileName : blobs) {
std::string content;
{
std::ifstream inp(fileName, std::ios_base::binary);
std::ostringstream ostr;
ostr << inp.rdbuf();
content = ostr.str();
}
auto index = content.find(compiled_model_format.c_str());
std::string new_compiled_model_format(compiled_model_format.size(), '0');
if (index != std::string::npos) {
content.replace(index, compiled_model_format.size(), new_compiled_model_format);
} else {
return; // skip test
}
std::ofstream out(fileName, std::ios_base::binary);
out.write(content.c_str(), static_cast<std::streamsize>(content.size()));
}
}
m_post_mock_net_callbacks.pop_back();
{ // Step 2. compiled_model_format mismatch, cache will be silently removed
EXPECT_CALL(*mockPlugin, compile_model(_, _, _)).Times(m_remoteContext ? 1 : 0);
EXPECT_CALL(*mockPlugin, compile_model(A<const std::shared_ptr<const ov::Model>&>(), _))
.Times(!m_remoteContext ? 1 : 0);
EXPECT_CALL(*mockPlugin, import_model(_, _, _)).Times(0);
EXPECT_CALL(*mockPlugin, import_model(_, _)).Times(0);
m_post_mock_net_callbacks.emplace_back([&](MockICompiledModelImpl& net) {
EXPECT_CALL(net, export_model(_)).Times(1);
});
testLoad([&](ov::Core& core) {
EXPECT_NO_THROW(core.set_property(ov::cache_dir(m_cacheDir)));
EXPECT_NO_THROW(m_testFunction(core));
});
}
m_post_mock_net_callbacks.pop_back();
{ // Step 3: same load, should be ok now due to re-creation of cache
EXPECT_CALL(*mockPlugin, compile_model(_, _, _)).Times(0);
EXPECT_CALL(*mockPlugin, compile_model(A<const std::shared_ptr<const ov::Model>&>(), _)).Times(0);
EXPECT_CALL(*mockPlugin, import_model(_, _, _)).Times(m_remoteContext ? 1 : 0);
EXPECT_CALL(*mockPlugin, import_model(_, _)).Times(!m_remoteContext ? 1 : 0);
for (auto& net : comp_models) {
EXPECT_CALL(*net, export_model(_)).Times(0);
}
testLoad([&](ov::Core& core) {
EXPECT_NO_THROW(core.set_property(ov::cache_dir(m_cacheDir)));
EXPECT_NO_THROW(m_testFunction(core));
});
}
}

TEST_P(CachingTest, LoadHetero_NoCacheMetric) {
EXPECT_CALL(*mockPlugin, query_model(_, _)).Times(AnyNumber());
EXPECT_CALL(*mockPlugin, get_property(ov::supported_properties.name(), _))
Expand Down
19 changes: 17 additions & 2 deletions src/plugins/intel_cpu/src/plugin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,8 @@ Engine::Engine() :
#if defined(OV_CPU_WITH_ACL)
scheduler_guard = SchedulerGuard::instance();
#endif
auto& ov_version = ov::get_openvino_version();
compiled_model_format_info = std::string(ov_version.buildNumber);
}

Engine::~Engine() {
Expand Down Expand Up @@ -688,6 +690,15 @@ ov::Any Engine::get_property(const std::string& name, const ov::AnyMap& options)
return decltype(ov::hint::num_requests)::value_type(engConfig.hintNumRequests);
} else if (name == ov::hint::execution_mode) {
return engConfig.executionMode;
} else if (name == ov::internal::compiled_model_format.name()) {
return decltype(ov::internal::compiled_model_format)::value_type(compiled_model_format_info);
} else if (name == ov::internal::compiled_model_format_supported.name()) {
ov::Any res = false;
auto it = options.find(ov::internal::compiled_model_format_supported.name());
if (it != options.end() && it->second.as<std::string>() == compiled_model_format_info) {
res = true;
}
return res;
}
return get_ro_property(name, options);
}
Expand Down Expand Up @@ -738,7 +749,9 @@ ov::Any Engine::get_metric_legacy(const std::string& name, const ov::AnyMap& opt
} else if (ov::internal::supported_properties.name() == name) {
return decltype(ov::internal::supported_properties)::value_type{
ov::PropertyName{ov::internal::caching_properties.name(), ov::PropertyMutability::RO},
ov::PropertyName{ov::internal::exclusive_async_requests.name(), ov::PropertyMutability::RW}};
ov::PropertyName{ov::internal::exclusive_async_requests.name(), ov::PropertyMutability::RW},
ov::PropertyName{ov::internal::compiled_model_format.name(), ov::PropertyMutability::RO},
ov::PropertyName{ov::internal::compiled_model_format_supported.name(), ov::PropertyMutability::RO}};
} else if (name == ov::internal::caching_properties) {
std::vector<ov::PropertyName> cachingProperties = {ov::device::full_name.name()};
return decltype(ov::internal::caching_properties)::value_type(std::move(cachingProperties));
Expand Down Expand Up @@ -796,7 +809,9 @@ ov::Any Engine::get_ro_property(const std::string& name, const ov::AnyMap& optio
} else if (ov::internal::supported_properties == name) {
return decltype(ov::internal::supported_properties)::value_type{
ov::PropertyName{ov::internal::caching_properties.name(), ov::PropertyMutability::RO},
ov::PropertyName{ov::internal::exclusive_async_requests.name(), ov::PropertyMutability::RW}};
ov::PropertyName{ov::internal::exclusive_async_requests.name(), ov::PropertyMutability::RW},
ov::PropertyName{ov::internal::compiled_model_format.name(), ov::PropertyMutability::RO},
ov::PropertyName{ov::internal::compiled_model_format_supported.name(), ov::PropertyMutability::RO}};
} else if (name == ov::device::full_name) {
return decltype(ov::device::full_name)::value_type(deviceFullName);
} else if (name == ov::available_devices) {
Expand Down
1 change: 1 addition & 0 deletions src/plugins/intel_cpu/src/plugin.h
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ class Engine : public ov::IPlugin {
So track if streams is set explicitly (not auto-configured) */
bool streamsExplicitlySetForEngine = false;
const std::string deviceFullName;
std::string compiled_model_format_info;

std::shared_ptr<void> specialSetup;

Expand Down
2 changes: 2 additions & 0 deletions src/plugins/intel_gpu/include/intel_gpu/plugin/plugin.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ class Plugin : public ov::IPlugin {
std::string m_default_device_id = "0";
std::map<std::string, cldnn::device::ptr> m_device_map;
std::map<std::string, ExecutionConfig> m_configs_map;
ov::AnyMap m_compiled_model_format;

mutable std::map<std::string, std::shared_ptr<RemoteContextImpl>> m_default_contexts;
mutable std::once_flag m_default_contexts_once;
Expand All @@ -38,6 +39,7 @@ class Plugin : public ov::IPlugin {
std::vector<std::string> get_device_capabilities(const cldnn::device_info& info) const;
uint32_t get_optimal_batch_size(const ov::AnyMap& options) const;
uint32_t get_max_batch_size(const ov::AnyMap& options) const;
ov::AnyMap parse_compiled_model_format(const std::string& input) const;

ov::AnyMap preprocess_config(const ov::AnyMap& orig_config) const;
bool is_metric(const std::string& name) const;
Expand Down
55 changes: 54 additions & 1 deletion src/plugins/intel_gpu/src/plugin/plugin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,27 @@ std::string Plugin::get_device_id(const ov::AnyMap& config) const {
return id;
}

/** Parse compiled model format to be ov::AnyMap
* input:"aaa:1234;ccc:xyzw;"
* output:
* out["aaa"] = "1234"
* out["ccc"] = "xyzw"
*/
ov::AnyMap Plugin::parse_compiled_model_format(const std::string& input) const {
ov::AnyMap res = {};
auto in = input;
while (!in.empty()) {
auto pos_1 = in.find_first_of(':');
auto pos_2 = in.find_first_of(';');
if (pos_1 == std::string::npos || pos_2 == std::string::npos) {
break;
}
res[in.substr(0, pos_1)] = in.substr(pos_1 + 1, pos_2 - pos_1 - 1);
in = in.substr(pos_2 + 1);
}
return res;
}

void Plugin::transform_model(std::shared_ptr<ov::Model>& model, const ExecutionConfig& config) const {
OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Plugin::transform_model");
auto deviceInfo = m_device_map.at(config.get_property(ov::device::id))->get_info();
Expand Down Expand Up @@ -174,6 +195,13 @@ Plugin::Plugin() {
for (const auto& device : m_device_map) {
m_configs_map.insert({device.first, ExecutionConfig(ov::device::id(device.first))});
}

// Set runtime info
auto& ov_version = ov::get_openvino_version();
m_compiled_model_format["OV_VERSION"] = ov_version.buildNumber;
for (const auto& device : m_device_map) {
m_compiled_model_format[device.first] = device.second->get_info().driver_version;
}
}

std::shared_ptr<ov::ICompiledModel> Plugin::compile_model(const std::shared_ptr<const ov::Model>& model, const ov::AnyMap& orig_config) const {
Expand Down Expand Up @@ -331,6 +359,29 @@ ov::Any Plugin::get_property(const std::string& name, const ov::AnyMap& options)
return decltype(ov::available_devices)::value_type {available_devices};
} else if (name == ov::internal::caching_properties) {
return decltype(ov::internal::caching_properties)::value_type(get_caching_properties());
} else if (name == ov::internal::compiled_model_format.name()) {
std::string format_info;
for (auto& it : m_compiled_model_format) {
format_info += it.first + ":" + it.second.as<std::string>() + ";";
}
return decltype(ov::internal::compiled_model_format)::value_type(format_info);
} else if (name == ov::internal::compiled_model_format_supported.name()) {
ov::Any res = true;
auto it = options.find(ov::internal::compiled_model_format_supported.name());
if (it == options.end()) {
res = false;
} else {
const auto data = it->second.as<std::string>();
auto input = parse_compiled_model_format(data);
for (auto& item : m_compiled_model_format) {
auto it = input.find(item.first);
if (it == input.end() || it->second.as<std::string>() != item.second.as<std::string>()) {
res = false;
break;
}
}
}
return res;
}

OPENVINO_SUPPRESS_DEPRECATED_START
Expand Down Expand Up @@ -572,7 +623,9 @@ std::vector<ov::PropertyName> Plugin::get_supported_internal_properties() const
static const std::vector<ov::PropertyName> supported_internal_properties = {
ov::PropertyName{ov::internal::caching_properties.name(), ov::PropertyMutability::RO},
ov::PropertyName{ov::internal::config_device_id.name(), ov::PropertyMutability::WO},
ov::PropertyName{ov::internal::exclusive_async_requests.name(), ov::PropertyMutability::RW}};
ov::PropertyName{ov::internal::exclusive_async_requests.name(), ov::PropertyMutability::RW},
ov::PropertyName{ov::internal::compiled_model_format.name(), ov::PropertyMutability::RO},
ov::PropertyName{ov::internal::compiled_model_format_supported.name(), ov::PropertyMutability::RO}};
return supported_internal_properties;
}

Expand Down

0 comments on commit 824da2b

Please sign in to comment.