Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Generalize model cache reusing #21492

Merged
19 changes: 19 additions & 0 deletions src/inference/dev_api/openvino/runtime/internal_properties.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,25 @@ static constexpr Property<ov::threading::IStreamsExecutor::ThreadBindingType, Pr
*/
static constexpr Property<size_t, PropertyMutability::RW> threads_per_stream{"THREADS_PER_STREAM"};

/**
* @brief It contains compiled_model_format information to make plugin runtime can check whether it compatible
* with the cached compiled model, and it is returned by get_property.
*
* The information details are defined by plugin itself, each plugin can have different contents.
* For example, CPU plugin will contain OV version, while GPU plugin will contain OV and GPU driver version, etc.
* Core doesn't understand its content and only read it from plugin and write it into blob header.
*
* @ingroup ov_dev_api_plugin_api
*/
static constexpr Property<std::string, PropertyMutability::RO> compiled_model_format{"COMPILED_MODEL_FORMAT"};

/**
* @brief Check whether the attached compile_model_format is supported by this plugin.
* @ingroup ov_dev_api_plugin_api
*/
static constexpr Property<bool, PropertyMutability::RO> compiled_model_format_supported{
"COMPILED_MODEL_FORMAT_SUPPORTED"};

} // namespace internal
OPENVINO_DEPRECATED(
"This property is deprecated and will be removed soon. Use ov::internal::caching_properties instead of it.")
Expand Down
9 changes: 7 additions & 2 deletions src/inference/src/compilation_context.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -190,9 +190,12 @@ std::string ModelCache::compute_hash(const std::string& modelStr,

CompiledBlobHeader::CompiledBlobHeader() {}

CompiledBlobHeader::CompiledBlobHeader(const std::string& ieVersion, const std::string& fileInfo)
CompiledBlobHeader::CompiledBlobHeader(const std::string& ieVersion,
const std::string& fileInfo,
const std::string& runtimeInfo)
: m_ieVersion(ieVersion),
m_fileInfo(fileInfo) {}
m_fileInfo(fileInfo),
m_runtimeInfo(runtimeInfo) {}

std::istream& operator>>(std::istream& stream, CompiledBlobHeader& header) {
std::string xmlStr;
Expand All @@ -208,6 +211,7 @@ std::istream& operator>>(std::istream& stream, CompiledBlobHeader& header) {
pugi::xml_node compiledBlobNode = document.document_element();
header.m_ieVersion = pugixml::utils::GetStrAttr(compiledBlobNode, "ie_version");
header.m_fileInfo = pugixml::utils::GetStrAttr(compiledBlobNode, "file_info");
header.m_runtimeInfo = pugixml::utils::GetStrAttr(compiledBlobNode, "runtime_info");

return stream;
}
Expand All @@ -217,6 +221,7 @@ std::ostream& operator<<(std::ostream& stream, const CompiledBlobHeader& header)
auto compiledBlobNode = document.append_child("compiled_blob");
compiledBlobNode.append_attribute("ie_version").set_value(header.m_ieVersion.c_str());
compiledBlobNode.append_attribute("file_info").set_value(header.m_fileInfo.c_str());
compiledBlobNode.append_attribute("runtime_info").set_value(header.m_runtimeInfo.c_str());

document.save(stream, nullptr, pugi::format_raw);
document.reset();
Expand Down
7 changes: 6 additions & 1 deletion src/inference/src/compilation_context.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,10 +31,11 @@ struct ModelCache final {
class CompiledBlobHeader final {
std::string m_ieVersion;
std::string m_fileInfo;
std::string m_runtimeInfo;

public:
CompiledBlobHeader();
CompiledBlobHeader(const std::string& ieVersion, const std::string& fileInfo);
CompiledBlobHeader(const std::string& ieVersion, const std::string& fileInfo, const std::string& runtimeInfo);

const std::string& getIeVersion() const {
return m_ieVersion;
Expand All @@ -44,6 +45,10 @@ class CompiledBlobHeader final {
return m_fileInfo;
}

const std::string& getRuntimeInfo() const {
return m_runtimeInfo;
}

friend std::istream& operator>>(std::istream& stream, CompiledBlobHeader& header);

friend std::ostream& operator<<(std::ostream& stream, const CompiledBlobHeader& header);
Expand Down
27 changes: 22 additions & 5 deletions src/inference/src/dev/core_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1435,9 +1435,15 @@ ov::SoPtr<ov::ICompiledModel> ov::CoreImpl::compile_model_and_cache(const std::s
try {
// need to export network for further import from "cache"
OV_ITT_SCOPE(FIRST_INFERENCE, ov::itt::domains::LoadTime, "Core::compile_model::Export");
std::string compiled_model_format("");
riverlijunjie marked this conversation as resolved.
Show resolved Hide resolved
if (device_supports_internal_property(plugin, ov::internal::compiled_model_format.name())) {
compiled_model_format =
plugin.get_property(ov::internal::compiled_model_format.name(), {}).as<std::string>();
}
cacheContent.cacheManager->write_cache_entry(cacheContent.blobId, [&](std::ostream& networkStream) {
networkStream << ov::CompiledBlobHeader(InferenceEngine::GetInferenceEngineVersion()->buildNumber,
ov::ModelCache::calculate_file_info(cacheContent.modelPath));
ov::ModelCache::calculate_file_info(cacheContent.modelPath),
compiled_model_format);
execNetwork->export_model(networkStream);
});
} catch (...) {
Expand Down Expand Up @@ -1466,14 +1472,25 @@ ov::SoPtr<ov::ICompiledModel> ov::CoreImpl::load_model_from_cache(
try {
ov::CompiledBlobHeader header;
networkStream >> header;
if (header.getIeVersion() != ov::get_openvino_version().buildNumber) {
// Build number mismatch, don't use this cache
OPENVINO_THROW("Version does not match");
}
if (header.getFileInfo() != ov::ModelCache::calculate_file_info(cacheContent.modelPath)) {
// Original file is changed, don't use cache
OPENVINO_THROW("Original model file is changed");
}
if (util::contains(plugin.get_property(ov::internal::supported_properties),
ov::internal::compiled_model_format_supported.name())) {
ov::AnyMap compiled_model_format = {
{ov::internal::compiled_model_format.name(), std::string(header.getRuntimeInfo())}};
auto res = plugin.get_property(ov::internal::compiled_model_format_supported.name(),
compiled_model_format);
if (!res.as<bool>()) {
OPENVINO_THROW("Original model format has been changed, not supported anymore!");
}
} else {
if (header.getIeVersion() != ov::get_openvino_version().buildNumber) {
wangleis marked this conversation as resolved.
Show resolved Hide resolved
// Build number mismatch, don't use this cache
OPENVINO_THROW("Version does not match");
}
}
} catch (...) {
throw HeaderException();
}
Expand Down
93 changes: 93 additions & 0 deletions src/inference/tests/functional/caching_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1700,6 +1700,99 @@ TEST_P(CachingTest, TestCacheFileOldVersion) {
}
}

TEST_P(CachingTest, TestCacheFileWithCompiledModelFormat) {
EXPECT_CALL(*mockPlugin, get_property(ov::supported_properties.name(), _)).Times(AnyNumber());
EXPECT_CALL(*mockPlugin, get_property(ov::device::capability::EXPORT_IMPORT, _)).Times(AnyNumber());
EXPECT_CALL(*mockPlugin, get_property(ov::device::architecture.name(), _)).Times(AnyNumber());
EXPECT_CALL(*mockPlugin, get_property(ov::internal::supported_properties.name(), _)).Times(AnyNumber());
EXPECT_CALL(*mockPlugin, get_property(ov::internal::caching_properties.name(), _)).Times(AnyNumber());
EXPECT_CALL(*mockPlugin, get_property(ov::device::capabilities.name(), _)).Times(AnyNumber());
EXPECT_CALL(*mockPlugin, get_property(ov::internal::supported_properties.name(), _))
.Times(AnyNumber())
.WillRepeatedly(Invoke([&](const std::string&, const ov::AnyMap&) {
return std::vector<ov::PropertyName>{ov::internal::caching_properties.name(),
ov::internal::compiled_model_format.name(),
ov::internal::compiled_model_format_supported.name()};
}));
const std::string compiled_model_format("Mock compiled model format segment.");
EXPECT_CALL(*mockPlugin, get_property(ov::internal::compiled_model_format.name(), _))
.Times(AtLeast(1))
.WillRepeatedly(Return(compiled_model_format));
EXPECT_CALL(*mockPlugin, get_property(ov::internal::compiled_model_format_supported.name(), _))
.Times(AtLeast(1))
.WillRepeatedly(Invoke([&](const std::string&, const ov::AnyMap& options) {
auto it = options.find(ov::internal::compiled_model_format.name());
ov::Any ret = true;
if (it == options.end() || it->second.as<std::string>() != compiled_model_format)
ret = false;
return ret;
}));
{
EXPECT_CALL(*mockPlugin, compile_model(_, _, _)).Times(m_remoteContext ? 1 : 0);
EXPECT_CALL(*mockPlugin, compile_model(A<const std::shared_ptr<const ov::Model>&>(), _))
.Times(!m_remoteContext ? 1 : 0);
EXPECT_CALL(*mockPlugin, import_model(_, _, _)).Times(0);
EXPECT_CALL(*mockPlugin, import_model(_, _)).Times(0);
m_post_mock_net_callbacks.emplace_back([&](MockICompiledModelImpl& net) {
EXPECT_CALL(net, export_model(_)).Times(1);
});
testLoad([&](ov::Core& core) {
EXPECT_NO_THROW(core.set_property(ov::cache_dir(m_cacheDir)));
EXPECT_NO_THROW(m_testFunction(core));
});
}
{
auto blobs = ov::test::utils::listFilesWithExt(m_cacheDir, "blob");
for (const auto& fileName : blobs) {
std::string content;
{
std::ifstream inp(fileName, std::ios_base::binary);
std::ostringstream ostr;
ostr << inp.rdbuf();
content = ostr.str();
}
auto index = content.find(compiled_model_format.c_str());
std::string new_compiled_model_format(compiled_model_format.size(), '0');
if (index != std::string::npos) {
content.replace(index, compiled_model_format.size(), new_compiled_model_format);
} else {
return; // skip test
}
std::ofstream out(fileName, std::ios_base::binary);
out.write(content.c_str(), static_cast<std::streamsize>(content.size()));
}
}
m_post_mock_net_callbacks.pop_back();
{ // Step 2. compiled_model_format mismatch, cache will be silently removed
EXPECT_CALL(*mockPlugin, compile_model(_, _, _)).Times(m_remoteContext ? 1 : 0);
EXPECT_CALL(*mockPlugin, compile_model(A<const std::shared_ptr<const ov::Model>&>(), _))
.Times(!m_remoteContext ? 1 : 0);
EXPECT_CALL(*mockPlugin, import_model(_, _, _)).Times(0);
EXPECT_CALL(*mockPlugin, import_model(_, _)).Times(0);
m_post_mock_net_callbacks.emplace_back([&](MockICompiledModelImpl& net) {
EXPECT_CALL(net, export_model(_)).Times(1);
});
testLoad([&](ov::Core& core) {
EXPECT_NO_THROW(core.set_property(ov::cache_dir(m_cacheDir)));
EXPECT_NO_THROW(m_testFunction(core));
});
}
m_post_mock_net_callbacks.pop_back();
{ // Step 3: same load, should be ok now due to re-creation of cache
EXPECT_CALL(*mockPlugin, compile_model(_, _, _)).Times(0);
EXPECT_CALL(*mockPlugin, compile_model(A<const std::shared_ptr<const ov::Model>&>(), _)).Times(0);
EXPECT_CALL(*mockPlugin, import_model(_, _, _)).Times(m_remoteContext ? 1 : 0);
EXPECT_CALL(*mockPlugin, import_model(_, _)).Times(!m_remoteContext ? 1 : 0);
for (auto& net : comp_models) {
EXPECT_CALL(*net, export_model(_)).Times(0);
}
testLoad([&](ov::Core& core) {
EXPECT_NO_THROW(core.set_property(ov::cache_dir(m_cacheDir)));
EXPECT_NO_THROW(m_testFunction(core));
});
}
}

TEST_P(CachingTest, LoadHetero_NoCacheMetric) {
EXPECT_CALL(*mockPlugin, query_model(_, _)).Times(AnyNumber());
EXPECT_CALL(*mockPlugin, get_property(ov::supported_properties.name(), _))
Expand Down
29 changes: 27 additions & 2 deletions src/plugins/intel_cpu/src/plugin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,8 @@ Engine::Engine() :
#if defined(OV_CPU_WITH_ACL)
scheduler_guard = SchedulerGuard::instance();
#endif
auto& ov_version = ov::get_openvino_version();
m_compiled_model_format["OV_VERSION"] = std::string(ov_version.buildNumber);
}

Engine::~Engine() {
Expand Down Expand Up @@ -688,6 +690,25 @@ ov::Any Engine::get_property(const std::string& name, const ov::AnyMap& options)
return decltype(ov::hint::num_requests)::value_type(engConfig.hintNumRequests);
} else if (name == ov::hint::execution_mode) {
return engConfig.executionMode;
} else if (name == ov::internal::compiled_model_format.name()) {
auto model_format = ov::Any(m_compiled_model_format);
return decltype(ov::internal::compiled_model_format)::value_type(model_format.as<std::string>());
} else if (name == ov::internal::compiled_model_format_supported.name()) {
ov::Any res = true;
auto it = options.find(ov::internal::compiled_model_format.name());
if (it == options.end()) {
res = false;
} else {
ov::AnyMap input_map = it->second.as<ov::AnyMap>();
for (auto& item : m_compiled_model_format) {
auto it = input_map.find(item.first);
if (it == input_map.end() || it->second.as<std::string>() != item.second.as<std::string>()) {
res = false;
break;
}
}
}
return res;
}
return get_ro_property(name, options);
}
Expand Down Expand Up @@ -738,7 +759,9 @@ ov::Any Engine::get_metric_legacy(const std::string& name, const ov::AnyMap& opt
} else if (ov::internal::supported_properties.name() == name) {
return decltype(ov::internal::supported_properties)::value_type{
ov::PropertyName{ov::internal::caching_properties.name(), ov::PropertyMutability::RO},
ov::PropertyName{ov::internal::exclusive_async_requests.name(), ov::PropertyMutability::RW}};
ov::PropertyName{ov::internal::exclusive_async_requests.name(), ov::PropertyMutability::RW},
ov::PropertyName{ov::internal::compiled_model_format.name(), ov::PropertyMutability::RO},
ov::PropertyName{ov::internal::compiled_model_format_supported.name(), ov::PropertyMutability::RO}};
} else if (name == ov::internal::caching_properties) {
std::vector<ov::PropertyName> cachingProperties = {ov::device::full_name.name()};
return decltype(ov::internal::caching_properties)::value_type(std::move(cachingProperties));
Expand Down Expand Up @@ -796,7 +819,9 @@ ov::Any Engine::get_ro_property(const std::string& name, const ov::AnyMap& optio
} else if (ov::internal::supported_properties == name) {
return decltype(ov::internal::supported_properties)::value_type{
ov::PropertyName{ov::internal::caching_properties.name(), ov::PropertyMutability::RO},
ov::PropertyName{ov::internal::exclusive_async_requests.name(), ov::PropertyMutability::RW}};
ov::PropertyName{ov::internal::exclusive_async_requests.name(), ov::PropertyMutability::RW},
ov::PropertyName{ov::internal::compiled_model_format.name(), ov::PropertyMutability::RO},
ov::PropertyName{ov::internal::compiled_model_format_supported.name(), ov::PropertyMutability::RO}};
} else if (name == ov::device::full_name) {
return decltype(ov::device::full_name)::value_type(deviceFullName);
} else if (name == ov::available_devices) {
Expand Down
1 change: 1 addition & 0 deletions src/plugins/intel_cpu/src/plugin.h
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ class Engine : public ov::IPlugin {
So track if streams is set explicitly (not auto-configured) */
bool streamsExplicitlySetForEngine = false;
const std::string deviceFullName;
ov::AnyMap m_compiled_model_format;

std::shared_ptr<void> specialSetup;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ class Plugin : public ov::IPlugin {
std::string m_default_device_id = "0";
std::map<std::string, cldnn::device::ptr> m_device_map;
std::map<std::string, ExecutionConfig> m_configs_map;
ov::AnyMap m_compiled_model_format;

mutable std::map<std::string, std::shared_ptr<RemoteContextImpl>> m_default_contexts;
mutable std::once_flag m_default_contexts_once;
Expand Down
30 changes: 29 additions & 1 deletion src/plugins/intel_gpu/src/plugin/plugin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,13 @@ Plugin::Plugin() {
for (const auto& device : m_device_map) {
m_configs_map.insert({device.first, ExecutionConfig(ov::device::id(device.first))});
}

// Set runtime info
auto& ov_version = ov::get_openvino_version();
m_compiled_model_format["OV_VERSION"] = ov_version.buildNumber;
for (const auto& device : m_device_map) {
m_compiled_model_format[device.first] = device.second->get_info().driver_version;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I suppose we need to take driver version specific to device we are compile_model on.
But I hope that a driver is the same for all devices and we can collect only driver of first device here (otherwise depending on actual number of devices on the system, we can have different m_compiled_model_format)

@isanghao could you please confirm?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We print the GPU info in one iGPU + dGPU machine:

    for (const auto& device : m_device_map) {
        auto driver_info = device.second->get_info();
        std::cout << device.first << ": (" << driver_info.dev_name << ", " << driver_info.vendor_id << ", "
                  << driver_info.device_id << ", " << driver_info.driver_version << ")" << std::endl;
    }
0: (Intel(R) UHD Graphics 770,         32902, 42880, 23.17.26241.33)
1: (Intel(R) Arc(TM) A770 Graphics, 32902, 22176, 23.17.26241.33)
2: (Intel(R) Arc(TM) A770 Graphics, 32902, 22176, 23.17.26241.33)

It seems iGPU and dGPU have the same driver version, I'm not sure whether they can be different driver version in the same machine.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I suppose driver version is defined by driver_info.driver_version which is the same for all devices, while other values are per-device properties. I hope that device_id and vendor_id are driver independent.

CC @isanghao

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@riverlijunjie I suppose we need to store driver version for a single device, because depending on a number of devices on the system we will have different value for m_compiled_model_format

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Got it, will do it!

}
}

std::shared_ptr<ov::ICompiledModel> Plugin::compile_model(const std::shared_ptr<const ov::Model>& model, const ov::AnyMap& orig_config) const {
Expand Down Expand Up @@ -331,6 +338,25 @@ ov::Any Plugin::get_property(const std::string& name, const ov::AnyMap& options)
return decltype(ov::available_devices)::value_type {available_devices};
} else if (name == ov::internal::caching_properties) {
return decltype(ov::internal::caching_properties)::value_type(get_caching_properties());
} else if (name == ov::internal::compiled_model_format.name()) {
auto model_format = ov::Any(m_compiled_model_format);
return decltype(ov::internal::compiled_model_format)::value_type(model_format.as<std::string>());
} else if (name == ov::internal::compiled_model_format_supported.name()) {
ov::Any res = true;
auto it = options.find(ov::internal::compiled_model_format.name());
if (it == options.end()) {
res = false;
} else {
ov::AnyMap input_map = it->second.as<ov::AnyMap>();
for (auto& item : m_compiled_model_format) {
auto it = input_map.find(item.first);
if (it == input_map.end() || it->second.as<std::string>() != item.second.as<std::string>()) {
res = false;
break;
}
}
}
return res;
}

OPENVINO_SUPPRESS_DEPRECATED_START
Expand Down Expand Up @@ -572,7 +598,9 @@ std::vector<ov::PropertyName> Plugin::get_supported_internal_properties() const
static const std::vector<ov::PropertyName> supported_internal_properties = {
ov::PropertyName{ov::internal::caching_properties.name(), ov::PropertyMutability::RO},
ov::PropertyName{ov::internal::config_device_id.name(), ov::PropertyMutability::WO},
ov::PropertyName{ov::internal::exclusive_async_requests.name(), ov::PropertyMutability::RW}};
ov::PropertyName{ov::internal::exclusive_async_requests.name(), ov::PropertyMutability::RW},
ov::PropertyName{ov::internal::compiled_model_format.name(), ov::PropertyMutability::RO},
ov::PropertyName{ov::internal::compiled_model_format_supported.name(), ov::PropertyMutability::RO}};
return supported_internal_properties;
}

Expand Down
Loading