Skip to content

Commit

Permalink
[NPUW] Properties for npuw::CompiledModel are split to private and pu…
Browse files Browse the repository at this point in the history
…blic (#25310)

### Details:
This PR defines public and private properties in `npuw::CompiledModel`:
- All public properties and hints from `intel_npu::CompiledModel` become
public properties in `npuw::CompiledModel`
 - All `*NPUW*` properties are private

### Tickets:
 - *ticket-id*
  • Loading branch information
AsyaPronina authored Jul 9, 2024
1 parent d55523c commit 76c00c3
Show file tree
Hide file tree
Showing 2 changed files with 113 additions and 100 deletions.
209 changes: 109 additions & 100 deletions src/plugins/intel_npu/src/plugin/npuw/compiled_model.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -654,15 +654,94 @@ void ov::npuw::CompiledModel::log_device_dist() const {
void ov::npuw::CompiledModel::implement_properties() {
// This function fills the map: {`property name`: `getter for property value`},
// that can be used later to return requested properties by user.
// It does it in 4 steps:
// It does it in 3 steps:
//
// 1. Create mappings for all NPUW-specific properties to getters of their
// values from config.
// 2. Create mappings for all copied from HETERO plugin properties, to
// their copied implementations.
// 3. Fill `m_all_supported_props` with all properties, mentioned above.
// 1. Create mappings for OV public properties and hints, exposed
// in ::intel_npu::CompiledModel.
// 2. Fill `m_all_supported_props` vector with property names from
// the 1st step. It will be returned as response to `ov::supported_properties`
// request. So the vector will define public properties.
// 3. Create mappings for all remaining (private) NPUW-specific properties
// to getters of their values from config.

#define GET_PLUGIN_PROP(property) return get_plugin()->get_property(property.name(), ov::AnyMap());

// 1.
// OV Public
// ===============================================
m_prop_to_opt = {{ov::supported_properties.name(),
{ov::PropertyMutability::RO,
[&](const ::intel_npu::Config&) {
return m_all_supported_props;
}}},
{ov::device::id.name(),
{ov::PropertyMutability::RO,
[&](const ::intel_npu::Config&) {
GET_PLUGIN_PROP(ov::device::id);
}}},
{ov::enable_profiling.name(),
{ov::PropertyMutability::RO,
[&](const ::intel_npu::Config&) {
GET_PLUGIN_PROP(ov::enable_profiling);
}}},
{ov::model_name.name(),
{ov::PropertyMutability::RO,
[&](const ::intel_npu::Config&) {
return m_name;
}}},
{ov::optimal_number_of_infer_requests.name(),
{ov::PropertyMutability::RO,
[&](const ::intel_npu::Config&) {
return 1u;
}}},
{ov::execution_devices.name(),
{ov::PropertyMutability::RO,
[&](const ::intel_npu::Config&) {
return "NPU";
}}},
{ov::loaded_from_cache.name(),
{ov::PropertyMutability::RO,
[&](const ::intel_npu::Config&) {
return m_loaded_from_cache;
}}},
// OV Public Hints
// =====================================================
{ov::hint::performance_mode.name(),
{ov::PropertyMutability::RO,
[&](const ::intel_npu::Config&) {
GET_PLUGIN_PROP(ov::hint::performance_mode);
}}},
{ov::hint::execution_mode.name(),
{ov::PropertyMutability::RO,
[&](const ::intel_npu::Config&) {
GET_PLUGIN_PROP(ov::hint::execution_mode);
}}},
{ov::hint::num_requests.name(),
{ov::PropertyMutability::RO,
[&](const ::intel_npu::Config&) {
GET_PLUGIN_PROP(ov::hint::num_requests);
}}},
{ov::hint::inference_precision.name(),
{ov::PropertyMutability::RO,
[&](const ::intel_npu::Config&) {
GET_PLUGIN_PROP(ov::hint::inference_precision);
}}},
{ov::hint::enable_cpu_pinning.name(),
{ov::PropertyMutability::RO,
[&](const ::intel_npu::Config&) {
GET_PLUGIN_PROP(ov::hint::enable_cpu_pinning);
}}},
{ov::hint::model_priority.name(), {ov::PropertyMutability::RO, [&](const ::intel_npu::Config&) {
GET_PLUGIN_PROP(ov::hint::model_priority);
}}}};
#undef GET_PLUGIN_PROP

// 2.
for (auto& p : m_prop_to_opt) {
m_all_supported_props.emplace_back(ov::PropertyName(p.first, std::get<0>(p.second)));
}

// 3.
#define BIND(N, T) \
{ \
ov::intel_npu::N.name(), { \
Expand All @@ -672,101 +751,31 @@ void ov::npuw::CompiledModel::implement_properties() {
} \
}

m_prop_to_opt = {BIND(use_npuw, NPU_USE_NPUW),
BIND(npuw::devices, NPUW_DEVICES),
BIND(npuw::submodel_device, NPUW_SUBMODEL_DEVICE),
BIND(npuw::partitioning::online::pipeline, NPUW_ONLINE_PIPELINE),
BIND(npuw::partitioning::online::min_size, NPUW_ONLINE_MIN_SIZE),
BIND(npuw::partitioning::online::avoid, NPUW_ONLINE_AVOID),
BIND(npuw::partitioning::online::dump_plan, NPUW_ONLINE_DUMP_PLAN),
BIND(npuw::partitioning::plan, NPUW_PLAN),
BIND(npuw::partitioning::fold, NPUW_FOLD),
BIND(npuw::partitioning::cwai, NPUW_CWAI),
BIND(npuw::partitioning::funcall_for_all, NPUW_FUNCALL_FOR_ALL),
BIND(npuw::parallel_compilation, NPUW_PARALLEL_COMPILE),
BIND(npuw::partitioning::dcoff_type, NPUW_DCOFF_TYPE),
BIND(npuw::partitioning::dcoff_with_scale, NPUW_DCOFF_SCALE),
BIND(npuw::funcall_async, NPUW_FUNCALL_ASYNC),
BIND(npuw::accuracy::check, NPUW_ACC_CHECK),
BIND(npuw::accuracy::threshold, NPUW_ACC_THRESH),
BIND(npuw::accuracy::reference_device, NPUW_ACC_DEVICE),
m_prop_to_opt.insert({BIND(use_npuw, NPU_USE_NPUW),
BIND(npuw::devices, NPUW_DEVICES),
BIND(npuw::submodel_device, NPUW_SUBMODEL_DEVICE),
BIND(npuw::partitioning::online::pipeline, NPUW_ONLINE_PIPELINE),
BIND(npuw::partitioning::online::min_size, NPUW_ONLINE_MIN_SIZE),
BIND(npuw::partitioning::online::avoid, NPUW_ONLINE_AVOID),
BIND(npuw::partitioning::online::dump_plan, NPUW_ONLINE_DUMP_PLAN),
BIND(npuw::partitioning::plan, NPUW_PLAN),
BIND(npuw::partitioning::fold, NPUW_FOLD),
BIND(npuw::partitioning::cwai, NPUW_CWAI),
BIND(npuw::partitioning::funcall_for_all, NPUW_FUNCALL_FOR_ALL),
BIND(npuw::parallel_compilation, NPUW_PARALLEL_COMPILE),
BIND(npuw::partitioning::dcoff_type, NPUW_DCOFF_TYPE),
BIND(npuw::partitioning::dcoff_with_scale, NPUW_DCOFF_SCALE),
BIND(npuw::funcall_async, NPUW_FUNCALL_ASYNC),
BIND(npuw::accuracy::check, NPUW_ACC_CHECK),
BIND(npuw::accuracy::threshold, NPUW_ACC_THRESH),
BIND(npuw::accuracy::reference_device, NPUW_ACC_DEVICE),
#ifdef NPU_PLUGIN_DEVELOPER_BUILD
BIND(npuw::dump::full, NPUW_DUMP_FULL),
BIND(npuw::dump::subgraphs, NPUW_DUMP_SUBS),
BIND(npuw::dump::subgraphs_on_fail, NPUW_DUMP_SUBS_ON_FAIL),
BIND(npuw::dump::inputs_outputs, NPUW_DUMP_IO),
BIND(npuw::dump::io_iters, NPUW_DUMP_IO_ITERS)
BIND(npuw::dump::full, NPUW_DUMP_FULL),
BIND(npuw::dump::subgraphs, NPUW_DUMP_SUBS),
BIND(npuw::dump::subgraphs_on_fail, NPUW_DUMP_SUBS_ON_FAIL),
BIND(npuw::dump::inputs_outputs, NPUW_DUMP_IO),
BIND(npuw::dump::io_iters, NPUW_DUMP_IO_ITERS)
#endif
};
});
#undef BIND
// 2.
m_prop_to_opt.insert(
{{ov::supported_properties.name(),
{ov::PropertyMutability::RO,
[&](const ::intel_npu::Config&) -> const std::vector<ov::PropertyName, std::allocator<ov::PropertyName>>& {
return m_all_supported_props;
}}},
{ov::device::properties.name(),
{ov::PropertyMutability::RO,
[&](const ::intel_npu::Config&) {
ov::AnyMap all_devices = {};
for (size_t i = 0; i < m_compiled_submodels.size(); ++i) {
const auto& comp_model_desc = m_compiled_submodels[i];
if (!comp_model_desc.compiled_model) // Handle if optimized out
continue;
ov::AnyMap device_properties = {};
if (all_devices.count(submodel_device(i)) == 0) {
auto device_supported_props =
comp_model_desc.compiled_model->get_property(ov::supported_properties.name());
for (auto&& property_name : device_supported_props.as<std::vector<ov::PropertyName>>())
device_properties[property_name] =
comp_model_desc.compiled_model->get_property(property_name);
all_devices[submodel_device(i)] = device_properties;
}
}
return all_devices;
}}},
{ov::model_name.name(),
{ov::PropertyMutability::RO,
[&](const ::intel_npu::Config&) -> const std::string& {
return m_name;
}}},
{ov::optimal_number_of_infer_requests.name(),
{ov::PropertyMutability::RO,
[&](const ::intel_npu::Config&) {
unsigned int value = 0u;
for (const auto& comp_model_desc : m_compiled_submodels) {
if (comp_model_desc.compiled_model) { // Some models may be optimized out
value = std::max(
value,
comp_model_desc.compiled_model->get_property(ov::optimal_number_of_infer_requests.name())
.as<unsigned int>());
}
}
return value;
}}},
{ov::execution_devices.name(),
{ov::PropertyMutability::RO,
[&](const ::intel_npu::Config&) {
std::vector<std::string> device_names;
std::set<std::string> s;
for (size_t i = 0; i < m_compiled_submodels.size(); ++i) {
const auto& comp_model_desc = m_compiled_submodels[i];
if (!comp_model_desc.compiled_model) // handle optimized out
continue;
if (s.count(submodel_device(i)) != 0)
continue;
s.insert(submodel_device(i));
device_names.push_back(submodel_device(i));
}
return decltype(ov::execution_devices)::value_type{std::move(device_names)};
}}},
{ov::loaded_from_cache.name(), {ov::PropertyMutability::RO, [&](const ::intel_npu::Config&) {
return m_loaded_from_cache;
}}}});

// 3.
for (auto& p : m_prop_to_opt) {
m_all_supported_props.emplace_back(ov::PropertyName(p.first, std::get<0>(p.second)));
}
}
4 changes: 4 additions & 0 deletions src/plugins/intel_npu/src/plugin/src/plugin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -589,6 +589,10 @@ std::shared_ptr<ov::ICompiledModel> Plugin::compile_model(const std::shared_ptr<
// activate the NPUW path
auto useNpuwKey = ov::intel_npu::use_npuw.name();
if (properties.count(useNpuwKey) && properties.at(useNpuwKey).as<bool>()) {
// CACHE_DIR isn't supported with NPU_USE_NPUW
if (properties.count(ov::cache_dir.name()) || !_globalConfig.get<CACHE_DIR>().empty()) {
OPENVINO_THROW("Option 'CACHE_DIR' is not supported with NPU_USE_NPUW");
}
return std::make_shared<ov::npuw::CompiledModel>(model->clone(), shared_from_this(), properties);
}

Expand Down

0 comments on commit 76c00c3

Please sign in to comment.