diff --git a/src/bindings/python/src/openvino/properties/__init__.py b/src/bindings/python/src/openvino/properties/__init__.py index e4685f5495f4a4..371660bcd9f214 100644 --- a/src/bindings/python/src/openvino/properties/__init__.py +++ b/src/bindings/python/src/openvino/properties/__init__.py @@ -5,6 +5,7 @@ # Enums from openvino._pyopenvino.properties import Affinity from openvino._pyopenvino.properties import CacheMode +from openvino._pyopenvino.properties import WorkloadType # Properties import openvino._pyopenvino.properties as __properties diff --git a/src/bindings/python/src/openvino/runtime/properties/__init__.py b/src/bindings/python/src/openvino/runtime/properties/__init__.py index 2a6b7a8fa416a9..c25db0bfa4d884 100644 --- a/src/bindings/python/src/openvino/runtime/properties/__init__.py +++ b/src/bindings/python/src/openvino/runtime/properties/__init__.py @@ -5,6 +5,7 @@ # Enums from openvino._pyopenvino.properties import Affinity from openvino._pyopenvino.properties import CacheMode +from openvino._pyopenvino.properties import WorkloadType # Properties from openvino._pyopenvino.properties import enable_profiling diff --git a/src/bindings/python/src/pyopenvino/core/properties/properties.cpp b/src/bindings/python/src/pyopenvino/core/properties/properties.cpp index e008d28e610814..ad53a1b64a1e0f 100644 --- a/src/bindings/python/src/pyopenvino/core/properties/properties.cpp +++ b/src/bindings/python/src/pyopenvino/core/properties/properties.cpp @@ -21,6 +21,10 @@ void regmodule_properties(py::module m) { .value("NUMA", ov::Affinity::NUMA) .value("HYBRID_AWARE", ov::Affinity::HYBRID_AWARE); + py::enum_(m_properties, "WorkloadType", py::arithmetic()) + .value("DEFAULT", ov::WorkloadType::DEFAULT) + .value("EFFICIENT", ov::WorkloadType::EFFICIENT); + py::enum_(m_properties, "CacheMode", py::arithmetic()) .value("OPTIMIZE_SIZE", ov::CacheMode::OPTIMIZE_SIZE) .value("OPTIMIZE_SPEED", ov::CacheMode::OPTIMIZE_SPEED); @@ -28,6 +32,7 @@ void regmodule_properties(py::module m) { // Submodule properties - properties wrap_property_RW(m_properties, ov::enable_profiling, "enable_profiling"); wrap_property_RW(m_properties, ov::cache_dir, "cache_dir"); + wrap_property_RW(m_properties, ov::workload_type, "workload_type"); wrap_property_RW(m_properties, ov::cache_mode, "cache_mode"); wrap_property_RW(m_properties, ov::auto_batch_timeout, "auto_batch_timeout"); wrap_property_RW(m_properties, ov::num_streams, "num_streams"); diff --git a/src/bindings/python/src/pyopenvino/utils/utils.cpp b/src/bindings/python/src/pyopenvino/utils/utils.cpp index d216c805c5f4aa..845691749b64f4 100644 --- a/src/bindings/python/src/pyopenvino/utils/utils.cpp +++ b/src/bindings/python/src/pyopenvino/utils/utils.cpp @@ -17,6 +17,7 @@ #include "openvino/core/meta_data.hpp" #include "openvino/frontend/decoder.hpp" #include "openvino/frontend/graph_iterator.hpp" +#include "openvino/runtime/properties.hpp" using Version = ov::pass::Serialize::Version; @@ -218,6 +219,8 @@ py::object from_ov_any(const ov::Any& any) { return py::cast(any.as()); } else if (any.is()) { return py::cast(any.as()); + } else if (any.is()) { + return py::cast(any.as()); } else if (any.is()) { return py::cast(any.as()); } else if (any.is()) { @@ -401,6 +404,8 @@ ov::Any py_object_to_any(const py::object& py_obj) { return py::cast(py_obj); } else if (py::isinstance(py_obj)) { return py::cast(py_obj); + } else if (py::isinstance(py_obj)) { + return py::cast(py_obj); } else if (py::isinstance(py_obj)) { return py::cast(py_obj); } else if (py::isinstance>(py_obj)) { diff --git a/src/bindings/python/tests/test_runtime/test_properties.py b/src/bindings/python/tests/test_runtime/test_properties.py index d4ad725679a351..e8d3162c362f4f 100644 --- a/src/bindings/python/tests/test_runtime/test_properties.py +++ b/src/bindings/python/tests/test_runtime/test_properties.py @@ -61,6 +61,13 @@ def test_properties_rw_base(): (props.CacheMode.OPTIMIZE_SPEED, "CacheMode.OPTIMIZE_SPEED", 1), ), ), + ( + props.WorkloadType, + ( + (props.WorkloadType.DEFAULT, "WorkloadType.DEFAULT", 0), + (props.WorkloadType.EFFICIENT, "WorkloadType.EFFICIENT", 1), + ), + ), ( hints.Priority, ( diff --git a/src/inference/include/openvino/runtime/properties.hpp b/src/inference/include/openvino/runtime/properties.hpp index 6d57ce51167d05..35fd40d32fafdf 100644 --- a/src/inference/include/openvino/runtime/properties.hpp +++ b/src/inference/include/openvino/runtime/properties.hpp @@ -10,7 +10,9 @@ */ #pragma once +#include #include +#include #include #include #include @@ -690,6 +692,52 @@ static constexpr Property cache_dir{"CACHE_DIR"}; */ static constexpr Property loaded_from_cache{"LOADED_FROM_CACHE"}; +/** + * @brief Enum to define possible workload types + * + * Workload type represents the execution priority for an inference. + * + * @ingroup ov_runtime_cpp_prop_api + */ +enum class WorkloadType { + DEFAULT = 0, // Default execution priority + EFFICIENT = 1, // Lower execution priority +}; + +/** @cond INTERNAL */ +inline std::ostream& operator<<(std::ostream& os, const WorkloadType& mode) { + switch (mode) { + case WorkloadType::DEFAULT: + return os << "Default"; + case WorkloadType::EFFICIENT: + return os << "Efficient"; + default: + OPENVINO_THROW("Unsupported workload type"); + } +} + +inline std::istream& operator>>(std::istream& is, WorkloadType& mode) { + std::string str; + is >> str; + std::transform(str.begin(), str.end(), str.begin(), tolower); + if (str == "default") { + mode = WorkloadType::DEFAULT; + } else if (str == "efficient") { + mode = WorkloadType::EFFICIENT; + } else { + OPENVINO_THROW("Unsupported workload type: ", str); + } + return is; +} +/** @endcond */ + +/** + * @brief Read-write property to select in which mode the workload will be executed + * This is only supported by NPU. + * @ingroup ov_runtime_cpp_prop_api + */ +static constexpr Property workload_type{"WORKLOAD_TYPE"}; + /** * @brief Enum to define possible cache mode * @ingroup ov_runtime_cpp_prop_api @@ -810,7 +858,6 @@ static constexpr Property enable_mmap{"ENABLE_MMAP * @brief Namespace with device properties */ namespace device { - /** * @brief the property for setting of required device to execute on * values: device id starts from "0" - first device, "1" - second device, etc @@ -1042,8 +1089,8 @@ inline std::istream& operator>>(std::istream& is, Type& device_type) { static constexpr Property type{"DEVICE_TYPE"}; /** - * @brief Read-only property which defines Giga OPS per second count (GFLOPS or GIOPS) for a set of precisions supported - * by specified device + * @brief Read-only property which defines Giga OPS per second count (GFLOPS or GIOPS) for a set of precisions + * supported by specified device * @ingroup ov_runtime_cpp_prop_api */ static constexpr Property, PropertyMutability::RO> gops{"DEVICE_GOPS"}; diff --git a/src/plugins/intel_npu/src/al/include/intel_npu/al/config/runtime.hpp b/src/plugins/intel_npu/src/al/include/intel_npu/al/config/runtime.hpp index aa855f188c078f..cf3be645c470c7 100644 --- a/src/plugins/intel_npu/src/al/include/intel_npu/al/config/runtime.hpp +++ b/src/plugins/intel_npu/src/al/include/intel_npu/al/config/runtime.hpp @@ -182,4 +182,26 @@ struct ENABLE_CPU_PINNING final : OptionBase { return OptionMode::RunTime; } }; + +// +// WORKLOAD_TYPE +// + +struct WORKLOAD_TYPE final : OptionBase { + static std::string_view key() { + return ov::workload_type.name(); + } + + static ov::WorkloadType defaultValue() { + return ov::WorkloadType::DEFAULT; + } + + static constexpr std::string_view getTypeName() { + return "ov::WorkloadType"; + } + + static ov::WorkloadType parse(std::string_view val); + + static std::string toString(const ov::WorkloadType& val); +}; } // namespace intel_npu diff --git a/src/plugins/intel_npu/src/al/include/npu.hpp b/src/plugins/intel_npu/src/al/include/npu.hpp index e30d2a9c13dd88..8853b6d8d24de5 100644 --- a/src/plugins/intel_npu/src/al/include/npu.hpp +++ b/src/plugins/intel_npu/src/al/include/npu.hpp @@ -35,6 +35,8 @@ class IEngineBackend : public std::enable_shared_from_this { virtual const std::string getName() const = 0; /** @brief Backend has support for concurrency batching */ virtual bool isBatchingSupported() const = 0; + /** @brief Backend has support for workload type */ + virtual bool isWorkloadTypeSupported() const = 0; /** @brief Register backend-specific options */ virtual void registerOptions(OptionsDesc& options) const; @@ -47,6 +49,8 @@ class IEngineBackend : public std::enable_shared_from_this { class IExecutor { public: virtual ~IExecutor() = default; + + virtual void setWorkloadType(const ov::WorkloadType workloadType) const = 0; }; //------------------------------------------------------------------------------ diff --git a/src/plugins/intel_npu/src/al/src/config/runtime.cpp b/src/plugins/intel_npu/src/al/src/config/runtime.cpp index bf731d96dd50e9..8dd7e3b4b58bec 100644 --- a/src/plugins/intel_npu/src/al/src/config/runtime.cpp +++ b/src/plugins/intel_npu/src/al/src/config/runtime.cpp @@ -4,7 +4,10 @@ #include "intel_npu/al/config/runtime.hpp" +#include + #include "intel_npu/al/config/common.hpp" +#include "openvino/runtime/properties.hpp" using namespace intel_npu; using namespace ov::intel_npu; @@ -20,6 +23,7 @@ void intel_npu::registerRunTimeOptions(OptionsDesc& desc) { desc.add(); desc.add(); desc.add(); + desc.add(); } // Heuristically obtained number. Varies depending on the values of PLATFORM and PERFORMANCE_HINT @@ -128,3 +132,22 @@ std::string intel_npu::NUM_STREAMS::toString(const ov::streams::Num& val) { return stringStream.str(); } + +// +// WORKLOAD_TYPE +// + +ov::WorkloadType intel_npu::WORKLOAD_TYPE::parse(std::string_view val) { + std::istringstream ss = std::istringstream(std::string(val)); + ov::WorkloadType workloadType; + + ss >> workloadType; + + return workloadType; +} + +std::string intel_npu::WORKLOAD_TYPE::toString(const ov::WorkloadType& val) { + std::ostringstream ss; + ss << val; + return ss.str(); +} diff --git a/src/plugins/intel_npu/src/backend/include/zero_backend.hpp b/src/plugins/intel_npu/src/backend/include/zero_backend.hpp index 7f4524ec8127ca..e7e8ecedb444ff 100644 --- a/src/plugins/intel_npu/src/backend/include/zero_backend.hpp +++ b/src/plugins/intel_npu/src/backend/include/zero_backend.hpp @@ -26,6 +26,7 @@ class ZeroEngineBackend final : public IEngineBackend { uint32_t getDriverExtVersion() const override; bool isBatchingSupported() const override; + bool isWorkloadTypeSupported() const override; private: std::shared_ptr _instance; diff --git a/src/plugins/intel_npu/src/backend/include/zero_executor.hpp b/src/plugins/intel_npu/src/backend/include/zero_executor.hpp index e516eb75b8254f..bcfdfa34d877c6 100644 --- a/src/plugins/intel_npu/src/backend/include/zero_executor.hpp +++ b/src/plugins/intel_npu/src/backend/include/zero_executor.hpp @@ -9,6 +9,7 @@ #include "intel_npu/utils/logger/logger.hpp" #include "npu.hpp" +#include "openvino/runtime/properties.hpp" #include "zero_init.hpp" #include "zero_wrappers.hpp" @@ -32,6 +33,7 @@ class ZeroExecutor final : public IExecutor { }; void setArgumentValue(uint32_t argi_, const void* argv_) const; + void setWorkloadType(const ov::WorkloadType workloadType) const override; inline ze_graph_handle_t graph() const { return _graph; } diff --git a/src/plugins/intel_npu/src/backend/include/zero_init.hpp b/src/plugins/intel_npu/src/backend/include/zero_init.hpp index 634f1dd586b8a0..23227fec2575aa 100644 --- a/src/plugins/intel_npu/src/backend/include/zero_init.hpp +++ b/src/plugins/intel_npu/src/backend/include/zero_init.hpp @@ -10,7 +10,8 @@ #include #include "intel_npu/utils/logger/logger.hpp" -#include "ze_intel_vpu_uuid.h" +#include "ze_command_queue_npu_ext.h" +#include "ze_intel_npu_uuid.h" #include "zero_types.hpp" namespace intel_npu { @@ -39,6 +40,9 @@ class ZeroInitStructsHolder final { inline ze_graph_dditable_ext_curr_t* getGraphDdiTable() const { return graph_dditable_ext_decorator.get(); } + inline ze_command_queue_npu_dditable_ext_curr_t* getCommandQueueDdiTable() const { + return _command_queue_npu_dditable_ext; + } inline ze_graph_profiling_dditable_ext_t* getProfilingDdiTable() const { return _graph_profiling_ddi_table_ext; } @@ -57,6 +61,7 @@ class ZeroInitStructsHolder final { ze_device_handle_t device_handle = nullptr; ze_context_handle_t context = nullptr; std::unique_ptr graph_dditable_ext_decorator; + ze_command_queue_npu_dditable_ext_curr_t* _command_queue_npu_dditable_ext = nullptr; ze_graph_profiling_dditable_ext_t* _graph_profiling_ddi_table_ext = nullptr; ze_driver_properties_t driver_properties = {}; diff --git a/src/plugins/intel_npu/src/backend/include/zero_types.hpp b/src/plugins/intel_npu/src/backend/include/zero_types.hpp index 508439698639c7..06b4818197a418 100644 --- a/src/plugins/intel_npu/src/backend/include/zero_types.hpp +++ b/src/plugins/intel_npu/src/backend/include/zero_types.hpp @@ -8,6 +8,7 @@ #include #include "intel_npu/al/config/runtime.hpp" +#include "ze_command_queue_npu_ext.h" /** * @brief Last version of Table of Graph Extension functions used within plugin @@ -127,3 +128,4 @@ struct ze_graph_dditable_ext_decorator final { }; using ze_graph_dditable_ext_curr_t = ze_graph_dditable_ext_decorator; +using ze_command_queue_npu_dditable_ext_curr_t = ze_command_queue_npu_dditable_ext_1_0_t; diff --git a/src/plugins/intel_npu/src/backend/include/zero_wrappers.hpp b/src/plugins/intel_npu/src/backend/include/zero_wrappers.hpp index 3ec0e402e62b13..c90a37d2b8d90e 100644 --- a/src/plugins/intel_npu/src/backend/include/zero_wrappers.hpp +++ b/src/plugins/intel_npu/src/backend/include/zero_wrappers.hpp @@ -131,6 +131,7 @@ class CommandQueue { CommandQueue(const ze_device_handle_t& device_handle, const ze_context_handle_t& context, const ze_command_queue_priority_t& priority, + ze_command_queue_npu_dditable_ext_curr_t* command_queue_npu_dditable_ext, const Config& config, const uint32_t& group_ordinal); CommandQueue(const CommandQueue&) = delete; @@ -140,6 +141,7 @@ class CommandQueue { void executeCommandList(CommandList& command_list) const; void executeCommandList(CommandList& command_list, Fence& fence) const; + void setWorkloadType(ze_command_queue_workload_type_t workloadType) const; ~CommandQueue(); inline ze_command_queue_handle_t handle() const { return _handle; @@ -148,6 +150,7 @@ class CommandQueue { private: ze_command_queue_handle_t _handle = nullptr; ze_context_handle_t _context = nullptr; + ze_command_queue_npu_dditable_ext_curr_t* _command_queue_npu_dditable_ext = nullptr; Logger _log; }; diff --git a/src/plugins/intel_npu/src/backend/src/zero_backend.cpp b/src/plugins/intel_npu/src/backend/src/zero_backend.cpp index 94a87ab725dae6..ed135dad4c7bc4 100644 --- a/src/plugins/intel_npu/src/backend/src/zero_backend.cpp +++ b/src/plugins/intel_npu/src/backend/src/zero_backend.cpp @@ -34,6 +34,10 @@ bool ZeroEngineBackend::isBatchingSupported() const { return _instance->getDriverExtVersion() >= ZE_GRAPH_EXT_VERSION_1_6; } +bool ZeroEngineBackend::isWorkloadTypeSupported() const { + return _instance->getCommandQueueDdiTable() != nullptr; +} + ZeroEngineBackend::~ZeroEngineBackend() = default; const std::shared_ptr ZeroEngineBackend::getDevice() const { diff --git a/src/plugins/intel_npu/src/backend/src/zero_executor.cpp b/src/plugins/intel_npu/src/backend/src/zero_executor.cpp index 4882a552155883..c4de9a5440b852 100644 --- a/src/plugins/intel_npu/src/backend/src/zero_executor.cpp +++ b/src/plugins/intel_npu/src/backend/src/zero_executor.cpp @@ -14,6 +14,8 @@ #include "intel_npu/al/config/common.hpp" #include "intel_npu/al/itt.hpp" #include "intel_npu/al/prefix.hpp" +#include "openvino/runtime/properties.hpp" +#include "ze_command_queue_npu_ext.h" #include "zero_device.hpp" #include "zero_utils.hpp" @@ -32,16 +34,19 @@ ZeroExecutor::ZeroExecutor(const std::shared_ptr& i _command_queues{{std::make_shared(_initStructs->getDevice(), _initStructs->getContext(), zeroUtils::toZeQueuePriority(_config.get()), + _initStructs->getCommandQueueDdiTable(), _config, group_ordinal), std::make_shared(_initStructs->getDevice(), _initStructs->getContext(), zeroUtils::toZeQueuePriority(_config.get()), + _initStructs->getCommandQueueDdiTable(), _config, group_ordinal), std::make_shared(_initStructs->getDevice(), _initStructs->getContext(), zeroUtils::toZeQueuePriority(_config.get()), + _initStructs->getCommandQueueDdiTable(), _config, group_ordinal)}} { _logger.debug("ZeroExecutor::ZeroExecutor - create graph_command_list"); @@ -55,6 +60,7 @@ ZeroExecutor::ZeroExecutor(const std::shared_ptr& i CommandQueue graph_command_queue(_initStructs->getDevice(), _initStructs->getContext(), ZE_COMMAND_QUEUE_PRIORITY_NORMAL, + _initStructs->getCommandQueueDdiTable(), _config, _group_ordinal); _logger.debug("ZeroExecutor::ZeroExecutor - create fence"); @@ -120,6 +126,28 @@ ZeroExecutor::ZeroExecutor(const std::shared_ptr& i _logger.debug("ZeroExecutor::ZeroExecutor - performing hostSynchronize"); fence.hostSynchronize(); _logger.debug("ZeroExecutor::ZeroExecutor - hostSynchronize completed"); + + if (config.has()) { + setWorkloadType(config.get()); + } +} + +void ZeroExecutor::setWorkloadType(const ov::WorkloadType workloadType) const { + ze_command_queue_workload_type_t zeWorkloadType; + switch (workloadType) { + case ov::WorkloadType::DEFAULT: + zeWorkloadType = ze_command_queue_workload_type_t::ZE_WORKLOAD_TYPE_DEFAULT; + break; + case ov::WorkloadType::EFFICIENT: + zeWorkloadType = ze_command_queue_workload_type_t::ZE_WORKLOAD_TYPE_BACKGROUND; + break; + default: + OPENVINO_THROW("Unknown value for WorkloadType!"); + } + + for (auto& queue : _command_queues) { + queue->setWorkloadType(zeWorkloadType); + } } void ZeroExecutor::setArgumentValue(uint32_t argi_, const void* argv_) const { diff --git a/src/plugins/intel_npu/src/backend/src/zero_init.cpp b/src/plugins/intel_npu/src/backend/src/zero_init.cpp index 316069e856112e..9eefe680078360 100644 --- a/src/plugins/intel_npu/src/backend/src/zero_init.cpp +++ b/src/plugins/intel_npu/src/backend/src/zero_init.cpp @@ -6,11 +6,13 @@ #include "intel_npu/al/itt.hpp" #include "intel_npu/utils/zero/zero_api.hpp" +#include "ze_api.h" +#include "ze_command_queue_npu_ext.h" #include "zero_utils.hpp" namespace intel_npu { -const ze_driver_uuid_t ZeroInitStructsHolder::uuid = ze_intel_vpu_driver_uuid; +const ze_driver_uuid_t ZeroInitStructsHolder::uuid = ze_intel_npu_driver_uuid; static std::tuple queryDriverExtensionVersion(ze_driver_handle_t _driverHandle) { // query the extension properties @@ -118,6 +120,17 @@ ZeroInitStructsHolder::ZeroInitStructsHolder() : log("NPUZeroInitStructsHolder", ZE_MINOR_VERSION(driver_ext_version), graph_ext_name.c_str()); + // Load our command queue extension + try { + zeroUtils::throwOnFail( + "zeDriverGetExtensionFunctionAddress " + std::string(ZE_COMMAND_QUEUE_NPU_EXT_NAME), + zeDriverGetExtensionFunctionAddress(driver_handle, + ZE_COMMAND_QUEUE_NPU_EXT_NAME, + reinterpret_cast(&_command_queue_npu_dditable_ext))); + } catch (const ov::Exception& error) { + log.debug("Current Driver Version does not have the command queue extension: %s", error.what()); + } + // Load our graph extension ze_graph_dditable_ext_last_t* graph_ddi_table_ext = nullptr; zeroUtils::throwOnFail("zeDriverGetExtensionFunctionAddress", diff --git a/src/plugins/intel_npu/src/backend/src/zero_pipeline.cpp b/src/plugins/intel_npu/src/backend/src/zero_pipeline.cpp index dcd7d127b79925..669bb107be24c5 100644 --- a/src/plugins/intel_npu/src/backend/src/zero_pipeline.cpp +++ b/src/plugins/intel_npu/src/backend/src/zero_pipeline.cpp @@ -11,6 +11,7 @@ #include "intel_npu/al/prefix.hpp" #include "intel_npu/utils/logger/logger.hpp" #include "intel_npu/utils/zero/zero_api.hpp" +#include "zero_types.hpp" namespace intel_npu { diff --git a/src/plugins/intel_npu/src/backend/src/zero_wrappers.cpp b/src/plugins/intel_npu/src/backend/src/zero_wrappers.cpp index 721a0a0957efd7..b0790644fbe6d4 100644 --- a/src/plugins/intel_npu/src/backend/src/zero_wrappers.cpp +++ b/src/plugins/intel_npu/src/backend/src/zero_wrappers.cpp @@ -6,6 +6,7 @@ #include "intel_npu/al/config/common.hpp" #include "intel_npu/utils/zero/zero_api.hpp" +#include "zero_types.hpp" namespace intel_npu { @@ -107,9 +108,11 @@ CommandList::~CommandList() { CommandQueue::CommandQueue(const ze_device_handle_t& device_handle, const ze_context_handle_t& context, const ze_command_queue_priority_t& priority, + ze_command_queue_npu_dditable_ext_curr_t* command_queue_npu_dditable_ext, const Config& config, const uint32_t& group_ordinal) : _context(context), + _command_queue_npu_dditable_ext(command_queue_npu_dditable_ext), _log("CommandQueue", config.get()) { ze_command_queue_desc_t queue_desc = {ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC, nullptr, group_ordinal, 0, 0, ZE_COMMAND_QUEUE_MODE_DEFAULT, priority}; @@ -124,6 +127,16 @@ void CommandQueue::executeCommandList(CommandList& command_list, Fence& fence) c zeroUtils::throwOnFail("zeCommandQueueExecuteCommandLists", zeCommandQueueExecuteCommandLists(_handle, 1, &command_list._handle, fence.handle())); } + +void CommandQueue::setWorkloadType(ze_command_queue_workload_type_t workloadType) const { + if (_command_queue_npu_dditable_ext != nullptr) { + zeroUtils::throwOnFail("zeSetWorkloadType", + _command_queue_npu_dditable_ext->pfnSetWorkloadType(_handle, workloadType)); + } else { + OPENVINO_THROW("The WorkloadType property is not supported by the current Driver Version!"); + } +} + CommandQueue::~CommandQueue() { auto result = zeCommandQueueDestroy(_handle); if (ZE_RESULT_SUCCESS != result) { diff --git a/src/plugins/intel_npu/src/compiler/src/driver_compiler_adapter.cpp b/src/plugins/intel_npu/src/compiler/src/driver_compiler_adapter.cpp index 012c2213f5039e..88fe7ee3edc459 100644 --- a/src/plugins/intel_npu/src/compiler/src/driver_compiler_adapter.cpp +++ b/src/plugins/intel_npu/src/compiler/src/driver_compiler_adapter.cpp @@ -8,7 +8,7 @@ #include "intel_npu/al/config/common.hpp" #include "intel_npu/utils/zero/zero_api.hpp" #include "intel_npu/utils/zero/zero_result.hpp" -#include "ze_intel_vpu_uuid.h" +#include "ze_intel_npu_uuid.h" #include "zero_compiler_in_driver.hpp" namespace intel_npu { @@ -48,7 +48,7 @@ LevelZeroCompilerAdapter::LevelZeroCompilerAdapter() : _logger("LevelZeroCompile uint64_t(result)); } - const ze_driver_uuid_t uuid = ze_intel_vpu_driver_uuid; + const ze_driver_uuid_t uuid = ze_intel_npu_driver_uuid; ze_driver_properties_t props = {}; props.stype = ZE_STRUCTURE_TYPE_DRIVER_PROPERTIES; // Get our target driver diff --git a/src/plugins/intel_npu/src/compiler/src/zero_compiler_in_driver.cpp b/src/plugins/intel_npu/src/compiler/src/zero_compiler_in_driver.cpp index ed740a02c58d89..f494f2e7c17e83 100644 --- a/src/plugins/intel_npu/src/compiler/src/zero_compiler_in_driver.cpp +++ b/src/plugins/intel_npu/src/compiler/src/zero_compiler_in_driver.cpp @@ -507,6 +507,12 @@ std::string LevelZeroCompilerInDriver::serializeConfig( content = std::regex_replace(content, std::regex(batchstr.str()), ""); } + // Remove the properties that are not used by the compiler + // WorkloadType is used only by compiled model + std::ostringstream workloadtypestr; + workloadtypestr << ov::workload_type.name() << KEY_VALUE_SEPARATOR << VALUE_DELIMITER << "\\S+" << VALUE_DELIMITER; + content = std::regex_replace(content, std::regex(workloadtypestr.str()), ""); + // FINAL step to convert prefixes of remaining params, to ensure backwards compatibility // From 5.0.0, driver compiler start to use NPU_ prefix, the old version uses VPU_ prefix if (compilerVersion.major < 5) { diff --git a/src/plugins/intel_npu/src/plugin/include/backends.hpp b/src/plugins/intel_npu/src/plugin/include/backends.hpp index 195c5a1199227b..f896121d579951 100644 --- a/src/plugins/intel_npu/src/plugin/include/backends.hpp +++ b/src/plugins/intel_npu/src/plugin/include/backends.hpp @@ -32,6 +32,7 @@ class NPUBackends final { uint32_t getDriverVersion() const; uint32_t getDriverExtVersion() const; bool isBatchingSupported() const; + bool isWorkloadTypeSupported() const; void registerOptions(OptionsDesc& options) const; std::string getCompilationPlatform(const std::string_view platform, const std::string& deviceId) const; diff --git a/src/plugins/intel_npu/src/plugin/include/compiled_model.hpp b/src/plugins/intel_npu/src/plugin/include/compiled_model.hpp index 083e41072ac32f..0608e83e4e7dab 100644 --- a/src/plugins/intel_npu/src/plugin/include/compiled_model.hpp +++ b/src/plugins/intel_npu/src/plugin/include/compiled_model.hpp @@ -83,7 +83,7 @@ class CompiledModel final : public ICompiledModel { std::shared_ptr _networkPtr; const std::shared_ptr _model; - const Config _config; + Config _config; Logger _logger; const std::shared_ptr _device; mutable std::shared_ptr _executorPtr; diff --git a/src/plugins/intel_npu/src/plugin/src/backends.cpp b/src/plugins/intel_npu/src/plugin/src/backends.cpp index 29a22e6c423027..12dce3776bd4fe 100644 --- a/src/plugins/intel_npu/src/plugin/src/backends.cpp +++ b/src/plugins/intel_npu/src/plugin/src/backends.cpp @@ -163,6 +163,14 @@ bool NPUBackends::isBatchingSupported() const { return false; } +bool NPUBackends::isWorkloadTypeSupported() const { + if (_backend != nullptr) { + return _backend->isWorkloadTypeSupported(); + } + + return false; +} + std::shared_ptr NPUBackends::getDevice(const std::string& specificName) const { _logger.debug("Searching for device %s to use started...", specificName.c_str()); // TODO iterate over all available backends diff --git a/src/plugins/intel_npu/src/plugin/src/compiled_model.cpp b/src/plugins/intel_npu/src/plugin/src/compiled_model.cpp index cb454ea4c80cab..2dfe0b5bb59650 100644 --- a/src/plugins/intel_npu/src/plugin/src/compiled_model.cpp +++ b/src/plugins/intel_npu/src/plugin/src/compiled_model.cpp @@ -140,8 +140,26 @@ std::shared_ptr CompiledModel::get_runtime_model() const { return _model; } -void CompiledModel::set_property(const ov::AnyMap& /*properties*/) { - OPENVINO_NOT_IMPLEMENTED; +void CompiledModel::set_property(const ov::AnyMap& properties) { + std::map config; + for (auto&& value : properties) { + config.emplace(value.first, value.second.as()); + } + for (const auto& configEntry : config) { + if (_properties.find(configEntry.first) == _properties.end()) { + OPENVINO_THROW("Unsupported configuration key: ", configEntry.first); + } else { + if (std::get<1>(_properties[configEntry.first]) == ov::PropertyMutability::RO) { + OPENVINO_THROW("READ-ONLY configuration key: ", configEntry.first); + } + } + } + + _config.update(config); + if (_executorPtr != nullptr && config.find(ov::workload_type.name()) != config.end()) { + const auto workloadType = properties.at(ov::workload_type.name()).as(); + _executorPtr->setWorkloadType(workloadType); + } } ov::Any CompiledModel::get_property(const std::string& name) const { @@ -191,6 +209,15 @@ void CompiledModel::configure_stream_executors() { } void CompiledModel::initialize_properties() { + const auto pluginSupportedProperties = + get_plugin()->get_property(ov::supported_properties.name(), {}).as>(); + const auto isPropertySupported = [&pluginSupportedProperties](const std::string& name) { + return std::any_of(pluginSupportedProperties.begin(), + pluginSupportedProperties.end(), + [&name](const ov::PropertyName& property) { + return property == name; + }); + }; _properties = { // OV Public // ========= @@ -238,6 +265,12 @@ void CompiledModel::initialize_properties() { [](const Config& config) { return config.get(); }}}, + {ov::workload_type.name(), + {isPropertySupported(ov::workload_type.name()), + ov::PropertyMutability::RW, + [](const Config& config) { + return config.get(); + }}}, // OV Public Hints // ========= {ov::hint::performance_mode.name(), diff --git a/src/plugins/intel_npu/src/plugin/src/plugin.cpp b/src/plugins/intel_npu/src/plugin/src/plugin.cpp index 82d4bd733ce268..9973c1fc0a8d6e 100644 --- a/src/plugins/intel_npu/src/plugin/src/plugin.cpp +++ b/src/plugins/intel_npu/src/plugin/src/plugin.cpp @@ -11,14 +11,14 @@ #include "device_helpers.hpp" #include "intel_npu/al/config/common.hpp" #include "intel_npu/al/config/compiler.hpp" -#include "intel_npu/al/config/runtime.hpp" #include "intel_npu/al/config/npuw.hpp" +#include "intel_npu/al/config/runtime.hpp" #include "intel_npu/al/itt.hpp" +#include "npuw/compiled_model.hpp" #include "openvino/op/constant.hpp" #include "openvino/op/parameter.hpp" #include "openvino/runtime/intel_npu/properties.hpp" - -#include "npuw/compiled_model.hpp" +#include "openvino/runtime/properties.hpp" using namespace intel_npu; @@ -306,6 +306,12 @@ Plugin::Plugin() [&](const Config&) { return _metrics->GetAvailableDevicesNames(); }}}, + {ov::workload_type.name(), + {_backends->isWorkloadTypeSupported(), + ov::PropertyMutability::RW, + [](const Config& config) { + return config.get(); + }}}, {ov::device::capabilities.name(), {true, ov::PropertyMutability::RO, @@ -528,13 +534,9 @@ Plugin::Plugin() [](const Config& config) { return config.getString(); }}}, - {ov::intel_npu::batch_mode.name(), - {false, - ov::PropertyMutability::RW, - [](const Config& config) { - return config.getString(); - }}} - }; + {ov::intel_npu::batch_mode.name(), {false, ov::PropertyMutability::RW, [](const Config& config) { + return config.getString(); + }}}}; for (auto& property : _properties) { if (std::get<0>(property.second)) { diff --git a/src/plugins/intel_npu/tests/functional/CMakeLists.txt b/src/plugins/intel_npu/tests/functional/CMakeLists.txt index bdac0a256f3f87..824d7975b2016d 100644 --- a/src/plugins/intel_npu/tests/functional/CMakeLists.txt +++ b/src/plugins/intel_npu/tests/functional/CMakeLists.txt @@ -1,9 +1,8 @@ -# Copyright (C) 2018-2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 +# Copyright (C) 2018-2024 Intel Corporation SPDX-License-Identifier: Apache-2.0 # if(ENABLE_LTO) - set(CMAKE_INTERPROCEDURAL_OPTIMIZATION_RELEASE ON) + set(CMAKE_INTERPROCEDURAL_OPTIMIZATION_RELEASE ON) endif() set(TARGET_NAME ov_npu_func_tests) @@ -12,50 +11,45 @@ set(OPTIONAL_FUNC_TESTS_INCLUDES "") set(OPTIONAL_FUNC_TESTS_LIBS "") if(ENABLE_DRIVER_COMPILER_ADAPTER) - list(APPEND OPTIONAL_FUNC_TESTS_INCLUDES - "${OpenVINO_SOURCE_DIR}/src/plugins/intel_npu/src/compiler/include" - ) + list(APPEND OPTIONAL_FUNC_TESTS_INCLUDES + "${OpenVINO_SOURCE_DIR}/src/plugins/intel_npu/src/compiler/include") - list(APPEND OPTIONAL_FUNC_TESTS_LIBS - openvino_npu_driver_compiler_adapter - ) + list(APPEND OPTIONAL_FUNC_TESTS_LIBS openvino_npu_driver_compiler_adapter + openvino_npu_level_zero_backend openvino_npu_zero_result_parser) endif() ov_add_test_target( - NAME ${TARGET_NAME} - ROOT ${CMAKE_CURRENT_SOURCE_DIR} - EXCLUDED_SOURCE_PATHS - ${EXCLUDED_FUNC_TESTS_DIR} - INCLUDES - ${CMAKE_CURRENT_SOURCE_DIR} - ${OPTIONAL_FUNC_TESTS_INCLUDES} - "${CMAKE_CURRENT_SOURCE_DIR}/shared_tests_instances" - "${CMAKE_CURRENT_SOURCE_DIR}/behavior" - "${CMAKE_CURRENT_SOURCE_DIR}/internal" - LINK_LIBRARIES - ${OPTIONAL_FUNC_TESTS_LIBS} - openvino::func_test_utils - openvino::funcSharedTests - openvino::format_reader - openvino::reference - openvino::runtime - openvino::npu_al -) - -set_target_properties(${TARGET_NAME} PROPERTIES - FOLDER ${CMAKE_CURRENT_SOURCE_DIR} - CXX_STANDARD 17) + NAME + ${TARGET_NAME} + ROOT + ${CMAKE_CURRENT_SOURCE_DIR} + EXCLUDED_SOURCE_PATHS + ${EXCLUDED_FUNC_TESTS_DIR} + INCLUDES + ${CMAKE_CURRENT_SOURCE_DIR} + ${OPTIONAL_FUNC_TESTS_INCLUDES} + "${CMAKE_CURRENT_SOURCE_DIR}/shared_tests_instances" + "${CMAKE_CURRENT_SOURCE_DIR}/behavior" + "${CMAKE_CURRENT_SOURCE_DIR}/internal" + LINK_LIBRARIES + ${OPTIONAL_FUNC_TESTS_LIBS} + openvino::func_test_utils + openvino::funcSharedTests + openvino::format_reader + openvino::reference + openvino::runtime + openvino::npu_al) + +set_target_properties( + ${TARGET_NAME} PROPERTIES FOLDER ${CMAKE_CURRENT_SOURCE_DIR} CXX_STANDARD 17) if(MSVC) - # Enforce standards conformance on MSVC - target_compile_options(${TARGET_NAME} - PRIVATE - /Zc:preprocessor - ) + # Enforce standards conformance on MSVC + target_compile_options(${TARGET_NAME} PRIVATE /Zc:preprocessor) endif() -install(TARGETS ${TARGET_NAME} - RUNTIME DESTINATION tests - COMPONENT tests - EXCLUDE_FROM_ALL -) +install( + TARGETS ${TARGET_NAME} + RUNTIME DESTINATION tests + COMPONENT tests + EXCLUDE_FROM_ALL) diff --git a/src/plugins/intel_npu/tests/functional/internal/overload/compile_and_infer.hpp b/src/plugins/intel_npu/tests/functional/internal/overload/compile_and_infer.hpp index c8728650201e6b..52091a4743fb75 100644 --- a/src/plugins/intel_npu/tests/functional/internal/overload/compile_and_infer.hpp +++ b/src/plugins/intel_npu/tests/functional/internal/overload/compile_and_infer.hpp @@ -2,9 +2,19 @@ // SPDX-License-Identifier: Apache-2.0 // +#include + +#include #include +#include + #include "base/ov_behavior_test_utils.hpp" +#include "intel_npu/al/config/common.hpp" +#include "npu_private_properties.hpp" +#include "openvino/core/except.hpp" #include "openvino/opsets/opset8.hpp" +#include "openvino/runtime/properties.hpp" +#include "zero_init.hpp" namespace ov { namespace test { @@ -23,15 +33,18 @@ inline std::shared_ptr getConstantGraph(element::Type type) { return std::make_shared(results, params); } +inline bool isWorkloadTypeSupported() { + return std::make_shared<::intel_npu::ZeroInitStructsHolder>()->getCommandQueueDdiTable() != nullptr; +} + typedef std::tuple, // Model std::string, // Device name ov::AnyMap // Config > - CompileAndInferRequestParams; + CompileAndInferRequestParams; -class OVCompileAndInferRequest : - public testing::WithParamInterface, - public OVInferRequestTestBase { +class OVCompileAndInferRequest : public testing::WithParamInterface, + public OVInferRequestTestBase { public: static std::string getTestCaseName(testing::TestParamInfo obj) { std::shared_ptr model; @@ -79,6 +92,97 @@ TEST_P(OVCompileAndInferRequest, AsyncInferRequest) { ASSERT_TRUE(is_called); } +TEST_P(OVCompileAndInferRequest, PluginWorkloadType) { + configuration[workload_type.name()] = WorkloadType::DEFAULT; + auto supportedProperties = core->get_property("NPU", supported_properties.name()).as>(); + bool workloadTypeSupported = + std::any_of(supportedProperties.begin(), supportedProperties.end(), [](const PropertyName& property) { + return property == workload_type.name(); + }); + + if (isWorkloadTypeSupported()) { + ASSERT_TRUE(workloadTypeSupported); + ov::InferRequest req; + OV_ASSERT_NO_THROW(execNet = core->compile_model(function, target_device, configuration)); + + const auto properties = execNet.get_property(supported_properties.name()).as>(); + ASSERT_TRUE(std::any_of(properties.begin(), properties.end(), [](const PropertyName& property) { + return property == workload_type.name(); + })); + + OV_ASSERT_NO_THROW(req = execNet.create_infer_request()); + bool is_called = false; + OV_ASSERT_NO_THROW(req.set_callback([&](std::exception_ptr exception_ptr) { + ASSERT_EQ(exception_ptr, nullptr); + is_called = true; + })); + OV_ASSERT_NO_THROW(req.start_async()); + OV_ASSERT_NO_THROW(req.wait()); + ASSERT_TRUE(is_called); + } else { + ASSERT_FALSE(workloadTypeSupported); + OV_EXPECT_THROW_HAS_SUBSTRING(core->compile_model(function, target_device, configuration), + ov::Exception, + "WorkloadType property is not supported by the current Driver Version!"); + } +} + +TEST_P(OVCompileAndInferRequest, CompiledModelWorkloadType) { + OV_ASSERT_NO_THROW(execNet = core->compile_model(function, target_device, configuration)); + ov::AnyMap modelConfiguration; + modelConfiguration[workload_type.name()] = WorkloadType::DEFAULT; + auto supportedProperties = execNet.get_property(supported_properties.name()).as>(); + bool workloadTypeSupported = + std::any_of(supportedProperties.begin(), supportedProperties.end(), [](const PropertyName& property) { + return property == workload_type.name(); + }); + + if (isWorkloadTypeSupported()) { + ASSERT_TRUE(workloadTypeSupported); + OV_ASSERT_NO_THROW(execNet.set_property(modelConfiguration)); + ov::InferRequest req; + OV_ASSERT_NO_THROW(req = execNet.create_infer_request()); + bool is_called = false; + OV_ASSERT_NO_THROW(req.set_callback([&](std::exception_ptr exception_ptr) { + ASSERT_EQ(exception_ptr, nullptr); + is_called = true; + })); + OV_ASSERT_NO_THROW(req.start_async()); + OV_ASSERT_NO_THROW(req.wait()); + ASSERT_TRUE(is_called); + } else { + ASSERT_FALSE(workloadTypeSupported); + OV_EXPECT_THROW_HAS_SUBSTRING(execNet.set_property(modelConfiguration), + ov::Exception, + "WorkloadType property is not supported by the current Driver Version!"); + } +} + +TEST_P(OVCompileAndInferRequest, CompiledModelWorkloadTypeDelayedExecutor) { + configuration[intel_npu::create_executor.name()] = 0; + OV_ASSERT_NO_THROW(execNet = core->compile_model(function, target_device, configuration)); + ov::AnyMap modelConfiguration; + modelConfiguration[workload_type.name()] = WorkloadType::DEFAULT; + OV_ASSERT_NO_THROW(execNet.set_property(modelConfiguration)); + + if (isWorkloadTypeSupported()) { + ov::InferRequest req; + OV_ASSERT_NO_THROW(req = execNet.create_infer_request()); + bool is_called = false; + OV_ASSERT_NO_THROW(req.set_callback([&](std::exception_ptr exception_ptr) { + ASSERT_EQ(exception_ptr, nullptr); + is_called = true; + })); + OV_ASSERT_NO_THROW(req.start_async()); + OV_ASSERT_NO_THROW(req.wait()); + ASSERT_TRUE(is_called); + } else { + OV_EXPECT_THROW_HAS_SUBSTRING(execNet.create_infer_request(), + ov::Exception, + "WorkloadType property is not supported by the current Driver Version!"); + } +} + } // namespace behavior } // namespace test } // namespace ov diff --git a/src/plugins/intel_npu/thirdparty/level-zero-ext b/src/plugins/intel_npu/thirdparty/level-zero-ext index d490a130fbb80e..518d64125521cd 160000 --- a/src/plugins/intel_npu/thirdparty/level-zero-ext +++ b/src/plugins/intel_npu/thirdparty/level-zero-ext @@ -1 +1 @@ -Subproject commit d490a130fbb80e600b3aed3886c305abcb60d77c +Subproject commit 518d64125521cd0f8c98d65f9a0fb40013e95d15 diff --git a/src/tests/functional/plugin/shared/include/behavior/compiled_model/compiled_model_base.hpp b/src/tests/functional/plugin/shared/include/behavior/compiled_model/compiled_model_base.hpp index 8618d53fddccf1..74f6c4193bcf4c 100644 --- a/src/tests/functional/plugin/shared/include/behavior/compiled_model/compiled_model_base.hpp +++ b/src/tests/functional/plugin/shared/include/behavior/compiled_model/compiled_model_base.hpp @@ -3,22 +3,22 @@ // #include -#include #include #include +#include #include "base/ov_behavior_test_utils.hpp" #include "common_test_utils/file_utils.hpp" #include "common_test_utils/ov_test_utils.hpp" -#include "openvino/op/concat.hpp" -#include "openvino/runtime/exec_model_info.hpp" -#include "openvino/runtime/tensor.hpp" +#include "common_test_utils/subgraph_builders/concat_with_params.hpp" #include "common_test_utils/subgraph_builders/conv_pool_relu.hpp" #include "common_test_utils/subgraph_builders/multiple_input_outpput_double_concat.hpp" #include "common_test_utils/subgraph_builders/single_concat_with_constant.hpp" -#include "common_test_utils/subgraph_builders/concat_with_params.hpp" #include "common_test_utils/subgraph_builders/single_split.hpp" #include "common_test_utils/subgraph_builders/split_concat.hpp" +#include "openvino/op/concat.hpp" +#include "openvino/runtime/exec_model_info.hpp" +#include "openvino/runtime/tensor.hpp" namespace ov { namespace test { @@ -98,7 +98,7 @@ TEST_P(OVCompiledModelBaseTest, canCompileModel) { } TEST_P(OVCompiledModelBaseTest, canCompileModelFromMemory) { - std::string model = R"V0G0N( + std::string model = R"V0G0N( @@ -165,11 +165,11 @@ TEST_P(OVCompiledModelBaseTest, canCompileModelFromMemory) { )V0G0N"; - EXPECT_NO_THROW(auto execNet = core ->compile_model(model, ov::Tensor(), target_device, configuration)); + EXPECT_NO_THROW(auto execNet = core->compile_model(model, ov::Tensor(), target_device, configuration)); } TEST_P(OVCompiledModelBaseTest, canCompileModelwithBrace) { - std::string model = R"V0G0N( + std::string model = R"V0G0N( @@ -628,9 +628,9 @@ TEST_P(OVCompiledModelBaseTest, canLoadCorrectNetworkToGetExecutableWithIncorrec for (const auto& confItem : configuration) { config.emplace(confItem.first, confItem.second); } - bool is_meta_devices = - target_device.find("AUTO") != std::string::npos || target_device.find("MULTI") != std::string::npos || - target_device.find("HETERO") != std::string::npos; + bool is_meta_devices = target_device.find("AUTO") != std::string::npos || + target_device.find("MULTI") != std::string::npos || + target_device.find("HETERO") != std::string::npos; if (is_meta_devices) { EXPECT_NO_THROW(auto execNet = core->compile_model(function, target_device, config)); } else { @@ -638,20 +638,11 @@ TEST_P(OVCompiledModelBaseTest, canLoadCorrectNetworkToGetExecutableWithIncorrec } } -TEST_P(OVAutoExecutableNetworkTest, AutoNotImplementedSetConfigToExecNet) { - std::map config; - for (const auto& confItem : configuration) { - config.emplace(confItem.first, confItem.second); - } - auto execNet = core->compile_model(function, target_device, config); - EXPECT_ANY_THROW(execNet.set_property(config)); -} - -typedef std::tuple< - ov::element::Type, // Type to convert - std::string, // Device name - ov::AnyMap // Config -> CompiledModelSetTypeParams; +typedef std::tuple + CompiledModelSetTypeParams; class CompiledModelSetType : public testing::WithParamInterface, public OVCompiledNetworkTestBase { @@ -726,4 +717,4 @@ TEST_P(CompiledModelSetType, canSetInputOutputTypeAndCompileModel) { } } // namespace behavior } // namespace test -} // namespace ov \ No newline at end of file +} // namespace ov