Skip to content

Commit

Permalink
[NPU] Adding Workload Type (openvinotoolkit#25382)
Browse files Browse the repository at this point in the history
### Details:
 - Adding workload type
 - *...*

### Tickets:
 - CVS-143714

---------

Co-authored-by: Anastasia Kuporosova <[email protected]>
  • Loading branch information
David Pava and akuporos authored Jul 10, 2024
1 parent 5fd3ba9 commit 3f98a75
Show file tree
Hide file tree
Showing 30 changed files with 419 additions and 93 deletions.
1 change: 1 addition & 0 deletions src/bindings/python/src/openvino/properties/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
# Enums
from openvino._pyopenvino.properties import Affinity
from openvino._pyopenvino.properties import CacheMode
from openvino._pyopenvino.properties import WorkloadType

# Properties
import openvino._pyopenvino.properties as __properties
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
# Enums
from openvino._pyopenvino.properties import Affinity
from openvino._pyopenvino.properties import CacheMode
from openvino._pyopenvino.properties import WorkloadType

# Properties
from openvino._pyopenvino.properties import enable_profiling
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,18 @@ void regmodule_properties(py::module m) {
.value("NUMA", ov::Affinity::NUMA)
.value("HYBRID_AWARE", ov::Affinity::HYBRID_AWARE);

py::enum_<ov::WorkloadType>(m_properties, "WorkloadType", py::arithmetic())
.value("DEFAULT", ov::WorkloadType::DEFAULT)
.value("EFFICIENT", ov::WorkloadType::EFFICIENT);

py::enum_<ov::CacheMode>(m_properties, "CacheMode", py::arithmetic())
.value("OPTIMIZE_SIZE", ov::CacheMode::OPTIMIZE_SIZE)
.value("OPTIMIZE_SPEED", ov::CacheMode::OPTIMIZE_SPEED);

// Submodule properties - properties
wrap_property_RW(m_properties, ov::enable_profiling, "enable_profiling");
wrap_property_RW(m_properties, ov::cache_dir, "cache_dir");
wrap_property_RW(m_properties, ov::workload_type, "workload_type");
wrap_property_RW(m_properties, ov::cache_mode, "cache_mode");
wrap_property_RW(m_properties, ov::auto_batch_timeout, "auto_batch_timeout");
wrap_property_RW(m_properties, ov::num_streams, "num_streams");
Expand Down
5 changes: 5 additions & 0 deletions src/bindings/python/src/pyopenvino/utils/utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#include "openvino/core/meta_data.hpp"
#include "openvino/frontend/decoder.hpp"
#include "openvino/frontend/graph_iterator.hpp"
#include "openvino/runtime/properties.hpp"

using Version = ov::pass::Serialize::Version;

Expand Down Expand Up @@ -218,6 +219,8 @@ py::object from_ov_any(const ov::Any& any) {
return py::cast(any.as<ov::streams::Num>());
} else if (any.is<ov::Affinity>()) {
return py::cast(any.as<ov::Affinity>());
} else if (any.is<ov::WorkloadType>()) {
return py::cast(any.as<ov::WorkloadType>());
} else if (any.is<ov::CacheMode>()) {
return py::cast(any.as<ov::CacheMode>());
} else if (any.is<ov::device::UUID>()) {
Expand Down Expand Up @@ -401,6 +404,8 @@ ov::Any py_object_to_any(const py::object& py_obj) {
return py::cast<ov::streams::Num>(py_obj);
} else if (py::isinstance<ov::Affinity>(py_obj)) {
return py::cast<ov::Affinity>(py_obj);
} else if (py::isinstance<ov::WorkloadType>(py_obj)) {
return py::cast<ov::WorkloadType>(py_obj);
} else if (py::isinstance<ov::Tensor>(py_obj)) {
return py::cast<ov::Tensor>(py_obj);
} else if (py::isinstance<ov::Output<ov::Node>>(py_obj)) {
Expand Down
7 changes: 7 additions & 0 deletions src/bindings/python/tests/test_runtime/test_properties.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,13 @@ def test_properties_rw_base():
(props.CacheMode.OPTIMIZE_SPEED, "CacheMode.OPTIMIZE_SPEED", 1),
),
),
(
props.WorkloadType,
(
(props.WorkloadType.DEFAULT, "WorkloadType.DEFAULT", 0),
(props.WorkloadType.EFFICIENT, "WorkloadType.EFFICIENT", 1),
),
),
(
hints.Priority,
(
Expand Down
53 changes: 50 additions & 3 deletions src/inference/include/openvino/runtime/properties.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,9 @@
*/
#pragma once

#include <algorithm>
#include <array>
#include <cctype>
#include <iomanip>
#include <istream>
#include <map>
Expand Down Expand Up @@ -690,6 +692,52 @@ static constexpr Property<std::string> cache_dir{"CACHE_DIR"};
*/
static constexpr Property<bool, PropertyMutability::RO> loaded_from_cache{"LOADED_FROM_CACHE"};

/**
* @brief Enum to define possible workload types
*
* Workload type represents the execution priority for an inference.
*
* @ingroup ov_runtime_cpp_prop_api
*/
enum class WorkloadType {
DEFAULT = 0, // Default execution priority
EFFICIENT = 1, // Lower execution priority
};

/** @cond INTERNAL */
inline std::ostream& operator<<(std::ostream& os, const WorkloadType& mode) {
switch (mode) {
case WorkloadType::DEFAULT:
return os << "Default";
case WorkloadType::EFFICIENT:
return os << "Efficient";
default:
OPENVINO_THROW("Unsupported workload type");
}
}

inline std::istream& operator>>(std::istream& is, WorkloadType& mode) {
std::string str;
is >> str;
std::transform(str.begin(), str.end(), str.begin(), tolower);
if (str == "default") {
mode = WorkloadType::DEFAULT;
} else if (str == "efficient") {
mode = WorkloadType::EFFICIENT;
} else {
OPENVINO_THROW("Unsupported workload type: ", str);
}
return is;
}
/** @endcond */

/**
* @brief Read-write property to select in which mode the workload will be executed
* This is only supported by NPU.
* @ingroup ov_runtime_cpp_prop_api
*/
static constexpr Property<WorkloadType, PropertyMutability::RW> workload_type{"WORKLOAD_TYPE"};

/**
* @brief Enum to define possible cache mode
* @ingroup ov_runtime_cpp_prop_api
Expand Down Expand Up @@ -810,7 +858,6 @@ static constexpr Property<bool, PropertyMutability::RW> enable_mmap{"ENABLE_MMAP
* @brief Namespace with device properties
*/
namespace device {

/**
* @brief the property for setting of required device to execute on
* values: device id starts from "0" - first device, "1" - second device, etc
Expand Down Expand Up @@ -1042,8 +1089,8 @@ inline std::istream& operator>>(std::istream& is, Type& device_type) {
static constexpr Property<Type, PropertyMutability::RO> type{"DEVICE_TYPE"};

/**
* @brief Read-only property which defines Giga OPS per second count (GFLOPS or GIOPS) for a set of precisions supported
* by specified device
* @brief Read-only property which defines Giga OPS per second count (GFLOPS or GIOPS) for a set of precisions
* supported by specified device
* @ingroup ov_runtime_cpp_prop_api
*/
static constexpr Property<std::map<element::Type, float>, PropertyMutability::RO> gops{"DEVICE_GOPS"};
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -182,4 +182,26 @@ struct ENABLE_CPU_PINNING final : OptionBase<ENABLE_CPU_PINNING, bool> {
return OptionMode::RunTime;
}
};

//
// WORKLOAD_TYPE
//

struct WORKLOAD_TYPE final : OptionBase<WORKLOAD_TYPE, ov::WorkloadType> {
static std::string_view key() {
return ov::workload_type.name();
}

static ov::WorkloadType defaultValue() {
return ov::WorkloadType::DEFAULT;
}

static constexpr std::string_view getTypeName() {
return "ov::WorkloadType";
}

static ov::WorkloadType parse(std::string_view val);

static std::string toString(const ov::WorkloadType& val);
};
} // namespace intel_npu
4 changes: 4 additions & 0 deletions src/plugins/intel_npu/src/al/include/npu.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@ class IEngineBackend : public std::enable_shared_from_this<IEngineBackend> {
virtual const std::string getName() const = 0;
/** @brief Backend has support for concurrency batching */
virtual bool isBatchingSupported() const = 0;
/** @brief Backend has support for workload type */
virtual bool isWorkloadTypeSupported() const = 0;
/** @brief Register backend-specific options */
virtual void registerOptions(OptionsDesc& options) const;

Expand All @@ -47,6 +49,8 @@ class IEngineBackend : public std::enable_shared_from_this<IEngineBackend> {
class IExecutor {
public:
virtual ~IExecutor() = default;

virtual void setWorkloadType(const ov::WorkloadType workloadType) const = 0;
};

//------------------------------------------------------------------------------
Expand Down
23 changes: 23 additions & 0 deletions src/plugins/intel_npu/src/al/src/config/runtime.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,10 @@

#include "intel_npu/al/config/runtime.hpp"

#include <sstream>

#include "intel_npu/al/config/common.hpp"
#include "openvino/runtime/properties.hpp"

using namespace intel_npu;
using namespace ov::intel_npu;
Expand All @@ -20,6 +23,7 @@ void intel_npu::registerRunTimeOptions(OptionsDesc& desc) {
desc.add<CREATE_EXECUTOR>();
desc.add<NUM_STREAMS>();
desc.add<ENABLE_CPU_PINNING>();
desc.add<WORKLOAD_TYPE>();
}

// Heuristically obtained number. Varies depending on the values of PLATFORM and PERFORMANCE_HINT
Expand Down Expand Up @@ -128,3 +132,22 @@ std::string intel_npu::NUM_STREAMS::toString(const ov::streams::Num& val) {

return stringStream.str();
}

//
// WORKLOAD_TYPE
//

ov::WorkloadType intel_npu::WORKLOAD_TYPE::parse(std::string_view val) {
std::istringstream ss = std::istringstream(std::string(val));
ov::WorkloadType workloadType;

ss >> workloadType;

return workloadType;
}

std::string intel_npu::WORKLOAD_TYPE::toString(const ov::WorkloadType& val) {
std::ostringstream ss;
ss << val;
return ss.str();
}
1 change: 1 addition & 0 deletions src/plugins/intel_npu/src/backend/include/zero_backend.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ class ZeroEngineBackend final : public IEngineBackend {
uint32_t getDriverExtVersion() const override;

bool isBatchingSupported() const override;
bool isWorkloadTypeSupported() const override;

private:
std::shared_ptr<ZeroInitStructsHolder> _instance;
Expand Down
2 changes: 2 additions & 0 deletions src/plugins/intel_npu/src/backend/include/zero_executor.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@

#include "intel_npu/utils/logger/logger.hpp"
#include "npu.hpp"
#include "openvino/runtime/properties.hpp"
#include "zero_init.hpp"
#include "zero_wrappers.hpp"

Expand All @@ -32,6 +33,7 @@ class ZeroExecutor final : public IExecutor {
};

void setArgumentValue(uint32_t argi_, const void* argv_) const;
void setWorkloadType(const ov::WorkloadType workloadType) const override;
inline ze_graph_handle_t graph() const {
return _graph;
}
Expand Down
7 changes: 6 additions & 1 deletion src/plugins/intel_npu/src/backend/include/zero_init.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@
#include <memory>

#include "intel_npu/utils/logger/logger.hpp"
#include "ze_intel_vpu_uuid.h"
#include "ze_command_queue_npu_ext.h"
#include "ze_intel_npu_uuid.h"
#include "zero_types.hpp"

namespace intel_npu {
Expand Down Expand Up @@ -39,6 +40,9 @@ class ZeroInitStructsHolder final {
inline ze_graph_dditable_ext_curr_t* getGraphDdiTable() const {
return graph_dditable_ext_decorator.get();
}
inline ze_command_queue_npu_dditable_ext_curr_t* getCommandQueueDdiTable() const {
return _command_queue_npu_dditable_ext;
}
inline ze_graph_profiling_dditable_ext_t* getProfilingDdiTable() const {
return _graph_profiling_ddi_table_ext;
}
Expand All @@ -57,6 +61,7 @@ class ZeroInitStructsHolder final {
ze_device_handle_t device_handle = nullptr;
ze_context_handle_t context = nullptr;
std::unique_ptr<ze_graph_dditable_ext_decorator> graph_dditable_ext_decorator;
ze_command_queue_npu_dditable_ext_curr_t* _command_queue_npu_dditable_ext = nullptr;
ze_graph_profiling_dditable_ext_t* _graph_profiling_ddi_table_ext = nullptr;

ze_driver_properties_t driver_properties = {};
Expand Down
2 changes: 2 additions & 0 deletions src/plugins/intel_npu/src/backend/include/zero_types.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
#include <ze_graph_ext.h>

#include "intel_npu/al/config/runtime.hpp"
#include "ze_command_queue_npu_ext.h"

/**
* @brief Last version of Table of Graph Extension functions used within plugin
Expand Down Expand Up @@ -127,3 +128,4 @@ struct ze_graph_dditable_ext_decorator final {
};

using ze_graph_dditable_ext_curr_t = ze_graph_dditable_ext_decorator;
using ze_command_queue_npu_dditable_ext_curr_t = ze_command_queue_npu_dditable_ext_1_0_t;
3 changes: 3 additions & 0 deletions src/plugins/intel_npu/src/backend/include/zero_wrappers.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,7 @@ class CommandQueue {
CommandQueue(const ze_device_handle_t& device_handle,
const ze_context_handle_t& context,
const ze_command_queue_priority_t& priority,
ze_command_queue_npu_dditable_ext_curr_t* command_queue_npu_dditable_ext,
const Config& config,
const uint32_t& group_ordinal);
CommandQueue(const CommandQueue&) = delete;
Expand All @@ -140,6 +141,7 @@ class CommandQueue {

void executeCommandList(CommandList& command_list) const;
void executeCommandList(CommandList& command_list, Fence& fence) const;
void setWorkloadType(ze_command_queue_workload_type_t workloadType) const;
~CommandQueue();
inline ze_command_queue_handle_t handle() const {
return _handle;
Expand All @@ -148,6 +150,7 @@ class CommandQueue {
private:
ze_command_queue_handle_t _handle = nullptr;
ze_context_handle_t _context = nullptr;
ze_command_queue_npu_dditable_ext_curr_t* _command_queue_npu_dditable_ext = nullptr;

Logger _log;
};
Expand Down
4 changes: 4 additions & 0 deletions src/plugins/intel_npu/src/backend/src/zero_backend.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,10 @@ bool ZeroEngineBackend::isBatchingSupported() const {
return _instance->getDriverExtVersion() >= ZE_GRAPH_EXT_VERSION_1_6;
}

bool ZeroEngineBackend::isWorkloadTypeSupported() const {
return _instance->getCommandQueueDdiTable() != nullptr;
}

ZeroEngineBackend::~ZeroEngineBackend() = default;

const std::shared_ptr<IDevice> ZeroEngineBackend::getDevice() const {
Expand Down
Loading

0 comments on commit 3f98a75

Please sign in to comment.