Merge branch 'river/cpu_api_2.0_TensorDesc' into river/cpu_plugin_api…

…_2.0_tensor_desc
riverlijunjie · Nov 8, 2023 · 2cd6d23 · 2cd6d23
2 parents c42a88a + c15a0bb
commit 2cd6d23
Show file tree

Hide file tree

Showing 34 changed files with 706 additions and 1,127 deletions.
diff --git a/src/inference/dev_api/openvino/runtime/internal_properties.hpp b/src/inference/dev_api/openvino/runtime/internal_properties.hpp
@@ -43,6 +43,46 @@ static constexpr Property<bool, PropertyMutability::RW> exclusive_async_requests
  */
 static constexpr Property<std::string, PropertyMutability::WO> config_device_id{"CONFIG_DEVICE_ID"};
 
+/**
+ * @brief Allow low precision transform
+ * @ingroup ov_dev_api_plugin_api
+ */
+static constexpr Property<bool, PropertyMutability::RW> lp_transforms_mode{"LP_TRANSFORMS_MODE"};
+
+/** @cond INTERNAL */
+inline std::ostream& operator<<(std::ostream& os, const ov::threading::IStreamsExecutor::ThreadBindingType& type) {
+    switch (type) {
+    case ov::threading::IStreamsExecutor::NONE:
+        return os << "NONE";
+    case ov::threading::IStreamsExecutor::CORES:
+        return os << "CORES";
+    case ov::threading::IStreamsExecutor::NUMA:
+        return os << "NUMA";
+    case ov::threading::IStreamsExecutor::HYBRID_AWARE:
+        return os << "HYBRID_AWARE";
+    default:
+        OPENVINO_THROW("Unsupported thread binding type value");
+    }
+}
+
+inline std::istream& operator>>(std::istream& is, ov::threading::IStreamsExecutor::ThreadBindingType& type) {
+    std::string str;
+    is >> str;
+    if (str == "NONE") {
+        type = ov::threading::IStreamsExecutor::NONE;
+    } else if (str == "CORES") {
+        type = ov::threading::IStreamsExecutor::CORES;
+    } else if (str == "NUMA") {
+        type = ov::threading::IStreamsExecutor::NUMA;
+    } else if (str == "HYBRID_AWARE") {
+        type = ov::threading::IStreamsExecutor::HYBRID_AWARE;
+    } else {
+        OPENVINO_THROW("Unsupported thread binding type: ", str);
+    }
+    return is;
+}
+/** @endcond */
+
 /**
  * @brief The name for setting CPU affinity per thread option.
  *
@@ -61,6 +101,42 @@ static constexpr Property<std::string, PropertyMutability::WO> config_device_id{
 static constexpr Property<ov::threading::IStreamsExecutor::ThreadBindingType, PropertyMutability::RW> cpu_bind_thread{
     "CPU_BIND_THREAD"};
 
+/**
+ * @brief Number of streams in Performance-core(big core)
+ * @ingroup ov_dev_api_plugin_api
+ */
+static constexpr Property<size_t, PropertyMutability::RW> big_core_streams{"BIG_CORE_STREAMS"};
+
+/**
+ * @brief Number of streams in Efficient-core(small core) on hybrid cores machine
+ * @ingroup ov_dev_api_plugin_api
+ */
+static constexpr Property<size_t, PropertyMutability::RW> small_core_streams{"SMALL_CORE_STREAMS"};
+
+/**
+ * @brief Number of threads per stream in big cores
+ * @ingroup ov_dev_api_plugin_api
+ */
+static constexpr Property<size_t, PropertyMutability::RW> threads_per_stream_big{"THREADS_PER_STREAM_BIG"};
+
+/**
+ * @brief Number of threads per stream in small cores on hybrid cores machine
+ * @ingroup ov_dev_api_plugin_api
+ */
+static constexpr Property<size_t, PropertyMutability::RW> threads_per_stream_small{"THREADS_PER_STREAM_SMALL"};
+
+/**
+ * @brief Small core start offset when binding cpu cores
+ * @ingroup ov_dev_api_plugin_api
+ */
+static constexpr Property<size_t, PropertyMutability::RW> small_core_offset{"SMALL_CORE_OFFSET"};
+
+/**
+ * @brief Enable hyper thread
+ * @ingroup ov_dev_api_plugin_api
+ */
+static constexpr Property<bool, PropertyMutability::RW> enable_hyper_thread{"ENABLE_HYPER_THREAD"};
+
 /**
  * @brief Limit \#threads that are used by IStreamsExecutor to execute `parallel_for` calls
  * @ingroup ov_dev_api_plugin_api

diff --git a/src/inference/include/openvino/runtime/intel_cpu/properties.hpp b/src/inference/include/openvino/runtime/intel_cpu/properties.hpp
@@ -63,5 +63,11 @@ static constexpr Property<bool> denormals_optimization{"CPU_DENORMALS_OPTIMIZATI
  */
 static constexpr Property<float> sparse_weights_decompression_rate{"CPU_SPARSE_WEIGHTS_DECOMPRESSION_RATE"};
 
+/**
+ * @brief Defines how many records can be stored in the CPU runtime parameters cache per CPU runtime parameter type per
+ * stream.
+ */
+static constexpr Property<int32_t, PropertyMutability::RW> cpu_runtime_cache_capacity{"CPU_RUNTIME_CACHE_CAPACITY"};
+
 }  // namespace intel_cpu
 }  // namespace ov
diff --git a/src/inference/include/openvino/runtime/properties.hpp b/src/inference/include/openvino/runtime/properties.hpp
@@ -1109,6 +1109,67 @@ inline std::istream& operator>>(std::istream& is, Affinity& affinity) {
  */
 static constexpr Property<Affinity> affinity{"AFFINITY"};
 
+/**
+ * @brief The name for setting to execute in bfloat16 precision whenever it is possible
+ * @ingroup ov_runtime_cpp_prop_api
+ *
+ * This option let plugin know to downscale the precision where it see performance benefits from
+ * bfloat16 execution
+ * Such option do not guarantee accuracy of the network, the accuracy in this mode should be
+ * verified separately by the user and basing on performance and accuracy results it should be
+ * user's decision to use this option or not to use
+ */
+static constexpr Property<bool> enforce_bf16{"ENFORCE_BF16"};
+
+/**
+ * @brief Enum to define possible snippets mode hints
+ * @ingroup ov_runtime_cpp_prop_api
+ */
+enum class SnippetsMode {
+    ENABLE = 0,           //!<  Enable
+    IGNORE_CALLBACK = 1,  //!<  Ignore callback
+    DISABLE = 2,          //!<  Disable
+};
+
+/** @cond INTERNAL */
+inline std::ostream& operator<<(std::ostream& os, const SnippetsMode& mode) {
+    switch (mode) {
+    case SnippetsMode::ENABLE:
+        return os << "ENABLE";
+    case SnippetsMode::IGNORE_CALLBACK:
+        return os << "IGNORE_CALLBACK";
+    case SnippetsMode::DISABLE:
+        return os << "DISABLE";
+    default:
+        OPENVINO_THROW("Unsupported snippets mode value");
+    }
+}
+
+inline std::istream& operator>>(std::istream& is, SnippetsMode& mode) {
+    std::string str;
+    is >> str;
+    if (str == "ENABLE") {
+        mode = SnippetsMode::ENABLE;
+    } else if (str == "IGNORE_CALLBACK") {
+        mode = SnippetsMode::IGNORE_CALLBACK;
+    } else if (str == "DISABLE") {
+        mode = SnippetsMode::DISABLE;
+    } else {
+        OPENVINO_THROW("Unsupported snippets mode: ", str);
+    }
+    return is;
+}
+/** @endcond */
+
+/**
+ * @brief Defines Snippets tokenization mode
+ *      @param ENABLE - default pipeline
+ *      @param IGNORE_CALLBACK - disable the Snippets markup transformation and tokenization callback
+ *      @param DISABLE - turn off the Snippets
+ * @ingroup ie_dev_api_plugin_api
+ */
+static constexpr Property<size_t, PropertyMutability::RW> snippets_mode{"SNIPPETS_MODE"};
+
 /**
  * @brief The devices that the inference task been executed.
  * @ingroup ov_runtime_cpp_prop_api

diff --git a/src/plugins/intel_cpu/src/compiled_model.cpp b/src/plugins/intel_cpu/src/compiled_model.cpp
@@ -312,8 +312,7 @@ ov::Any CompiledModel::get_property(const std::string& name) const {
     } else if (name == ov::hint::inference_precision) {
         return decltype(ov::hint::inference_precision)::value_type(config.inferencePrecision);
     } else if (name == ov::hint::performance_mode) {
-        const auto perfHint = ov::util::from_string(config.perfHintsConfig.ovPerfHint, ov::hint::performance_mode);
-        return perfHint;
+        return decltype(ov::hint::performance_mode)::value_type(config.hintPerfMode);
     } else if (name == ov::hint::enable_cpu_pinning.name()) {
         const bool use_pin = config.enableCpuPinning;
         return decltype(ov::hint::enable_cpu_pinning)::value_type(use_pin);
@@ -326,8 +325,7 @@ ov::Any CompiledModel::get_property(const std::string& name) const {
     } else if (name == ov::hint::execution_mode) {
         return config.executionMode;
     } else if (name == ov::hint::num_requests) {
-        const auto perfHintNumRequests = config.perfHintsConfig.ovPerfHintNumRequests;
-        return decltype(ov::hint::num_requests)::value_type(perfHintNumRequests);
+        return decltype(ov::hint::num_requests)::value_type(config.hintNumRequests);
     } else if (name == ov::execution_devices) {
         return decltype(ov::execution_devices)::value_type{m_plugin->get_device_name()};
     } else if (name == ov::intel_cpu::denormals_optimization) {