[GPU] Use stream executor and exceptions from new api (#18531)

* [Common] Handle ov::Exception too in CompiledModel wrapper * [GPU] Use new threading and exception interfaces where possible
openvinotoolkit · Jul 14, 2023 · 325d02b · 325d02b
1 parent 08cd757
commit 325d02b
Show file tree

Hide file tree

Showing 92 changed files with 350 additions and 415 deletions.
diff --git a/src/inference/src/dev/icompiled_model_wrapper.cpp b/src/inference/src/dev/icompiled_model_wrapper.cpp
@@ -44,6 +44,24 @@ ov::Any InferenceEngine::ICompiledModelWrapper::get_property(const std::string&
     if (ov::loaded_from_cache == name) {
         return m_model->isLoadedFromCache();
     }
+
+    auto get_supported_properties = [&]() {
+        auto ro_properties = m_model->GetMetric(METRIC_KEY(SUPPORTED_METRICS)).as<std::vector<std::string>>();
+        auto rw_properties = m_model->GetMetric(METRIC_KEY(SUPPORTED_CONFIG_KEYS)).as<std::vector<std::string>>();
+        std::vector<ov::PropertyName> supported_properties;
+        for (auto&& ro_property : ro_properties) {
+            if (ro_property != METRIC_KEY(SUPPORTED_METRICS) && ro_property != METRIC_KEY(SUPPORTED_CONFIG_KEYS)) {
+                supported_properties.emplace_back(ro_property, ov::PropertyMutability::RO);
+            }
+        }
+        for (auto&& rw_property : rw_properties) {
+            supported_properties.emplace_back(rw_property, ov::PropertyMutability::RW);
+        }
+        supported_properties.emplace_back(ov::supported_properties.name(), ov::PropertyMutability::RO);
+        supported_properties.emplace_back(ov::loaded_from_cache.name(), ov::PropertyMutability::RO);
+        return supported_properties;
+    };
+
     if (ov::supported_properties == name) {
         try {
             auto supported_properties = m_model->GetMetric(name).as<std::vector<ov::PropertyName>>();
@@ -55,25 +73,16 @@ ov::Any InferenceEngine::ICompiledModelWrapper::get_property(const std::string&
                                                       }),
                                        supported_properties.end());
             return supported_properties;
+        } catch (ov::Exception&) {
+            return get_supported_properties();
         } catch (InferenceEngine::Exception&) {
-            auto ro_properties = m_model->GetMetric(METRIC_KEY(SUPPORTED_METRICS)).as<std::vector<std::string>>();
-            auto rw_properties = m_model->GetMetric(METRIC_KEY(SUPPORTED_CONFIG_KEYS)).as<std::vector<std::string>>();
-            std::vector<ov::PropertyName> supported_properties;
-            for (auto&& ro_property : ro_properties) {
-                if (ro_property != METRIC_KEY(SUPPORTED_METRICS) && ro_property != METRIC_KEY(SUPPORTED_CONFIG_KEYS)) {
-                    supported_properties.emplace_back(ro_property, ov::PropertyMutability::RO);
-                }
-            }
-            for (auto&& rw_property : rw_properties) {
-                supported_properties.emplace_back(rw_property, ov::PropertyMutability::RW);
-            }
-            supported_properties.emplace_back(ov::supported_properties.name(), ov::PropertyMutability::RO);
-            supported_properties.emplace_back(ov::loaded_from_cache.name(), ov::PropertyMutability::RO);
-            return supported_properties;
+            return get_supported_properties();
         }
     }
     try {
         return m_model->GetMetric(name);
+    } catch (ov::Exception&) {
+        return m_model->GetConfig(name);
     } catch (InferenceEngine::Exception&) {
         return m_model->GetConfig(name);
     }

diff --git a/src/plugins/intel_gpu/include/intel_gpu/graph/network.hpp b/src/plugins/intel_gpu/include/intel_gpu/graph/network.hpp
@@ -4,6 +4,8 @@
 
 #pragma once
 
+#include "openvino/runtime/threading/cpu_streams_executor.hpp"
+
 #include "intel_gpu/graph/topology.hpp"
 #include "intel_gpu/graph/program.hpp"
 #include "intel_gpu/graph/serialization/binary_buffer.hpp"
@@ -81,12 +83,12 @@ struct network {
             const topology& topo,
             const ExecutionConfig& config = {},
             bool is_internal = false,
-            InferenceEngine::CPUStreamsExecutor::Ptr task_executor = nullptr);
+            std::shared_ptr<ov::threading::IStreamsExecutor> task_executor = nullptr);
 
     network(engine& engine,
             const std::set<std::shared_ptr<program_node>>& nodes,
             const ExecutionConfig& config,
-            std::shared_ptr<InferenceEngine::CPUStreamsExecutor> task_executor,
+            std::shared_ptr<ov::threading::IStreamsExecutor> task_executor,
             bool is_internal);
 
     network(program::ptr program, uint16_t stream_id = 0);
@@ -103,13 +105,13 @@ struct network {
     static ptr build_network(engine& engine,
                              const topology& topology,
                              const ExecutionConfig& config = {},
-                             std::shared_ptr<InferenceEngine::CPUStreamsExecutor> task_executor = nullptr,
+                             std::shared_ptr<ov::threading::IStreamsExecutor> task_executor = nullptr,
                              bool is_internal = false);
 
     static ptr build_network(engine& engine,
                              const std::set<std::shared_ptr<program_node>>& nodes,
                              const ExecutionConfig& config,
-                             std::shared_ptr<InferenceEngine::CPUStreamsExecutor> task_executor,
+                             std::shared_ptr<ov::threading::IStreamsExecutor> task_executor,
                              bool is_internal);
 
     static ptr allocate_network(stream::ptr stream,

diff --git a/src/plugins/intel_gpu/include/intel_gpu/graph/program.hpp b/src/plugins/intel_gpu/include/intel_gpu/graph/program.hpp
@@ -4,6 +4,8 @@
 
 #pragma once
 
+#include "openvino/runtime/threading/cpu_streams_executor.hpp"
+
 #include "intel_gpu/runtime/engine.hpp"
 #include "intel_gpu/runtime/stream.hpp"
 #include "intel_gpu/runtime/lru_cache.hpp"
@@ -125,22 +127,22 @@ struct program {
     program(engine& engine_ref,
             topology const& topology,
             const ExecutionConfig& config,
-            InferenceEngine::CPUStreamsExecutor::Ptr task_executor,
+            std::shared_ptr<ov::threading::IStreamsExecutor> task_executor,
             bool is_internal = false,
             bool no_optimizations = false,
             bool is_body_program = false);
 
     program(engine& engine_ref,
             std::set<std::shared_ptr<program_node>> const& nodes,
             const ExecutionConfig& config,
-            std::shared_ptr<InferenceEngine::CPUStreamsExecutor> task_executor,
+            std::shared_ptr<ov::threading::IStreamsExecutor> task_executor,
             bool is_internal);
 
     explicit program(engine& engine);
     ~program();
     engine& get_engine() const { return _engine; }
     const ExecutionConfig& get_config() const { return _config; }
-    InferenceEngine::CPUStreamsExecutor::Ptr get_task_executor() const { return _task_executor; }
+    std::shared_ptr<ov::threading::IStreamsExecutor> get_task_executor() const { return _task_executor; }
     std::list<program_node*>& get_inputs() {
         return inputs;
     }  // ToDo: redesign trim to ouptut pass to make it const as_well as get_engine and get options
@@ -240,14 +242,14 @@ struct program {
     static ptr build_program(engine& engine,
                              const topology& topology,
                              const ExecutionConfig& config,
-                             InferenceEngine::CPUStreamsExecutor::Ptr task_executor,
+                             std::shared_ptr<ov::threading::IStreamsExecutor> task_executor,
                              bool is_internal = false,
                              bool no_optimizations = false,
                              bool is_body_program = false);
     static ptr build_program(engine& engine,
                              const std::set<std::shared_ptr<program_node>>& nodes,
                              const ExecutionConfig& config,
-                             std::shared_ptr<InferenceEngine::CPUStreamsExecutor> task_executor,
+                             std::shared_ptr<ov::threading::IStreamsExecutor> task_executor,
                              bool is_internal);
     static void init_primitives();
     kernels_cache& get_kernels_cache() const;
@@ -261,7 +263,7 @@ struct program {
     ICompilationContext& get_compilation_context() const { return *_compilation_context; }
     void cancel_compilation_context();
 
-    static std::shared_ptr<InferenceEngine::CPUStreamsExecutor> make_task_executor(const ExecutionConfig& config);
+    static std::shared_ptr<ov::threading::IStreamsExecutor> make_task_executor(const ExecutionConfig& config);
 
 private:
     uint32_t prog_id = 0;
@@ -270,7 +272,7 @@ struct program {
     // TODO: Consider moving it to engine
     std::unique_ptr<kernels_cache> _kernels_cache;
     ExecutionConfig _config;
-    std::shared_ptr<InferenceEngine::CPUStreamsExecutor> _task_executor = nullptr;
+    std::shared_ptr<ov::threading::IStreamsExecutor> _task_executor = nullptr;
     std::list<program_node*> inputs;
     std::vector<program_node*> outputs;
     nodes_ordering processing_order;

diff --git a/src/plugins/intel_gpu/include/intel_gpu/graph/serialization/utils.hpp b/src/plugins/intel_gpu/include/intel_gpu/graph/serialization/utils.hpp
@@ -7,11 +7,13 @@
 #define RUN_ALL_MODEL_CACHING_TESTS
 
 #include <unordered_map>
+#include "openvino/core/deprecated.hpp"
 #include "ie/ie_common.h"
 
 namespace cldnn {
 class serial_util {
 public:
+    OPENVINO_SUPPRESS_DEPRECATED_START
     static InferenceEngine::Layout layout_from_string(const std::string& name) {
         static const std::unordered_map<std::string, InferenceEngine::Layout> layouts = {
             { "ANY", InferenceEngine::Layout::ANY },
@@ -36,8 +38,9 @@ class serial_util {
         if (it != layouts.end()) {
             return it->second;
         }
-        IE_THROW(NetworkNotRead) << "Unknown layout with name '" << name << "'";
+        OPENVINO_THROW("Unknown layout with name '", name, "'");
     }
+    OPENVINO_SUPPRESS_DEPRECATED_END
 };
 
 class membuf : public std::streambuf {

diff --git a/src/plugins/intel_gpu/include/intel_gpu/plugin/common_utils.hpp b/src/plugins/intel_gpu/include/intel_gpu/plugin/common_utils.hpp
@@ -7,6 +7,7 @@
 #include <ie_layouts.h>
 #include "intel_gpu/runtime/layout.hpp"
 #include "openvino/core/layout.hpp"
+#include "openvino/core/deprecated.hpp"
 
 #include "ngraph/type/element_type.hpp"
 
@@ -15,7 +16,7 @@ namespace intel_gpu {
 
 #define TensorValue(val) static_cast<cldnn::tensor::value_type>(val)
 
-inline cldnn::tensor tensor_from_dims(const InferenceEngine::SizeVector& dims, int def = 1) {
+inline cldnn::tensor tensor_from_dims(const ov::Shape& dims, int def = 1) {
     switch (dims.size()) {
     case 0: return cldnn::tensor(cldnn::batch(def), cldnn::feature(def), cldnn::spatial(def, def));
     case 1: return cldnn::tensor(cldnn::batch(dims[0]), cldnn::feature(def), cldnn::spatial(def, def));
@@ -24,10 +25,11 @@ inline cldnn::tensor tensor_from_dims(const InferenceEngine::SizeVector& dims, i
     case 4: return cldnn::tensor(cldnn::batch(dims[0]), cldnn::feature(dims[1]), cldnn::spatial(dims[3], dims[2]));
     case 5: return cldnn::tensor(cldnn::batch(dims[0]), cldnn::feature(dims[1]), cldnn::spatial(dims[4], dims[3], dims[2]));
     case 6: return cldnn::tensor(cldnn::batch(dims[0]), cldnn::feature(dims[1]), cldnn::spatial(dims[5], dims[4], dims[3], dims[2]));
-    default: IE_THROW() << "Invalid dimensions size(" << dims.size() << ") for gpu tensor";
+    default: OPENVINO_THROW("Invalid dimensions size(", dims.size(), ") for gpu tensor");
     }
 }
 
+OPENVINO_SUPPRESS_DEPRECATED_START
 inline cldnn::data_types DataTypeFromPrecision(InferenceEngine::Precision p) {
     switch (p) {
     case InferenceEngine::Precision::I16:
@@ -74,7 +76,7 @@ inline InferenceEngine::Precision PrecisionFromDataType(cldnn::data_types dt) {
     case cldnn::data_types::i64:
         return InferenceEngine::Precision::ePrecision::I64;
     default:
-        IE_THROW(ParameterMismatch) << "The plugin does not support " << cldnn::data_type_traits::name(dt) << " data type";
+        OPENVINO_THROW("The plugin does not support ", cldnn::data_type_traits::name(dt), " data type");
     }
 }
 
@@ -140,21 +142,7 @@ inline cldnn::format ImageFormatFromLayout(InferenceEngine::Layout l) {
             << "The plugin does not support " << l << " image layout";
     }
 }
-
-inline InferenceEngine::Layout InferenceEngineLayoutFromOVLayout(ov::Layout l) {
-    if (l == ov::Layout("C")) return InferenceEngine::Layout::C;
-    if (l == ov::Layout("CN")) return InferenceEngine::Layout::CN;
-    if (l == ov::Layout("HW")) return InferenceEngine::Layout::HW;
-    if (l == ov::Layout("NC")) return InferenceEngine::Layout::NC;
-    if (l == ov::Layout("CHW")) return InferenceEngine::Layout::CHW;
-    if (l == ov::Layout("HWC")) return InferenceEngine::Layout::HWC;
-    if (l == ov::Layout("NCHW")) return InferenceEngine::Layout::NCHW;
-    if (l == ov::Layout("NC??")) return InferenceEngine::Layout::NCHW;
-    if (l == ov::Layout("NHWC")) return InferenceEngine::Layout::NHWC;
-    if (l == ov::Layout("NCDHW")) return InferenceEngine::Layout::NCDHW;
-    if (l == ov::Layout("NDHWC")) return InferenceEngine::Layout::NDHWC;
-    IE_THROW() << "The plugin does not support " << l.to_string() << " layout";
-}
+OPENVINO_SUPPRESS_DEPRECATED_END
 
 /// WA: Force exit. Any opencl api call can be hang after CL_OUT_OF_RESOURCES.
 inline void ForceExit() {

diff --git a/src/plugins/intel_gpu/include/intel_gpu/plugin/legacy_api_helper.hpp b/src/plugins/intel_gpu/include/intel_gpu/plugin/legacy_api_helper.hpp
@@ -3,6 +3,12 @@
 //
 
 #include "intel_gpu/runtime/execution_config.hpp"
+#include "ie_metric_helpers.hpp"
+#include <ie_ngraph_utils.hpp>
+#include "ie_plugin_config.hpp"
+#include "gpu/gpu_config.hpp"
+#include "cpp_interfaces/interface/ie_internal_plugin_config.hpp"
+#include "ie_icore.hpp"
 
 namespace ov {
 namespace intel_gpu {

diff --git a/src/plugins/intel_gpu/include/intel_gpu/plugin/program.hpp b/src/plugins/intel_gpu/include/intel_gpu/plugin/program.hpp
@@ -31,16 +31,15 @@ enum class reduce_mode : uint16_t;
 enum class eltwise_mode : int32_t;
 }  // namespace cldnn
 
-#define REGISTER_FACTORY_IMPL(op_version, op_name)                                                \
-void __register ## _ ## op_name ## _ ## op_version();                                             \
-void __register ## _ ## op_name ## _ ## op_version() {                                            \
-    Program::RegisterFactory<ov::op::op_version::op_name>(                                        \
-    [](Program& p, const std::shared_ptr<ov::Node>& op) {                                         \
-        auto op_casted = std::dynamic_pointer_cast<ov::op::op_version::op_name>(op);              \
-        if (!op_casted)                                                                           \
-            IE_THROW() << "Invalid ov Node type passed into " << __PRETTY_FUNCTION__;             \
-        Create##op_name##Op(p, op_casted);                                                        \
-       });                                                                                        \
+#define REGISTER_FACTORY_IMPL(op_version, op_name)                                                  \
+void __register ## _ ## op_name ## _ ## op_version();                                               \
+void __register ## _ ## op_name ## _ ## op_version() {                                              \
+    Program::RegisterFactory<ov::op::op_version::op_name>(                                          \
+    [](Program& p, const std::shared_ptr<ov::Node>& op) {                                           \
+        auto op_casted = std::dynamic_pointer_cast<ov::op::op_version::op_name>(op);                \
+        OPENVINO_ASSERT(op_casted, "[GPU] Invalid ov Node type passed into ", __PRETTY_FUNCTION__); \
+        Create##op_name##Op(p, op_casted);                                                          \
+       });                                                                                          \
 }
 
 namespace ov {
@@ -84,7 +83,7 @@ class Program {
     Program(InferenceEngine::CNNNetwork& network, cldnn::engine& engine, const ExecutionConfig& config,
             bool createTopologyOnly = false, bool partialBuild = false,
             InferenceEngine::InputsDataMap* inputs = nullptr, InferenceEngine::OutputsDataMap* outputs = nullptr,
-            InferenceEngine::CPUStreamsExecutor::Ptr task_executor = nullptr, bool innerProgram = false);
+            std::shared_ptr<ov::threading::IStreamsExecutor> task_executor = nullptr, bool innerProgram = false);
     Program(cldnn::engine& engine, const ExecutionConfig& config,
             InferenceEngine::InputsDataMap* inputs = nullptr, InferenceEngine::OutputsDataMap* outputs = nullptr);
 
@@ -159,7 +158,7 @@ class Program {
     bool use_new_shape_infer() const { return allow_new_shape_infer; }
     bool requires_new_shape_infer(const ngraph::Node& op) const;
 
-    InferenceEngine::CPUStreamsExecutor::Ptr get_task_executor() { return m_task_executor; }
+    std::shared_ptr<ov::threading::IStreamsExecutor> get_task_executor() { return m_task_executor; }
 
 private:
     static factories_map_t factories_map;
@@ -177,7 +176,7 @@ class Program {
 
     bool queryMode;
 
-    InferenceEngine::CPUStreamsExecutor::Ptr m_task_executor;
+    std::shared_ptr<ov::threading::IStreamsExecutor> m_task_executor;
 
     void EnableQueryMode() { queryMode = true; }
     void DisableQueryMode() { queryMode = false; }

diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/engine.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/engine.hpp
@@ -11,7 +11,6 @@
 #include "layout.hpp"
 #include "execution_config.hpp"
 #include "engine_configuration.hpp"
-#include <threading/ie_cpu_streams_executor.hpp>
 
 #include <memory>
 #include <set>
@@ -147,7 +146,6 @@ class engine {
 
     /// Factory method which creates engine object with impl configured by @p engine_type
     /// @param engine_type requested engine type
-    /// @param task_executor GPU plugin internal task executor
     /// @param runtime_type requested execution runtime for the engine. @note some runtime/engine types configurations might be unsupported
     /// @param device specifies the device which the engine is created for
     /// @param configuration options for the engine
@@ -156,7 +154,6 @@ class engine {
     /// Factory method which creates engine object with impl configured by @p engine_type
     /// @param engine_type requested engine type
     /// @param runtime_type requested execution runtime for the engine. @note some runtime/engine types configurations might be unsupported
-    /// @param task_executor GPU plugin internal task executor
     /// @param configuration options for the engine
     /// @note engine is created for the first device returned by devices query
     static std::shared_ptr<cldnn::engine> create(engine_types engine_type, runtime_types runtime_type);

diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/engine_configuration.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/engine_configuration.hpp
@@ -4,12 +4,7 @@
 
 #pragma once
 
-#include "utils.hpp"
-
 #include <string>
-#include <stdexcept>
-#include <thread>
-#include <threading/ie_cpu_streams_executor.hpp>
 
 namespace cldnn {
 

diff --git a/src/plugins/intel_gpu/src/graph/arg_max_min.cpp b/src/plugins/intel_gpu/src/graph/arg_max_min.cpp
@@ -60,7 +60,7 @@ layout arg_max_min_inst::calc_output_layout(arg_max_min_node const& node, kernel
     auto format = input_layout.format;
     auto sizes = input_layout.get_dims();
     if (desc->axis >= static_cast<int64_t>(sizes.size()) || desc->axis < 0) {
-        IE_THROW() << "Incorrect arg_max_min axis.";
+        OPENVINO_THROW("Incorrect arg_max_min axis.");
     }
     sizes[desc->axis] = desc->top_k;
     return layout{output_data_type, format, tensor(format::get_default_format(input_layout.get_rank()), sizes)};

diff --git a/src/plugins/intel_gpu/src/graph/compilation_context.cpp b/src/plugins/intel_gpu/src/graph/compilation_context.cpp
@@ -12,9 +12,9 @@
 namespace cldnn {
 class CompilationContext : public ICompilationContext {
 public:
-    CompilationContext(InferenceEngine::CPUStreamsExecutor::Config task_executor_config) : _task_executor_config(task_executor_config) {
+    CompilationContext(ov::threading::IStreamsExecutor::Config task_executor_config) : _task_executor_config(task_executor_config) {
         _task_executor_config._streams = 4;
-        _task_executor = std::make_shared<InferenceEngine::CPUStreamsExecutor>(_task_executor_config);
+        _task_executor = std::make_shared<ov::threading::CPUStreamsExecutor>(_task_executor_config);
     }
 
     void push_task(size_t key, Task&& task) override {
@@ -62,14 +62,14 @@ class CompilationContext : public ICompilationContext {
     }
 
 private:
-    InferenceEngine::CPUStreamsExecutor::Config _task_executor_config;
-    InferenceEngine::CPUStreamsExecutor::Ptr _task_executor;
+    ov::threading::IStreamsExecutor::Config _task_executor_config;
+    std::shared_ptr<ov::threading::IStreamsExecutor> _task_executor;
     std::mutex _mutex;
     std::unordered_set<size_t> _task_keys;
     std::atomic_bool _stop_compilation{false};
 };
 
-std::unique_ptr<ICompilationContext> ICompilationContext::create(InferenceEngine::CPUStreamsExecutor::Config task_executor_config) {
+std::unique_ptr<ICompilationContext> ICompilationContext::create(ov::threading::IStreamsExecutor::Config task_executor_config) {
     return cldnn::make_unique<CompilationContext>(task_executor_config);
 }