Skip to content

Commit

Permalink
[GPU] Use stream executor and exceptions from new api (#18531)
Browse files Browse the repository at this point in the history
* [Common] Handle ov::Exception too in CompiledModel wrapper

* [GPU] Use new threading and exception interfaces where possible
  • Loading branch information
vladimir-paramuzov authored Jul 14, 2023
1 parent 08cd757 commit 325d02b
Show file tree
Hide file tree
Showing 92 changed files with 350 additions and 415 deletions.
37 changes: 23 additions & 14 deletions src/inference/src/dev/icompiled_model_wrapper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,24 @@ ov::Any InferenceEngine::ICompiledModelWrapper::get_property(const std::string&
if (ov::loaded_from_cache == name) {
return m_model->isLoadedFromCache();
}

auto get_supported_properties = [&]() {
auto ro_properties = m_model->GetMetric(METRIC_KEY(SUPPORTED_METRICS)).as<std::vector<std::string>>();
auto rw_properties = m_model->GetMetric(METRIC_KEY(SUPPORTED_CONFIG_KEYS)).as<std::vector<std::string>>();
std::vector<ov::PropertyName> supported_properties;
for (auto&& ro_property : ro_properties) {
if (ro_property != METRIC_KEY(SUPPORTED_METRICS) && ro_property != METRIC_KEY(SUPPORTED_CONFIG_KEYS)) {
supported_properties.emplace_back(ro_property, ov::PropertyMutability::RO);
}
}
for (auto&& rw_property : rw_properties) {
supported_properties.emplace_back(rw_property, ov::PropertyMutability::RW);
}
supported_properties.emplace_back(ov::supported_properties.name(), ov::PropertyMutability::RO);
supported_properties.emplace_back(ov::loaded_from_cache.name(), ov::PropertyMutability::RO);
return supported_properties;
};

if (ov::supported_properties == name) {
try {
auto supported_properties = m_model->GetMetric(name).as<std::vector<ov::PropertyName>>();
Expand All @@ -55,25 +73,16 @@ ov::Any InferenceEngine::ICompiledModelWrapper::get_property(const std::string&
}),
supported_properties.end());
return supported_properties;
} catch (ov::Exception&) {
return get_supported_properties();
} catch (InferenceEngine::Exception&) {
auto ro_properties = m_model->GetMetric(METRIC_KEY(SUPPORTED_METRICS)).as<std::vector<std::string>>();
auto rw_properties = m_model->GetMetric(METRIC_KEY(SUPPORTED_CONFIG_KEYS)).as<std::vector<std::string>>();
std::vector<ov::PropertyName> supported_properties;
for (auto&& ro_property : ro_properties) {
if (ro_property != METRIC_KEY(SUPPORTED_METRICS) && ro_property != METRIC_KEY(SUPPORTED_CONFIG_KEYS)) {
supported_properties.emplace_back(ro_property, ov::PropertyMutability::RO);
}
}
for (auto&& rw_property : rw_properties) {
supported_properties.emplace_back(rw_property, ov::PropertyMutability::RW);
}
supported_properties.emplace_back(ov::supported_properties.name(), ov::PropertyMutability::RO);
supported_properties.emplace_back(ov::loaded_from_cache.name(), ov::PropertyMutability::RO);
return supported_properties;
return get_supported_properties();
}
}
try {
return m_model->GetMetric(name);
} catch (ov::Exception&) {
return m_model->GetConfig(name);
} catch (InferenceEngine::Exception&) {
return m_model->GetConfig(name);
}
Expand Down
10 changes: 6 additions & 4 deletions src/plugins/intel_gpu/include/intel_gpu/graph/network.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@

#pragma once

#include "openvino/runtime/threading/cpu_streams_executor.hpp"

#include "intel_gpu/graph/topology.hpp"
#include "intel_gpu/graph/program.hpp"
#include "intel_gpu/graph/serialization/binary_buffer.hpp"
Expand Down Expand Up @@ -81,12 +83,12 @@ struct network {
const topology& topo,
const ExecutionConfig& config = {},
bool is_internal = false,
InferenceEngine::CPUStreamsExecutor::Ptr task_executor = nullptr);
std::shared_ptr<ov::threading::IStreamsExecutor> task_executor = nullptr);

network(engine& engine,
const std::set<std::shared_ptr<program_node>>& nodes,
const ExecutionConfig& config,
std::shared_ptr<InferenceEngine::CPUStreamsExecutor> task_executor,
std::shared_ptr<ov::threading::IStreamsExecutor> task_executor,
bool is_internal);

network(program::ptr program, uint16_t stream_id = 0);
Expand All @@ -103,13 +105,13 @@ struct network {
static ptr build_network(engine& engine,
const topology& topology,
const ExecutionConfig& config = {},
std::shared_ptr<InferenceEngine::CPUStreamsExecutor> task_executor = nullptr,
std::shared_ptr<ov::threading::IStreamsExecutor> task_executor = nullptr,
bool is_internal = false);

static ptr build_network(engine& engine,
const std::set<std::shared_ptr<program_node>>& nodes,
const ExecutionConfig& config,
std::shared_ptr<InferenceEngine::CPUStreamsExecutor> task_executor,
std::shared_ptr<ov::threading::IStreamsExecutor> task_executor,
bool is_internal);

static ptr allocate_network(stream::ptr stream,
Expand Down
16 changes: 9 additions & 7 deletions src/plugins/intel_gpu/include/intel_gpu/graph/program.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@

#pragma once

#include "openvino/runtime/threading/cpu_streams_executor.hpp"

#include "intel_gpu/runtime/engine.hpp"
#include "intel_gpu/runtime/stream.hpp"
#include "intel_gpu/runtime/lru_cache.hpp"
Expand Down Expand Up @@ -125,22 +127,22 @@ struct program {
program(engine& engine_ref,
topology const& topology,
const ExecutionConfig& config,
InferenceEngine::CPUStreamsExecutor::Ptr task_executor,
std::shared_ptr<ov::threading::IStreamsExecutor> task_executor,
bool is_internal = false,
bool no_optimizations = false,
bool is_body_program = false);

program(engine& engine_ref,
std::set<std::shared_ptr<program_node>> const& nodes,
const ExecutionConfig& config,
std::shared_ptr<InferenceEngine::CPUStreamsExecutor> task_executor,
std::shared_ptr<ov::threading::IStreamsExecutor> task_executor,
bool is_internal);

explicit program(engine& engine);
~program();
engine& get_engine() const { return _engine; }
const ExecutionConfig& get_config() const { return _config; }
InferenceEngine::CPUStreamsExecutor::Ptr get_task_executor() const { return _task_executor; }
std::shared_ptr<ov::threading::IStreamsExecutor> get_task_executor() const { return _task_executor; }
std::list<program_node*>& get_inputs() {
return inputs;
} // ToDo: redesign trim to ouptut pass to make it const as_well as get_engine and get options
Expand Down Expand Up @@ -240,14 +242,14 @@ struct program {
static ptr build_program(engine& engine,
const topology& topology,
const ExecutionConfig& config,
InferenceEngine::CPUStreamsExecutor::Ptr task_executor,
std::shared_ptr<ov::threading::IStreamsExecutor> task_executor,
bool is_internal = false,
bool no_optimizations = false,
bool is_body_program = false);
static ptr build_program(engine& engine,
const std::set<std::shared_ptr<program_node>>& nodes,
const ExecutionConfig& config,
std::shared_ptr<InferenceEngine::CPUStreamsExecutor> task_executor,
std::shared_ptr<ov::threading::IStreamsExecutor> task_executor,
bool is_internal);
static void init_primitives();
kernels_cache& get_kernels_cache() const;
Expand All @@ -261,7 +263,7 @@ struct program {
ICompilationContext& get_compilation_context() const { return *_compilation_context; }
void cancel_compilation_context();

static std::shared_ptr<InferenceEngine::CPUStreamsExecutor> make_task_executor(const ExecutionConfig& config);
static std::shared_ptr<ov::threading::IStreamsExecutor> make_task_executor(const ExecutionConfig& config);

private:
uint32_t prog_id = 0;
Expand All @@ -270,7 +272,7 @@ struct program {
// TODO: Consider moving it to engine
std::unique_ptr<kernels_cache> _kernels_cache;
ExecutionConfig _config;
std::shared_ptr<InferenceEngine::CPUStreamsExecutor> _task_executor = nullptr;
std::shared_ptr<ov::threading::IStreamsExecutor> _task_executor = nullptr;
std::list<program_node*> inputs;
std::vector<program_node*> outputs;
nodes_ordering processing_order;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,13 @@
#define RUN_ALL_MODEL_CACHING_TESTS

#include <unordered_map>
#include "openvino/core/deprecated.hpp"
#include "ie/ie_common.h"

namespace cldnn {
class serial_util {
public:
OPENVINO_SUPPRESS_DEPRECATED_START
static InferenceEngine::Layout layout_from_string(const std::string& name) {
static const std::unordered_map<std::string, InferenceEngine::Layout> layouts = {
{ "ANY", InferenceEngine::Layout::ANY },
Expand All @@ -36,8 +38,9 @@ class serial_util {
if (it != layouts.end()) {
return it->second;
}
IE_THROW(NetworkNotRead) << "Unknown layout with name '" << name << "'";
OPENVINO_THROW("Unknown layout with name '", name, "'");
}
OPENVINO_SUPPRESS_DEPRECATED_END
};

class membuf : public std::streambuf {
Expand Down
24 changes: 6 additions & 18 deletions src/plugins/intel_gpu/include/intel_gpu/plugin/common_utils.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#include <ie_layouts.h>
#include "intel_gpu/runtime/layout.hpp"
#include "openvino/core/layout.hpp"
#include "openvino/core/deprecated.hpp"

#include "ngraph/type/element_type.hpp"

Expand All @@ -15,7 +16,7 @@ namespace intel_gpu {

#define TensorValue(val) static_cast<cldnn::tensor::value_type>(val)

inline cldnn::tensor tensor_from_dims(const InferenceEngine::SizeVector& dims, int def = 1) {
inline cldnn::tensor tensor_from_dims(const ov::Shape& dims, int def = 1) {
switch (dims.size()) {
case 0: return cldnn::tensor(cldnn::batch(def), cldnn::feature(def), cldnn::spatial(def, def));
case 1: return cldnn::tensor(cldnn::batch(dims[0]), cldnn::feature(def), cldnn::spatial(def, def));
Expand All @@ -24,10 +25,11 @@ inline cldnn::tensor tensor_from_dims(const InferenceEngine::SizeVector& dims, i
case 4: return cldnn::tensor(cldnn::batch(dims[0]), cldnn::feature(dims[1]), cldnn::spatial(dims[3], dims[2]));
case 5: return cldnn::tensor(cldnn::batch(dims[0]), cldnn::feature(dims[1]), cldnn::spatial(dims[4], dims[3], dims[2]));
case 6: return cldnn::tensor(cldnn::batch(dims[0]), cldnn::feature(dims[1]), cldnn::spatial(dims[5], dims[4], dims[3], dims[2]));
default: IE_THROW() << "Invalid dimensions size(" << dims.size() << ") for gpu tensor";
default: OPENVINO_THROW("Invalid dimensions size(", dims.size(), ") for gpu tensor");
}
}

OPENVINO_SUPPRESS_DEPRECATED_START
inline cldnn::data_types DataTypeFromPrecision(InferenceEngine::Precision p) {
switch (p) {
case InferenceEngine::Precision::I16:
Expand Down Expand Up @@ -74,7 +76,7 @@ inline InferenceEngine::Precision PrecisionFromDataType(cldnn::data_types dt) {
case cldnn::data_types::i64:
return InferenceEngine::Precision::ePrecision::I64;
default:
IE_THROW(ParameterMismatch) << "The plugin does not support " << cldnn::data_type_traits::name(dt) << " data type";
OPENVINO_THROW("The plugin does not support ", cldnn::data_type_traits::name(dt), " data type");
}
}

Expand Down Expand Up @@ -140,21 +142,7 @@ inline cldnn::format ImageFormatFromLayout(InferenceEngine::Layout l) {
<< "The plugin does not support " << l << " image layout";
}
}

inline InferenceEngine::Layout InferenceEngineLayoutFromOVLayout(ov::Layout l) {
if (l == ov::Layout("C")) return InferenceEngine::Layout::C;
if (l == ov::Layout("CN")) return InferenceEngine::Layout::CN;
if (l == ov::Layout("HW")) return InferenceEngine::Layout::HW;
if (l == ov::Layout("NC")) return InferenceEngine::Layout::NC;
if (l == ov::Layout("CHW")) return InferenceEngine::Layout::CHW;
if (l == ov::Layout("HWC")) return InferenceEngine::Layout::HWC;
if (l == ov::Layout("NCHW")) return InferenceEngine::Layout::NCHW;
if (l == ov::Layout("NC??")) return InferenceEngine::Layout::NCHW;
if (l == ov::Layout("NHWC")) return InferenceEngine::Layout::NHWC;
if (l == ov::Layout("NCDHW")) return InferenceEngine::Layout::NCDHW;
if (l == ov::Layout("NDHWC")) return InferenceEngine::Layout::NDHWC;
IE_THROW() << "The plugin does not support " << l.to_string() << " layout";
}
OPENVINO_SUPPRESS_DEPRECATED_END

/// WA: Force exit. Any opencl api call can be hang after CL_OUT_OF_RESOURCES.
inline void ForceExit() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,12 @@
//

#include "intel_gpu/runtime/execution_config.hpp"
#include "ie_metric_helpers.hpp"
#include <ie_ngraph_utils.hpp>
#include "ie_plugin_config.hpp"
#include "gpu/gpu_config.hpp"
#include "cpp_interfaces/interface/ie_internal_plugin_config.hpp"
#include "ie_icore.hpp"

namespace ov {
namespace intel_gpu {
Expand Down
25 changes: 12 additions & 13 deletions src/plugins/intel_gpu/include/intel_gpu/plugin/program.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,16 +31,15 @@ enum class reduce_mode : uint16_t;
enum class eltwise_mode : int32_t;
} // namespace cldnn

#define REGISTER_FACTORY_IMPL(op_version, op_name) \
void __register ## _ ## op_name ## _ ## op_version(); \
void __register ## _ ## op_name ## _ ## op_version() { \
Program::RegisterFactory<ov::op::op_version::op_name>( \
[](Program& p, const std::shared_ptr<ov::Node>& op) { \
auto op_casted = std::dynamic_pointer_cast<ov::op::op_version::op_name>(op); \
if (!op_casted) \
IE_THROW() << "Invalid ov Node type passed into " << __PRETTY_FUNCTION__; \
Create##op_name##Op(p, op_casted); \
}); \
#define REGISTER_FACTORY_IMPL(op_version, op_name) \
void __register ## _ ## op_name ## _ ## op_version(); \
void __register ## _ ## op_name ## _ ## op_version() { \
Program::RegisterFactory<ov::op::op_version::op_name>( \
[](Program& p, const std::shared_ptr<ov::Node>& op) { \
auto op_casted = std::dynamic_pointer_cast<ov::op::op_version::op_name>(op); \
OPENVINO_ASSERT(op_casted, "[GPU] Invalid ov Node type passed into ", __PRETTY_FUNCTION__); \
Create##op_name##Op(p, op_casted); \
}); \
}

namespace ov {
Expand Down Expand Up @@ -84,7 +83,7 @@ class Program {
Program(InferenceEngine::CNNNetwork& network, cldnn::engine& engine, const ExecutionConfig& config,
bool createTopologyOnly = false, bool partialBuild = false,
InferenceEngine::InputsDataMap* inputs = nullptr, InferenceEngine::OutputsDataMap* outputs = nullptr,
InferenceEngine::CPUStreamsExecutor::Ptr task_executor = nullptr, bool innerProgram = false);
std::shared_ptr<ov::threading::IStreamsExecutor> task_executor = nullptr, bool innerProgram = false);
Program(cldnn::engine& engine, const ExecutionConfig& config,
InferenceEngine::InputsDataMap* inputs = nullptr, InferenceEngine::OutputsDataMap* outputs = nullptr);

Expand Down Expand Up @@ -159,7 +158,7 @@ class Program {
bool use_new_shape_infer() const { return allow_new_shape_infer; }
bool requires_new_shape_infer(const ngraph::Node& op) const;

InferenceEngine::CPUStreamsExecutor::Ptr get_task_executor() { return m_task_executor; }
std::shared_ptr<ov::threading::IStreamsExecutor> get_task_executor() { return m_task_executor; }

private:
static factories_map_t factories_map;
Expand All @@ -177,7 +176,7 @@ class Program {

bool queryMode;

InferenceEngine::CPUStreamsExecutor::Ptr m_task_executor;
std::shared_ptr<ov::threading::IStreamsExecutor> m_task_executor;

void EnableQueryMode() { queryMode = true; }
void DisableQueryMode() { queryMode = false; }
Expand Down
3 changes: 0 additions & 3 deletions src/plugins/intel_gpu/include/intel_gpu/runtime/engine.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
#include "layout.hpp"
#include "execution_config.hpp"
#include "engine_configuration.hpp"
#include <threading/ie_cpu_streams_executor.hpp>

#include <memory>
#include <set>
Expand Down Expand Up @@ -147,7 +146,6 @@ class engine {

/// Factory method which creates engine object with impl configured by @p engine_type
/// @param engine_type requested engine type
/// @param task_executor GPU plugin internal task executor
/// @param runtime_type requested execution runtime for the engine. @note some runtime/engine types configurations might be unsupported
/// @param device specifies the device which the engine is created for
/// @param configuration options for the engine
Expand All @@ -156,7 +154,6 @@ class engine {
/// Factory method which creates engine object with impl configured by @p engine_type
/// @param engine_type requested engine type
/// @param runtime_type requested execution runtime for the engine. @note some runtime/engine types configurations might be unsupported
/// @param task_executor GPU plugin internal task executor
/// @param configuration options for the engine
/// @note engine is created for the first device returned by devices query
static std::shared_ptr<cldnn::engine> create(engine_types engine_type, runtime_types runtime_type);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,7 @@

#pragma once

#include "utils.hpp"

#include <string>
#include <stdexcept>
#include <thread>
#include <threading/ie_cpu_streams_executor.hpp>

namespace cldnn {

Expand Down
2 changes: 1 addition & 1 deletion src/plugins/intel_gpu/src/graph/arg_max_min.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ layout arg_max_min_inst::calc_output_layout(arg_max_min_node const& node, kernel
auto format = input_layout.format;
auto sizes = input_layout.get_dims();
if (desc->axis >= static_cast<int64_t>(sizes.size()) || desc->axis < 0) {
IE_THROW() << "Incorrect arg_max_min axis.";
OPENVINO_THROW("Incorrect arg_max_min axis.");
}
sizes[desc->axis] = desc->top_k;
return layout{output_data_type, format, tensor(format::get_default_format(input_layout.get_rank()), sizes)};
Expand Down
10 changes: 5 additions & 5 deletions src/plugins/intel_gpu/src/graph/compilation_context.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,9 @@
namespace cldnn {
class CompilationContext : public ICompilationContext {
public:
CompilationContext(InferenceEngine::CPUStreamsExecutor::Config task_executor_config) : _task_executor_config(task_executor_config) {
CompilationContext(ov::threading::IStreamsExecutor::Config task_executor_config) : _task_executor_config(task_executor_config) {
_task_executor_config._streams = 4;
_task_executor = std::make_shared<InferenceEngine::CPUStreamsExecutor>(_task_executor_config);
_task_executor = std::make_shared<ov::threading::CPUStreamsExecutor>(_task_executor_config);
}

void push_task(size_t key, Task&& task) override {
Expand Down Expand Up @@ -62,14 +62,14 @@ class CompilationContext : public ICompilationContext {
}

private:
InferenceEngine::CPUStreamsExecutor::Config _task_executor_config;
InferenceEngine::CPUStreamsExecutor::Ptr _task_executor;
ov::threading::IStreamsExecutor::Config _task_executor_config;
std::shared_ptr<ov::threading::IStreamsExecutor> _task_executor;
std::mutex _mutex;
std::unordered_set<size_t> _task_keys;
std::atomic_bool _stop_compilation{false};
};

std::unique_ptr<ICompilationContext> ICompilationContext::create(InferenceEngine::CPUStreamsExecutor::Config task_executor_config) {
std::unique_ptr<ICompilationContext> ICompilationContext::create(ov::threading::IStreamsExecutor::Config task_executor_config) {
return cldnn::make_unique<CompilationContext>(task_executor_config);
}

Expand Down
Loading

0 comments on commit 325d02b

Please sign in to comment.