From 7f8786d9aaa2a1926c84aff1ca838ec226c914ab Mon Sep 17 00:00:00 2001 From: Maxim Vafin Date: Mon, 20 Mar 2023 22:08:24 +0100 Subject: [PATCH 001/296] [PT FE] Make NodeContext constant inside conversion rules (#16165) * Make NodeContext constant inside conversion rules * Use shared_ptr * Fix ptr * Fix logical not --- .../openvino/frontend/pytorch/frontend.hpp | 2 +- .../frontend/pytorch/node_context.hpp | 34 ++++++------- src/frontends/pytorch/src/node_context.cpp | 10 ++-- .../pytorch/src/op/adaptive_avg_pool3d.cpp | 2 +- .../pytorch/src/op/adaptive_max_pool2d.cpp | 2 +- src/frontends/pytorch/src/op/add.cpp | 2 +- src/frontends/pytorch/src/op/addcmul.cpp | 2 +- src/frontends/pytorch/src/op/addmm.cpp | 2 +- src/frontends/pytorch/src/op/arange.cpp | 2 +- src/frontends/pytorch/src/op/as_tensor.cpp | 2 +- src/frontends/pytorch/src/op/avg_poolnd.cpp | 2 +- src/frontends/pytorch/src/op/batch_norm.cpp | 2 +- src/frontends/pytorch/src/op/bitwise_not.cpp | 2 +- src/frontends/pytorch/src/op/bool.cpp | 2 +- src/frontends/pytorch/src/op/cat.cpp | 2 +- src/frontends/pytorch/src/op/clamp.cpp | 2 +- src/frontends/pytorch/src/op/constant.cpp | 2 +- .../pytorch/src/op/conv_transposend.cpp | 2 +- src/frontends/pytorch/src/op/convnd.cpp | 2 +- src/frontends/pytorch/src/op/convolution.cpp | 2 +- .../pytorch/src/op/convolution_mode.cpp | 2 +- src/frontends/pytorch/src/op/cumsum.cpp | 2 +- src/frontends/pytorch/src/op/dim.cpp | 2 +- src/frontends/pytorch/src/op/div.cpp | 2 +- src/frontends/pytorch/src/op/elu.cpp | 2 +- src/frontends/pytorch/src/op/embedding.cpp | 2 +- src/frontends/pytorch/src/op/expand.cpp | 4 +- src/frontends/pytorch/src/op/eye.cpp | 2 +- src/frontends/pytorch/src/op/flatten.cpp | 2 +- src/frontends/pytorch/src/op/floor_divide.cpp | 2 +- src/frontends/pytorch/src/op/floordiv.cpp | 2 +- src/frontends/pytorch/src/op/full.cpp | 22 ++++----- src/frontends/pytorch/src/op/gelu.cpp | 2 +- src/frontends/pytorch/src/op/get_attr.cpp | 2 +- src/frontends/pytorch/src/op/getitem.cpp | 2 +- src/frontends/pytorch/src/op/glu.cpp | 2 +- src/frontends/pytorch/src/op/grid_sampler.cpp | 2 +- src/frontends/pytorch/src/op/group_norm.cpp | 2 +- src/frontends/pytorch/src/op/hardtanh.cpp | 2 +- src/frontends/pytorch/src/op/if.cpp | 2 +- src/frontends/pytorch/src/op/im2col.cpp | 2 +- src/frontends/pytorch/src/op/index_put_.cpp | 4 +- .../pytorch/src/op/instance_norm.cpp | 2 +- src/frontends/pytorch/src/op/int.cpp | 2 +- src/frontends/pytorch/src/op/layer_norm.cpp | 2 +- src/frontends/pytorch/src/op/len.cpp | 2 +- src/frontends/pytorch/src/op/linear.cpp | 2 +- .../pytorch/src/op/list_construct.cpp | 2 +- src/frontends/pytorch/src/op/log.cpp | 4 +- src/frontends/pytorch/src/op/loop.cpp | 2 +- src/frontends/pytorch/src/op/masked_fill.cpp | 2 +- src/frontends/pytorch/src/op/max_poolnd.cpp | 2 +- src/frontends/pytorch/src/op/mean.cpp | 2 +- src/frontends/pytorch/src/op/meshgrid.cpp | 2 +- src/frontends/pytorch/src/op/min_max.cpp | 4 +- src/frontends/pytorch/src/op/narrow.cpp | 2 +- src/frontends/pytorch/src/op/neg.cpp | 2 +- src/frontends/pytorch/src/op/nms.cpp | 2 +- src/frontends/pytorch/src/op/nonzero.cpp | 2 +- src/frontends/pytorch/src/op/norm.cpp | 2 +- src/frontends/pytorch/src/op/numel.cpp | 2 +- src/frontends/pytorch/src/op/pad.cpp | 2 +- src/frontends/pytorch/src/op/pow.cpp | 2 +- src/frontends/pytorch/src/op/pythonop.cpp | 2 +- src/frontends/pytorch/src/op/reciprocal.cpp | 2 +- src/frontends/pytorch/src/op/relu6.cpp | 2 +- src/frontends/pytorch/src/op/remainder.cpp | 2 +- src/frontends/pytorch/src/op/repeat.cpp | 2 +- .../pytorch/src/op/repeat_interleave.cpp | 2 +- src/frontends/pytorch/src/op/reshape.cpp | 2 +- src/frontends/pytorch/src/op/reshape_as.cpp | 2 +- src/frontends/pytorch/src/op/roi_align.cpp | 2 +- src/frontends/pytorch/src/op/roll.cpp | 2 +- src/frontends/pytorch/src/op/rsqrt.cpp | 2 +- src/frontends/pytorch/src/op/rsub.cpp | 2 +- src/frontends/pytorch/src/op/select.cpp | 2 +- src/frontends/pytorch/src/op/selu.cpp | 2 +- src/frontends/pytorch/src/op/set_item.cpp | 2 +- src/frontends/pytorch/src/op/size.cpp | 2 +- src/frontends/pytorch/src/op/slice.cpp | 2 +- src/frontends/pytorch/src/op/softmax.cpp | 2 +- src/frontends/pytorch/src/op/sort.cpp | 4 +- src/frontends/pytorch/src/op/square.cpp | 2 +- src/frontends/pytorch/src/op/squeeze.cpp | 2 +- src/frontends/pytorch/src/op/sub.cpp | 2 +- src/frontends/pytorch/src/op/sum.cpp | 2 +- src/frontends/pytorch/src/op/to.cpp | 2 +- src/frontends/pytorch/src/op/topk.cpp | 2 +- src/frontends/pytorch/src/op/transpose.cpp | 2 +- src/frontends/pytorch/src/op/trilu.cpp | 4 +- src/frontends/pytorch/src/op/unfold.cpp | 2 +- src/frontends/pytorch/src/op/upsample.cpp | 14 +++--- src/frontends/pytorch/src/op/var_mean.cpp | 4 +- src/frontends/pytorch/src/op/where.cpp | 2 +- src/frontends/pytorch/src/op_table.cpp | 4 +- src/frontends/pytorch/src/op_table.hpp | 2 +- .../pytorch/src/translate_session.cpp | 49 +++++++++---------- .../pytorch/src/translate_session.hpp | 6 +-- src/frontends/pytorch/src/utils.cpp | 2 +- src/frontends/pytorch/src/utils.hpp | 16 +++--- 100 files changed, 173 insertions(+), 180 deletions(-) diff --git a/src/frontends/pytorch/include/openvino/frontend/pytorch/frontend.hpp b/src/frontends/pytorch/include/openvino/frontend/pytorch/frontend.hpp index 123f24fd4a16a1..9bd62ada8ff706 100644 --- a/src/frontends/pytorch/include/openvino/frontend/pytorch/frontend.hpp +++ b/src/frontends/pytorch/include/openvino/frontend/pytorch/frontend.hpp @@ -60,7 +60,7 @@ class PYTORCH_API FrontEnd : public ov::frontend::FrontEnd { bool supported_impl(const std::vector& variants) const override; ov::frontend::InputModel::Ptr load_impl(const std::vector& variants) const override; - std::map m_op_translators; + std::map m_op_translators; }; } // namespace pytorch diff --git a/src/frontends/pytorch/include/openvino/frontend/pytorch/node_context.hpp b/src/frontends/pytorch/include/openvino/frontend/pytorch/node_context.hpp index 41205130137c09..a3c5504c5c0f3a 100644 --- a/src/frontends/pytorch/include/openvino/frontend/pytorch/node_context.hpp +++ b/src/frontends/pytorch/include/openvino/frontend/pytorch/node_context.hpp @@ -19,20 +19,22 @@ typedef std::unordered_map> TensorMap; class NodeContext : public frontend::NodeContext { public: NodeContext(std::shared_ptr decoder, - TensorMap* tensor_map, - ParameterVector* external_parameters, const TensorMap& ext_tensor_map, + std::shared_ptr tensor_map, + std::shared_ptr external_parameters, + std::shared_ptr> mutated_tensors, TranslateSession* translate_session) : frontend::NodeContext(decoder->get_op_type()), m_decoder(decoder), - m_tensor_map(tensor_map), m_ext_tensor_map(ext_tensor_map), + m_tensor_map(tensor_map), m_external_parameters(external_parameters), + m_mutated_tensors(mutated_tensors), m_translate_session(translate_session), m_decoder_inputs(decoder->inputs()), m_decoder_outputs(decoder->outputs()) { - FRONT_END_GENERAL_CHECK(tensor_map != nullptr && external_parameters != nullptr && - translate_session != nullptr); + FRONT_END_GENERAL_CHECK(m_tensor_map != nullptr && m_external_parameters != nullptr && + m_mutated_tensors != nullptr && m_translate_session != nullptr); } // Do not search for input in tensor map; try to access it as a constant of specified type T and return its value @@ -106,11 +108,7 @@ class NodeContext : public frontend::NodeContext { "There is no any named attributes in PyTorch node, query by attribute name is not implemented"); } - void mutate_input(size_t index, Output ov_output); - - std::set get_mutated_tensors() const { - return m_mutated_tensors; - } + void mutate_input(size_t index, Output ov_output) const; std::shared_ptr get_decoder() const { return m_decoder; @@ -120,7 +118,7 @@ class NodeContext : public frontend::NodeContext { return m_translate_session; } - void add_tensor_to_context(size_t index, Output ov_output); + void add_tensor_to_context(size_t index, Output ov_output) const; Output get_tensor_from_model(size_t index) const { if (m_tensor_map->find(index) != m_tensor_map->end()) { @@ -130,22 +128,22 @@ class NodeContext : public frontend::NodeContext { } } - Output get_tensor_from_model_or_create_input(size_t index); + Output get_tensor_from_model_or_create_input(size_t index) const; Output get_input_from_visible_context(size_t index) const; - std::shared_ptr convert_subgraph(size_t index); + std::shared_ptr convert_subgraph(size_t index) const; private: std::shared_ptr m_decoder; - std::set m_mutated_tensors; - TensorMap* m_tensor_map; const TensorMap& m_ext_tensor_map; - ParameterVector* m_external_parameters; - TranslateSession* m_translate_session; + std::shared_ptr m_tensor_map; + std::shared_ptr m_external_parameters; + std::shared_ptr> m_mutated_tensors; + TranslateSession* m_translate_session = nullptr; const std::vector m_decoder_inputs; const std::vector m_decoder_outputs; }; -using PytorchCreatorFunction = std::function; +using CreatorFunction = std::function; } // namespace pytorch } // namespace frontend diff --git a/src/frontends/pytorch/src/node_context.cpp b/src/frontends/pytorch/src/node_context.cpp index 10ac4fc5d005e9..49495749d570f4 100644 --- a/src/frontends/pytorch/src/node_context.cpp +++ b/src/frontends/pytorch/src/node_context.cpp @@ -42,16 +42,16 @@ std::shared_ptr NodeContext::mark_node(std::shared_ptr ov_node) cons return m_decoder->mark_node(ov_node); } -void NodeContext::mutate_input(size_t index, Output ov_output) { +void NodeContext::mutate_input(size_t index, Output ov_output) const { FRONT_END_GENERAL_CHECK(!m_decoder->input_is_none(index), "Input is none with index: ", index); auto input_id = m_decoder_inputs.at(index); FRONT_END_GENERAL_CHECK(m_tensor_map->count(input_id), "No tensor corresponding input: ", input_id, " exist."); m_translate_session->encode_tensor_name(ov_output, input_id, m_decoder->get_input_debug_name(index)); (*m_tensor_map)[input_id] = ov_output; - m_mutated_tensors.insert(input_id); + m_mutated_tensors->insert(input_id); } -void NodeContext::add_tensor_to_context(size_t index, Output ov_output) { +void NodeContext::add_tensor_to_context(size_t index, Output ov_output) const { if (m_tensor_map->count(index)) { OPENVINO_DEBUG << "[ WARNING ] Current context has tensor. Rewriting.\n"; } @@ -59,7 +59,7 @@ void NodeContext::add_tensor_to_context(size_t index, Output ov_output) { (*m_tensor_map)[index] = ov_output; } -Output NodeContext::get_tensor_from_model_or_create_input(size_t index) { +Output NodeContext::get_tensor_from_model_or_create_input(size_t index) const { if (m_tensor_map->find(index) != m_tensor_map->end()) { return m_tensor_map->at(index); } else { @@ -87,7 +87,7 @@ Output NodeContext::get_input_from_visible_context(size_t index) const { return input_tensor; } -std::shared_ptr NodeContext::convert_subgraph(size_t index) { +std::shared_ptr NodeContext::convert_subgraph(size_t index) const { auto subgraph_decoder = m_decoder->get_subgraph_decoder(index); // Extend external context with internal tensors except Parameter nodes, because internal Parameters are created to diff --git a/src/frontends/pytorch/src/op/adaptive_avg_pool3d.cpp b/src/frontends/pytorch/src/op/adaptive_avg_pool3d.cpp index 75d000ce1d7da8..42aa3da1d8586b 100644 --- a/src/frontends/pytorch/src/op/adaptive_avg_pool3d.cpp +++ b/src/frontends/pytorch/src/op/adaptive_avg_pool3d.cpp @@ -19,7 +19,7 @@ namespace op { using namespace ov::op; -OutputVector translate_adaptive_avg_pool3d(NodeContext& context) { +OutputVector translate_adaptive_avg_pool3d(const NodeContext& context) { num_inputs_check(context, 2, 2); auto const_tile_params = context.mark_node(v0::Constant::create(element::i32, Shape{5}, {1, 1, 1, 1, 1})); auto const_0 = context.mark_node(v0::Constant::create(element::i32, Shape{1}, {0})); diff --git a/src/frontends/pytorch/src/op/adaptive_max_pool2d.cpp b/src/frontends/pytorch/src/op/adaptive_max_pool2d.cpp index bbd72927fc6fac..5705fd22e70f47 100644 --- a/src/frontends/pytorch/src/op/adaptive_max_pool2d.cpp +++ b/src/frontends/pytorch/src/op/adaptive_max_pool2d.cpp @@ -11,7 +11,7 @@ namespace frontend { namespace pytorch { namespace op { -OutputVector translate_adaptive_max_pool2d(NodeContext& context) { +OutputVector translate_adaptive_max_pool2d(const NodeContext& context) { num_inputs_check(context, 2, 2); auto x = context.get_input(0); auto y = context.get_input(1); diff --git a/src/frontends/pytorch/src/op/add.cpp b/src/frontends/pytorch/src/op/add.cpp index 3c4976efe2cdef..65ecfe021c5b7e 100644 --- a/src/frontends/pytorch/src/op/add.cpp +++ b/src/frontends/pytorch/src/op/add.cpp @@ -15,7 +15,7 @@ namespace frontend { namespace pytorch { namespace op { -OutputVector translate_add(NodeContext& context) { +OutputVector translate_add(const NodeContext& context) { num_inputs_check(context, 2, 3); auto lhs = context.get_input(0); auto rhs = context.get_input(1); diff --git a/src/frontends/pytorch/src/op/addcmul.cpp b/src/frontends/pytorch/src/op/addcmul.cpp index 50d2cec4a3d045..7933acfde1d176 100644 --- a/src/frontends/pytorch/src/op/addcmul.cpp +++ b/src/frontends/pytorch/src/op/addcmul.cpp @@ -17,7 +17,7 @@ namespace op { using namespace ov::op; -OutputVector translate_addcmul(NodeContext& context) { +OutputVector translate_addcmul(const NodeContext& context) { num_inputs_check(context, 4, 4); const auto eltwise_mult = std::make_shared(context.get_input(1), context.get_input(2)); const auto value = context.get_input(3); diff --git a/src/frontends/pytorch/src/op/addmm.cpp b/src/frontends/pytorch/src/op/addmm.cpp index 4a2e16906b2b09..e8ba9b7e9159ab 100644 --- a/src/frontends/pytorch/src/op/addmm.cpp +++ b/src/frontends/pytorch/src/op/addmm.cpp @@ -16,7 +16,7 @@ namespace op { using namespace ov::op; -OutputVector translate_addmm(NodeContext& context) { +OutputVector translate_addmm(const NodeContext& context) { num_inputs_check(context, 5, 5); auto input = context.get_input(0); auto m1 = context.get_input(1); diff --git a/src/frontends/pytorch/src/op/arange.cpp b/src/frontends/pytorch/src/op/arange.cpp index bacd8d113bc3c3..c238cd12bbc394 100644 --- a/src/frontends/pytorch/src/op/arange.cpp +++ b/src/frontends/pytorch/src/op/arange.cpp @@ -17,7 +17,7 @@ namespace op { using namespace ov::op; -OutputVector translate_arange(NodeContext& context) { +OutputVector translate_arange(const NodeContext& context) { auto zero = context.mark_node(v0::Constant::create(element::i32, Shape{}, {0})); auto one = context.mark_node(v0::Constant::create(element::i32, Shape{}, {1})); int dtype_port = -1; diff --git a/src/frontends/pytorch/src/op/as_tensor.cpp b/src/frontends/pytorch/src/op/as_tensor.cpp index 114f4e6fe6ad34..ae2c15d0a1eba9 100644 --- a/src/frontends/pytorch/src/op/as_tensor.cpp +++ b/src/frontends/pytorch/src/op/as_tensor.cpp @@ -16,7 +16,7 @@ namespace op { using namespace ov::op; -OutputVector translate_as_tensor(NodeContext& context) { +OutputVector translate_as_tensor(const NodeContext& context) { // aten::tensor(t[] data, *, ScalarType? dtype=None, Device? device=None, bool requires_grad=False) -> Tensor num_inputs_check(context, 1, 4); auto dtype = element::f32; diff --git a/src/frontends/pytorch/src/op/avg_poolnd.cpp b/src/frontends/pytorch/src/op/avg_poolnd.cpp index bb1d16b99df2cb..77f35a0569e76b 100644 --- a/src/frontends/pytorch/src/op/avg_poolnd.cpp +++ b/src/frontends/pytorch/src/op/avg_poolnd.cpp @@ -18,7 +18,7 @@ namespace op { using namespace ov::op; -OutputVector translate_avg_poolnd(NodeContext& context) { +OutputVector translate_avg_poolnd(const NodeContext& context) { num_inputs_check(context, 6, 7); auto input = context.get_input(0); auto kernel = context.const_input(1); diff --git a/src/frontends/pytorch/src/op/batch_norm.cpp b/src/frontends/pytorch/src/op/batch_norm.cpp index 495295e212e213..a306dd21832e82 100644 --- a/src/frontends/pytorch/src/op/batch_norm.cpp +++ b/src/frontends/pytorch/src/op/batch_norm.cpp @@ -32,7 +32,7 @@ Output broadcast_const_to_channel_dim(const NodeContext& context, } } // namespace -OutputVector translate_batch_norm(NodeContext& context) { +OutputVector translate_batch_norm(const NodeContext& context) { // Schema: aten::batch_norm(Tensor input, Tensor? weight, Tensor? bias, Tensor? running_mean, Tensor? running_var, // bool training, float momentum, float eps, bool cudnn_enabled) -> Tensor num_inputs_check(context, 8, 9); diff --git a/src/frontends/pytorch/src/op/bitwise_not.cpp b/src/frontends/pytorch/src/op/bitwise_not.cpp index dcb953990e0d82..55ba1203b80a04 100644 --- a/src/frontends/pytorch/src/op/bitwise_not.cpp +++ b/src/frontends/pytorch/src/op/bitwise_not.cpp @@ -11,7 +11,7 @@ namespace frontend { namespace pytorch { namespace op { -OutputVector translate_bitwise_not(NodeContext& context) { +OutputVector translate_bitwise_not(const NodeContext& context) { num_inputs_check(context, 1, 2); auto x = context.get_input(0); FRONT_END_OP_CONVERSION_CHECK(x.get_element_type().compatible(element::boolean), diff --git a/src/frontends/pytorch/src/op/bool.cpp b/src/frontends/pytorch/src/op/bool.cpp index 60d7a4e0158631..0d0d53ad608fb1 100644 --- a/src/frontends/pytorch/src/op/bool.cpp +++ b/src/frontends/pytorch/src/op/bool.cpp @@ -11,7 +11,7 @@ namespace frontend { namespace pytorch { namespace op { -OutputVector translate_bool(NodeContext& context) { +OutputVector translate_bool(const NodeContext& context) { num_inputs_check(context, 1, 1); return {context.mark_node(std::make_shared(context.get_input(0), element::boolean))}; }; diff --git a/src/frontends/pytorch/src/op/cat.cpp b/src/frontends/pytorch/src/op/cat.cpp index ce359c6384885f..8fbdd0f3e6f32f 100644 --- a/src/frontends/pytorch/src/op/cat.cpp +++ b/src/frontends/pytorch/src/op/cat.cpp @@ -12,7 +12,7 @@ namespace frontend { namespace pytorch { namespace op { -OutputVector translate_cat(NodeContext& context) { +OutputVector translate_cat(const NodeContext& context) { // This translator is only needed to get axis as constant from external scope num_inputs_check(context, 2, 2); const auto&& list_elems = get_list_as_outputs(context.get_input(0)); diff --git a/src/frontends/pytorch/src/op/clamp.cpp b/src/frontends/pytorch/src/op/clamp.cpp index afbe349cf4a25f..fa28ca301df48a 100644 --- a/src/frontends/pytorch/src/op/clamp.cpp +++ b/src/frontends/pytorch/src/op/clamp.cpp @@ -15,7 +15,7 @@ namespace op { using namespace ov::op; -OutputVector translate_clamp(NodeContext& context) { +OutputVector translate_clamp(const NodeContext& context) { num_inputs_check(context, 1, 3); auto x = context.get_input(0); if (!context.input_is_none(1)) { diff --git a/src/frontends/pytorch/src/op/constant.cpp b/src/frontends/pytorch/src/op/constant.cpp index 944a1e197867dc..6fc6444f04fa35 100644 --- a/src/frontends/pytorch/src/op/constant.cpp +++ b/src/frontends/pytorch/src/op/constant.cpp @@ -9,7 +9,7 @@ namespace frontend { namespace pytorch { namespace op { -OutputVector translate_constant(NodeContext& context) { +OutputVector translate_constant(const NodeContext& context) { return context.as_constant(); }; diff --git a/src/frontends/pytorch/src/op/conv_transposend.cpp b/src/frontends/pytorch/src/op/conv_transposend.cpp index d4a0cc2e047114..1f281f90486fad 100644 --- a/src/frontends/pytorch/src/op/conv_transposend.cpp +++ b/src/frontends/pytorch/src/op/conv_transposend.cpp @@ -15,7 +15,7 @@ namespace op { using namespace ov::op; -OutputVector translate_conv_transposend(NodeContext& context) { +OutputVector translate_conv_transposend(const NodeContext& context) { num_inputs_check(context, 8, 8); auto strides = context.const_input(3); // PyTorch support only symmetric padding, padding sizes are the same for begins and ends for each dimension diff --git a/src/frontends/pytorch/src/op/convnd.cpp b/src/frontends/pytorch/src/op/convnd.cpp index 9b91985182479a..861cb68f1fa30f 100644 --- a/src/frontends/pytorch/src/op/convnd.cpp +++ b/src/frontends/pytorch/src/op/convnd.cpp @@ -15,7 +15,7 @@ namespace op { using namespace ov::op; -OutputVector translate_convnd(NodeContext& context) { +OutputVector translate_convnd(const NodeContext& context) { num_inputs_check(context, 7, 7); auto strides = context.const_input(3); // In torch pads at beginning are same as at end diff --git a/src/frontends/pytorch/src/op/convolution.cpp b/src/frontends/pytorch/src/op/convolution.cpp index d0e77e4adadafe..7a250115b54a51 100644 --- a/src/frontends/pytorch/src/op/convolution.cpp +++ b/src/frontends/pytorch/src/op/convolution.cpp @@ -16,7 +16,7 @@ namespace op { using namespace ov::op; -OutputVector translate_convolution(NodeContext& context) { +OutputVector translate_convolution(const NodeContext& context) { // Schema: aten::_convolution(Tensor input, Tensor weight, Tensor? bias, int[] stride, int[] padding, int[] // dilation, bool transposed, int[] output_padding, int groups, bool benchmark, bool deterministic, bool // cudnn_enabled, bool allow_tf32) -> Tensor diff --git a/src/frontends/pytorch/src/op/convolution_mode.cpp b/src/frontends/pytorch/src/op/convolution_mode.cpp index c732aa6688941f..7aa9d8f991b97b 100644 --- a/src/frontends/pytorch/src/op/convolution_mode.cpp +++ b/src/frontends/pytorch/src/op/convolution_mode.cpp @@ -15,7 +15,7 @@ namespace op { using namespace ov::op; -OutputVector translate_convolution_mode(NodeContext& context) { +OutputVector translate_convolution_mode(const NodeContext& context) { // Schema: aten::_convolution_mode(Tensor input, Tensor weight, Tensor? bias, int[] stride, str padding, int[] // dilation, int groups) -> Tensor num_inputs_check(context, 7, 7); diff --git a/src/frontends/pytorch/src/op/cumsum.cpp b/src/frontends/pytorch/src/op/cumsum.cpp index 55d46500427e6d..c396521a9e402b 100644 --- a/src/frontends/pytorch/src/op/cumsum.cpp +++ b/src/frontends/pytorch/src/op/cumsum.cpp @@ -13,7 +13,7 @@ namespace op { using namespace ov::op; -OutputVector translate_cumsum(NodeContext& context) { +OutputVector translate_cumsum(const NodeContext& context) { // aten::cumsum(Tensor self, int dim, *, ScalarType? dtype=None, Tensor out=None) num_inputs_check(context, 2, 4); auto x = context.get_input(0); diff --git a/src/frontends/pytorch/src/op/dim.cpp b/src/frontends/pytorch/src/op/dim.cpp index 7af4aa8fe3147e..2d69cb3e37a796 100644 --- a/src/frontends/pytorch/src/op/dim.cpp +++ b/src/frontends/pytorch/src/op/dim.cpp @@ -12,7 +12,7 @@ namespace op { using namespace ov::op; -OutputVector translate_dim(NodeContext& context) { +OutputVector translate_dim(const NodeContext& context) { num_inputs_check(context, 1, 1); Output rank; std::tie(std::ignore, rank) = get_shape_rank(context, context.get_input(0), true); diff --git a/src/frontends/pytorch/src/op/div.cpp b/src/frontends/pytorch/src/op/div.cpp index 54cab6325ae7f2..e9dd7136e35bef 100644 --- a/src/frontends/pytorch/src/op/div.cpp +++ b/src/frontends/pytorch/src/op/div.cpp @@ -17,7 +17,7 @@ namespace frontend { namespace pytorch { namespace op { -OutputVector translate_div(NodeContext& context) { +OutputVector translate_div(const NodeContext& context) { num_inputs_check(context, 2, 3); auto x = context.get_input(0); auto y = context.get_input(1); diff --git a/src/frontends/pytorch/src/op/elu.cpp b/src/frontends/pytorch/src/op/elu.cpp index f60d76b96638e5..4f96371ee83ebd 100644 --- a/src/frontends/pytorch/src/op/elu.cpp +++ b/src/frontends/pytorch/src/op/elu.cpp @@ -12,7 +12,7 @@ namespace frontend { namespace pytorch { namespace op { -OutputVector translate_elu(NodeContext& context) { +OutputVector translate_elu(const NodeContext& context) { // aten::elu(Tensor self, Scalar alpha=1, Scalar scale=1, Scalar input_scale=1) -> Tensor num_inputs_check(context, 2, 4); auto x = context.get_input(0); diff --git a/src/frontends/pytorch/src/op/embedding.cpp b/src/frontends/pytorch/src/op/embedding.cpp index c920992bdaafec..e5dc85a0ddfebf 100644 --- a/src/frontends/pytorch/src/op/embedding.cpp +++ b/src/frontends/pytorch/src/op/embedding.cpp @@ -13,7 +13,7 @@ namespace frontend { namespace pytorch { namespace op { -OutputVector translate_embedding(NodeContext& context) { +OutputVector translate_embedding(const NodeContext& context) { // aten::embedding(Tensor weight, Tensor indices, SymInt padding_idx=-1, bool scale_grad_by_freq=False, bool // sparse=False) num_inputs_check(context, 5, 5); diff --git a/src/frontends/pytorch/src/op/expand.cpp b/src/frontends/pytorch/src/op/expand.cpp index 34f0a9d70c323b..9210cedc6eba6f 100644 --- a/src/frontends/pytorch/src/op/expand.cpp +++ b/src/frontends/pytorch/src/op/expand.cpp @@ -30,7 +30,7 @@ OutputVector base_expand(const NodeContext& context, const Output& x, cons }; } // namespace -OutputVector translate_expand(NodeContext& context) { +OutputVector translate_expand(const NodeContext& context) { // aten::expand(Tensor(a) self, SymInt[] size, *, bool implicit=False) -> Tensor(a) num_inputs_check(context, 2, 3); auto x = context.get_input(0); @@ -41,7 +41,7 @@ OutputVector translate_expand(NodeContext& context) { return base_expand(context, x, sizes); }; -OutputVector translate_expand_as(NodeContext& context) { +OutputVector translate_expand_as(const NodeContext& context) { num_inputs_check(context, 2, 2); auto x = context.get_input(0); auto y = context.get_input(1); diff --git a/src/frontends/pytorch/src/op/eye.cpp b/src/frontends/pytorch/src/op/eye.cpp index ab35c56569c5c4..9b7f7ef8c3bc29 100644 --- a/src/frontends/pytorch/src/op/eye.cpp +++ b/src/frontends/pytorch/src/op/eye.cpp @@ -16,7 +16,7 @@ namespace op { using namespace ov::op; -OutputVector translate_eye(NodeContext& context) { +OutputVector translate_eye(const NodeContext& context) { size_t num_inputs = context.get_input_size(); auto x = context.get_input(0); // num rows and cols should be integer, but at the moment conversion their data type can be unknown yet diff --git a/src/frontends/pytorch/src/op/flatten.cpp b/src/frontends/pytorch/src/op/flatten.cpp index 6d9005a64b8643..6022661c3aa8cf 100644 --- a/src/frontends/pytorch/src/op/flatten.cpp +++ b/src/frontends/pytorch/src/op/flatten.cpp @@ -18,7 +18,7 @@ namespace op { using namespace ov::op; -OutputVector translate_flatten(NodeContext& context) { +OutputVector translate_flatten(const NodeContext& context) { num_inputs_check(context, 1, 3); auto x = context.get_input(0); int64_t start_dim = 0; diff --git a/src/frontends/pytorch/src/op/floor_divide.cpp b/src/frontends/pytorch/src/op/floor_divide.cpp index 5731006dd770aa..4fb1b230d44c21 100644 --- a/src/frontends/pytorch/src/op/floor_divide.cpp +++ b/src/frontends/pytorch/src/op/floor_divide.cpp @@ -14,7 +14,7 @@ namespace op { using namespace ov::op; -OutputVector translate_floor_divide(NodeContext& context) { +OutputVector translate_floor_divide(const NodeContext& context) { num_inputs_check(context, 2, 2); auto x = context.get_input(0); auto y = context.get_input(1); diff --git a/src/frontends/pytorch/src/op/floordiv.cpp b/src/frontends/pytorch/src/op/floordiv.cpp index b85cacf3fc5145..91c03e74d7f0b1 100644 --- a/src/frontends/pytorch/src/op/floordiv.cpp +++ b/src/frontends/pytorch/src/op/floordiv.cpp @@ -11,7 +11,7 @@ namespace frontend { namespace pytorch { namespace op { -OutputVector translate_floordiv(NodeContext& context) { +OutputVector translate_floordiv(const NodeContext& context) { num_inputs_check(context, 2, 2); auto x = context.get_input(0); auto y = context.get_input(1); diff --git a/src/frontends/pytorch/src/op/full.cpp b/src/frontends/pytorch/src/op/full.cpp index abfacbf872f913..bbb7f98022f42b 100644 --- a/src/frontends/pytorch/src/op/full.cpp +++ b/src/frontends/pytorch/src/op/full.cpp @@ -42,7 +42,7 @@ Output base_translate_full_with_convert(const NodeContext& context, } } // namespace -OutputVector translate_full(NodeContext& context) { +OutputVector translate_full(const NodeContext& context) { num_inputs_check(context, 2, 6); auto sizes = context.get_input(0); auto value = context.get_input(1); @@ -59,7 +59,7 @@ OutputVector translate_full(NodeContext& context) { return {base_translate_full_with_convert(context, sizes, value, dtype_id)}; }; -OutputVector translate_full_like(NodeContext& context) { +OutputVector translate_full_like(const NodeContext& context) { num_inputs_check(context, 2, 7); auto input = context.get_input(0); auto value = context.get_input(1); @@ -71,7 +71,7 @@ OutputVector translate_full_like(NodeContext& context) { return {base_translate_full_with_convertlike(context, sizes, value, out)}; }; -OutputVector translate_fill_(NodeContext& context) { +OutputVector translate_fill_(const NodeContext& context) { num_inputs_check(context, 2, 2); auto input = context.get_input(0); auto value = context.get_input(1); @@ -79,7 +79,7 @@ OutputVector translate_fill_(NodeContext& context) { return {base_translate_full_with_convertlike(context, sizes, value, input)}; }; -OutputVector translate_new_full(NodeContext& context) { +OutputVector translate_new_full(const NodeContext& context) { num_inputs_check(context, 3, 7); auto input = context.get_input(0); auto sizes = context.get_input(1); @@ -90,7 +90,7 @@ OutputVector translate_new_full(NodeContext& context) { return {base_translate_full_with_convertlike(context, sizes, value, input)}; }; -OutputVector translate_zeros(NodeContext& context) { +OutputVector translate_zeros(const NodeContext& context) { num_inputs_check(context, 2, 5); auto sizes = context.get_input(0); auto value = context.mark_node(v0::Constant::create(element::f32, Shape{}, {0})); @@ -107,7 +107,7 @@ OutputVector translate_zeros(NodeContext& context) { return {base_translate_full_with_convert(context, sizes, value, dtype_id)}; }; -OutputVector translate_zeros_like(NodeContext& context) { +OutputVector translate_zeros_like(const NodeContext& context) { num_inputs_check(context, 1, 6); auto input = context.get_input(0); auto value = context.mark_node(v0::Constant::create(element::f32, Shape{}, {0})); @@ -119,7 +119,7 @@ OutputVector translate_zeros_like(NodeContext& context) { return {base_translate_full_with_convertlike(context, sizes, value, out)}; }; -OutputVector translate_new_zeros(NodeContext& context) { +OutputVector translate_new_zeros(const NodeContext& context) { num_inputs_check(context, 2, 6); auto input = context.get_input(0); auto sizes = context.get_input(1); @@ -130,7 +130,7 @@ OutputVector translate_new_zeros(NodeContext& context) { return {base_translate_full_with_convertlike(context, sizes, value, input)}; }; -OutputVector translate_ones(NodeContext& context) { +OutputVector translate_ones(const NodeContext& context) { num_inputs_check(context, 1, 5); auto sizes = context.get_input(0); auto value = context.mark_node(v0::Constant::create(element::f32, Shape{}, {1})); @@ -147,7 +147,7 @@ OutputVector translate_ones(NodeContext& context) { return {base_translate_full_with_convert(context, sizes, value, dtype_id)}; }; -OutputVector translate_ones_like(NodeContext& context) { +OutputVector translate_ones_like(const NodeContext& context) { num_inputs_check(context, 1, 6); auto input = context.get_input(0); auto value = context.mark_node(v0::Constant::create(element::f32, Shape{}, {1})); @@ -159,7 +159,7 @@ OutputVector translate_ones_like(NodeContext& context) { return {base_translate_full_with_convertlike(context, sizes, value, out)}; }; -OutputVector translate_new_ones(NodeContext& context) { +OutputVector translate_new_ones(const NodeContext& context) { num_inputs_check(context, 2, 6); auto input = context.get_input(0); auto sizes = context.get_input(1); @@ -170,7 +170,7 @@ OutputVector translate_new_ones(NodeContext& context) { return {base_translate_full_with_convertlike(context, sizes, value, input)}; }; -OutputVector translate_empty(NodeContext& context) { +OutputVector translate_empty(const NodeContext& context) { // aten::empty(SymInt[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? // pin_memory=None, MemoryFormat? memory_format=None) -> Tensor layout, device and work with memory ignored on our // side, so just skip these parameters diff --git a/src/frontends/pytorch/src/op/gelu.cpp b/src/frontends/pytorch/src/op/gelu.cpp index 598f6865b3e2b0..c64ea647688584 100644 --- a/src/frontends/pytorch/src/op/gelu.cpp +++ b/src/frontends/pytorch/src/op/gelu.cpp @@ -12,7 +12,7 @@ namespace frontend { namespace pytorch { namespace op { -OutputVector translate_gelu(NodeContext& context) { +OutputVector translate_gelu(const NodeContext& context) { num_inputs_check(context, 2, 2); auto x = context.get_input(0); auto approximate = context.const_input(1); diff --git a/src/frontends/pytorch/src/op/get_attr.cpp b/src/frontends/pytorch/src/op/get_attr.cpp index 3575a5210a8518..1d0ae0e4d13d1b 100644 --- a/src/frontends/pytorch/src/op/get_attr.cpp +++ b/src/frontends/pytorch/src/op/get_attr.cpp @@ -12,7 +12,7 @@ namespace frontend { namespace pytorch { namespace op { -OutputVector translate_get_attr(NodeContext& context) { +OutputVector translate_get_attr(const NodeContext& context) { auto res = context.get_decoder()->try_decode_get_attr(); FRONT_END_OP_CONVERSION_CHECK(res.size() > 0, "GetAttr must have at least one output."); return res; diff --git a/src/frontends/pytorch/src/op/getitem.cpp b/src/frontends/pytorch/src/op/getitem.cpp index 1bf9f4a0e8a274..0a1243196f4d6c 100644 --- a/src/frontends/pytorch/src/op/getitem.cpp +++ b/src/frontends/pytorch/src/op/getitem.cpp @@ -13,7 +13,7 @@ namespace frontend { namespace pytorch { namespace op { -OutputVector translate_getitem(NodeContext& context) { +OutputVector translate_getitem(const NodeContext& context) { num_inputs_check(context, 2, 2); auto input = context.get_input(0); if (std::dynamic_pointer_cast(input.get_node_shared_ptr())) { diff --git a/src/frontends/pytorch/src/op/glu.cpp b/src/frontends/pytorch/src/op/glu.cpp index e650e9c4a54c05..dbe979fb1f2870 100644 --- a/src/frontends/pytorch/src/op/glu.cpp +++ b/src/frontends/pytorch/src/op/glu.cpp @@ -16,7 +16,7 @@ namespace op { using namespace ov::op; -OutputVector translate_glu(NodeContext& context) { +OutputVector translate_glu(const NodeContext& context) { num_inputs_check(context, 2, 2); auto x = context.get_input(0); auto dim = context.input_is_none(1) ? context.mark_node(v0::Constant::create(element::i32, Shape{}, {-1})) diff --git a/src/frontends/pytorch/src/op/grid_sampler.cpp b/src/frontends/pytorch/src/op/grid_sampler.cpp index 9011abd8566a67..8c603813d888f7 100644 --- a/src/frontends/pytorch/src/op/grid_sampler.cpp +++ b/src/frontends/pytorch/src/op/grid_sampler.cpp @@ -13,7 +13,7 @@ namespace op { using namespace ov::op; -OutputVector translate_grid_sampler(NodeContext& context) { +OutputVector translate_grid_sampler(const NodeContext& context) { num_inputs_check(context, 4, 5); auto x = context.get_input(0); auto grid = context.get_input(1); diff --git a/src/frontends/pytorch/src/op/group_norm.cpp b/src/frontends/pytorch/src/op/group_norm.cpp index 7b3ac53bbb4772..6ce36aac6601b1 100644 --- a/src/frontends/pytorch/src/op/group_norm.cpp +++ b/src/frontends/pytorch/src/op/group_norm.cpp @@ -20,7 +20,7 @@ namespace op { using namespace ov::op; -OutputVector translate_group_norm(NodeContext& context) { +OutputVector translate_group_norm(const NodeContext& context) { // aten::group_norm(Tensor input, int num_groups, Tensor? weight=None, Tensor? bias=None, float // eps=1.0000000000000001e-05, bool cudnn_enabled=True) -> Tensor num_inputs_check(context, 2, 6); diff --git a/src/frontends/pytorch/src/op/hardtanh.cpp b/src/frontends/pytorch/src/op/hardtanh.cpp index 52551a259a97d2..a85bedbf00626b 100644 --- a/src/frontends/pytorch/src/op/hardtanh.cpp +++ b/src/frontends/pytorch/src/op/hardtanh.cpp @@ -11,7 +11,7 @@ namespace frontend { namespace pytorch { namespace op { -OutputVector translate_hardtanh(NodeContext& context) { +OutputVector translate_hardtanh(const NodeContext& context) { num_inputs_check(context, 1, 3); float min = -1; float max = 1; diff --git a/src/frontends/pytorch/src/op/if.cpp b/src/frontends/pytorch/src/op/if.cpp index 1e5d3a26778cc1..7fb3ecce123a26 100644 --- a/src/frontends/pytorch/src/op/if.cpp +++ b/src/frontends/pytorch/src/op/if.cpp @@ -13,7 +13,7 @@ namespace frontend { namespace pytorch { namespace op { -OutputVector translate_if(NodeContext& context) { +OutputVector translate_if(const NodeContext& context) { auto if_node = std::make_shared(context.get_input(0)); context.mark_node(if_node); auto decoder = context.get_decoder(); diff --git a/src/frontends/pytorch/src/op/im2col.cpp b/src/frontends/pytorch/src/op/im2col.cpp index 12fb4f3b7c4a04..718e0eadaa4ca0 100644 --- a/src/frontends/pytorch/src/op/im2col.cpp +++ b/src/frontends/pytorch/src/op/im2col.cpp @@ -56,7 +56,7 @@ std::shared_ptr get_im2col_indices_along_dim(const NodeContext& context, } } // namespace -OutputVector translate_im2col(NodeContext& context) { +OutputVector translate_im2col(const NodeContext& context) { num_inputs_check(context, 5, 5); auto input = context.get_input(0); auto kernel_size = context.const_input>(1); diff --git a/src/frontends/pytorch/src/op/index_put_.cpp b/src/frontends/pytorch/src/op/index_put_.cpp index 1ce4ea0e96d6a1..d8a599eaf5abcf 100644 --- a/src/frontends/pytorch/src/op/index_put_.cpp +++ b/src/frontends/pytorch/src/op/index_put_.cpp @@ -10,9 +10,7 @@ namespace frontend { namespace pytorch { namespace op { -using namespace ov::op; - -OutputVector translate_index_put_(NodeContext& context) { +OutputVector translate_index_put_(const NodeContext& context) { // Pass as PtFrameworkNode to register as `inplace_op`. Conversion to OV operators is done as transformation. auto node = std::make_shared(context.get_decoder(), context.inputs()); return {context.mark_node(node)}; diff --git a/src/frontends/pytorch/src/op/instance_norm.cpp b/src/frontends/pytorch/src/op/instance_norm.cpp index b00b7bff260bd1..ff53bd11e92ffb 100644 --- a/src/frontends/pytorch/src/op/instance_norm.cpp +++ b/src/frontends/pytorch/src/op/instance_norm.cpp @@ -88,7 +88,7 @@ OutputVector translate_instance_norm_train(const NodeContext& context, } // namespace -OutputVector translate_instance_norm(NodeContext& context) { +OutputVector translate_instance_norm(const NodeContext& context) { num_inputs_check(context, 8, 9); auto input = context.get_input(0); auto eps = context.const_input(7); diff --git a/src/frontends/pytorch/src/op/int.cpp b/src/frontends/pytorch/src/op/int.cpp index e11397cb6f11e3..5a407a1a7254c3 100644 --- a/src/frontends/pytorch/src/op/int.cpp +++ b/src/frontends/pytorch/src/op/int.cpp @@ -11,7 +11,7 @@ namespace frontend { namespace pytorch { namespace op { -OutputVector translate_int(NodeContext& context) { +OutputVector translate_int(const NodeContext& context) { num_inputs_check(context, 1, 1); return {context.mark_node(std::make_shared(context.get_input(0), element::i32))}; }; diff --git a/src/frontends/pytorch/src/op/layer_norm.cpp b/src/frontends/pytorch/src/op/layer_norm.cpp index c954110111e799..204d7164531c72 100644 --- a/src/frontends/pytorch/src/op/layer_norm.cpp +++ b/src/frontends/pytorch/src/op/layer_norm.cpp @@ -16,7 +16,7 @@ namespace op { using namespace ov::op; -OutputVector translate_layer_norm(NodeContext& context) { +OutputVector translate_layer_norm(const NodeContext& context) { num_inputs_check(context, 5, 6); auto eps = context.const_input(4); auto normalized_shape = context.const_input(1); diff --git a/src/frontends/pytorch/src/op/len.cpp b/src/frontends/pytorch/src/op/len.cpp index 71f8bdf3a32b88..9a22658500913f 100644 --- a/src/frontends/pytorch/src/op/len.cpp +++ b/src/frontends/pytorch/src/op/len.cpp @@ -16,7 +16,7 @@ namespace op { using namespace ov::op; -OutputVector translate_len(NodeContext& context) { +OutputVector translate_len(const NodeContext& context) { num_inputs_check(context, 1, 1); auto const_0 = context.mark_node(v0::Constant::create(element::i32, Shape{1}, {0})); auto const_1 = context.mark_node(v0::Constant::create(element::i32, Shape{1}, {1})); diff --git a/src/frontends/pytorch/src/op/linear.cpp b/src/frontends/pytorch/src/op/linear.cpp index e94ff7c9168003..8288220f320251 100644 --- a/src/frontends/pytorch/src/op/linear.cpp +++ b/src/frontends/pytorch/src/op/linear.cpp @@ -11,7 +11,7 @@ namespace frontend { namespace pytorch { namespace op { -OutputVector translate_linear(NodeContext& context) { +OutputVector translate_linear(const NodeContext& context) { // schema: aten::linear(Tensor input, Tensor weight, Tensor? bias=None) -> Tensor num_inputs_check(context, 2, 3); auto x = context.get_input(0); diff --git a/src/frontends/pytorch/src/op/list_construct.cpp b/src/frontends/pytorch/src/op/list_construct.cpp index e69188e23d89a3..e58a3c4744ff61 100644 --- a/src/frontends/pytorch/src/op/list_construct.cpp +++ b/src/frontends/pytorch/src/op/list_construct.cpp @@ -15,7 +15,7 @@ namespace op { using namespace ov::op; -OutputVector translate_list_construct(NodeContext& context) { +OutputVector translate_list_construct(const NodeContext& context) { // Process the case when prim::ListConstruct has all inputs constant auto const_0 = context.mark_node(v0::Constant::create(element::i32, Shape{}, {0})); ov::OutputVector consts; diff --git a/src/frontends/pytorch/src/op/log.cpp b/src/frontends/pytorch/src/op/log.cpp index 85947b7694ee06..808dff6ed32822 100644 --- a/src/frontends/pytorch/src/op/log.cpp +++ b/src/frontends/pytorch/src/op/log.cpp @@ -17,7 +17,7 @@ namespace op { using namespace ov::op; -OutputVector translate_log(NodeContext& context) { +OutputVector translate_log(const NodeContext& context) { // torch.log returns a tensor with the natural logarithm of the elements of input. num_inputs_check(context, 1, 1); auto x = context.get_input(0); @@ -26,7 +26,7 @@ OutputVector translate_log(NodeContext& context) { return {log}; }; -OutputVector translate_log2(NodeContext& context) { +OutputVector translate_log2(const NodeContext& context) { // torch.log2 returns a tensor with the logarithm to the base 2 of the elements of input. num_inputs_check(context, 1, 1); auto x = context.get_input(0); diff --git a/src/frontends/pytorch/src/op/loop.cpp b/src/frontends/pytorch/src/op/loop.cpp index 7bf03cfcd30138..36369ea63bd4ee 100644 --- a/src/frontends/pytorch/src/op/loop.cpp +++ b/src/frontends/pytorch/src/op/loop.cpp @@ -13,7 +13,7 @@ namespace frontend { namespace pytorch { namespace op { -OutputVector translate_loop(NodeContext& context) { +OutputVector translate_loop(const NodeContext& context) { const auto& inputs = context.inputs(); FRONT_END_OP_CONVERSION_CHECK(inputs.size() >= 2, "Loop must have at least 2 inputs."); auto loop = std::make_shared(inputs[0], inputs[1]); diff --git a/src/frontends/pytorch/src/op/masked_fill.cpp b/src/frontends/pytorch/src/op/masked_fill.cpp index 2a071755b3a145..5ed090e0b619b7 100644 --- a/src/frontends/pytorch/src/op/masked_fill.cpp +++ b/src/frontends/pytorch/src/op/masked_fill.cpp @@ -18,7 +18,7 @@ namespace op { using namespace ov::op; -OutputVector translate_masked_fill(NodeContext& context) { +OutputVector translate_masked_fill(const NodeContext& context) { num_inputs_check(context, 3, 3); auto data = context.get_input(0); auto mask = context.get_input(1); diff --git a/src/frontends/pytorch/src/op/max_poolnd.cpp b/src/frontends/pytorch/src/op/max_poolnd.cpp index f594b0a2b0798c..f756b1488ce9ea 100644 --- a/src/frontends/pytorch/src/op/max_poolnd.cpp +++ b/src/frontends/pytorch/src/op/max_poolnd.cpp @@ -13,7 +13,7 @@ namespace op { using namespace ov::op; -OutputVector translate_max_poolnd(NodeContext& context) { +OutputVector translate_max_poolnd(const NodeContext& context) { num_inputs_check(context, 6, 6); auto kernel = context.const_input(1); auto strides = context.const_input(2); diff --git a/src/frontends/pytorch/src/op/mean.cpp b/src/frontends/pytorch/src/op/mean.cpp index b7a5acfb6fb22f..46c42f6be1a4b7 100644 --- a/src/frontends/pytorch/src/op/mean.cpp +++ b/src/frontends/pytorch/src/op/mean.cpp @@ -11,7 +11,7 @@ namespace frontend { namespace pytorch { namespace op { -OutputVector translate_mean(NodeContext& context) { +OutputVector translate_mean(const NodeContext& context) { num_inputs_check(context, 3, 4); auto x = context.get_input(0); auto y = context.get_input(1); diff --git a/src/frontends/pytorch/src/op/meshgrid.cpp b/src/frontends/pytorch/src/op/meshgrid.cpp index 841de80fcbf494..c9b5833ae68d60 100644 --- a/src/frontends/pytorch/src/op/meshgrid.cpp +++ b/src/frontends/pytorch/src/op/meshgrid.cpp @@ -10,7 +10,7 @@ namespace frontend { namespace pytorch { namespace op { -OutputVector translate_meshgrid(NodeContext& context) { +OutputVector translate_meshgrid(const NodeContext& context) { std::string indexing = "ij"; if (!context.input_is_none(1)) { indexing = context.const_input(1); diff --git a/src/frontends/pytorch/src/op/min_max.cpp b/src/frontends/pytorch/src/op/min_max.cpp index 34c70219f1137c..898403bf82b7cf 100644 --- a/src/frontends/pytorch/src/op/min_max.cpp +++ b/src/frontends/pytorch/src/op/min_max.cpp @@ -20,7 +20,7 @@ namespace op { using namespace ov::op; -OutputVector translate_max(NodeContext& context) { +OutputVector translate_max(const NodeContext& context) { // torch.max (same for torch.min) actually has two interfaces smashed together: // torch.max(x, dim, keepdim) and torch.max(x, y) num_inputs_check(context, 1, 3); @@ -49,7 +49,7 @@ OutputVector translate_max(NodeContext& context) { return {values, indicies}; }; -OutputVector translate_min(NodeContext& context) { +OutputVector translate_min(const NodeContext& context) { // torch.min (same for torch.max) actually has two interfaces smashed together: // torch.min(x, dim, keepdim) and torch.min(x, y) num_inputs_check(context, 1, 3); diff --git a/src/frontends/pytorch/src/op/narrow.cpp b/src/frontends/pytorch/src/op/narrow.cpp index a212b22503434e..ffae7d2ec55a03 100644 --- a/src/frontends/pytorch/src/op/narrow.cpp +++ b/src/frontends/pytorch/src/op/narrow.cpp @@ -16,7 +16,7 @@ namespace op { using namespace ov::op; -OutputVector translate_narrow(NodeContext& context) { +OutputVector translate_narrow(const NodeContext& context) { num_inputs_check(context, 4, 4); auto const_1 = context.mark_node(v0::Constant::create(element::i32, Shape{1}, {1})); diff --git a/src/frontends/pytorch/src/op/neg.cpp b/src/frontends/pytorch/src/op/neg.cpp index e902eb0f21fefb..423118c3b2fc24 100644 --- a/src/frontends/pytorch/src/op/neg.cpp +++ b/src/frontends/pytorch/src/op/neg.cpp @@ -15,7 +15,7 @@ namespace op { using namespace ov::op; -OutputVector translate_neg(NodeContext& context) { +OutputVector translate_neg(const NodeContext& context) { num_inputs_check(context, 1, 1); auto x = context.get_input(0); auto const_neg_1 = context.mark_node(v0::Constant::create(element::i32, Shape{}, {-1})); diff --git a/src/frontends/pytorch/src/op/nms.cpp b/src/frontends/pytorch/src/op/nms.cpp index 2454d94a78e6a8..86ecb3df73cf87 100644 --- a/src/frontends/pytorch/src/op/nms.cpp +++ b/src/frontends/pytorch/src/op/nms.cpp @@ -18,7 +18,7 @@ namespace op { using namespace ov::op; -OutputVector translate_nms(NodeContext& context) { +OutputVector translate_nms(const NodeContext& context) { num_inputs_check(context, 3, 3); auto const_0 = context.mark_node(v0::Constant::create(element::i32, Shape{}, {0})); auto const_1 = context.mark_node(v0::Constant::create(element::i32, Shape{}, {1})); diff --git a/src/frontends/pytorch/src/op/nonzero.cpp b/src/frontends/pytorch/src/op/nonzero.cpp index 80edef3f079b6b..29a6aa51175008 100644 --- a/src/frontends/pytorch/src/op/nonzero.cpp +++ b/src/frontends/pytorch/src/op/nonzero.cpp @@ -15,7 +15,7 @@ namespace op { using namespace ov::op; -OutputVector translate_nonzero(NodeContext& context) { +OutputVector translate_nonzero(const NodeContext& context) { num_inputs_check(context, 1, 1); auto cond = context.get_input(0); auto non_zero = context.mark_node(std::make_shared(cond)); diff --git a/src/frontends/pytorch/src/op/norm.cpp b/src/frontends/pytorch/src/op/norm.cpp index d35c0fec25867f..34a0bdd01c4cfa 100644 --- a/src/frontends/pytorch/src/op/norm.cpp +++ b/src/frontends/pytorch/src/op/norm.cpp @@ -20,7 +20,7 @@ namespace op { using namespace ov::op; -OutputVector translate_norm(NodeContext& context) { +OutputVector translate_norm(const NodeContext& context) { num_inputs_check(context, 4, 4); auto input_tensor = context.get_input(0); auto p = context.const_input(1); diff --git a/src/frontends/pytorch/src/op/numel.cpp b/src/frontends/pytorch/src/op/numel.cpp index 721ed7e173bdc6..a4d2a836c6a2bd 100644 --- a/src/frontends/pytorch/src/op/numel.cpp +++ b/src/frontends/pytorch/src/op/numel.cpp @@ -10,7 +10,7 @@ namespace frontend { namespace pytorch { namespace op { -OutputVector translate_numel(NodeContext& context) { +OutputVector translate_numel(const NodeContext& context) { num_inputs_check(context, 1, 1); return {numel(context, context.get_input(0))}; }; diff --git a/src/frontends/pytorch/src/op/pad.cpp b/src/frontends/pytorch/src/op/pad.cpp index 8a0568ece9cc3d..170544654542e5 100644 --- a/src/frontends/pytorch/src/op/pad.cpp +++ b/src/frontends/pytorch/src/op/pad.cpp @@ -22,7 +22,7 @@ namespace op { using namespace ov::op; -OutputVector translate_pad(NodeContext& context) { +OutputVector translate_pad(const NodeContext& context) { num_inputs_check(context, 2, 4); auto data = context.get_input(0); auto paddings = context.const_input>(1); diff --git a/src/frontends/pytorch/src/op/pow.cpp b/src/frontends/pytorch/src/op/pow.cpp index d418f3385340bf..d3a39694bf3953 100644 --- a/src/frontends/pytorch/src/op/pow.cpp +++ b/src/frontends/pytorch/src/op/pow.cpp @@ -11,7 +11,7 @@ namespace frontend { namespace pytorch { namespace op { -OutputVector translate_pow(NodeContext& context) { +OutputVector translate_pow(const NodeContext& context) { num_inputs_check(context, 2, 2); auto lhs = context.get_input(0); auto rhs = context.get_input(1); diff --git a/src/frontends/pytorch/src/op/pythonop.cpp b/src/frontends/pytorch/src/op/pythonop.cpp index 36a4b388738c02..4aa142f04b58ed 100644 --- a/src/frontends/pytorch/src/op/pythonop.cpp +++ b/src/frontends/pytorch/src/op/pythonop.cpp @@ -11,7 +11,7 @@ namespace frontend { namespace pytorch { namespace op { -OutputVector translate_pythonop(NodeContext& context) { +OutputVector translate_pythonop(const NodeContext& context) { auto decoder = context.get_decoder(); FRONT_END_OP_CONVERSION_CHECK(decoder->get_subgraph_size() == 1, "PythonOp must have 1 subgraph to be able to translate it to OV."); diff --git a/src/frontends/pytorch/src/op/reciprocal.cpp b/src/frontends/pytorch/src/op/reciprocal.cpp index 67c5bdba78b39f..38b12fee06cb18 100644 --- a/src/frontends/pytorch/src/op/reciprocal.cpp +++ b/src/frontends/pytorch/src/op/reciprocal.cpp @@ -15,7 +15,7 @@ namespace op { using namespace ov::op; -OutputVector translate_reciprocal(NodeContext& context) { +OutputVector translate_reciprocal(const NodeContext& context) { num_inputs_check(context, 1, 1); auto x = context.get_input(0); auto const_neg_1 = context.mark_node(v0::Constant::create(element::i32, Shape{}, {-1})); diff --git a/src/frontends/pytorch/src/op/relu6.cpp b/src/frontends/pytorch/src/op/relu6.cpp index 5dd5906061bc7f..08996811249dcc 100644 --- a/src/frontends/pytorch/src/op/relu6.cpp +++ b/src/frontends/pytorch/src/op/relu6.cpp @@ -11,7 +11,7 @@ namespace frontend { namespace pytorch { namespace op { -OutputVector translate_relu6(NodeContext& context) { +OutputVector translate_relu6(const NodeContext& context) { num_inputs_check(context, 1, 1); auto x = context.get_input(0); return {context.mark_node(std::make_shared(x, 0., 6.))}; diff --git a/src/frontends/pytorch/src/op/remainder.cpp b/src/frontends/pytorch/src/op/remainder.cpp index 55d33e00c3f321..622e20eba52fa4 100644 --- a/src/frontends/pytorch/src/op/remainder.cpp +++ b/src/frontends/pytorch/src/op/remainder.cpp @@ -16,7 +16,7 @@ namespace op { using namespace ov::op; -OutputVector translate_remainder(NodeContext& context) { +OutputVector translate_remainder(const NodeContext& context) { num_inputs_check(context, 2, 2); auto x = context.get_input(0); auto y = context.get_input(1); diff --git a/src/frontends/pytorch/src/op/repeat.cpp b/src/frontends/pytorch/src/op/repeat.cpp index 574951aaf82c20..15dc03a466ec92 100644 --- a/src/frontends/pytorch/src/op/repeat.cpp +++ b/src/frontends/pytorch/src/op/repeat.cpp @@ -16,7 +16,7 @@ namespace op { using namespace ov::op; -OutputVector translate_repeat(NodeContext& context) { +OutputVector translate_repeat(const NodeContext& context) { num_inputs_check(context, 2, 2); auto x = context.get_input(0); auto repeats = context.get_input(1); diff --git a/src/frontends/pytorch/src/op/repeat_interleave.cpp b/src/frontends/pytorch/src/op/repeat_interleave.cpp index 06d8333e04cceb..64971f6e3f28bc 100644 --- a/src/frontends/pytorch/src/op/repeat_interleave.cpp +++ b/src/frontends/pytorch/src/op/repeat_interleave.cpp @@ -34,7 +34,7 @@ OutputVector generate_indices_from_repeats_tensor(const NodeContext& context, co }; } // namespace -OutputVector translate_repeat_interleave(NodeContext& context) { +OutputVector translate_repeat_interleave(const NodeContext& context) { num_inputs_check(context, 2, 3); // constants auto const_0 = context.mark_node(v0::Constant::create(element::i32, Shape{}, {0})); diff --git a/src/frontends/pytorch/src/op/reshape.cpp b/src/frontends/pytorch/src/op/reshape.cpp index b0d669e47be63b..c5c33f4f6e61da 100644 --- a/src/frontends/pytorch/src/op/reshape.cpp +++ b/src/frontends/pytorch/src/op/reshape.cpp @@ -12,7 +12,7 @@ namespace frontend { namespace pytorch { namespace op { -OutputVector translate_reshape(NodeContext& context) { +OutputVector translate_reshape(const NodeContext& context) { // Translation is used by both aten::view and aten::reshape. // Schema: aten::view(Tensor input, int[] shape) -> Tensor // Schema: aten::reshape(Tensor input, int[] shape) -> Tensor diff --git a/src/frontends/pytorch/src/op/reshape_as.cpp b/src/frontends/pytorch/src/op/reshape_as.cpp index 63d18ee468f087..1c9be43d4a2d15 100644 --- a/src/frontends/pytorch/src/op/reshape_as.cpp +++ b/src/frontends/pytorch/src/op/reshape_as.cpp @@ -12,7 +12,7 @@ namespace frontend { namespace pytorch { namespace op { -OutputVector translate_reshape_as(NodeContext& context) { +OutputVector translate_reshape_as(const NodeContext& context) { num_inputs_check(context, 2, 2); auto input_tensor = context.get_input(0); auto shape_tesnor = context.get_input(1); diff --git a/src/frontends/pytorch/src/op/roi_align.cpp b/src/frontends/pytorch/src/op/roi_align.cpp index d3a389c59654b9..fb2ad3a41d7455 100644 --- a/src/frontends/pytorch/src/op/roi_align.cpp +++ b/src/frontends/pytorch/src/op/roi_align.cpp @@ -19,7 +19,7 @@ namespace op { using namespace ov::op; -OutputVector translate_roi_align(NodeContext& context) { +OutputVector translate_roi_align(const NodeContext& context) { num_inputs_check(context, 7, 7); auto const_1 = context.mark_node(v0::Constant::create(element::i32, Shape{1}, {1})); auto const_neg_1 = context.mark_node(v0::Constant::create(element::i32, Shape{1}, {-1})); diff --git a/src/frontends/pytorch/src/op/roll.cpp b/src/frontends/pytorch/src/op/roll.cpp index b0aef51a6872b9..9f358368fbce8c 100644 --- a/src/frontends/pytorch/src/op/roll.cpp +++ b/src/frontends/pytorch/src/op/roll.cpp @@ -17,7 +17,7 @@ namespace op { using namespace ov::op; -OutputVector translate_roll(NodeContext& context) { +OutputVector translate_roll(const NodeContext& context) { num_inputs_check(context, 3, 3); const auto data = context.get_input(0); const auto shifts = context.get_input(1); diff --git a/src/frontends/pytorch/src/op/rsqrt.cpp b/src/frontends/pytorch/src/op/rsqrt.cpp index 9e9ba9330c87ca..d4f56040da27c5 100644 --- a/src/frontends/pytorch/src/op/rsqrt.cpp +++ b/src/frontends/pytorch/src/op/rsqrt.cpp @@ -16,7 +16,7 @@ namespace op { using namespace ov::op; -OutputVector translate_rsqrt(NodeContext& context) { +OutputVector translate_rsqrt(const NodeContext& context) { num_inputs_check(context, 1, 1); auto data = context.get_input(0); auto input_shape = context.mark_node(std::make_shared(data, element::i32)); diff --git a/src/frontends/pytorch/src/op/rsub.cpp b/src/frontends/pytorch/src/op/rsub.cpp index 21b109e9037182..200094b6eecede 100644 --- a/src/frontends/pytorch/src/op/rsub.cpp +++ b/src/frontends/pytorch/src/op/rsub.cpp @@ -15,7 +15,7 @@ namespace op { using namespace ov::op; -OutputVector translate_rsub(NodeContext& context) { +OutputVector translate_rsub(const NodeContext& context) { num_inputs_check(context, 3, 3); auto self = context.get_input(0); auto other = context.get_input(1); diff --git a/src/frontends/pytorch/src/op/select.cpp b/src/frontends/pytorch/src/op/select.cpp index c6d7cb0048f325..ea5255f2410ffa 100644 --- a/src/frontends/pytorch/src/op/select.cpp +++ b/src/frontends/pytorch/src/op/select.cpp @@ -20,7 +20,7 @@ namespace op { using namespace ov::op; -OutputVector translate_select(NodeContext& context) { +OutputVector translate_select(const NodeContext& context) { num_inputs_check(context, 3, 3); auto const_1 = context.mark_node(v0::Constant::create(element::i32, Shape{1}, {1})); auto const_minus_1 = context.mark_node(v0::Constant::create(element::i32, Shape{1}, {-1})); diff --git a/src/frontends/pytorch/src/op/selu.cpp b/src/frontends/pytorch/src/op/selu.cpp index 9ec08af77facc8..aef54491e74bd1 100644 --- a/src/frontends/pytorch/src/op/selu.cpp +++ b/src/frontends/pytorch/src/op/selu.cpp @@ -16,7 +16,7 @@ namespace op { using namespace ov::op; -OutputVector translate_selu(NodeContext& context) { +OutputVector translate_selu(const NodeContext& context) { num_inputs_check(context, 1, 1); auto x = context.get_input(0); auto alpha = context.mark_node(v0::Constant::create(element::f64, Shape{}, {1.6732632423543772848170429916717})); diff --git a/src/frontends/pytorch/src/op/set_item.cpp b/src/frontends/pytorch/src/op/set_item.cpp index 9ce33fce24e8d2..ef11a2a391c39a 100644 --- a/src/frontends/pytorch/src/op/set_item.cpp +++ b/src/frontends/pytorch/src/op/set_item.cpp @@ -15,7 +15,7 @@ namespace op { using namespace ov::op; -OutputVector translate_set_item(NodeContext& context) { +OutputVector translate_set_item(const NodeContext& context) { // schema: aten::_set_item.t(t[](a!) l, int idx, t(b -> *) el) -> t[](a!) // _set_item inserts element in list num_inputs_check(context, 3, 3); diff --git a/src/frontends/pytorch/src/op/size.cpp b/src/frontends/pytorch/src/op/size.cpp index a4d70cef19ad2c..289facd0fe7f44 100644 --- a/src/frontends/pytorch/src/op/size.cpp +++ b/src/frontends/pytorch/src/op/size.cpp @@ -15,7 +15,7 @@ namespace op { using namespace ov::op; -OutputVector translate_size(NodeContext& context) { +OutputVector translate_size(const NodeContext& context) { num_inputs_check(context, 1, 2); auto shape = context.mark_node(std::make_shared(context.get_input(0), element::i32)); if (context.input_is_none(1)) { diff --git a/src/frontends/pytorch/src/op/slice.cpp b/src/frontends/pytorch/src/op/slice.cpp index 756efc1590b796..391b1c834fd535 100644 --- a/src/frontends/pytorch/src/op/slice.cpp +++ b/src/frontends/pytorch/src/op/slice.cpp @@ -18,7 +18,7 @@ namespace op { using namespace ov::op; -OutputVector translate_slice(NodeContext& context) { +OutputVector translate_slice(const NodeContext& context) { // aten::slice.t(t[] l, int? start=None, int? end=None, int step=1) -> (t[]) // aten::slice.Tensor(Tensor(a) self, int dim=0, int? start=None, int? end=None, int step=1) -> (Tensor(a)) ov::Output dim; diff --git a/src/frontends/pytorch/src/op/softmax.cpp b/src/frontends/pytorch/src/op/softmax.cpp index 1b94a3560ad972..10c3afea7cda0b 100644 --- a/src/frontends/pytorch/src/op/softmax.cpp +++ b/src/frontends/pytorch/src/op/softmax.cpp @@ -13,7 +13,7 @@ namespace pytorch { namespace op { using namespace ov::op; -OutputVector translate_softmax(NodeContext& context) { +OutputVector translate_softmax(const NodeContext& context) { num_inputs_check(context, 2, 3); auto x = context.get_input(0); auto axis = context.const_input(1); diff --git a/src/frontends/pytorch/src/op/sort.cpp b/src/frontends/pytorch/src/op/sort.cpp index c0e54d54d9be2a..715a7a52d3efe8 100644 --- a/src/frontends/pytorch/src/op/sort.cpp +++ b/src/frontends/pytorch/src/op/sort.cpp @@ -9,7 +9,7 @@ namespace frontend { namespace pytorch { namespace op { -OutputVector translate_sort(NodeContext& context) { +OutputVector translate_sort(const NodeContext& context) { num_inputs_check(context, 3, 4); const auto input_tensor = context.get_input(0); bool stable, descending; @@ -40,7 +40,7 @@ OutputVector translate_sort(NodeContext& context) { return topk->outputs(); }; -OutputVector translate_argsort(NodeContext& context) { +OutputVector translate_argsort(const NodeContext& context) { auto sort = translate_sort(context); return {sort[1]}; }; diff --git a/src/frontends/pytorch/src/op/square.cpp b/src/frontends/pytorch/src/op/square.cpp index 7194aafd9abb36..2310fda75aa574 100644 --- a/src/frontends/pytorch/src/op/square.cpp +++ b/src/frontends/pytorch/src/op/square.cpp @@ -14,7 +14,7 @@ namespace op { using namespace ov::op; -OutputVector translate_square(NodeContext& context) { +OutputVector translate_square(const NodeContext& context) { num_inputs_check(context, 1, 1); auto input_0 = context.get_input(0); auto const_2 = context.mark_node(v0::Constant::create(input_0.get_element_type(), Shape{1}, {2})); diff --git a/src/frontends/pytorch/src/op/squeeze.cpp b/src/frontends/pytorch/src/op/squeeze.cpp index dacf2c55a4d596..fb15801367a564 100644 --- a/src/frontends/pytorch/src/op/squeeze.cpp +++ b/src/frontends/pytorch/src/op/squeeze.cpp @@ -12,7 +12,7 @@ namespace frontend { namespace pytorch { namespace op { -OutputVector translate_squeeze(NodeContext& context) { +OutputVector translate_squeeze(const NodeContext& context) { num_inputs_check(context, 1, 2); auto x = context.get_input(0); if (context.input_is_none(1)) { diff --git a/src/frontends/pytorch/src/op/sub.cpp b/src/frontends/pytorch/src/op/sub.cpp index fd449c12bbd2d3..94963ed9bdb61f 100644 --- a/src/frontends/pytorch/src/op/sub.cpp +++ b/src/frontends/pytorch/src/op/sub.cpp @@ -15,7 +15,7 @@ namespace op { using namespace ov::op; -OutputVector translate_sub(NodeContext& context) { +OutputVector translate_sub(const NodeContext& context) { num_inputs_check(context, 2, 3); auto x = context.get_input(0); auto y = context.get_input(1); diff --git a/src/frontends/pytorch/src/op/sum.cpp b/src/frontends/pytorch/src/op/sum.cpp index 3dc4601b1083a9..7a87dc0c507f6c 100644 --- a/src/frontends/pytorch/src/op/sum.cpp +++ b/src/frontends/pytorch/src/op/sum.cpp @@ -11,7 +11,7 @@ namespace frontend { namespace pytorch { namespace op { -OutputVector translate_sum(NodeContext& context) { +OutputVector translate_sum(const NodeContext& context) { num_inputs_check(context, 1, 3); bool keep_dims = false; ov::Output axes; diff --git a/src/frontends/pytorch/src/op/to.cpp b/src/frontends/pytorch/src/op/to.cpp index 6e5b0ebda639c4..2499b8346f5f02 100644 --- a/src/frontends/pytorch/src/op/to.cpp +++ b/src/frontends/pytorch/src/op/to.cpp @@ -16,7 +16,7 @@ namespace op { using namespace ov::op; -OutputVector translate_to(NodeContext& context) { +OutputVector translate_to(const NodeContext& context) { int dtype_idx; int memory_format_idx; if (context.get_input_size() == 5) { diff --git a/src/frontends/pytorch/src/op/topk.cpp b/src/frontends/pytorch/src/op/topk.cpp index 26addb856c6445..06916c4ea03e2f 100644 --- a/src/frontends/pytorch/src/op/topk.cpp +++ b/src/frontends/pytorch/src/op/topk.cpp @@ -15,7 +15,7 @@ namespace op { using namespace ov::op; -OutputVector translate_topk(NodeContext& context) { +OutputVector translate_topk(const NodeContext& context) { num_inputs_check(context, 5, 5); const auto input_tensor = context.get_input(0); const auto largest = context.const_input(3); diff --git a/src/frontends/pytorch/src/op/transpose.cpp b/src/frontends/pytorch/src/op/transpose.cpp index 60fee576613374..9a6cddb3ffb896 100644 --- a/src/frontends/pytorch/src/op/transpose.cpp +++ b/src/frontends/pytorch/src/op/transpose.cpp @@ -20,7 +20,7 @@ namespace op { using namespace ov::op; -OutputVector translate_transpose(NodeContext& context) { +OutputVector translate_transpose(const NodeContext& context) { num_inputs_check(context, 3, 3); auto dim0 = context.const_input(1); auto dim1 = context.const_input(2); diff --git a/src/frontends/pytorch/src/op/trilu.cpp b/src/frontends/pytorch/src/op/trilu.cpp index 1726cf2f895956..1ef4d50fd6affa 100644 --- a/src/frontends/pytorch/src/op/trilu.cpp +++ b/src/frontends/pytorch/src/op/trilu.cpp @@ -60,11 +60,11 @@ OutputVector translate_base_triu_tril(const NodeContext& context, bool upper) { } }; // namespace -OutputVector translate_triu(NodeContext& context) { +OutputVector translate_triu(const NodeContext& context) { return translate_base_triu_tril(context, true); }; -OutputVector translate_tril(NodeContext& context) { +OutputVector translate_tril(const NodeContext& context) { return translate_base_triu_tril(context, false); }; diff --git a/src/frontends/pytorch/src/op/unfold.cpp b/src/frontends/pytorch/src/op/unfold.cpp index 949f7991391b7e..e7aa129b2935cc 100644 --- a/src/frontends/pytorch/src/op/unfold.cpp +++ b/src/frontends/pytorch/src/op/unfold.cpp @@ -13,7 +13,7 @@ namespace frontend { namespace pytorch { namespace op { -OutputVector translate_unfold(NodeContext& context) { +OutputVector translate_unfold(const NodeContext& context) { num_inputs_check(context, 4, 4); // constants auto const_0 = context.mark_node(Constant::create(element::i32, Shape{}, {0})); diff --git a/src/frontends/pytorch/src/op/upsample.cpp b/src/frontends/pytorch/src/op/upsample.cpp index 111a07a28c70e9..484387b8f3931d 100644 --- a/src/frontends/pytorch/src/op/upsample.cpp +++ b/src/frontends/pytorch/src/op/upsample.cpp @@ -69,32 +69,32 @@ OutputVector base_translate_upsample(const NodeContext& context, }; } // namespace -OutputVector translate_upsample_linear1d(NodeContext& context) { +OutputVector translate_upsample_linear1d(const NodeContext& context) { return base_translate_upsample(context, v4::Interpolate::InterpolateMode::LINEAR_ONNX, 1); }; -OutputVector translate_upsample_bilinear2d(NodeContext& context) { +OutputVector translate_upsample_bilinear2d(const NodeContext& context) { return base_translate_upsample(context, v4::Interpolate::InterpolateMode::LINEAR_ONNX, 2); }; -OutputVector translate_upsample_trilinear3d(NodeContext& context) { +OutputVector translate_upsample_trilinear3d(const NodeContext& context) { return base_translate_upsample(context, v4::Interpolate::InterpolateMode::LINEAR_ONNX, 3); }; -OutputVector translate_upsample_nearest1d(NodeContext& context) { +OutputVector translate_upsample_nearest1d(const NodeContext& context) { return base_translate_upsample(context, v4::Interpolate::InterpolateMode::NEAREST, 1); }; -OutputVector translate_upsample_nearest2d(NodeContext& context) { +OutputVector translate_upsample_nearest2d(const NodeContext& context) { return base_translate_upsample(context, v4::Interpolate::InterpolateMode::NEAREST, 2); }; -OutputVector translate_upsample_nearest3d(NodeContext& context) { +OutputVector translate_upsample_nearest3d(const NodeContext& context) { return base_translate_upsample(context, v4::Interpolate::InterpolateMode::NEAREST, 3); }; // bicubic is only supported for 2d in pytorch -OutputVector translate_upsample_bicubic2d(NodeContext& context) { +OutputVector translate_upsample_bicubic2d(const NodeContext& context) { return base_translate_upsample(context, v4::Interpolate::InterpolateMode::CUBIC, 2); }; diff --git a/src/frontends/pytorch/src/op/var_mean.cpp b/src/frontends/pytorch/src/op/var_mean.cpp index 936038fecdcc2c..f021161722cd39 100644 --- a/src/frontends/pytorch/src/op/var_mean.cpp +++ b/src/frontends/pytorch/src/op/var_mean.cpp @@ -20,7 +20,7 @@ namespace op { using namespace ov::op; -OutputVector translate_var_mean(NodeContext& context) { +OutputVector translate_var_mean(const NodeContext& context) { num_inputs_check(context, 1, 4); auto data = context.get_input(0); bool unbiased = true; @@ -75,7 +75,7 @@ OutputVector translate_var_mean(NodeContext& context) { return {var, mean}; }; -OutputVector translate_var(NodeContext& context) { +OutputVector translate_var(const NodeContext& context) { auto res = translate_var_mean(context); return {res[0]}; } diff --git a/src/frontends/pytorch/src/op/where.cpp b/src/frontends/pytorch/src/op/where.cpp index 454d23938a2c0c..4a9de9f69edab8 100644 --- a/src/frontends/pytorch/src/op/where.cpp +++ b/src/frontends/pytorch/src/op/where.cpp @@ -14,7 +14,7 @@ namespace op { using namespace ov::op; -OutputVector translate_where(NodeContext& context) { +OutputVector translate_where(const NodeContext& context) { num_inputs_check(context, 1, 3); auto cond = context.get_input(0); FRONT_END_OP_CONVERSION_CHECK(!context.input_is_none(1), "aten::where(cond) unsupported"); diff --git a/src/frontends/pytorch/src/op_table.cpp b/src/frontends/pytorch/src/op_table.cpp index b3e54233f50feb..c42024fa36f4df 100644 --- a/src/frontends/pytorch/src/op_table.cpp +++ b/src/frontends/pytorch/src/op_table.cpp @@ -12,7 +12,7 @@ namespace frontend { namespace pytorch { namespace op { -#define OP_CONVERTER(op) OutputVector op(NodeContext& node) +#define OP_CONVERTER(op) OutputVector op(const NodeContext& node) OP_CONVERTER(translate_adaptive_avg_pool3d); OP_CONVERTER(translate_adaptive_max_pool2d); @@ -130,7 +130,7 @@ OP_CONVERTER(translate_zeros_like); } // namespace op -const std::map get_supported_ops() { +const std::map get_supported_ops() { return { {"aten::__and__", op::translate_1to1_match_2_inputs}, // TODO: cover numerical cases {"aten::__getitem__", op::translate_getitem}, diff --git a/src/frontends/pytorch/src/op_table.hpp b/src/frontends/pytorch/src/op_table.hpp index 7a67c9101578b0..e15a988e98175b 100644 --- a/src/frontends/pytorch/src/op_table.hpp +++ b/src/frontends/pytorch/src/op_table.hpp @@ -10,7 +10,7 @@ namespace ov { namespace frontend { namespace pytorch { -const std::map get_supported_ops(); +const std::map get_supported_ops(); } // namespace pytorch } // namespace frontend diff --git a/src/frontends/pytorch/src/translate_session.cpp b/src/frontends/pytorch/src/translate_session.cpp index 025a325eebf939..376b466c0a268c 100644 --- a/src/frontends/pytorch/src/translate_session.cpp +++ b/src/frontends/pytorch/src/translate_session.cpp @@ -20,7 +20,7 @@ namespace pytorch { using namespace ov::op; TranslateSession::TranslateSession(const ov::frontend::InputModel::Ptr& input_model, - const std::map& translator_map) + const std::map& translator_map) : m_input_model(input_model), m_translator_map(translator_map), m_ov_model(nullptr) {} @@ -45,9 +45,9 @@ std::shared_ptr TranslateSession::convert_pytorch_model( const std::unordered_map& external_descriptors) { std::shared_ptr resulting_model; // define here to make a conversion in a nested scope { - ParameterVector parameters; - TensorMap tensor_map; // tensor map of the current context - std::set mutated_tensors; + auto parameters = std::make_shared(); + auto tensor_map = std::make_shared(); // tensor map of the current context + auto mutated_tensors = std::make_shared>(); // Go over all pytorch_model inputs and register them in the tensor map: auto inputs = pytorch_model->inputs(); @@ -74,7 +74,7 @@ std::shared_ptr TranslateSession::convert_pytorch_model( if (!input_node) { auto parameter = std::make_shared(type, pshape); encode_tensor_name(parameter->output(0), inputs.at(i), pytorch_model->get_input_debug_name(i)); - parameters.push_back(parameter); + parameters->push_back(parameter); input_node = parameter; auto order = pytorch_model->get_input_transpose_order(i); if (order.size() > 0 && !std::is_sorted(order.begin(), order.end())) { @@ -91,7 +91,7 @@ std::shared_ptr TranslateSession::convert_pytorch_model( input_node = transpose; } } - tensor_map[inputs.at(i)] = input_node; + (*tensor_map)[inputs.at(i)] = input_node; } auto node_visitor = [&](std::shared_ptr node) { @@ -102,7 +102,7 @@ std::shared_ptr TranslateSession::convert_pytorch_model( auto raw_inputs = node->inputs(); for (size_t i = 0; i < raw_inputs.size(); ++i) { auto input = raw_inputs.at(i); - if (tensor_map.find(input) == tensor_map.end()) { + if (tensor_map->find(input) == tensor_map->end()) { // Input refers value in the outer scope, need to create a new Parameter in the current scope // Linkage to external scope will be performed on the level of the parent operation (if or loop) // TODO: Eliminate duplication with the main code for Parameters creation @@ -111,18 +111,15 @@ std::shared_ptr TranslateSession::convert_pytorch_model( // TODO: Use special API to set custom type specification auto parameter = std::make_shared(element::dynamic, ps); // TODO: Missing get_input_transpose_order handling for not trivial layouts - tensor_map[input] = parameter; + (*tensor_map)[input] = parameter; // set name of parameter to the index of node in the model encode_tensor_name(parameter->output(0), input); - parameters.push_back(parameter); + parameters->push_back(parameter); } } - auto context = NodeContext(node, &tensor_map, ¶meters, external_tensor_map, this); + auto context = NodeContext(node, external_tensor_map, tensor_map, parameters, mutated_tensors, this); auto converted_outputs = convert_node(context); - auto mutated_t = context.get_mutated_tensors(); - mutated_tensors.insert(mutated_t.begin(), mutated_t.end()); - auto fw_outputs = node->outputs(); // Ops with subgraphs or with mutated inputs may have more outputs after conversion compared to pytorch ones FRONT_END_OP_CONVERSION_CHECK(fw_outputs.size() <= converted_outputs.size(), @@ -134,10 +131,10 @@ std::shared_ptr TranslateSession::convert_pytorch_model( // FIXME: Now it is not true for at least prim::Constant for (size_t i = 0; i < fw_outputs.size(); ++i) { size_t fw_tensor_id = node->output(i); - FRONT_END_GENERAL_CHECK(tensor_map.find(fw_tensor_id) == tensor_map.end(), + FRONT_END_GENERAL_CHECK(tensor_map->find(fw_tensor_id) == tensor_map->end(), "Duplicated producer for PT value with unique ID: ", fw_tensor_id); - tensor_map[fw_tensor_id] = converted_outputs[i]; + (*tensor_map)[fw_tensor_id] = converted_outputs[i]; encode_tensor_name(converted_outputs[i], fw_tensor_id, node->get_output_debug_name(i)); } }; @@ -148,14 +145,14 @@ std::shared_ptr TranslateSession::convert_pytorch_model( ResultVector results; for (size_t i = 0; i < pytorch_model->num_of_outputs(); ++i) { size_t id = pytorch_model->output(i); - if (tensor_map.find(id) == tensor_map.end()) { + if (tensor_map->find(id) == tensor_map->end()) { // Not found in this scope, adding Parameter to connect to external scope auto parameter = std::make_shared(element::dynamic, PartialShape::dynamic()); encode_tensor_name(parameter->output(0), id); - parameters.push_back(parameter); - tensor_map[id] = parameter; + parameters->push_back(parameter); + (*tensor_map)[id] = parameter; } - auto ov_output = tensor_map[id]; + auto ov_output = tensor_map->at(id); auto order = pytorch_model->get_output_transpose_order(i); FRONT_END_GENERAL_CHECK(order.size() == 0 || std::is_sorted(order.begin(), order.end()), "Output strides have wrong order."); @@ -168,32 +165,32 @@ std::shared_ptr TranslateSession::convert_pytorch_model( // Since parameters can be added we need to list all current parameters std::set param_names; - for (const auto& param : parameters) { + for (const auto& param : *parameters) { auto input_idx = decode_tensor_name(param->output(0)); param_names.insert(input_idx); } - for (const auto& tensor_id : mutated_tensors) { + for (const auto& tensor_id : *mutated_tensors) { if (param_names.count(tensor_id)) { - FRONT_END_GENERAL_CHECK(tensor_map.count(tensor_id), + FRONT_END_GENERAL_CHECK(tensor_map->count(tensor_id), "Tensor with id: ", tensor_id, " doesn't exist in tensor map."); // model input was mutated we need to make a result for it - auto mutated_tensor = tensor_map.at(tensor_id); + auto mutated_tensor = tensor_map->at(tensor_id); // empty external_tensor_map means this is main body of the model and we don't want to create // additional outputs in that case. if (mutated_tensor.get_target_inputs().empty() && !external_tensor_map.empty()) - results.push_back(std::make_shared(tensor_map.at(tensor_id))); + results.push_back(std::make_shared(tensor_map->at(tensor_id))); } } - resulting_model = std::make_shared(results, parameters); + resulting_model = std::make_shared(results, *parameters); // Did a conversion in a nested scope to automatically remove any holders of nodes except those in the graph } return resulting_model; } -OutputVector TranslateSession::convert_node(NodeContext& context) { +OutputVector TranslateSession::convert_node(const NodeContext& context) { try { auto it = m_translator_map.find(context.get_op_type()); if (it != m_translator_map.end()) { diff --git a/src/frontends/pytorch/src/translate_session.hpp b/src/frontends/pytorch/src/translate_session.hpp index e33ea31c63091d..4931c274984485 100644 --- a/src/frontends/pytorch/src/translate_session.hpp +++ b/src/frontends/pytorch/src/translate_session.hpp @@ -17,7 +17,7 @@ namespace pytorch { class TranslateSession { public: TranslateSession(const frontend::InputModel::Ptr& input_model, - const std::map& translator_map); + const std::map& translator_map); std::shared_ptr get_converted_model(); std::shared_ptr translate_graph(const frontend::InputModel::Ptr& input_model); @@ -38,10 +38,10 @@ class TranslateSession { size_t m_friendly_name_counter = 0; private: - OutputVector convert_node(NodeContext& context); + OutputVector convert_node(const NodeContext& context); const frontend::InputModel::Ptr m_input_model; - const std::map& m_translator_map; + const std::map& m_translator_map; std::shared_ptr m_ov_model; std::map>> m_counter_map; diff --git a/src/frontends/pytorch/src/utils.cpp b/src/frontends/pytorch/src/utils.cpp index dd7d1dad5db255..bdae3e9e75e397 100644 --- a/src/frontends/pytorch/src/utils.cpp +++ b/src/frontends/pytorch/src/utils.cpp @@ -177,7 +177,7 @@ std::shared_ptr concat_list_construct(std::shared_ptr input) { return input; } -OutputVector make_framework_node(NodeContext& context) { +OutputVector make_framework_node(const NodeContext& context) { auto schema = context.get_schema(); // TODO: properly process schema to get the actual position of mutable input // Hack. Can indicate mutable inputs, but can it be reliable? diff --git a/src/frontends/pytorch/src/utils.hpp b/src/frontends/pytorch/src/utils.hpp index 07235a2152d8b0..029b349c77bac2 100644 --- a/src/frontends/pytorch/src/utils.hpp +++ b/src/frontends/pytorch/src/utils.hpp @@ -48,7 +48,7 @@ op::PadType convert_pad(const std::string& pt_pad); std::shared_ptr concat_list_construct(std::shared_ptr input); -OutputVector make_framework_node(NodeContext& context); +OutputVector make_framework_node(const NodeContext& context); std::shared_ptr cast_fw_node(std::shared_ptr node, const std::string& type); @@ -63,8 +63,8 @@ void align_eltwise_input_types(const NodeContext& context, std::deque> get_list_as_outputs(const Output& start); namespace op { -template -OutputVector inplace_op(NodeContext& context) { +template +OutputVector inplace_op(const NodeContext& context) { auto translation_res = T(context); FRONT_END_OP_CONVERSION_CHECK(translation_res.size() == 1, "inplace_op function must be used on single output translators"); @@ -73,21 +73,21 @@ OutputVector inplace_op(NodeContext& context) { } template -OutputVector translate_1to1_match_1_inputs(NodeContext& context) { +OutputVector translate_1to1_match_1_inputs(const NodeContext& context) { num_inputs_check(context, 1, 1); FRONT_END_OP_CONVERSION_CHECK(!context.input_is_none(0), "Input should not be None."); return {context.mark_node(std::make_shared(context.get_input(0)))}; } template -OutputVector translate_1to1_match_2_inputs(NodeContext& context) { +OutputVector translate_1to1_match_2_inputs(const NodeContext& context) { num_inputs_check(context, 2, 2); FRONT_END_OP_CONVERSION_CHECK(!context.input_is_none(0) && !context.input_is_none(1), "Inputs should not be None."); return {context.mark_node(std::make_shared(context.get_input(0), context.get_input(1)))}; } template -OutputVector translate_1to1_match_2_inputs_align_types(NodeContext& context) { +OutputVector translate_1to1_match_2_inputs_align_types(const NodeContext& context) { num_inputs_check(context, 2, 2); FRONT_END_OP_CONVERSION_CHECK(!context.input_is_none(0) && !context.input_is_none(1), "Inputs should not be None."); auto lhs = context.get_input(0); @@ -96,11 +96,11 @@ OutputVector translate_1to1_match_2_inputs_align_types(NodeContext& context) { return {context.mark_node(std::make_shared(lhs, rhs))}; } -inline OutputVector return_false_scalar(NodeContext& context) { +inline OutputVector return_false_scalar(const NodeContext& context) { return {context.mark_node(ov::op::v0::Constant::create(element::boolean, Shape{}, {false}))}; } -inline OutputVector skip_node(NodeContext& context) { +inline OutputVector skip_node(const NodeContext& context) { return {context.get_input(0).get_node_shared_ptr()}; } From 98237b06b5b61a4df0b4ec2789a3cbb8804674fc Mon Sep 17 00:00:00 2001 From: Vladimir Paramuzov Date: Tue, 21 Mar 2023 08:52:52 +0400 Subject: [PATCH 002/296] [GPU] Update memory_statistics property impl (#16399) --- .../include/intel_gpu/plugin/plugin.hpp | 6 +--- src/plugins/intel_gpu/src/plugin/plugin.cpp | 29 ++----------------- src/plugins/intel_gpu/src/runtime/engine.cpp | 1 + 3 files changed, 4 insertions(+), 32 deletions(-) diff --git a/src/plugins/intel_gpu/include/intel_gpu/plugin/plugin.hpp b/src/plugins/intel_gpu/include/intel_gpu/plugin/plugin.hpp index 36fad099a90320..9fad8f53b6a164 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/plugin/plugin.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/plugin/plugin.hpp @@ -23,17 +23,13 @@ class Plugin : public InferenceEngine::IInferencePlugin { // key: device_id, value: cldnn device std::map device_map; std::map m_configs_map; - // key: cldnn context, value: memory statistics - mutable std::map> statistics_map; - mutable std::mutex engine_mutex; - mutable std::map m_default_contexts; + std::map m_default_contexts; InferenceEngine::CNNNetwork clone_and_transform_model(const InferenceEngine::CNNNetwork& network, const ExecutionConfig& config) const; void transform_model(std::shared_ptr& model, const ExecutionConfig& config) const; void register_primitives(); - void update_memory_statistics(const RemoteContextImpl::Ptr& context) const; std::string get_device_id_from_config(const std::map& config) const; std::string get_device_id(const std::map& config) const; RemoteCLContext::Ptr get_default_context(const std::string& device_id) const; diff --git a/src/plugins/intel_gpu/src/plugin/plugin.cpp b/src/plugins/intel_gpu/src/plugin/plugin.cpp index ac366d192aa361..e68a9094f221dc 100644 --- a/src/plugins/intel_gpu/src/plugin/plugin.cpp +++ b/src/plugins/intel_gpu/src/plugin/plugin.cpp @@ -175,18 +175,6 @@ auto check_inputs = [](InferenceEngine::InputsDataMap _networkInputs) { } }; -void Plugin::update_memory_statistics(const RemoteContextImpl::Ptr& context) const { - OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Plugin::update_memory_statistics"); - { - std::lock_guard lock(engine_mutex); - - // if the same context exists, the statistics is replaced with the latest one - // (currently, memory usage is accumulated for several networks in the same context) - // if it does not exist, a new statistics is added - statistics_map[context] = context->get_engine().get_memory_statistics(); - } -} - IExecutableNetworkInternal::Ptr Plugin::LoadExeNetworkImpl(const InferenceEngine::CNNNetwork &network, const std::map &orig_config) { OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Plugin::LoadExeNetworkImpl"); @@ -208,7 +196,6 @@ IExecutableNetworkInternal::Ptr Plugin::LoadExeNetworkImpl(const InferenceEngine { OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Plugin::LoadExeNetworkImpl::CreateExeNetwork"); CompiledModel::Ptr exeNetwork = std::make_shared(transformedNetwork, context, config); - update_memory_statistics(context->get_impl()); return exeNetwork; } } @@ -542,7 +529,6 @@ InferenceEngine::IExecutableNetworkInternal::Ptr Plugin::ImportNetwork(std::istr exeNetwork->setNetworkOutputs(outputs); exeNetwork->setInputs(new_params); exeNetwork->setOutputs(new_results); - update_memory_statistics(context->get_impl()); return exeNetwork; } } @@ -672,19 +658,8 @@ Parameter Plugin::GetMetric(const std::string& name, const std::map statistics; - for (auto const &item : statistics_map) { - // Before collecting memory statistics of each context, it's updated with the latest memory statistics from engine. - update_memory_statistics(item.first); - for (auto const &kv : item.second) { - if (!statistics.count(kv.first)) { - statistics[kv.first] = kv.second; - } else { - statistics[kv.first] += kv.second; - } - } - } - return decltype(ov::intel_gpu::memory_statistics)::value_type {statistics}; + const auto& ctx = get_default_context(device_id)->get_impl(); + return decltype(ov::intel_gpu::memory_statistics)::value_type {ctx->get_engine().get_memory_statistics()}; } else if (name == METRIC_KEY(MAX_BATCH_SIZE) || name == ov::max_batch_size) { return decltype(ov::max_batch_size)::value_type {static_cast(get_max_batch_size(options))}; diff --git a/src/plugins/intel_gpu/src/runtime/engine.cpp b/src/plugins/intel_gpu/src/runtime/engine.cpp index ad5cc79cca3a26..9cabc43e876257 100644 --- a/src/plugins/intel_gpu/src/runtime/engine.cpp +++ b/src/plugins/intel_gpu/src/runtime/engine.cpp @@ -215,6 +215,7 @@ uint64_t engine::get_used_device_memory(allocation_type type) const { } std::map engine::get_memory_statistics() const { + std::lock_guard guard(_mutex); std::map statistics; for (auto const& m : _memory_usage_map) { std::ostringstream oss; From 5cb20f8858c60c016f404627a3768b8a387a709e Mon Sep 17 00:00:00 2001 From: Roman Kazantsev Date: Tue, 21 Mar 2023 08:54:48 +0400 Subject: [PATCH 003/296] [TF FE] Refactor StridedSlice translator and add layer test to precommit (#16376) Signed-off-by: Kazantsev, Roman --- .../src/op/strided_slice.cpp | 57 ++++++---- .../tensorflow_tests/test_tf_StridedSlice.py | 100 ++++++++++-------- 2 files changed, 88 insertions(+), 69 deletions(-) diff --git a/src/frontends/tensorflow_common/src/op/strided_slice.cpp b/src/frontends/tensorflow_common/src/op/strided_slice.cpp index e0e7e6761bd8f9..e71097557e7e44 100644 --- a/src/frontends/tensorflow_common/src/op/strided_slice.cpp +++ b/src/frontends/tensorflow_common/src/op/strided_slice.cpp @@ -16,42 +16,55 @@ namespace tensorflow { namespace op { OutputVector translate_strided_slice_op(const NodeContext& node) { + default_op_checks(node, 4, {"StridedSlice", "STRIDED_SLICE"}); auto input = node.get_input(0); auto begin = node.get_input(1); auto end = node.get_input(2); auto strides = node.get_input(3); - auto begin_mask = node.get_attribute("begin_mask", 0); - auto end_mask = node.get_attribute("end_mask", 0); - auto new_axis_mask = node.get_attribute("new_axis_mask", 0); - auto ellipsis_mask = node.get_attribute("ellipsis_mask", 0); - auto shrink_axis_mask = node.get_attribute("shrink_axis_mask", 0); - auto mask_to_vector = [](int64_t mask) { - size_t length = sizeof(mask) * CHAR_BIT; - vector vec(length, 0); if (mask == 0) { - return vec; + return vector{}; } - for (size_t i = 0; i < length; ++i) { - if (static_cast(mask >> i & 0x1) == 1) { + size_t max_length = sizeof(mask) * CHAR_BIT; + vector vec{}; + for (size_t i = 0; i < max_length; ++i) { + if ((mask >> i & 0x1) == 1) { + // resize the vector by appending with required number of zeros + vec.resize(i + 1, 0); vec[i] = 1; } } return vec; }; - auto res = make_shared(input, - begin, - end, - strides, - mask_to_vector(begin_mask), - mask_to_vector(end_mask), - mask_to_vector(new_axis_mask), - mask_to_vector(shrink_axis_mask), - mask_to_vector(ellipsis_mask)); - set_node_name(node.get_name(), res); - return res->outputs(); + // retrieve attributes for StridedSlice operation + auto begin_mask = mask_to_vector(node.get_attribute("begin_mask", 0)); + auto end_mask = mask_to_vector(node.get_attribute("end_mask", 0)); + auto new_axis_mask = mask_to_vector(node.get_attribute("new_axis_mask", 0)); + auto ellipsis_mask = mask_to_vector(node.get_attribute("ellipsis_mask", 0)); + auto shrink_axis_mask = mask_to_vector(node.get_attribute("shrink_axis_mask", 0)); + + // the masks can be of different length and we need to align them by the maximum length + size_t max_length = std::max( + {begin_mask.size(), end_mask.size(), new_axis_mask.size(), ellipsis_mask.size(), shrink_axis_mask.size()}); + begin_mask.resize(max_length, 0); + end_mask.resize(max_length, 0); + new_axis_mask.resize(max_length, 0); + ellipsis_mask.resize(max_length, 0); + shrink_axis_mask.resize(max_length, 0); + + auto strided_slice = make_shared(input, + begin, + end, + strides, + begin_mask, + end_mask, + new_axis_mask, + shrink_axis_mask, + ellipsis_mask); + set_node_name(node.get_name(), strided_slice); + return {strided_slice}; } } // namespace op diff --git a/tests/layer_tests/tensorflow_tests/test_tf_StridedSlice.py b/tests/layer_tests/tensorflow_tests/test_tf_StridedSlice.py index dac01d739186e0..eb8afc65019cda 100644 --- a/tests/layer_tests/tensorflow_tests/test_tf_StridedSlice.py +++ b/tests/layer_tests/tensorflow_tests/test_tf_StridedSlice.py @@ -7,61 +7,72 @@ class TestStridedSlice(CommonTFLayerTest): - - @staticmethod - def create_strided_slice_net(input_shape, begin, end, strides, begin_mask, end_mask, + def create_strided_slice_net(self, input_shape, begin_value, end_value, strides_value, begin_mask, end_mask, ellipsis_mask, - new_axis_mask, shrink_axis_mask, ir_version, use_new_frontend): - + new_axis_mask, shrink_axis_mask): import tensorflow as tf - tf.compat.v1.reset_default_graph() with tf.compat.v1.Session() as sess: - input_node = tf.compat.v1.placeholder(tf.float32, input_shape, 'Input') - strided_slice = tf.compat.v1.strided_slice(input_node, begin=begin, end=end, - strides=strides, - begin_mask=begin_mask, end_mask=end_mask, - ellipsis_mask=ellipsis_mask, - new_axis_mask=new_axis_mask, - shrink_axis_mask=shrink_axis_mask) + input = tf.compat.v1.placeholder(tf.float32, input_shape, 'Input') + begin = tf.constant(begin_value, dtype=tf.int32) + end = tf.constant(end_value, dtype=tf.int32) + strides = tf.constant(strides_value, dtype=tf.int32) + tf.raw_ops.StridedSlice(input=input, begin=begin, end=end, strides=strides, begin_mask=begin_mask, + end_mask=end_mask, ellipsis_mask=ellipsis_mask, new_axis_mask=new_axis_mask, + shrink_axis_mask=shrink_axis_mask) tf.compat.v1.global_variables_initializer() + tf_net = sess.graph_def - ref_net = None - return tf_net, ref_net + return tf_net, None + + test_basic_data = [ + dict(input_shape=[2, 5, 4, 3], begin_value=[1, 0, 2, 0], end_value=[2, 5, 4, 2], strides_value=[1, 2, 1, 1], + begin_mask=0, end_mask=0, ellipsis_mask=0, new_axis_mask=0, shrink_axis_mask=1), + dict(input_shape=[1, 5, 5, 3], begin_value=[0, 0, 0, 0], end_value=[1, 5, 5, 3], strides_value=[1, 2, 3, 1], + begin_mask=0, end_mask=0, ellipsis_mask=0, new_axis_mask=8, shrink_axis_mask=0), + dict(input_shape=[3, 4, 5, 7], begin_value=[2, 0, 3], end_value=[3, 0, 6], strides_value=[1, 1, 1], + begin_mask=6, end_mask=6, ellipsis_mask=2, new_axis_mask=0, shrink_axis_mask=1), + ] + + @pytest.mark.parametrize('params', test_basic_data) + @pytest.mark.precommit_tf_fe + @pytest.mark.nightly + def test_strided_slice_basic(self, params, ie_device, precision, ir_version, + temp_dir, use_new_frontend, use_old_api): + self._test(*self.create_strided_slice_net(**params), + ie_device, precision, ir_version, temp_dir=temp_dir, + use_new_frontend=use_new_frontend, use_old_api=use_old_api) test_squeeze_data = [ - dict(input_shape=[1, 5], begin=[0, 0], end=[1, 5], strides=[1, 1], begin_mask=0, + dict(input_shape=[1, 5], begin_value=[0, 0], end_value=[1, 5], strides_value=[1, 1], begin_mask=0, end_mask=0, ellipsis_mask=0, new_axis_mask=0, shrink_axis_mask=1), - dict(input_shape=[5, 1], begin=[0, 0], end=[5, 1], strides=[1, 1], begin_mask=0, + dict(input_shape=[5, 1], begin_value=[0, 0], end_value=[5, 1], strides_value=[1, 1], begin_mask=0, end_mask=0, ellipsis_mask=0, new_axis_mask=0, shrink_axis_mask=2), - dict(input_shape=[1, 5, 3], begin=[0, 0, 0], end=[1, 5, 3], strides=[1, 1, 1], begin_mask=0, + dict(input_shape=[1, 5, 3], begin_value=[0, 0, 0], end_value=[1, 5, 3], strides_value=[1, 1, 1], begin_mask=0, end_mask=0, ellipsis_mask=0, new_axis_mask=0, shrink_axis_mask=1), - dict(input_shape=[1, 1, 3], begin=[0, 0, 0], end=[1, 1, 3], strides=[1, 1, 1], begin_mask=0, + dict(input_shape=[1, 1, 3], begin_value=[0, 0, 0], end_value=[1, 1, 3], strides_value=[1, 1, 1], begin_mask=0, end_mask=0, ellipsis_mask=0, new_axis_mask=0, shrink_axis_mask=2), - dict(input_shape=[1, 5, 1], begin=[0, 0, 0], end=[1, 5, 1], strides=[1, 1, 1], begin_mask=0, + dict(input_shape=[1, 5, 1], begin_value=[0, 0, 0], end_value=[1, 5, 1], strides_value=[1, 1, 1], begin_mask=0, end_mask=0, ellipsis_mask=0, new_axis_mask=0, shrink_axis_mask=4), - pytest.param(dict(input_shape=[1, 5, 5, 3], begin=[0, 0, 0, 0], end=[1, 5, 5, 3], strides=[1, 1, 1, 1], - begin_mask=0, end_mask=0, ellipsis_mask=0, new_axis_mask=0, shrink_axis_mask=1), - marks=pytest.mark.precommit_tf_fe), - dict(input_shape=[1, 1, 5, 3], begin=[0, 0, 0, 0], end=[1, 1, 5, 3], strides=[1, 1, 1, 1], + dict(input_shape=[1, 1, 5, 3], begin_value=[0, 0, 0, 0], end_value=[1, 1, 5, 3], strides_value=[1, 1, 1, 1], begin_mask=0, end_mask=0, ellipsis_mask=0, new_axis_mask=0, shrink_axis_mask=2), - dict(input_shape=[1, 5, 1, 3], begin=[0, 0, 0, 0], end=[1, 5, 1, 3], strides=[1, 1, 1, 1], + dict(input_shape=[1, 5, 1, 3], begin_value=[0, 0, 0, 0], end_value=[1, 5, 1, 3], strides_value=[1, 1, 1, 1], begin_mask=0, end_mask=0, ellipsis_mask=0, new_axis_mask=0, shrink_axis_mask=4), - dict(input_shape=[1, 5, 5, 1], begin=[0, 0, 0, 0], end=[1, 5, 1, 1], strides=[1, 1, 1, 1], + dict(input_shape=[1, 5, 5, 1], begin_value=[0, 0, 0, 0], end_value=[1, 5, 1, 1], strides_value=[1, 1, 1, 1], begin_mask=0, end_mask=0, ellipsis_mask=0, new_axis_mask=0, shrink_axis_mask=8), - dict(input_shape=[1, 1, 5, 5, 3], begin=[0, 0, 0, 0, 0], end=[1, 1, 5, 5, 3], - strides=[1, 1, 1, 1, 1], + dict(input_shape=[1, 1, 5, 5, 3], begin_value=[0, 0, 0, 0, 0], end_value=[1, 1, 5, 5, 3], + strides_value=[1, 1, 1, 1, 1], begin_mask=0, end_mask=0, ellipsis_mask=0, new_axis_mask=0, shrink_axis_mask=3), - dict(input_shape=[1, 5, 1, 5, 3], begin=[0, 0, 0, 0, 0], end=[1, 5, 1, 5, 3], - strides=[1, 1, 1, 1, 1], + dict(input_shape=[1, 5, 1, 5, 3], begin_value=[0, 0, 0, 0, 0], end_value=[1, 5, 1, 5, 3], + strides_value=[1, 1, 1, 1, 1], begin_mask=0, end_mask=0, ellipsis_mask=0, new_axis_mask=0, shrink_axis_mask=5), - dict(input_shape=[1, 5, 1, 5, 1], begin=[0, 0, 0, 0, 0], end=[1, 5, 1, 5, 1], - strides=[1, 1, 1, 1, 1], + dict(input_shape=[1, 5, 1, 5, 1], begin_value=[0, 0, 0, 0, 0], end_value=[1, 5, 1, 5, 1], + strides_value=[1, 1, 1, 1, 1], begin_mask=0, end_mask=0, ellipsis_mask=0, new_axis_mask=0, shrink_axis_mask=21), ] @@ -69,32 +80,28 @@ def create_strided_slice_net(input_shape, begin, end, strides, begin_mask, end_m @pytest.mark.nightly def test_strided_slice_replace_with_squeeze(self, params, ie_device, precision, ir_version, temp_dir, use_new_frontend, use_old_api): - self._test(*self.create_strided_slice_net(**params, ir_version=ir_version, - use_new_frontend=use_new_frontend), + self._test(*self.create_strided_slice_net(**params), ie_device, precision, ir_version, temp_dir=temp_dir, use_new_frontend=use_new_frontend, use_old_api=use_old_api) test_unsqueeze_data = [ - dict(input_shape=[1, 5], begin=[0, 0], end=[1, 5], strides=[1, 1], begin_mask=0, + dict(input_shape=[1, 5], begin_value=[0, 0], end_value=[1, 5], strides_value=[1, 1], begin_mask=0, end_mask=0, ellipsis_mask=0, new_axis_mask=1, shrink_axis_mask=0), - dict(input_shape=[1, 5], begin=[0, 0], end=[1, 5], strides=[1, 1], begin_mask=0, + dict(input_shape=[1, 5], begin_value=[0, 0], end_value=[1, 5], strides_value=[1, 1], begin_mask=0, end_mask=0, ellipsis_mask=0, new_axis_mask=3, shrink_axis_mask=0), - dict(input_shape=[1, 5, 3], begin=[0, 0, 0], end=[1, 5, 3], strides=[1, 1, 1], begin_mask=0, + dict(input_shape=[1, 5, 3], begin_value=[0, 0, 0], end_value=[1, 5, 3], strides_value=[1, 1, 1], begin_mask=0, end_mask=0, ellipsis_mask=0, new_axis_mask=3, shrink_axis_mask=0), - dict(input_shape=[1, 5, 3], begin=[0, 0, 0], end=[1, 5, 3], strides=[1, 1, 1], begin_mask=0, + dict(input_shape=[1, 5, 3], begin_value=[0, 0, 0], end_value=[1, 5, 3], strides_value=[1, 1, 1], begin_mask=0, end_mask=0, ellipsis_mask=0, new_axis_mask=4, shrink_axis_mask=0), - dict(input_shape=[1, 5, 3], begin=[0, 0, 0], end=[1, 5, 3], strides=[1, 1, 1], begin_mask=0, + dict(input_shape=[1, 5, 3], begin_value=[0, 0, 0], end_value=[1, 5, 3], strides_value=[1, 1, 1], begin_mask=0, end_mask=0, ellipsis_mask=0, new_axis_mask=5, shrink_axis_mask=0), - dict(input_shape=[1, 5, 5, 3], begin=[0, 0, 0, 0], end=[1, 5, 5, 3], strides=[1, 1, 1, 1], - begin_mask=0, - end_mask=0, ellipsis_mask=0, new_axis_mask=8, shrink_axis_mask=0), - dict(input_shape=[1, 5, 5, 3], begin=[0, 0, 0, 0], end=[1, 5, 5, 3], strides=[1, 1, 1, 1], + dict(input_shape=[1, 5, 5, 3], begin_value=[0, 0, 0, 0], end_value=[1, 5, 5, 3], strides_value=[1, 1, 1, 1], begin_mask=0, end_mask=0, ellipsis_mask=0, new_axis_mask=4, shrink_axis_mask=0), - dict(input_shape=[1, 5, 5, 3], begin=[0, 0, 0, 0], end=[1, 5, 5, 3], strides=[1, 1, 1, 1], + dict(input_shape=[1, 5, 5, 3], begin_value=[0, 0, 0, 0], end_value=[1, 5, 5, 3], strides_value=[1, 1, 1, 1], begin_mask=0, end_mask=0, ellipsis_mask=0, new_axis_mask=2, shrink_axis_mask=0), - dict(input_shape=[16, 4, 64], begin=[0, 0, 0, 0], end=[0, 0, 0, 0], strides=[1, 1, 1, 1], + dict(input_shape=[16, 4, 64], begin_value=[0, 0, 0, 0], end_value=[0, 0, 0, 0], strides_value=[1, 1, 1, 1], begin_mask=19, end_mask=19, ellipsis_mask=0, new_axis_mask=12, shrink_axis_mask=0), ] @@ -103,7 +110,6 @@ def test_strided_slice_replace_with_squeeze(self, params, ie_device, precision, @pytest.mark.nightly def test_strided_slice_replace_with_unsqueeze(self, params, ie_device, precision, ir_version, temp_dir, use_new_frontend, use_old_api): - self._test(*self.create_strided_slice_net(**params, ir_version=ir_version, - use_new_frontend=use_new_frontend), + self._test(*self.create_strided_slice_net(**params), ie_device, precision, ir_version, temp_dir=temp_dir, use_new_frontend=use_new_frontend, use_old_api=use_old_api) From 60436dee5ab9a5598656ba60181e53dc51283bb7 Mon Sep 17 00:00:00 2001 From: Ilya Churaev Date: Tue, 21 Mar 2023 10:52:45 +0400 Subject: [PATCH 004/296] Updated AsyncInferRequest documentation + leftovers (#16420) --- docs/IE_PLUGIN_DG/AsyncInferRequest.md | 42 ++++++++++++-------------- docs/IE_PLUGIN_DG/CompiledModel.md | 2 +- docs/IE_PLUGIN_DG/InferRequest.md | 4 +-- docs/IE_PLUGIN_DG/Intro.md | 6 ++-- docs/IE_PLUGIN_DG/Plugin.md | 2 +- docs/IE_PLUGIN_DG/PluginTesting.md | 2 +- docs/IE_PLUGIN_DG/QuantizedNetworks.md | 8 ++--- docs/IE_PLUGIN_DG/detailed_guides.md | 4 +-- 8 files changed, 33 insertions(+), 37 deletions(-) diff --git a/docs/IE_PLUGIN_DG/AsyncInferRequest.md b/docs/IE_PLUGIN_DG/AsyncInferRequest.md index d8f45d528b22c9..e45a5799deea8b 100644 --- a/docs/IE_PLUGIN_DG/AsyncInferRequest.md +++ b/docs/IE_PLUGIN_DG/AsyncInferRequest.md @@ -1,49 +1,45 @@ -# Asynchronous Inference Request {#openvino_docs_ie_plugin_dg_async_infer_request} +# Asynchronous Inference Request {#openvino_docs_ov_plugin_dg_async_infer_request} Asynchronous Inference Request runs an inference pipeline asynchronously in one or several task executors depending on a device pipeline structure. -OpenVINO Runtime Plugin API provides the base InferenceEngine::AsyncInferRequestThreadSafeDefault class: +OpenVINO Runtime Plugin API provides the base ov::IAsyncInferRequest class: -- The class has the `_pipeline` field of `std::vector >`, which contains pairs of an executor and executed task. +- The class has the `m_pipeline` field of `std::vector, ov::threading::Task> >`, which contains pairs of an executor and executed task. - All executors are passed as arguments to a class constructor and they are in the running state and ready to run tasks. -- The class has the InferenceEngine::AsyncInferRequestThreadSafeDefault::StopAndWait method, which waits for `_pipeline` to finish in a class destructor. The method does not stop task executors and they are still in the running stage, because they belong to the executable network instance and are not destroyed. +- The class has the ov::IAsyncInferRequest::stop_and_wait method, which waits for `m_pipeline` to finish in a class destructor. The method does not stop task executors and they are still in the running stage, because they belong to the compiled model instance and are not destroyed. -`AsyncInferRequest` Class +AsyncInferRequest Class ------------------------ -OpenVINO Runtime Plugin API provides the base InferenceEngine::AsyncInferRequestThreadSafeDefault class for a custom asynchronous inference request implementation: +OpenVINO Runtime Plugin API provides the base ov::IAsyncInferRequest class for a custom asynchronous inference request implementation: @snippet src/async_infer_request.hpp async_infer_request:header -#### Class Fields +### Class Fields -- `_inferRequest` - a reference to the [synchronous inference request](@ref openvino_docs_ov_plugin_dg_infer_request) implementation. Its methods are reused in the `AsyncInferRequest` constructor to define a device pipeline. -- `_waitExecutor` - a task executor that waits for a response from a device about device tasks completion +- `m_wait_executor` - a task executor that waits for a response from a device about device tasks completion -> **NOTE**: If a plugin can work with several instances of a device, `_waitExecutor` must be device-specific. Otherwise, having a single task executor for several devices does not allow them to work in parallel. +> **NOTE**: If a plugin can work with several instances of a device, `m_wait_executor` must be device-specific. Otherwise, having a single task executor for several devices does not allow them to work in parallel. -### `AsyncInferRequest()` +### AsyncInferRequest() -The main goal of the `AsyncInferRequest` constructor is to define a device pipeline `_pipeline`. The example below demonstrates `_pipeline` creation with the following stages: +The main goal of the `AsyncInferRequest` constructor is to define a device pipeline `m_pipeline`. The example below demonstrates `m_pipeline` creation with the following stages: -- `inferPreprocess` is a CPU compute task. -- `startPipeline` is a CPU ligthweight task to submit tasks to a remote device. -- `waitPipeline` is a CPU non-compute task that waits for a response from a remote device. -- `inferPostprocess` is a CPU compute task. +- `infer_preprocess_and_start_pipeline` is a CPU ligthweight task to submit tasks to a remote device. +- `wait_pipeline` is a CPU non-compute task that waits for a response from a remote device. +- `infer_postprocess` is a CPU compute task. @snippet src/async_infer_request.cpp async_infer_request:ctor The stages are distributed among two task executors in the following way: -- `inferPreprocess` and `startPipeline` are combined into a single task and run on `_requestExecutor`, which computes CPU tasks. +- `infer_preprocess_and_start_pipeline` prepare input tensors and run on `m_request_executor`, which computes CPU tasks. - You need at least two executors to overlap compute tasks of a CPU and a remote device the plugin works with. Otherwise, CPU and device tasks are executed serially one by one. -- `waitPipeline` is sent to `_waitExecutor`, which works with the device. +- `wait_pipeline` is sent to `m_wait_executor`, which works with the device. -> **NOTE**: `callbackExecutor` is also passed to the constructor and it is used in the base InferenceEngine::AsyncInferRequestThreadSafeDefault class, which adds a pair of `callbackExecutor` and a callback function set by the user to the end of the pipeline. +> **NOTE**: `m_callback_executor` is also passed to the constructor and it is used in the base ov::IAsyncInferRequest class, which adds a pair of `callback_executor` and a callback function set by the user to the end of the pipeline. -Inference request stages are also profiled using IE_PROFILING_AUTO_SCOPE, which shows how pipelines of multiple asynchronous inference requests are run in parallel via the [Intel® VTune™ Profiler](https://software.intel.com/en-us/vtune) tool. +### ~AsyncInferRequest() -### `~AsyncInferRequest()` - -In the asynchronous request destructor, it is necessary to wait for a pipeline to finish. It can be done using the InferenceEngine::AsyncInferRequestThreadSafeDefault::StopAndWait method of the base class. +In the asynchronous request destructor, it is necessary to wait for a pipeline to finish. It can be done using the ov::IAsyncInferRequest::stop_and_wait method of the base class. @snippet src/async_infer_request.cpp async_infer_request:dtor diff --git a/docs/IE_PLUGIN_DG/CompiledModel.md b/docs/IE_PLUGIN_DG/CompiledModel.md index 45d990101d132a..fa80ee81cb75ab 100644 --- a/docs/IE_PLUGIN_DG/CompiledModel.md +++ b/docs/IE_PLUGIN_DG/CompiledModel.md @@ -54,7 +54,7 @@ The method creates an synchronous inference request and returns it. While the public OpenVINO API has a single interface for inference request, which can be executed in synchronous and asynchronous modes, a plugin library implementation has two separate classes: - [Synchronous inference request](@ref openvino_docs_ov_plugin_dg_infer_request), which defines pipeline stages and runs them synchronously in the `infer` method. -- [Asynchronous inference request](@ref openvino_docs_ie_plugin_dg_async_infer_request), which is a wrapper for a synchronous inference request and can run a pipeline asynchronously. Depending on a device pipeline structure, it can has one or several stages: +- [Asynchronous inference request](@ref openvino_docs_ov_plugin_dg_async_infer_request), which is a wrapper for a synchronous inference request and can run a pipeline asynchronously. Depending on a device pipeline structure, it can has one or several stages: - For single-stage pipelines, there is no need to define this method and create a class derived from ov::IAsyncInferRequest. For single stage pipelines, a default implementation of this method creates ov::IAsyncInferRequest wrapping a synchronous inference request and runs it asynchronously in the `m_request_executor` executor. - For pipelines with multiple stages, such as performing some preprocessing on host, uploading input data to a device, running inference on a device, or downloading and postprocessing output data, schedule stages on several task executors to achieve better device use and performance. You can do it by creating a sufficient number of inference requests running in parallel. In this case, device stages of different inference requests are overlapped with preprocessing and postprocessing stage giving better performance. > **IMPORTANT**: It is up to you to decide how many task executors you need to optimally execute a device pipeline. diff --git a/docs/IE_PLUGIN_DG/InferRequest.md b/docs/IE_PLUGIN_DG/InferRequest.md index bd08278e90cd0c..b40f23221fe90f 100644 --- a/docs/IE_PLUGIN_DG/InferRequest.md +++ b/docs/IE_PLUGIN_DG/InferRequest.md @@ -2,7 +2,7 @@ `InferRequest` class functionality: - Allocate input and output tensors needed for a backend-dependent network inference. -- Define functions for inference process stages (for example, `preprocess`, `upload`, `infer`, `download`, `postprocess`). These functions can later be used to define an execution pipeline during [Asynchronous Inference Request](@ref openvino_docs_ie_plugin_dg_async_infer_request) implementation. +- Define functions for inference process stages (for example, `preprocess`, `upload`, `infer`, `download`, `postprocess`). These functions can later be used to define an execution pipeline during [Asynchronous Inference Request](@ref openvino_docs_ov_plugin_dg_async_infer_request) implementation. - Call inference stages one by one synchronously. InferRequest Class @@ -81,4 +81,4 @@ The method returns the profiling info which was measured during pipeline stages @snippet src/sync_infer_request.cpp infer_request:get_profiling_info -The next step in the plugin library implementation is the [Asynchronous Inference Request](@ref openvino_docs_ie_plugin_dg_async_infer_request) class. +The next step in the plugin library implementation is the [Asynchronous Inference Request](@ref openvino_docs_ov_plugin_dg_async_infer_request) class. diff --git a/docs/IE_PLUGIN_DG/Intro.md b/docs/IE_PLUGIN_DG/Intro.md index d1a6a12e104036..ed3d101ea4a6bc 100644 --- a/docs/IE_PLUGIN_DG/Intro.md +++ b/docs/IE_PLUGIN_DG/Intro.md @@ -10,7 +10,7 @@ Implement Plugin Functionality Implement Compiled Model Functionality Implement Synchronous Inference Request - Implement Asynchronous Inference Request + Implement Asynchronous Inference Request Implement Remote Context Implement Remote Tensor openvino_docs_ov_plugin_dg_plugin_build @@ -43,7 +43,7 @@ OpenVINO plugin dynamic library consists of several main components: 3. [Inference Request class](@ref openvino_docs_ov_plugin_dg_infer_request): - Runs an inference pipeline serially. - Can extract performance counters for an inference pipeline execution profiling. -4. [Asynchronous Inference Request class](@ref openvino_docs_ie_plugin_dg_async_infer_request): +4. [Asynchronous Inference Request class](@ref openvino_docs_ov_plugin_dg_async_infer_request): - Wraps the [Inference Request](@ref openvino_docs_ov_plugin_dg_infer_request) class and runs pipeline stages in parallel on several task executors based on a device-specific pipeline structure. 5. [Remote Context](@ref openvino_docs_ov_plugin_dg_remote_context): - Provides the device specific remote context. Context allows to create remote tensors. @@ -61,7 +61,7 @@ Detailed guides * [Build](@ref openvino_docs_ov_plugin_dg_plugin_build) a plugin library using CMake * Plugin and its components [testing](@ref openvino_docs_ov_plugin_dg_plugin_testing) -* [Quantized networks](@ref openvino_docs_ie_plugin_dg_quantized_networks) +* [Quantized networks](@ref openvino_docs_ov_plugin_dg_quantized_models) * [Low precision transformations](@ref openvino_docs_OV_UG_lpt) guide * [Writing OpenVINO™ transformations](@ref openvino_docs_transformations) guide diff --git a/docs/IE_PLUGIN_DG/Plugin.md b/docs/IE_PLUGIN_DG/Plugin.md index 2fa02a009e8056..96326fabcb574a 100644 --- a/docs/IE_PLUGIN_DG/Plugin.md +++ b/docs/IE_PLUGIN_DG/Plugin.md @@ -85,7 +85,7 @@ Actual model compilation is done in the `CompiledModel` constructor. Refer to th The function accepts a const shared pointer to `ov::Model` object and applies common and device-specific transformations on a copied model to make it more friendly to hardware operations. For details how to write custom device-specific transformation, please, refer to [Writing OpenVINO™ transformations](@ref openvino_docs_transformations) guide. See detailed topics about model representation: * [Intermediate Representation and Operation Sets](@ref openvino_docs_MO_DG_IR_and_opsets) - * [Quantized models](@ref openvino_docs_ie_plugin_dg_quantized_networks). + * [Quantized models](@ref openvino_docs_ov_plugin_dg_quantized_models). @snippet template/src/plugin.cpp plugin:transform_model diff --git a/docs/IE_PLUGIN_DG/PluginTesting.md b/docs/IE_PLUGIN_DG/PluginTesting.md index 6a0cecfb05a1c4..ca19d5ea2fbbae 100644 --- a/docs/IE_PLUGIN_DG/PluginTesting.md +++ b/docs/IE_PLUGIN_DG/PluginTesting.md @@ -8,7 +8,7 @@ OpenVINO Plugin tests are included in the `openvino::funcSharedTests` CMake targ Test definitions are split into tests class declaration (see `src/tests/functional/plugin/shared/include`) and tests class implementation (see `src/tests/functional/plugin/shared/src`) and include the following scopes of plugin conformance tests: -1. **Behavior tests** (`behavior` sub-folder), which are a separate test group to check that a plugin satisfies basic OpenVINO concepts: plugin creation, multiple executable networks support, multiple synchronous and asynchronous inference requests support, and so on. See the next section with details how to instantiate the tests definition class with plugin-specific parameters. +1. **Behavior tests** (`behavior` sub-folder), which are a separate test group to check that a plugin satisfies basic OpenVINO concepts: plugin creation, multiple compiled models support, multiple synchronous and asynchronous inference requests support, and so on. See the next section with details how to instantiate the tests definition class with plugin-specific parameters. 2. **Single layer tests** (`single_layer_tests` sub-folder). This groups of tests checks that a particular single layer can be inferenced on a device. An example of test instantiation based on test definition from `openvino::funcSharedTests` library: diff --git a/docs/IE_PLUGIN_DG/QuantizedNetworks.md b/docs/IE_PLUGIN_DG/QuantizedNetworks.md index 57deb94281de05..f3c712e2f618b5 100644 --- a/docs/IE_PLUGIN_DG/QuantizedNetworks.md +++ b/docs/IE_PLUGIN_DG/QuantizedNetworks.md @@ -1,8 +1,8 @@ -# Quantized networks compute and restrictions {#openvino_docs_ie_plugin_dg_quantized_networks} +# Quantized models compute and restrictions {#openvino_docs_ov_plugin_dg_quantized_models} -One of the feature of Inference Engine is the support of quantized networks with different precisions: INT8, INT4, etc. +One of the feature of OpenVINO is the support of quantized models with different precisions: INT8, INT4, etc. However, it is up to the plugin to define what exact precisions are supported by the particular HW. -All quantized networks which can be expressed in IR have a unified representation by means of *FakeQuantize* operation. +All quantized models which can be expressed in IR have a unified representation by means of *FakeQuantize* operation. For more details about low-precision model representation please refer to this [document](@ref openvino_docs_ie_plugin_dg_lp_representation). ### Interpreting FakeQuantize at runtime @@ -44,6 +44,6 @@ Below we define these rules as follows: - Per-channel quantization of activations for channel-wise and element-wise operations, e.g. Depthwise Convolution, Eltwise Add/Mul, ScaleShift. - Symmetric and asymmetric quantization of weights and activations with the support of per-channel scales and zero-points. - Non-unified quantization parameters for Eltwise and Concat operations. -- Non-quantized network output, i.e. there are no quantization parameters for it. +- Non-quantized models output, i.e. there are no quantization parameters for it. [qdq_propagation]: images/qdq_propagation.png diff --git a/docs/IE_PLUGIN_DG/detailed_guides.md b/docs/IE_PLUGIN_DG/detailed_guides.md index 934c53cc1e2ca5..2076afb0d94447 100644 --- a/docs/IE_PLUGIN_DG/detailed_guides.md +++ b/docs/IE_PLUGIN_DG/detailed_guides.md @@ -6,13 +6,13 @@ :maxdepth: 1 :hidden: - openvino_docs_ie_plugin_dg_quantized_networks + openvino_docs_ov_plugin_dg_quantized_models openvino_docs_OV_UG_lpt @endsphinxdirective The guides below provides extra information about specific features of OpenVINO needed for understanding during OpenVINO plugin development: -* [Quantized networks](@ref openvino_docs_ie_plugin_dg_quantized_networks) +* [Quantized networks](@ref openvino_docs_ov_plugin_dg_quantized_models) * [Low precision transformations](@ref openvino_docs_OV_UG_lpt) guide * [Writing OpenVINO™ transformations](@ref openvino_docs_transformations) guide From 82a992b95d2264b8c26c5d3af6feb1d877bcf3ac Mon Sep 17 00:00:00 2001 From: Roman Kazantsev Date: Tue, 21 Mar 2023 12:31:10 +0400 Subject: [PATCH 005/296] [TF FE] Fix leftovers from code review (#16422) Signed-off-by: Kazantsev, Roman --- src/frontends/tensorflow_common/src/op/strided_slice.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/frontends/tensorflow_common/src/op/strided_slice.cpp b/src/frontends/tensorflow_common/src/op/strided_slice.cpp index e71097557e7e44..50ceb763a80658 100644 --- a/src/frontends/tensorflow_common/src/op/strided_slice.cpp +++ b/src/frontends/tensorflow_common/src/op/strided_slice.cpp @@ -27,9 +27,10 @@ OutputVector translate_strided_slice_op(const NodeContext& node) { return vector{}; } size_t max_length = sizeof(mask) * CHAR_BIT; - vector vec{}; + vector vec; + vec.reserve(max_length); for (size_t i = 0; i < max_length; ++i) { - if ((mask >> i & 0x1) == 1) { + if (((mask >> i) & 0x1) == 1) { // resize the vector by appending with required number of zeros vec.resize(i + 1, 0); vec[i] = 1; From 63797db257f1a1130a59e17470a04aa49ad85114 Mon Sep 17 00:00:00 2001 From: Pawel Raasz Date: Tue, 21 Mar 2023 10:02:37 +0100 Subject: [PATCH 006/296] Review ROIPooling class for shape inference aspects (#16403) * Review ROIPooling class - check interval shape and label propagation - add template shape_infer - add shape infer into cpu plugin - add test with StaticShape * Use get_output_roi instead of get_output_size * Add missing includes --- src/core/include/openvino/op/roi_pooling.hpp | 20 +- .../include/roi_pooling_shape_inference.hpp | 107 +++++++++ src/core/src/op/roi_pooling.cpp | 108 +++------ src/core/tests/type_prop/roi_pooling.cpp | 218 +++++++++++------- src/core/tests/visitors/op/roi_pooling.cpp | 2 +- .../intel_cpu/src/nodes/roi_pooling.cpp | 6 +- .../utils/shape_inference/shape_inference.cpp | 7 +- .../roi_pooling_shape_inference_test.cpp | 74 ++++++ 8 files changed, 385 insertions(+), 157 deletions(-) create mode 100644 src/core/shape_inference/include/roi_pooling_shape_inference.hpp create mode 100644 src/plugins/intel_cpu/tests/unit/shape_inference_test/roi_pooling_shape_inference_test.cpp diff --git a/src/core/include/openvino/op/roi_pooling.hpp b/src/core/include/openvino/op/roi_pooling.hpp index b0b04648d7b3ea..57799954a7641f 100644 --- a/src/core/include/openvino/op/roi_pooling.hpp +++ b/src/core/include/openvino/op/roi_pooling.hpp @@ -34,12 +34,30 @@ class OPENVINO_API ROIPooling : public Op { std::shared_ptr clone_with_new_inputs(const OutputVector& new_args) const override; + /// \brief Set the output ROI feature map (pooled_h, pooled_w). + /// \param output_size Shape with pooling attributes pooled_h and pooled_w sizes. + void set_output_roi(Shape output_size); + + /// \brief Get the output ROI feature map shape (H x W) + /// \return Shape with pooled_h and pooled_w attributes. + const Shape& get_output_roi() const; + + OPENVINO_DEPRECATED("Use 'get_output_roi' instead. Use of this member can be ambiguous with Node base " + "'get_output_size' which return number of outputs.") const Shape& get_output_size() const { return m_output_size; } + + /// \brief Set the spatial scale value. + /// \param scale Scale value to set. + void set_spatial_scale(float scale); float get_spatial_scale() const { return m_spatial_scale; } + + /// \brief Set the method of pooling + /// \param method_name Pooling method name. + void set_method(std::string method_name); const std::string& get_method() const { return m_method; } @@ -47,7 +65,7 @@ class OPENVINO_API ROIPooling : public Op { private: Shape m_output_size{0, 0}; - float m_spatial_scale{0}; + float m_spatial_scale{0.0f}; std::string m_method = "max"; }; } // namespace v0 diff --git a/src/core/shape_inference/include/roi_pooling_shape_inference.hpp b/src/core/shape_inference/include/roi_pooling_shape_inference.hpp new file mode 100644 index 00000000000000..1568ce3cbe960c --- /dev/null +++ b/src/core/shape_inference/include/roi_pooling_shape_inference.hpp @@ -0,0 +1,107 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include + +#include "compare.hpp" +#include "dimension_util.hpp" +#include "openvino/op/roi_pooling.hpp" + +namespace ov { +namespace op { +namespace pooling { +namespace validate { +template +void rois_input_shape(const TROIPooling* op, const TShape rois_shape) { + if (rois_shape.rank().is_static()) { + NODE_VALIDATION_CHECK(op, + rois_shape.size() == 2, + "Expected a 2D tensor for the ROIs input with box coordinates. Got: ", + rois_shape); + + NODE_VALIDATION_CHECK(op, + rois_shape[1].compatible(5), + "The second dimension of ROIs input should contain batch id and box coordinates. ", + "This dimension is expected to be equal to 5. Got: ", + rois_shape[1]); + } +} + +template +void output_roi_attr(const TROIPooling* op) { + const auto& out_roi = op->get_output_roi(); + + NODE_VALIDATION_CHECK(op, + out_roi.size() == 2, + "The dimension of pooled size is expected to be equal to 2. Got: ", + out_roi.size()); + + NODE_VALIDATION_CHECK(op, + std::none_of(out_roi.cbegin(), out_roi.cend(), cmp::Less(1)), + "Pooled size attributes pooled_h and pooled_w should should be positive integers. Got: ", + out_roi[0], + " and: ", + out_roi[1], + "respectively"); +} + +template +void scale_attr(const TROIPooling* op) { + const auto scale = op->get_spatial_scale(); + NODE_VALIDATION_CHECK(op, + std::isnormal(scale) && !std::signbit(scale), + "The spatial scale attribute should be a positive floating point number. Got: ", + scale); +} + +template +void method_attr(const TROIPooling* op) { + const auto& method = op->get_method(); + NODE_VALIDATION_CHECK(op, + method == "max" || method == "bilinear", + "Pooling method attribute should be either \'max\' or \'bilinear\'. Got: ", + method); +} +} // namespace validate +} // namespace pooling + +namespace v0 { +template +std::vector shape_infer(const ROIPooling* op, const std::vector& input_shapes) { + NODE_VALIDATION_CHECK(op, input_shapes.size() == 2); + using namespace ov::util; + + const auto& feat_shape = input_shapes[0]; + const auto& rois_shape = input_shapes[1]; + const auto& feat_rank = feat_shape.rank(); + + NODE_VALIDATION_CHECK(op, + feat_rank.compatible(4), + "Expected a 4D tensor for the feature maps input. Got: ", + feat_shape); + + pooling::validate::rois_input_shape(op, rois_shape); + pooling::validate::output_roi_attr(op); + pooling::validate::scale_attr(op); + pooling::validate::method_attr(op); + + TShape out_shape; + out_shape.reserve(4); + + out_shape.emplace_back(rois_shape.rank().is_static() ? rois_shape[0] : dim::inf_bound); + out_shape.emplace_back(feat_rank.is_static() ? feat_shape[1] : dim::inf_bound); + std::copy(op->get_output_roi().cbegin(), op->get_output_roi().cend(), std::back_inserter(out_shape)); + + return {out_shape}; +} + +template +void shape_infer(const ROIPooling* op, const std::vector& input_shapes, std::vector& output_shapes) { + output_shapes = shape_infer(op, input_shapes); +} +} // namespace v0 +} // namespace op +} // namespace ov diff --git a/src/core/src/op/roi_pooling.cpp b/src/core/src/op/roi_pooling.cpp index d0baa803933db5..00ee8dacf46447 100644 --- a/src/core/src/op/roi_pooling.cpp +++ b/src/core/src/op/roi_pooling.cpp @@ -2,18 +2,22 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "ngraph/op/roi_pooling.hpp" +#include "openvino/op/roi_pooling.hpp" #include "itt.hpp" +#include "openvino/core/validation_util.hpp" +#include "roi_pooling_shape_inference.hpp" using namespace std; -using namespace ngraph; -op::ROIPooling::ROIPooling(const Output& input, - const Output& coords, - const ov::Shape& output_size, - const float spatial_scale, - const string& method) +namespace ov { +namespace op { +namespace v0 { +ROIPooling::ROIPooling(const Output& input, + const Output& coords, + const ov::Shape& output_size, + const float spatial_scale, + const string& method) : Op({input, coords}), m_output_size(output_size), m_spatial_scale(spatial_scale), @@ -21,10 +25,10 @@ op::ROIPooling::ROIPooling(const Output& input, constructor_validate_and_infer_types(); } -void op::ROIPooling::validate_and_infer_types() { +void ROIPooling::validate_and_infer_types() { OV_OP_SCOPE(v0_ROIPooling_validate_and_infer_types); - auto feat_maps_et = get_input_element_type(0); - auto coords_et = get_input_element_type(1); + const auto& feat_maps_et = get_input_element_type(0); + const auto& coords_et = get_input_element_type(1); NODE_VALIDATION_CHECK(this, feat_maps_et.is_real() && coords_et.is_real(), "The data type for input and ROIs is expected to be a floating point type. Got: ", @@ -34,72 +38,16 @@ void op::ROIPooling::validate_and_infer_types() { NODE_VALIDATION_CHECK(this, feat_maps_et == coords_et, - "Type of feature maps (inputs) and rois is expected to be the same. Got: ", + "Type of feature maps (inputs) and ROIs is expected to be the same. Got: ", feat_maps_et, " and: ", coords_et); - NODE_VALIDATION_CHECK(this, - m_output_size.size() == 2, - "The dimension of pooled size is expected to be equal to 2. Got: ", - m_output_size.size()); - - NODE_VALIDATION_CHECK(this, - m_output_size[0] > 0 && m_output_size[1] > 0, - "Pooled size attributes pooled_h and pooled_w should should be " - "non-negative integers. Got: ", - m_output_size[0], - " and: ", - m_output_size[1], - "respectively"); - - NODE_VALIDATION_CHECK(this, - m_spatial_scale > 0, - "The spatial scale attribute should be a positive floating point number. Got: ", - m_spatial_scale); - - NODE_VALIDATION_CHECK(this, - m_method == "max" || m_method == "bilinear", - "Pooling method attribute should be either \'max\' or \'bilinear\'. Got: ", - m_method); + const auto output_shapes = shape_infer(this, get_node_input_partial_shapes(*this)); + set_output_type(0, feat_maps_et, output_shapes[0]); const auto& feat_maps_ps = get_input_partial_shape(0); - NODE_VALIDATION_CHECK(this, - feat_maps_ps.rank().compatible(4), - "Expected a 4D tensor for the feature maps input. Got: ", - feat_maps_ps); - const auto& coords_ps = get_input_partial_shape(1); - NODE_VALIDATION_CHECK(this, - coords_ps.rank().compatible(2), - "Expected a 2D tensor for the ROIs input with box coordinates. Got: ", - coords_ps); - - if (coords_ps.rank().is_static()) { - const auto coords_second_dim = coords_ps[1]; - NODE_VALIDATION_CHECK(this, - coords_second_dim.compatible(5), - "The second dimension of ROIs input should contain batch id and box coordinates. ", - "This dimension is expected to be equal to 5. Got: ", - coords_second_dim); - } - - // output shape should be {NUM_ROIS, C, pooled_h, pooled_w} - auto output_shape = ov::PartialShape{{Dimension::dynamic(), - Dimension::dynamic(), - Dimension{static_cast(m_output_size[0])}, - Dimension{static_cast(m_output_size[1])}}}; - - if (coords_ps.rank().is_static()) { - output_shape[0] = coords_ps[0]; - } - - if (feat_maps_ps.rank().is_static()) { - output_shape[1] = feat_maps_ps[1]; - } - - set_output_size(1); - set_output_type(0, feat_maps_et, output_shape); // if channel dimension, C, not known // feature maps input is used by shape specialization pass @@ -114,13 +62,13 @@ void op::ROIPooling::validate_and_infer_types() { } } -shared_ptr op::ROIPooling::clone_with_new_inputs(const OutputVector& new_args) const { +shared_ptr ROIPooling::clone_with_new_inputs(const OutputVector& new_args) const { OV_OP_SCOPE(v0_ROIPooling_clone_with_new_inputs); check_new_args_count(this, new_args); return make_shared(new_args.at(0), new_args.at(1), m_output_size, m_spatial_scale, m_method); } -bool op::ROIPooling::visit_attributes(AttributeVisitor& visitor) { +bool ROIPooling::visit_attributes(AttributeVisitor& visitor) { OV_OP_SCOPE(v0_ROIPooling_visit_attributes); visitor.on_attribute("output_size", m_output_size); visitor.on_attribute("pooled_h", m_output_size[0]); @@ -129,3 +77,21 @@ bool op::ROIPooling::visit_attributes(AttributeVisitor& visitor) { visitor.on_attribute("method", m_method); return true; } + +void ROIPooling::set_output_roi(Shape output_size) { + m_output_size = std::move(output_size); +} +const Shape& ROIPooling::get_output_roi() const { + return m_output_size; +} + +void ROIPooling::set_spatial_scale(float scale) { + m_spatial_scale = scale; +} + +void ROIPooling::set_method(std::string method_name) { + m_method = std::move(method_name); +} +} // namespace v0 +} // namespace op +} // namespace ov diff --git a/src/core/tests/type_prop/roi_pooling.cpp b/src/core/tests/type_prop/roi_pooling.cpp index 0fa337a37ea5be..e86b52eef52641 100644 --- a/src/core/tests/type_prop/roi_pooling.cpp +++ b/src/core/tests/type_prop/roi_pooling.cpp @@ -2,109 +2,171 @@ // SPDX-License-Identifier: Apache-2.0 // +#include "common_test_utils/test_assertions.hpp" #include "gtest/gtest.h" -#include "ngraph/ngraph.hpp" +#include "openvino/opsets/opset11.hpp" +#include "type_prop.hpp" using namespace std; -using namespace ngraph; - -TEST(type_prop, roi_pooling_basic_shape_inference) { - const auto feat_maps = make_shared(element::f32, Shape{1, 3, 6, 6}); - const auto rois = make_shared(element::f32, Shape{4, 5}); - const auto op = make_shared(feat_maps, rois, Shape{2, 2}, 0.625f); - ASSERT_EQ(op->get_method(), "max"); - ASSERT_EQ(op->get_shape(), (Shape{4, 3, 2, 2})); +using namespace ov; +using namespace ov::opset11; +using namespace testing; + +class TypePropROIPoolingV0 : public TypePropOpTest { +protected: + float spatial_scale = 0.625f; + Shape pooling_roi_2x2{2, 2}; +}; + +TEST_F(TypePropROIPoolingV0, default_ctor) { + const auto feat_maps = make_shared(element::f32, PartialShape{{0, 3}, {1, 3}, {1, 6}, {1, 6}}); + const auto rois = make_shared(element::f32, PartialShape{{2, 4}, {1, 5}}); + + const auto op = make_op(); + op->set_arguments(OutputVector{feat_maps, rois}); + op->set_spatial_scale(spatial_scale); + op->set_method("max"); + op->set_output_roi({3, 4}); + op->validate_and_infer_types(); + + EXPECT_FLOAT_EQ(op->get_spatial_scale(), spatial_scale); + EXPECT_EQ(op->get_output_roi(), Shape({3, 4})); + EXPECT_EQ(op->get_method(), "max"); + EXPECT_EQ(op->get_input_size(), 2); + EXPECT_EQ(op->get_element_type(), element::f32); + EXPECT_EQ(static_cast(op.get())->get_output_size(), 1); + EXPECT_EQ(op->get_output_partial_shape(0), (PartialShape{{2, 4}, {1, 3}, 3, 4})); } -TEST(type_prop, roi_pooling_dynamic_channels_dim) { - const auto feat_maps = make_shared(element::f32, PartialShape{1, Dimension(), 6, 6}); - const auto rois = make_shared(element::f32, Shape{4, 5}); - const auto op = make_shared(feat_maps, rois, Shape{2, 2}, 0.625f, "max"); - ASSERT_TRUE(op->get_output_partial_shape(0).same_scheme(PartialShape{4, Dimension(), 2, 2})); +TEST_F(TypePropROIPoolingV0, basic_shape_inference) { + const auto feat_maps = make_shared(element::f32, Shape{1, 3, 6, 6}); + const auto rois = make_shared(element::f32, Shape{4, 5}); + const auto op = make_op(feat_maps, rois, pooling_roi_2x2, 0.625f); + + EXPECT_EQ(op->get_element_type(), element::f32); + EXPECT_EQ(op->get_method(), "max"); + EXPECT_EQ(op->get_shape(), (Shape{4, 3, 2, 2})); } -TEST(type_prop, roi_pooling_dynamic_num_rois_dim) { - const auto feat_maps = make_shared(element::f32, Shape{1, 3, 6, 6}); - const auto rois = make_shared(element::f32, PartialShape{Dimension(), 5}); - const auto op = make_shared(feat_maps, rois, Shape{2, 2}, 0.625f); - ASSERT_TRUE(op->get_output_partial_shape(0).same_scheme(PartialShape{Dimension(), 3, 2, 2})); +TEST_F(TypePropROIPoolingV0, dynamic_channels_dim) { + auto feat_shape = PartialShape{1, -1, 6, 6}; + auto rois_shape = PartialShape{4, 5}; + set_shape_labels(feat_shape, 10); + set_shape_labels(rois_shape, 20); + + const auto feat_maps = make_shared(element::f32, feat_shape); + const auto rois = make_shared(element::f32, rois_shape); + const auto op = make_op(feat_maps, rois, pooling_roi_2x2, spatial_scale, "max"); + + EXPECT_EQ(op->get_element_type(), element::f32); + EXPECT_EQ(op->get_output_partial_shape(0), (PartialShape{4, -1, 2, 2})); + EXPECT_THAT(get_shape_labels(op->get_output_partial_shape(0)), ElementsAre(20, 11, ov::no_label, ov::no_label)); } -TEST(type_prop, roi_pooling_dynamic_rank_feat_maps) { - const auto feat_maps = make_shared(element::f32, PartialShape::dynamic()); - const auto rois = make_shared(element::f32, Shape{4, 5}); - const auto op = make_shared(feat_maps, rois, Shape{2, 2}, 0.625f); - ASSERT_TRUE(op->get_output_partial_shape(0).same_scheme(PartialShape{4, Dimension(), 2, 2})); +TEST_F(TypePropROIPoolingV0, dynamic_num_rois_dim) { + auto feat_shape = PartialShape{1, 3, 6, 6}; + auto rois_shape = PartialShape{-1, 5}; + set_shape_labels(feat_shape, 10); + set_shape_labels(rois_shape, 20); + + const auto feat_maps = make_shared(element::f64, feat_shape); + const auto rois = make_shared(element::f64, rois_shape); + const auto op = make_op(feat_maps, rois, pooling_roi_2x2, spatial_scale, "bilinear"); + + EXPECT_EQ(op->get_element_type(), element::f64); + EXPECT_EQ(op->get_output_partial_shape(0), (PartialShape{-1, 3, 2, 2})); + EXPECT_THAT(get_shape_labels(op->get_output_partial_shape(0)), ElementsAre(20, 11, ov::no_label, ov::no_label)); } -TEST(type_prop, roi_pooling_dynamic_rank_rois) { - const auto feat_maps = make_shared(element::f32, Shape{1, 3, 6, 6}); - const auto rois = make_shared(element::f32, PartialShape::dynamic()); - const auto op = make_shared(feat_maps, rois, Shape{2, 2}, 0.625f); - ASSERT_TRUE(op->get_output_partial_shape(0).same_scheme(PartialShape{Dimension(), 3, 2, 2})); +TEST_F(TypePropROIPoolingV0, dynamic_rank_feat_maps) { + const auto feat_maps = make_shared(element::f16, PartialShape::dynamic()); + const auto rois = make_shared(element::f16, Shape{4, 5}); + const auto op = make_op(feat_maps, rois, pooling_roi_2x2, spatial_scale); + + EXPECT_EQ(op->get_element_type(), element::f16); + EXPECT_EQ(op->get_output_partial_shape(0), (PartialShape{4, -1, 2, 2})); + EXPECT_THAT(get_shape_labels(op->get_output_partial_shape(0)), Each(ov::no_label)); } -TEST(type_prop, roi_pooling_incompatible_input_rank) { - const auto feat_maps = make_shared(element::f32, Shape{1, 3, 2, 6, 6}); - const auto rois = make_shared(element::f32, Shape{3, 5}); - // feat_maps must be of rank 4 - ASSERT_THROW(const auto unused = make_shared(feat_maps, rois, Shape{2, 2}, 0.625f, "max"), - ngraph::NodeValidationFailure); +TEST_F(TypePropROIPoolingV0, dynamic_rank_feat_rois) { + const auto feat_maps = make_shared(element::f32, Shape{1, 3, 6, 6}); + const auto rois = make_shared(element::f32, PartialShape::dynamic()); + const auto op = make_op(feat_maps, rois, pooling_roi_2x2, spatial_scale); + + EXPECT_EQ(op->get_element_type(), element::f32); + EXPECT_EQ(op->get_output_partial_shape(0), (PartialShape{-1, 3, 2, 2})); + EXPECT_THAT(get_shape_labels(op->get_output_partial_shape(0)), Each(ov::no_label)); } -TEST(type_prop, roi_pooling_incompatible_pooling_shape) { - Shape pool_shape{2, 2, 2}; - const auto feat_maps = make_shared(element::f32, Shape{3, 2, 6, 6}); - const auto rois = make_shared(element::f32, Shape{3, 5}); - // pool_shape must be of rank 2 {pooled_h, pooled_w} - ASSERT_THROW(const auto unused = make_shared(feat_maps, rois, pool_shape, 0.625f, "max"), - ngraph::NodeValidationFailure); +TEST_F(TypePropROIPoolingV0, incompatible_input_rank) { + const auto feat_maps = make_shared(element::f32, Shape{1, 3, 6, 6, 6}); + const auto rois = make_shared(element::f32, PartialShape{3, 5}); + + OV_EXPECT_THROW(const auto op = make_op(feat_maps, rois, pooling_roi_2x2, spatial_scale, "max"), + NodeValidationFailure, + HasSubstr("Expected a 4D tensor for the feature maps input")); } -TEST(type_prop, roi_pooling_incompatible_rois_second_dim) { - const auto feat_maps = make_shared(element::f32, Shape{3, 2, 6, 6}); - const auto rois = make_shared(element::f32, Shape{3, 4}); - // the second dim of rois must be 5. [batch_id, x_1, y_1, x_2, y_2] - ASSERT_THROW(const auto unused = make_shared(feat_maps, rois, Shape{2, 2}, 0.625f, "max"), - ngraph::NodeValidationFailure); +TEST_F(TypePropROIPoolingV0, incompatible_pooling_shape) { + const auto feat_maps = make_shared(element::f32, Shape{3, 2, 6, 6}); + const auto rois = make_shared(element::f32, PartialShape{3, 5}); + + OV_EXPECT_THROW(const auto op = make_op(feat_maps, rois, Shape{2, 2, 2}, spatial_scale, "max"), + NodeValidationFailure, + HasSubstr("The dimension of pooled size is expected to be equal to 2")); +} + +TEST_F(TypePropROIPoolingV0, incompatible_rois_second_dim) { + const auto feat_maps = make_shared(element::f32, Shape{3, 2, 6, 6}); + const auto rois = make_shared(element::f32, PartialShape{3, 4}); + + OV_EXPECT_THROW(const auto op = make_op(feat_maps, rois, pooling_roi_2x2, spatial_scale, "max"), + NodeValidationFailure, + HasSubstr("The second dimension of ROIs input should contain batch id and box coordinates. This " + "dimension is expected to be equal to 5")); } -TEST(type_prop, roi_pooling_incompatible_feature_maps_element_type) { - const auto feat_maps = make_shared(element::i32, Shape{3, 2, 6, 6}); - const auto rois = make_shared(element::f32, Shape{3, 5}); - // feat_maps element type must be floating point type - ASSERT_THROW(const auto unused = make_shared(feat_maps, rois, Shape{2, 2}, 0.625f, "max"), - ngraph::NodeValidationFailure); +TEST_F(TypePropROIPoolingV0, incompatible_feature_maps_element_type) { + const auto feat_maps = make_shared(element::i32, Shape{3, 2, 6, 6}); + const auto rois = make_shared(element::f32, PartialShape{3, 5}); + + OV_EXPECT_THROW(const auto op = make_op(feat_maps, rois, pooling_roi_2x2, spatial_scale, "max"), + NodeValidationFailure, + HasSubstr("The data type for input and ROIs is expected to be a floating point type")); } -TEST(type_prop, roi_pooling_incompatible_rois_element_type) { - const auto feat_maps = make_shared(element::f32, Shape{3, 2, 6, 6}); - const auto rois = make_shared(element::f16, Shape{3, 5}); - // rois element type must be equal to feat_maps element type (floating point type) - ASSERT_THROW(const auto unused = make_shared(feat_maps, rois, Shape{2, 2}, 0.625f, "bilinear"), - ngraph::NodeValidationFailure); +TEST_F(TypePropROIPoolingV0, incompatible_rois_element_type) { + const auto feat_maps = make_shared(element::f32, Shape{3, 2, 6, 6}); + const auto rois = make_shared(element::i16, PartialShape{3, 5}); + + OV_EXPECT_THROW(const auto op = make_op(feat_maps, rois, pooling_roi_2x2, spatial_scale, "bilinear"), + NodeValidationFailure, + HasSubstr("The data type for input and ROIs is expected to be a floating point type")); } -TEST(type_prop, roi_pooling_invalid_pooling_method) { - const auto feat_maps = make_shared(element::f32, Shape{3, 2, 6, 6}); - const auto rois = make_shared(element::f16, Shape{3, 5}); - // ROIPooling method is invalid: not max nor bilinear - ASSERT_THROW(const auto unused = make_shared(feat_maps, rois, Shape{2, 2}, 0.625f, "invalid"), - ngraph::NodeValidationFailure); +TEST_F(TypePropROIPoolingV0, invalid_pooling_method) { + const auto feat_maps = make_shared(element::f32, Shape{3, 2, 6, 6}); + const auto rois = make_shared(element::f32, PartialShape{3, 5}); + + OV_EXPECT_THROW(const auto op = make_op(feat_maps, rois, pooling_roi_2x2, spatial_scale, "invalid"), + NodeValidationFailure, + HasSubstr("Pooling method attribute should be either \'max\' or \'bilinear\'")); } -TEST(type_prop, roi_pooling_invalid_spatial_scale) { - const auto feat_maps = make_shared(element::f32, Shape{3, 2, 6, 6}); - const auto rois = make_shared(element::f16, Shape{3, 5}); - // ROIPooling spatial scale attribute must be a positive floating point number - ASSERT_THROW(const auto unused = make_shared(feat_maps, rois, Shape{2, 2}, -0.625f, "max"), - ngraph::NodeValidationFailure); +TEST_F(TypePropROIPoolingV0, invalid_spatial_scale) { + const auto feat_maps = make_shared(element::f32, Shape{3, 2, 6, 6}); + const auto rois = make_shared(element::f32, PartialShape{3, 5}); + + OV_EXPECT_THROW(const auto op = make_op(feat_maps, rois, pooling_roi_2x2, -1.0f), + NodeValidationFailure, + HasSubstr("The spatial scale attribute should be a positive floating point number")); } -TEST(type_prop, roi_pooling_invalid_pooled_size) { - const auto feat_maps = make_shared(element::f32, Shape{3, 2, 6, 6}); - const auto rois = make_shared(element::f16, Shape{3, 5}); - // ROIPooling pooled_h and pooled_w must be non-negative integers - ASSERT_THROW(const auto unused = make_shared(feat_maps, rois, Shape{1, 0}, 0.625f, "max"), - ngraph::NodeValidationFailure); +TEST_F(TypePropROIPoolingV0, invalid_pooled_size) { + const auto feat_maps = make_shared(element::f32, Shape{3, 2, 6, 6}); + const auto rois = make_shared(element::f32, PartialShape{3, 5}); + + OV_EXPECT_THROW(const auto op = make_op(feat_maps, rois, Shape{1, 0}, spatial_scale), + NodeValidationFailure, + HasSubstr("Pooled size attributes pooled_h and pooled_w should should be positive integers")); } diff --git a/src/core/tests/visitors/op/roi_pooling.cpp b/src/core/tests/visitors/op/roi_pooling.cpp index 8438a797728eb1..a5b49fe9cca3d2 100644 --- a/src/core/tests/visitors/op/roi_pooling.cpp +++ b/src/core/tests/visitors/op/roi_pooling.cpp @@ -25,7 +25,7 @@ TEST(attributes, roi_pooling_op) { NodeBuilder builder(op, {data, coords}); const auto g_op = ov::as_type_ptr(builder.create()); - EXPECT_EQ(g_op->get_output_size(), op->get_output_size()); + EXPECT_EQ(g_op->get_output_roi(), op->get_output_roi()); EXPECT_EQ(g_op->get_spatial_scale(), op->get_spatial_scale()); EXPECT_EQ(g_op->get_method(), op->get_method()); } diff --git a/src/plugins/intel_cpu/src/nodes/roi_pooling.cpp b/src/plugins/intel_cpu/src/nodes/roi_pooling.cpp index 3f25a167134715..496307ede5bb6b 100644 --- a/src/plugins/intel_cpu/src/nodes/roi_pooling.cpp +++ b/src/plugins/intel_cpu/src/nodes/roi_pooling.cpp @@ -393,10 +393,10 @@ ROIPooling::ROIPooling(const std::shared_ptr& op, const GraphConte std::string errorPrefix = "ROIPooling layer with name '" + getName() + "' "; auto roiPooling = ngraph::as_type_ptr(op); - refParams.pooled_h = roiPooling->get_output_size()[0]; - refParams.pooled_w = roiPooling->get_output_size()[1]; + refParams.pooled_h = roiPooling->get_output_roi()[0]; + refParams.pooled_w = roiPooling->get_output_roi()[1]; refParams.spatial_scale = roiPooling->get_spatial_scale(); - std::string m = roiPooling->get_method(); + const auto& m = roiPooling->get_method(); if (m == "max") { algorithm = Algorithm::ROIPoolingMax; } else if (m == "bilinear") { diff --git a/src/plugins/intel_cpu/src/utils/shape_inference/shape_inference.cpp b/src/plugins/intel_cpu/src/utils/shape_inference/shape_inference.cpp index f27dbf76453f5e..1961157ae93ddc 100644 --- a/src/plugins/intel_cpu/src/utils/shape_inference/shape_inference.cpp +++ b/src/plugins/intel_cpu/src/utils/shape_inference/shape_inference.cpp @@ -62,6 +62,7 @@ #include "reverse_sequence_shape_inference.hpp" #include "reverse_shape_inference.hpp" #include "roi_align_shape_inference.hpp" +#include "roi_pooling_shape_inference.hpp" #include "roll_shape_inference.hpp" #include "scatter_elements_update_shape_inference.hpp" #include "scatter_nd_base_shape_inference.hpp" @@ -125,9 +126,8 @@ class entryIO : public entryBase { IShapeInferCommon::Result infer(const std::vector& input_shapes, const std::map& constant_data) override { - auto op = static_cast(node.get()); - std::vector output_shapes(op->get_output_size()); - shape_infer(op, input_shapes, output_shapes); + std::vector output_shapes(node->get_output_size()); + shape_infer(static_cast(node.get()), input_shapes, output_shapes); return {std::move(output_shapes), ShapeInferStatus::success}; } }; @@ -597,6 +597,7 @@ const IShapeInferCommonFactory::TRegistry IShapeInferCommonFactory::registry{ _OV_OP_SHAPE_INFER_REG(Reshape, entryIOC), _OV_OP_SHAPE_INFER_REG(ReverseSequence, entryIO), _OV_OP_SHAPE_INFER_REG(ROIAlign, entryIO), + _OV_OP_SHAPE_INFER_REG(ROIPooling, entryIO), _OV_OP_SHAPE_INFER_REG(Roll, entryIOC), _OV_OP_SHAPE_INFER_REG(ScatterElementsUpdate, entryIOC), _OV_OP_SHAPE_INFER_REG(ScatterNDUpdate, entryIO), diff --git a/src/plugins/intel_cpu/tests/unit/shape_inference_test/roi_pooling_shape_inference_test.cpp b/src/plugins/intel_cpu/tests/unit/shape_inference_test/roi_pooling_shape_inference_test.cpp new file mode 100644 index 00000000000000..0999e278c95ae6 --- /dev/null +++ b/src/plugins/intel_cpu/tests/unit/shape_inference_test/roi_pooling_shape_inference_test.cpp @@ -0,0 +1,74 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include "common_test_utils/test_assertions.hpp" +#include "openvino/opsets/opset11.hpp" +#include "utils.hpp" + +using namespace ov; +using namespace ov::intel_cpu; +using namespace testing; + +class ROIPoolingV0StaticShapeInferenceTest : public OpStaticShapeInferenceTest { +protected: + void SetUp() override { + output_shapes.resize(1); + } +}; + +TEST_F(ROIPoolingV0StaticShapeInferenceTest, default_ctor) { + op = make_op(); + op->set_output_roi({3, 3}); + op->set_method("max"); + op->set_spatial_scale(0.34f); + + input_shapes = ShapeVector{{1, 5, 10, 10}, {2, 5}}; + auto shape_infer = make_shape_inference(op); + shape_inference(op.get(), input_shapes, output_shapes); + + EXPECT_EQ(output_shapes.size(), 1); + EXPECT_EQ(output_shapes.front(), StaticShape({2, 5, 3, 3})); +} + +TEST_F(ROIPoolingV0StaticShapeInferenceTest, inputs_dynamic_rank) { + const auto feat = std::make_shared(element::f64, PartialShape::dynamic()); + const auto rois = std::make_shared(element::f64, PartialShape::dynamic()); + + op = make_op(feat, rois, ov::Shape{5, 5}, 0.9f); + + input_shapes = ShapeVector{{2, 3, 100, 100}, {10, 5}}; + shape_inference(op.get(), input_shapes, output_shapes); + + EXPECT_EQ(output_shapes.size(), 1); + EXPECT_EQ(output_shapes.front(), StaticShape({10, 3, 5, 5})); +} + +TEST_F(ROIPoolingV0StaticShapeInferenceTest, inputs_static_rank) { + const auto feat = std::make_shared(element::f64, PartialShape::dynamic(4)); + const auto rois = std::make_shared(element::f64, PartialShape::dynamic(2)); + + op = make_op(feat, rois, ov::Shape{7, 5}, 1.9f, "max"); + + input_shapes = ShapeVector{{2, 3, 20, 100}, {10, 5}}; + shape_inference(op.get(), input_shapes, output_shapes); + + EXPECT_EQ(output_shapes.size(), 1); + EXPECT_EQ(output_shapes.front(), StaticShape({10, 3, 7, 5})); +} + +TEST_F(ROIPoolingV0StaticShapeInferenceTest, invalid_rois_batch_size) { + const auto feat = std::make_shared(element::f64, PartialShape::dynamic(4)); + const auto rois = std::make_shared(element::f64, PartialShape::dynamic()); + + op = make_op(feat, rois, ov::Shape{7, 5}, 1.9f, "max"); + + input_shapes = ShapeVector{{2, 3, 20, 100}, {10, 6}}; + + OV_EXPECT_THROW(shape_inference(op.get(), input_shapes, output_shapes), + NodeValidationFailure, + HasSubstr("The second dimension of ROIs input should contain batch id and box coordinates. This " + "dimension is expected to be equal to 5")); +} From 7d56c75d65f0072ff9fc36c5e747ce35d15d031a Mon Sep 17 00:00:00 2001 From: Maxim Vafin Date: Tue, 21 Mar 2023 10:28:58 +0100 Subject: [PATCH 007/296] Fix MO Reader for Squeeze without axes (#16398) * Fix MO Reader for Squeeze without axes * Fix style * Update tools/mo/openvino/tools/mo/utils/ir_reader/internal_ops/squeeze.py --- .../utils/ir_reader/internal_ops/squeeze.py | 19 ++++++++++--- .../mo/utils/ir_reader/layer_to_class_test.py | 28 +++++++++++++++++++ 2 files changed, 43 insertions(+), 4 deletions(-) diff --git a/tools/mo/openvino/tools/mo/utils/ir_reader/internal_ops/squeeze.py b/tools/mo/openvino/tools/mo/utils/ir_reader/internal_ops/squeeze.py index 67bfc80dea5e69..5e9702e30f8ea0 100644 --- a/tools/mo/openvino/tools/mo/utils/ir_reader/internal_ops/squeeze.py +++ b/tools/mo/openvino/tools/mo/utils/ir_reader/internal_ops/squeeze.py @@ -3,12 +3,23 @@ from openvino.tools.mo.graph.graph import Node from openvino.tools.mo.ops.squeeze import Squeeze +from openvino.tools.mo.front.common.partial_infer.utils import shape_array, is_fully_defined class SqueezeInternal(Squeeze): @staticmethod def infer(node: Node): - axis_value = node.in_port(1).data.get_value() - Squeeze.infer(node) - # preserve initial axis value - node.in_port(1).data.set_value(axis_value) + if node.is_in_port_connected(1): + axis_value = node.in_port(1).data.get_value() + Squeeze.infer(node) + # preserve initial axis value + node.in_port(1).data.set_value(axis_value) + else: + # Squeeze without axes provided + node_name = node.soft_get('name', node.id) + input_shape = node.in_port(0).data.get_shape() + assert is_fully_defined( + input_shape), 'Squeeze dimensions are not defined for op "{}"'.format(node_name) + output_shape = [s for s in shape_array(input_shape).tolist() if s != 1] + node.out_port(0).data.set_shape(shape_array(output_shape)) + diff --git a/tools/mo/unit_tests/mo/utils/ir_reader/layer_to_class_test.py b/tools/mo/unit_tests/mo/utils/ir_reader/layer_to_class_test.py index f86e4514ca7acb..8dd6a17aba63e2 100644 --- a/tools/mo/unit_tests/mo/utils/ir_reader/layer_to_class_test.py +++ b/tools/mo/unit_tests/mo/utils/ir_reader/layer_to_class_test.py @@ -166,6 +166,34 @@ def test_squeeze(self): (flag, resp) = compare_graphs(graph, graph_ref, 'result', check_op_attrs=True) self.assertTrue(flag, resp) + def test_squeeze_no_axes(self): + nodes_attributes = { + 'input': {'kind': 'op', 'type': 'Parameter'}, + 'input_data': {'shape': [2, 1, 3], 'kind': 'data'}, + + 'squeeze': {'kind': 'op', 'type': 'Squeeze'}, + 'squeeze_data': {'shape': [2, 3], 'kind': 'data', 'value': None}, + + 'result': {'kind': 'op', 'type': 'Result'} + } + + edges = [('input', 'input_data'), + ('input_data', 'squeeze'), + ('squeeze', 'squeeze_data'), + ('squeeze_data', 'result'), + ] + + graph = build_graph(nodes_attributes, edges, nodes_with_edges_only=True) + + squeeze_node = Node(graph, 'squeeze') + SqueezeInternal.infer(squeeze_node) + + graph_ref = build_graph(nodes_attributes, edges, nodes_with_edges_only=True) + + # Check that graph wasn't changed after shape infer + (flag, resp) = compare_graphs(graph, graph_ref, 'result', check_op_attrs=True) + self.assertTrue(flag, resp) + def test_unsqueeze(self): nodes_attributes = { 'input': {'kind': 'op', 'type': 'Parameter'}, From ec0a1e58d16777afa9c9aac598ad4cb938ff3d64 Mon Sep 17 00:00:00 2001 From: Ilya Churaev Date: Tue, 21 Mar 2023 13:34:37 +0400 Subject: [PATCH 008/296] Fixed some leftovers for 2.0 dev api (#16421) * Fixed some leftovers for 2.0 dev api * Fixed build issue --- src/inference/dev_api/ie_icore.hpp | 18 ------ .../openvino/runtime/device_id_parser.hpp | 36 +++++++++++ .../dev_api/openvino/runtime/iplugin.hpp | 14 ++++- src/inference/src/core.cpp | 5 +- src/inference/src/dev/core_impl.cpp | 30 +++++----- src/inference/src/dev/core_impl_ie.cpp | 13 ++-- src/inference/src/dev/device_id_parser.cpp | 60 ++++++++++--------- src/inference/src/ie_core.cpp | 9 +-- src/plugins/auto/plugin.cpp | 15 ++--- src/plugins/auto/utils/plugin_config.hpp | 5 +- src/plugins/auto_batch/src/auto_batch.cpp | 5 +- src/plugins/hetero/plugin.cpp | 16 ++--- src/plugins/intel_gpu/src/plugin/plugin.cpp | 3 +- .../src/base/layer_test_utils.cpp | 3 +- 14 files changed, 138 insertions(+), 94 deletions(-) create mode 100644 src/inference/dev_api/openvino/runtime/device_id_parser.hpp diff --git a/src/inference/dev_api/ie_icore.hpp b/src/inference/dev_api/ie_icore.hpp index 32f08028d3f012..03c298af681f72 100644 --- a/src/inference/dev_api/ie_icore.hpp +++ b/src/inference/dev_api/ie_icore.hpp @@ -209,22 +209,4 @@ class ICore : public ov::ICore { virtual RemoteContext::Ptr GetDefaultContext(const std::string& deviceName) = 0; }; -/** - * @private - */ -class INFERENCE_ENGINE_API_CLASS(DeviceIDParser) { - std::string deviceName; - std::string deviceID; - -public: - explicit DeviceIDParser(const std::string& deviceNameWithID); - - std::string getDeviceID() const; - std::string getDeviceName() const; - - static std::vector getHeteroDevices(std::string fallbackDevice); - static std::vector getMultiDevices(std::string devicesList); - static std::string getBatchDevice(std::string devicesList); -}; - } // namespace InferenceEngine diff --git a/src/inference/dev_api/openvino/runtime/device_id_parser.hpp b/src/inference/dev_api/openvino/runtime/device_id_parser.hpp new file mode 100644 index 00000000000000..ecd95c6a6a7859 --- /dev/null +++ b/src/inference/dev_api/openvino/runtime/device_id_parser.hpp @@ -0,0 +1,36 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +/** + * @brief Provides parser for device name + * @file openvino/runtime/device_id_paeser.hpp + */ + +#pragma once + +#include + +#include "openvino/runtime/common.hpp" + +namespace ov { + +/** + * @brief Class parses device name and id + */ +class OPENVINO_RUNTIME_API DeviceIDParser { + std::string m_device_name; + std::string m_device_id; + +public: + explicit DeviceIDParser(const std::string& device_name_with_id); + + const std::string& get_device_id() const; + const std::string& get_device_name() const; + + static std::vector get_hetero_devices(const std::string& fallbackDevice); + static std::vector get_multi_devices(const std::string& devicesList); + static std::string get_batch_device(const std::string& devicesList); +}; + +} // namespace ov diff --git a/src/inference/dev_api/openvino/runtime/iplugin.hpp b/src/inference/dev_api/openvino/runtime/iplugin.hpp index 5d752ab5b15d08..a64073b3ebe0d1 100644 --- a/src/inference/dev_api/openvino/runtime/iplugin.hpp +++ b/src/inference/dev_api/openvino/runtime/iplugin.hpp @@ -20,6 +20,7 @@ #include "openvino/runtime/icore.hpp" #include "openvino/runtime/iremote_context.hpp" #include "openvino/runtime/threading/executor_manager.hpp" +#include "openvino/util/pp.hpp" namespace InferenceEngine { @@ -256,7 +257,11 @@ OPENVINO_RUNTIME_API std::unordered_set get_supported_nodes( std::function&)> transform, std::function)> is_node_supported); -} // namespace ov +/** + * @private + */ +using CreatePluginFunc = void(std::shared_ptr<::ov::IPlugin>&); + /** * @def OV_CREATE_PLUGIN * @brief Defines a name of a function creating plugin instance @@ -266,6 +271,13 @@ OPENVINO_RUNTIME_API std::unordered_set get_supported_nodes( # define OV_CREATE_PLUGIN CreatePluginEngine #endif +/** + * @private + */ +constexpr static const auto create_plugin_function = OV_PP_TOSTRING(OV_CREATE_PLUGIN); + +} // namespace ov + /** * @def OV_DEFINE_PLUGIN_CREATE_FUNCTION(PluginType, version) * @brief Defines the exported `OV_CREATE_PLUGIN` function which is used to create a plugin instance diff --git a/src/inference/src/core.cpp b/src/inference/src/core.cpp index 01454656e7f3ad..9da46ee74fae3e 100644 --- a/src/inference/src/core.cpp +++ b/src/inference/src/core.cpp @@ -9,6 +9,7 @@ #include "dev/converter_utils.hpp" #include "dev/core_impl.hpp" #include "ie_itt.hpp" +#include "openvino/runtime/device_id_parser.hpp" #include "so_extension.hpp" #ifdef OPENVINO_STATIC_LIBRARY @@ -252,8 +253,8 @@ void Core::register_plugin(const std::string& plugin, const std::string& device_ void Core::unload_plugin(const std::string& device_name) { OV_CORE_CALL_STATEMENT({ - ie::DeviceIDParser parser(device_name); - std::string devName = parser.getDeviceName(); + ov::DeviceIDParser parser(device_name); + std::string devName = parser.get_device_name(); _impl->unload_plugin(devName); }); diff --git a/src/inference/src/dev/core_impl.cpp b/src/inference/src/dev/core_impl.cpp index 44fc79a4987f59..ed39bc67f1f94e 100644 --- a/src/inference/src/dev/core_impl.cpp +++ b/src/inference/src/dev/core_impl.cpp @@ -28,6 +28,7 @@ #include "openvino/core/preprocess/pre_post_process.hpp" #include "openvino/core/version.hpp" #include "openvino/pass/manager.hpp" +#include "openvino/runtime/device_id_parser.hpp" #include "openvino/runtime/icompiled_model.hpp" #include "openvino/runtime/itensor.hpp" #include "openvino/runtime/remote_context.hpp" @@ -276,9 +277,9 @@ ov::Parsed ov::parseDeviceNameIntoConfig(const std::string& deviceName, const An updated_device_name = deviceName.substr(0, pos); parsed_device_priority = deviceName.substr(pos + 1); } else { - InferenceEngine::DeviceIDParser parser(deviceName); - updated_device_name = parser.getDeviceName(); - parsed_device_priority = parser.getDeviceID(); + ov::DeviceIDParser parser(deviceName); + updated_device_name = parser.get_device_name(); + parsed_device_priority = parser.get_device_id(); } // checks and updates device priority @@ -416,8 +417,7 @@ ov::Plugin ov::CoreImpl::get_plugin(const std::string& pluginName) const { } else { so = ov::util::load_shared_object(desc.libraryLocation.c_str()); std::shared_ptr plugin_impl; - reinterpret_cast( - ov::util::get_symbol(so, InferenceEngine::create_plugin_function))(plugin_impl); + reinterpret_cast(ov::util::get_symbol(so, ov::create_plugin_function))(plugin_impl); plugin = Plugin{plugin_impl, so}; } @@ -425,8 +425,8 @@ ov::Plugin ov::CoreImpl::get_plugin(const std::string& pluginName) const { plugin.set_name(deviceName); // Set Core class reference to plugins - std::weak_ptr mutableCore = - std::const_pointer_cast(shared_from_this()); + std::weak_ptr mutableCore = + std::const_pointer_cast(std::dynamic_pointer_cast(shared_from_this())); plugin.set_core(mutableCore); } @@ -472,9 +472,9 @@ ov::Plugin ov::CoreImpl::get_plugin(const std::string& pluginName) const { // for each such .0, .1, .# device to make sure plugin can handle different settings for different // device IDs for (auto pluginDesc : pluginRegistry) { - InferenceEngine::DeviceIDParser parser(pluginDesc.first); - if (pluginDesc.first.find(deviceName) != std::string::npos && !parser.getDeviceID().empty()) { - pluginDesc.second.defaultConfig[deviceKey] = parser.getDeviceID(); + ov::DeviceIDParser parser(pluginDesc.first); + if (pluginDesc.first.find(deviceName) != std::string::npos && !parser.get_device_id().empty()) { + pluginDesc.second.defaultConfig[deviceKey] = parser.get_device_id(); plugin.set_property(pluginDesc.second.defaultConfig); } } @@ -795,7 +795,7 @@ void ov::CoreImpl::apply_auto_batching(const std::shared_ptr& m if (pos == std::string::npos) return; // BATCH device is already configured via the config deviceNameWithBatchSize = deviceName.substr(pos + 1); - deviceNameWithoutBatch = InferenceEngine::DeviceIDParser::getBatchDevice(deviceNameWithBatchSize); + deviceNameWithoutBatch = ov::DeviceIDParser::get_batch_device(deviceNameWithBatchSize); // when user sets the BATCH device explicitly, we may check the dims less strictly // as the result is being checked by the user strictly_check_dims = false; @@ -982,8 +982,8 @@ void ov::CoreImpl::set_property_for_device(const ov::AnyMap& configMap, const st return; } - InferenceEngine::DeviceIDParser parser(deviceName); - std::string clearDeviceName = parser.getDeviceName(); + ov::DeviceIDParser parser(deviceName); + std::string clearDeviceName = parser.get_device_name(); std::vector> created_plugins; { @@ -1065,8 +1065,8 @@ void ov::CoreImpl::set_property_for_device(const ov::AnyMap& configMap, const st const std::string deviceKey = supportsConfigDeviceID ? CONFIG_KEY_INTERNAL(CONFIG_DEVICE_ID) : CONFIG_KEY(DEVICE_ID); - if (!parser.getDeviceID().empty()) { - configCopy[deviceKey] = parser.getDeviceID(); + if (!parser.get_device_id().empty()) { + configCopy[deviceKey] = parser.get_device_id(); } } plugin.second.set_property(configCopy); diff --git a/src/inference/src/dev/core_impl_ie.cpp b/src/inference/src/dev/core_impl_ie.cpp index cbf60f265ba209..a04d46081040a7 100644 --- a/src/inference/src/dev/core_impl_ie.cpp +++ b/src/inference/src/dev/core_impl_ie.cpp @@ -18,6 +18,7 @@ #include "ngraph/op/constant.hpp" #include "ngraph/pass/constant_folding.hpp" #include "openvino/itt.hpp" +#include "openvino/runtime/device_id_parser.hpp" #include "openvino/runtime/icompiled_model.hpp" #include "openvino/runtime/itensor.hpp" #include "openvino/util/common_util.hpp" @@ -231,25 +232,25 @@ std::map ov::CoreImpl::GetVersions(const if (deviceName.find("HETERO") == 0) { auto pos = deviceName.find_first_of(":"); if (pos != std::string::npos) { - deviceNames = InferenceEngine::DeviceIDParser::getHeteroDevices(deviceName.substr(pos + 1)); + deviceNames = ov::DeviceIDParser::get_hetero_devices(deviceName.substr(pos + 1)); } deviceNames.push_back("HETERO"); } else if (deviceName.find("MULTI") == 0) { auto pos = deviceName.find_first_of(":"); if (pos != std::string::npos) { - deviceNames = InferenceEngine::DeviceIDParser::getMultiDevices(deviceName.substr(pos + 1)); + deviceNames = ov::DeviceIDParser::get_multi_devices(deviceName.substr(pos + 1)); } deviceNames.push_back("MULTI"); } else if (deviceName.find("AUTO") == 0) { auto pos = deviceName.find_first_of(":"); if (pos != std::string::npos) { - deviceNames = InferenceEngine::DeviceIDParser::getMultiDevices(deviceName.substr(pos + 1)); + deviceNames = ov::DeviceIDParser::get_multi_devices(deviceName.substr(pos + 1)); } deviceNames.emplace_back("AUTO"); } else if (deviceName.find("BATCH") == 0) { auto pos = deviceName.find_first_of(":"); if (pos != std::string::npos) { - deviceNames = {InferenceEngine::DeviceIDParser::getBatchDevice(deviceName.substr(pos + 1))}; + deviceNames = {ov::DeviceIDParser::get_batch_device(deviceName.substr(pos + 1))}; } deviceNames.push_back("BATCH"); } else { @@ -258,8 +259,8 @@ std::map ov::CoreImpl::GetVersions(const } for (auto&& deviceName_ : deviceNames) { - ie::DeviceIDParser parser(deviceName_); - std::string deviceNameLocal = parser.getDeviceName(); + ov::DeviceIDParser parser(deviceName_); + std::string deviceNameLocal = parser.get_device_name(); ov::Plugin cppPlugin = get_plugin(deviceNameLocal); diff --git a/src/inference/src/dev/device_id_parser.cpp b/src/inference/src/dev/device_id_parser.cpp index 73309ee1cc9fc8..ba64a1cf08b831 100644 --- a/src/inference/src/dev/device_id_parser.cpp +++ b/src/inference/src/dev/device_id_parser.cpp @@ -2,47 +2,51 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "ie_icore.hpp" +#include "openvino/runtime/device_id_parser.hpp" -namespace InferenceEngine { +#include + +namespace ov { DeviceIDParser::DeviceIDParser(const std::string& deviceNameWithID) { - deviceName = deviceNameWithID; + m_device_name = deviceNameWithID; - auto pos = deviceName.find('.'); + auto pos = m_device_name.find('.'); if (pos != std::string::npos) { - deviceName = deviceNameWithID.substr(0, pos); - deviceID = deviceNameWithID.substr(pos + 1, deviceNameWithID.size()); + m_device_name = deviceNameWithID.substr(0, pos); + m_device_id = deviceNameWithID.substr(pos + 1, deviceNameWithID.size()); } } -std::string DeviceIDParser::getDeviceID() const { - return deviceID; +const std::string& DeviceIDParser::get_device_id() const { + return m_device_id; } -std::string DeviceIDParser::getDeviceName() const { - return deviceName; +const std::string& DeviceIDParser::get_device_name() const { + return m_device_name; } -std::vector DeviceIDParser::getHeteroDevices(std::string fallbackDevice) { +std::vector DeviceIDParser::get_hetero_devices(const std::string& fallbackDevice) { std::vector deviceNames; + std::string fallback_dev = fallbackDevice; std::string cdevice; char delimiter = ','; size_t pos = 0; - while ((pos = fallbackDevice.find(delimiter)) != std::string::npos) { - deviceNames.push_back(fallbackDevice.substr(0, pos)); - fallbackDevice.erase(0, pos + 1); + while ((pos = fallback_dev.find(delimiter)) != std::string::npos) { + deviceNames.push_back(fallback_dev.substr(0, pos)); + fallback_dev.erase(0, pos + 1); } - if (!fallbackDevice.empty()) - deviceNames.push_back(fallbackDevice); + if (!fallback_dev.empty()) + deviceNames.push_back(fallback_dev); return deviceNames; } -std::vector DeviceIDParser::getMultiDevices(std::string devicesList) { +std::vector DeviceIDParser::get_multi_devices(const std::string& devicesList) { + std::string dev_list = devicesList; std::set deviceNames; auto trim_request_info = [](const std::string& device_with_requests) { auto opening_bracket = device_with_requests.find_first_of('('); @@ -53,37 +57,37 @@ std::vector DeviceIDParser::getMultiDevices(std::string devicesList size_t pos = 0; // in addition to the list of devices, every device can have a #requests in the brackets e.g. "CPU(100)" // we skip the #requests info here - while ((pos = devicesList.find(delimiter)) != std::string::npos) { - auto d = devicesList.substr(0, pos); + while ((pos = dev_list.find(delimiter)) != std::string::npos) { + auto d = dev_list.substr(0, pos); if (d.find("BATCH") == 0) { deviceNames.insert("BATCH"); auto p = d.find_first_of(":"); if (p != std::string::npos) - deviceNames.insert(DeviceIDParser::getBatchDevice(d.substr(p + 1))); + deviceNames.insert(DeviceIDParser::get_batch_device(d.substr(p + 1))); } else { deviceNames.insert(trim_request_info(d)); } - devicesList.erase(0, pos + 1); + dev_list.erase(0, pos + 1); } - if (!devicesList.empty()) { - if (devicesList.find("BATCH") == 0) { + if (!dev_list.empty()) { + if (dev_list.find("BATCH") == 0) { deviceNames.insert("BATCH"); - auto p = devicesList.find_first_of(":"); + auto p = dev_list.find_first_of(":"); if (p != std::string::npos) - deviceNames.insert(DeviceIDParser::getBatchDevice(devicesList.substr(p + 1))); + deviceNames.insert(DeviceIDParser::get_batch_device(dev_list.substr(p + 1))); } else { - deviceNames.insert(trim_request_info(devicesList)); + deviceNames.insert(trim_request_info(dev_list)); } } return std::vector(deviceNames.begin(), deviceNames.end()); } -std::string DeviceIDParser::getBatchDevice(std::string device) { +std::string DeviceIDParser::get_batch_device(const std::string& device) { auto trim_request_info = [](const std::string& device_with_requests) { auto opening_bracket = device_with_requests.find_first_of('('); return device_with_requests.substr(0, opening_bracket); }; return trim_request_info(device); } -} // namespace InferenceEngine +} // namespace ov diff --git a/src/inference/src/ie_core.cpp b/src/inference/src/ie_core.cpp index 799e284f58ba38..cc138a0f13d17a 100644 --- a/src/inference/src/ie_core.cpp +++ b/src/inference/src/ie_core.cpp @@ -40,6 +40,7 @@ #include "openvino/op/result.hpp" #include "openvino/runtime/compiled_model.hpp" #include "openvino/runtime/core.hpp" +#include "openvino/runtime/device_id_parser.hpp" #include "openvino/util/common_util.hpp" #include "openvino/util/file_util.hpp" #include "openvino/util/shared_object.hpp" @@ -251,8 +252,8 @@ ExecutableNetwork Core::ImportNetwork(std::istream& networkModel, } std::string deviceName_ = context->getDeviceName(); - DeviceIDParser device(deviceName_); - std::string deviceName = device.getDeviceName(); + ov::DeviceIDParser device(deviceName_); + std::string deviceName = device.get_device_name(); auto parsed = ov::parseDeviceNameIntoConfig(deviceName, ov::any_copy(config)); auto exec = _impl->get_plugin(deviceName) @@ -350,8 +351,8 @@ void Core::RegisterPlugins(const std::string& xmlConfigFile) { } void Core::UnregisterPlugin(const std::string& deviceName_) { - DeviceIDParser parser(deviceName_); - std::string deviceName = parser.getDeviceName(); + ov::DeviceIDParser parser(deviceName_); + std::string deviceName = parser.get_device_name(); _impl->unload_plugin(deviceName); } diff --git a/src/plugins/auto/plugin.cpp b/src/plugins/auto/plugin.cpp index 641de18db3ce38..aa05129cb46f10 100644 --- a/src/plugins/auto/plugin.cpp +++ b/src/plugins/auto/plugin.cpp @@ -17,6 +17,7 @@ #include #include #include "openvino/runtime/auto/properties.hpp" +#include "openvino/runtime/device_id_parser.hpp" #include "plugin.hpp" #include #include @@ -189,8 +190,8 @@ std::vector MultiDeviceInferencePlugin::ParseMetaDevices(cons } } - DeviceIDParser parsed{deviceName}; - std::string deviceid = parsed.getDeviceID(); + ov::DeviceIDParser parsed{deviceName}; + std::string deviceid = parsed.get_device_id(); std::vector sameTypeDevices; // if AUTO:GPU case, replace GPU with GPU.0 and GPU.1 // Disable AUTO:MYRIAD here because of below test case @@ -212,19 +213,19 @@ std::vector MultiDeviceInferencePlugin::ParseMetaDevices(cons } for (auto&& deviceNameWithID : sameTypeDevices) { - DeviceIDParser newParsed{deviceNameWithID}; + ov::DeviceIDParser newParsed{deviceNameWithID}; std::string defaultDeviceID = ""; std::string tempDeviceID = ""; - if (newParsed.getDeviceID().empty()) { + if (newParsed.get_device_id().empty()) { defaultDeviceID = getDefaultDeviceID(deviceNameWithID); tempDeviceID = defaultDeviceID; } else { - tempDeviceID = newParsed.getDeviceID(); + tempDeviceID = newParsed.get_device_id(); } std::string fullDeviceName = ""; std::string uniqueName = ""; - if (newParsed.getDeviceName() == "GPU") { + if (newParsed.get_device_name() == "GPU") { auto supportedMetrics = GetCore()->GetMetric(deviceNameWithID, METRIC_KEY(SUPPORTED_METRICS)).as>(); if (std::find(supportedMetrics.begin(), supportedMetrics.end(), METRIC_KEY(FULL_DEVICE_NAME)) != supportedMetrics.end()) { fullDeviceName = GetCore()->GetMetric(deviceNameWithID, METRIC_KEY(FULL_DEVICE_NAME)).as(); @@ -232,7 +233,7 @@ std::vector MultiDeviceInferencePlugin::ParseMetaDevices(cons } if (fullDeviceName.empty()) { - uniqueName = newParsed.getDeviceName() + "_" + tempDeviceID; + uniqueName = newParsed.get_device_name() + "_" + tempDeviceID; } else { uniqueName = fullDeviceName + "_" + tempDeviceID; } diff --git a/src/plugins/auto/utils/plugin_config.hpp b/src/plugins/auto/utils/plugin_config.hpp index 6677916f429c4e..f0221d471920d4 100644 --- a/src/plugins/auto/utils/plugin_config.hpp +++ b/src/plugins/auto/utils/plugin_config.hpp @@ -10,6 +10,7 @@ #include "ie_icore.hpp" #include "openvino/runtime/auto/properties.hpp" #include "log.hpp" +#include "openvino/runtime/device_id_parser.hpp" #include #include #include @@ -199,7 +200,7 @@ class PluginConfig { if (realDevName.empty()) { return false; } - realDevName = DeviceIDParser(realDevName).getDeviceName(); + realDevName = ov::DeviceIDParser(realDevName).get_device_name(); std::string::size_type realEndPos = 0; if ((realEndPos = realDevName.find('(')) != std::string::npos) { realDevName = realDevName.substr(0, realEndPos); @@ -239,4 +240,4 @@ class PluginConfig { BaseValidator::Ptr device_property_validator; static const std::set _availableDevices; }; -} // namespace MultiDevicePlugin \ No newline at end of file +} // namespace MultiDevicePlugin diff --git a/src/plugins/auto_batch/src/auto_batch.cpp b/src/plugins/auto_batch/src/auto_batch.cpp index 5a35ee4385b77c..8dbaea000ecaad 100644 --- a/src/plugins/auto_batch/src/auto_batch.cpp +++ b/src/plugins/auto_batch/src/auto_batch.cpp @@ -19,6 +19,7 @@ #include "ie_ngraph_utils.hpp" #include "ie_performance_hints.hpp" #include "openvino/pass/manager.hpp" +#include "openvino/runtime/device_id_parser.hpp" #include "openvino/runtime/intel_gpu/properties.hpp" #include "transformations/common_optimizations/dimension_tracking.hpp" #include "transformations/init_node_info.hpp" @@ -692,8 +693,8 @@ DeviceInformation AutoBatchInferencePlugin::ParseBatchDevice(const std::string& DeviceInformation AutoBatchInferencePlugin::ParseMetaDevice(const std::string& devicesBatchCfg, const std::map& config) const { auto getDeviceConfig = [&](const DeviceName& deviceWithID) { - DeviceIDParser deviceParser(deviceWithID); - std::string deviceName = deviceParser.getDeviceName(); + ov::DeviceIDParser deviceParser(deviceWithID); + std::string deviceName = deviceParser.get_device_name(); std::map tconfig = mergeConfigs(_config, config); // passthrough the cache dir to core->loadnetwork when underlying device does not support cache dir auto deviceConfig = GetCore()->GetSupportedConfig(deviceWithID, tconfig); diff --git a/src/plugins/hetero/plugin.cpp b/src/plugins/hetero/plugin.cpp index 9152f7d8161818..10edbe72fe78c9 100644 --- a/src/plugins/hetero/plugin.cpp +++ b/src/plugins/hetero/plugin.cpp @@ -4,6 +4,7 @@ // clang-format off #include "ie_metric_helpers.hpp" +#include "openvino/runtime/device_id_parser.hpp" #include "plugin.hpp" #include #include @@ -95,7 +96,7 @@ InferenceEngine::IExecutableNetworkInternal::Ptr Engine::ImportNetwork( Engine::DeviceMetaInformationMap Engine::GetDevicePlugins(const std::string& targetFallback, const Configs& localConfig) const { - auto fallbackDevices = InferenceEngine::DeviceIDParser::getHeteroDevices(targetFallback); + auto fallbackDevices = ov::DeviceIDParser::get_hetero_devices(targetFallback); Engine::DeviceMetaInformationMap metaDevices; for (auto&& deviceName : fallbackDevices) { auto itPlugin = metaDevices.find(deviceName); @@ -140,7 +141,7 @@ QueryNetworkResult Engine::QueryNetwork(const CNNNetwork& network, const Configs } // WARNING: Here is devices with user set priority - auto fallbackDevices = InferenceEngine::DeviceIDParser::getHeteroDevices(fallbackDevicesStr); + auto fallbackDevices = ov::DeviceIDParser::get_hetero_devices(fallbackDevicesStr); for (auto&& deviceName : fallbackDevices) { for (auto&& layerQueryResult : queryResults[deviceName].supportedLayersMap) { @@ -187,17 +188,18 @@ Parameter Engine::GetMetric(const std::string& name, const std::mapGetMetric(parser.getDeviceName(), METRIC_KEY(SUPPORTED_METRICS)).as>(); + auto supportedMetricKeys = GetCore() + ->GetMetric(parser.get_device_name(), METRIC_KEY(SUPPORTED_METRICS)) + .as>(); auto it = std::find(supportedMetricKeys.begin(), supportedMetricKeys.end(), METRIC_KEY(DEVICE_ARCHITECTURE)); auto arch = (it != supportedMetricKeys.end()) ? GetCore()->GetMetric(device, METRIC_KEY(DEVICE_ARCHITECTURE)).as() - : parser.getDeviceName(); + : parser.get_device_name(); resArch += " " + arch; } return resArch; diff --git a/src/plugins/intel_gpu/src/plugin/plugin.cpp b/src/plugins/intel_gpu/src/plugin/plugin.cpp index e68a9094f221dc..ddf75aefaafed5 100644 --- a/src/plugins/intel_gpu/src/plugin/plugin.cpp +++ b/src/plugins/intel_gpu/src/plugin/plugin.cpp @@ -32,6 +32,7 @@ #include "ie_plugin_config.hpp" #include "gpu/gpu_config.hpp" #include "cpp_interfaces/interface/ie_internal_plugin_config.hpp" +#include "openvino/runtime/device_id_parser.hpp" #include "ie_icore.hpp" #include "dimension_tracker.hpp" @@ -207,7 +208,7 @@ IExecutableNetworkInternal::Ptr Plugin::LoadExeNetworkImpl(const InferenceEngine check_inputs(_networkInputs); auto context_impl = get_context_impl(context); - auto device_id = InferenceEngine::DeviceIDParser{context_impl->get_device_name()}.getDeviceID(); + auto device_id = ov::DeviceIDParser{context_impl->get_device_name()}.get_device_id(); OPENVINO_ASSERT(m_configs_map.find(device_id) != m_configs_map.end(), "[GPU] LoadExeNetworkImpl: Couldn't find config for GPU with id ", device_id); diff --git a/src/tests/functional/shared_test_classes/src/base/layer_test_utils.cpp b/src/tests/functional/shared_test_classes/src/base/layer_test_utils.cpp index ea7befd88f3426..66486df59a1ef1 100644 --- a/src/tests/functional/shared_test_classes/src/base/layer_test_utils.cpp +++ b/src/tests/functional/shared_test_classes/src/base/layer_test_utils.cpp @@ -9,6 +9,7 @@ #include +#include "openvino/runtime/device_id_parser.hpp" #include #include #include "shared_test_classes/base/layer_test_utils.hpp" @@ -121,7 +122,7 @@ void LayerTestsCommon::QueryNetwork() { ASSERT_EQ(res.second, ctx->getDeviceName()); } catch (...) { // otherwise, compare with originally used device name - ASSERT_EQ(InferenceEngine::DeviceIDParser(res.second).getDeviceName(), targetDevice); + ASSERT_EQ(ov::DeviceIDParser(res.second).get_device_name(), targetDevice); } actual.insert(res.first); } From 5af4a8e8d64edb71c85107324cbd516dfc1407d7 Mon Sep 17 00:00:00 2001 From: "Wang, Yang" Date: Tue, 21 Mar 2023 17:46:44 +0800 Subject: [PATCH 009/296] Take VPUX out of AUTO default candidate device list (#16037) * 1. Add device blacklist for AUTO plugin. 2. Update the logic to parse out the device candidate list from the inputting config MULTI_DEVICE_PRIORITIES. 3. Update the corresponding mock test cases. 4. Ignore the GTEST warning for the test cases. Signed-off-by: Wang, Yang * Update. * Update. * Update. * Add description about blacklist. * Apply suggestions from code review Update. Co-authored-by: yanlan song * Update. * Apply suggestions from code review Updated. Co-authored-by: yanlan song Co-authored-by: River Li * Update test case. * Update test case. * Update test case. * Update. * Update. --------- Signed-off-by: Wang, Yang Co-authored-by: yanlan song Co-authored-by: River Li Co-authored-by: Shen, Wanglei --- src/plugins/auto/plugin.cpp | 139 ++++++++++++----------- src/plugins/auto/plugin_config.cpp | 4 + src/plugins/auto/utils/plugin_config.hpp | 15 +-- src/tests/unit/auto/get_device_list.cpp | 117 ++++++++++++------- 4 files changed, 163 insertions(+), 112 deletions(-) diff --git a/src/plugins/auto/plugin.cpp b/src/plugins/auto/plugin.cpp index aa05129cb46f10..165f5c3db6cf3c 100644 --- a/src/plugins/auto/plugin.cpp +++ b/src/plugins/auto/plugin.cpp @@ -850,83 +850,92 @@ std::string MultiDeviceInferencePlugin::GetDeviceList(const std::mapGetAvailableDevices(); auto deviceListConfig = config.find(ov::device::priorities.name()); - if (deviceListConfig->second.empty()) { - for (auto&& device : deviceList) { - // filter out the supported devices - if (!_pluginConfig.isSupportedDevice(device)) - continue; - allDevices += device + ","; - } - } else { + for (auto&& device : deviceList) { + // filter out the supported devices + if (!_pluginConfig.isSupportedDevice(device)) + continue; + allDevices += device + ","; + } + std::vector devicesMerged; + if (deviceListConfig != config.end() && !deviceListConfig->second.empty()) { auto priorities = deviceListConfig->second; // parsing the string and splitting the comma-separated tokens - std::vector deviceVec = _pluginConfig.ParsePrioritiesDevices(priorities); - std::vector devicesToBeDeleted; - auto updateDeviceVec = [&](const std::string& delPattern = "") { - auto iter = deviceVec.begin(); - while (iter != deviceVec.end()) { - if (delPattern.empty()) { - if ((*iter).find("-") == 0) { - devicesToBeDeleted.push_back((*iter).erase(0, 1)); - iter = deviceVec.erase(iter); - } else { - iter++; - } - } else { - if ((*iter).find(delPattern) != std::string::npos) - iter = deviceVec.erase(iter); - else - iter++; - } - } + std::vector devicesToBeMerged = _pluginConfig.ParsePrioritiesDevices(priorities); + std::vector devicesToBeDeleted(devicesToBeMerged.size()); + const auto& iterDel = std::copy_if(devicesToBeMerged.begin(), + devicesToBeMerged.end(), + devicesToBeDeleted.begin(), + [](const std::string& item) { + return item.front() == '-'; + }); + devicesToBeDeleted.resize(std::distance(devicesToBeDeleted.begin(), iterDel)); + const auto& iterMerge = + std::remove_if(devicesToBeMerged.begin(), devicesToBeMerged.end(), [](const std::string& item) { + return item.front() == '-'; + }); + devicesToBeMerged.resize(std::distance(devicesToBeMerged.begin(), iterMerge)); + for (auto&& device : devicesToBeDeleted) + LOG_INFO_TAG("remove %s from device candidate list", device.c_str()); + auto isAnyDev = [](std::string& device, const std::vector& devices) { + auto iter = std::find_if(devices.begin(), devices.end(), [device](const std::string& devItem) { + return devItem.find(device) != std::string::npos; + }); + return iter != devices.end(); }; - updateDeviceVec(); - if (devicesToBeDeleted.size() == 0) { - allDevices = deviceListConfig->second; + auto deviceWithDefaultID = [](std::string& device) { + // AUTO assume the default device ID will be "0" for the single device. + return device.find(".") == std::string::npos ? device + ".0" : device; + }; + if (devicesToBeMerged.empty()) { + for (auto&& device : deviceList) { + if (isAnyDev(device, devicesToBeDeleted) || !_pluginConfig.isSupportedDevice(device)) + continue; + devicesMerged.push_back(device); + } } else { - auto deviceNeedToMerge = [&](const std::string& devicename) { - for (auto&& iter : devicesToBeDeleted) { - if (iter.find(devicename) != std::string::npos) - return true; - } - return false; - }; - auto mergeDeviceList = [&]() { - std::vector mergedList; - auto prevSize = mergedList.size(); - for (auto&& iter : deviceVec) { - for (auto&& viter : deviceList) { - if (viter.find(iter) != std::string::npos && deviceNeedToMerge(iter)) - mergedList.push_back(std::move(viter)); + for (auto&& device : devicesToBeMerged) { + if (!isAnyDev(device, deviceList)) { + DeviceIDParser parsed{device}; + auto iter = std::find(devicesMerged.begin(), devicesMerged.end(), parsed.getDeviceName()); + if (iter != devicesMerged.end() && parsed.getDeviceName() != device && parsed.getDeviceID() == "0") + // The device is the device with default device ID (eg. GPU.0) and + // its wide name (eg. GPU) has been in device candidate list. + continue; + // Add user specified device into candidate list + devicesMerged.push_back(device); + } else { + // Update device name if supported device with id existed + for (auto&& item : deviceList) { + auto realDevice = deviceWithDefaultID(item); + if (isAnyDev(realDevice, devicesToBeDeleted) || item.find(device) == std::string::npos) + continue; + auto iter = std::find(devicesMerged.begin(), devicesMerged.end(), deviceWithDefaultID(item)); + // Remove the device with default device id from candidate device list (eg. GPU.0) + // if its wide name is a single device (eg. GPU). + DeviceIDParser parsed{item}; + if (parsed.getDeviceName() == item && iter != devicesMerged.end()) + devicesMerged.erase(iter); + // continue if targe device has been in the candidate device list. + if (std::find(devicesMerged.begin(), devicesMerged.end(), item) != devicesMerged.end()) + continue; + devicesMerged.push_back(item); } - // if virtual devices or mock devices - if (mergedList.size() == prevSize) - mergedList.push_back(std::move(iter)); - prevSize = mergedList.size(); } - return mergedList; - }; - - deviceVec = deviceVec.size() == 0 ? deviceList : mergeDeviceList(); - for (auto& iter : devicesToBeDeleted) { - LOG_INFO_TAG("remove %s from device candidate list", iter.c_str()); - updateDeviceVec(iter); - } - for (auto&& device : deviceVec) { - if (!_pluginConfig.isSupportedDevice(device)) - continue; - allDevices += device + ","; } } } - - // remove the last ',' if exist - if (allDevices.back() == ',') - allDevices.pop_back(); - + if (devicesMerged.size()) { + allDevices.clear(); + std::for_each(devicesMerged.begin(), devicesMerged.end(), [&allDevices](const std::string& device) { + allDevices += device + ","; + }); + } if (allDevices.empty()) { IE_THROW() << "Please, check environment due to no supported devices can be used"; } + // remove the last ',' if exist + if (allDevices.back() == ',') + allDevices.pop_back(); return allDevices; } diff --git a/src/plugins/auto/plugin_config.cpp b/src/plugins/auto/plugin_config.cpp index b344585a06c096..7fffa536a5b566 100644 --- a/src/plugins/auto/plugin_config.cpp +++ b/src/plugins/auto/plugin_config.cpp @@ -5,6 +5,10 @@ namespace MultiDevicePlugin { const std::set PluginConfig::_availableDevices = {"AUTO", "CPU", "GPU", "TEMPLATE", "NVIDIA", "VPUX", "MULTI", "HETERO", "mock"}; +// AUTO will enable the blocklist if +// 1.No device priority passed to AUTO/MULTI.(eg. core.compile_model(model, "AUTO", configs);) +// 2.No valid device parsed out from device priority (eg. core.compile_model(model, "AUTO:-CPU,-GPU", configs);). +const std::set PluginConfig::_deviceBlocklist = {"VPUX", "GNA"}; PluginConfig::PluginConfig() { set_default(); diff --git a/src/plugins/auto/utils/plugin_config.hpp b/src/plugins/auto/utils/plugin_config.hpp index f0221d471920d4..243e1800199c19 100644 --- a/src/plugins/auto/utils/plugin_config.hpp +++ b/src/plugins/auto/utils/plugin_config.hpp @@ -205,7 +205,7 @@ class PluginConfig { if ((realEndPos = realDevName.find('(')) != std::string::npos) { realDevName = realDevName.substr(0, realEndPos); } - if (_availableDevices.end() == std::find(_availableDevices.begin(), _availableDevices.end(), realDevName)) { + if (_deviceBlocklist.end() != std::find(_deviceBlocklist.begin(), _deviceBlocklist.end(), realDevName)) { return false; } return true; @@ -217,17 +217,13 @@ class PluginConfig { std::string::size_type endpos = 0; while ((endpos = priorities.find(separator, pos)) != std::string::npos) { auto subStr = priorities.substr(pos, endpos - pos); - if (!isSupportedDevice(subStr)) { - IE_THROW() << "Unavailable device name: " << subStr; - } - devices.push_back(subStr); + if (!subStr.empty()) + devices.push_back(subStr); pos = endpos + 1; } auto subStr = priorities.substr(pos, priorities.length() - pos); - if (!isSupportedDevice(subStr)) { - IE_THROW() << "Unavailable device name: " << subStr; - } - devices.push_back(subStr); + if (!subStr.empty()) + devices.push_back(subStr); return devices; } @@ -239,5 +235,6 @@ class PluginConfig { std::map property_validators; BaseValidator::Ptr device_property_validator; static const std::set _availableDevices; + static const std::set _deviceBlocklist; }; } // namespace MultiDevicePlugin diff --git a/src/tests/unit/auto/get_device_list.cpp b/src/tests/unit/auto/get_device_list.cpp index 73e0a8938e3131..27456ffcd99368 100644 --- a/src/tests/unit/auto/get_device_list.cpp +++ b/src/tests/unit/auto/get_device_list.cpp @@ -23,6 +23,7 @@ using ::testing::Property; using ::testing::Eq; using ::testing::AnyNumber; using ::testing::ReturnRef; +using ::testing::NiceMock; using ::testing::AtLeast; using ::testing::InvokeWithoutArgs; using Config = std::map; @@ -33,24 +34,35 @@ const char igpuFullDeviceName[] = "Intel(R) Gen9 HD Graphics (iGPU)"; const char dgpuFullDeviceName[] = "Intel(R) Iris(R) Xe MAX Graphics (dGPU)"; // const char myriadFullDeviceName[] = "Intel Movidius Myriad X VPU"; // const char vpuxFullDeviceName[] = ""; -const std::vector availableDevs = {"CPU", "GPU.0", "GPU.1", "VPUX", "UNSUPPORTED_DEVICE"}; +const std::vector availableDevs = {"CPU", "GPU", "VPUX"}; +const std::vector availableDevsWithId = {"CPU", "GPU.0", "GPU.1", "VPUX"}; +using Params = std::tuple; using ConfigParams = std::tuple< - std::string, // Priority devices - std::string // expect metaDevices + std::vector, // Available devices retrieved from Core + Params // Params {devicePriority, expect metaDevices} >; class GetDeviceListTest : public ::testing::TestWithParam { public: - std::shared_ptr core; - std::shared_ptr plugin; + std::shared_ptr> core; + std::shared_ptr> plugin; public: static std::string getTestCaseName(testing::TestParamInfo obj) { + Params priorityAndMetaDev; std::string priorityDevices; std::string metaDevices; - std::tie(priorityDevices, metaDevices) = obj.param; + std::vector availableDevices; + std::tie(availableDevices, priorityAndMetaDev) = obj.param; + std::tie(priorityDevices, metaDevices) = priorityAndMetaDev; std::ostringstream result; result << "priorityDevices_" << priorityDevices; result << "_expectedDevices" << metaDevices; + result << "_availableDevicesList"; + std::string devicesStr; + for (auto&& device : availableDevices) { + devicesStr += "_" + device; + } + result << devicesStr; return result.str(); } @@ -61,15 +73,12 @@ class GetDeviceListTest : public ::testing::TestWithParam { void SetUp() override { // prepare mockicore and cnnNetwork for loading - core = std::shared_ptr(new MockICore()); - auto* origin_plugin = new MockMultiDeviceInferencePlugin(); - plugin = std::shared_ptr(origin_plugin); + core = std::shared_ptr>(new NiceMock()); + auto* origin_plugin = new NiceMock(); + plugin = std::shared_ptr>(origin_plugin); // replace core with mock Icore plugin->SetCore(core); - - ON_CALL(*core, GetAvailableDevices()).WillByDefault(Return(availableDevs)); - ON_CALL(*plugin, GetDeviceList).WillByDefault([this]( const std::map& config) { return plugin->MultiDeviceInferencePlugin::GetDeviceList(config); @@ -79,40 +88,72 @@ class GetDeviceListTest : public ::testing::TestWithParam { TEST_P(GetDeviceListTest, GetDeviceListTestWithExcludeList) { // get Parameter + Params priorityAndMetaDev; std::string priorityDevices; std::string metaDevices; - std::tie(priorityDevices, metaDevices) = this->GetParam(); + std::vector availableDevs; + std::tie(availableDevs, priorityAndMetaDev) = this->GetParam(); + std::tie(priorityDevices, metaDevices) = priorityAndMetaDev; - //EXPECT_CALL(*plugin, GetDeviceList(_)).Times(1); + ON_CALL(*core, GetAvailableDevices()).WillByDefault(Return(availableDevs)); EXPECT_CALL(*core, GetAvailableDevices()).Times(1); auto result = plugin->GetDeviceList({{ov::device::priorities.name(), priorityDevices}}); EXPECT_EQ(result, metaDevices); } - -// ConfigParams details -// example -// ConfigParams {devicePriority, expect metaDevices, ifThrowException} - -const std::vector testConfigs = { - // - ConfigParams {"CPU,GPU,VPUX", - "CPU,GPU,VPUX"}, - ConfigParams {"VPUX,GPU,CPU,-GPU.0", - "VPUX,GPU.1,CPU"}, - ConfigParams {"-GPU.0,GPU,CPU", - "GPU.1,CPU"}, - ConfigParams {"-GPU.0,GPU", - "GPU.1"}, - ConfigParams {"-GPU.0", "CPU,GPU.1,VPUX"}, - ConfigParams {"-GPU.0,-GPU.1", "CPU,VPUX"}, - ConfigParams {"-GPU.0,-CPU", "GPU.1,VPUX"} -}; - - -INSTANTIATE_TEST_SUITE_P(smoke_Auto_BehaviorTests, GetDeviceListTest, - ::testing::ValuesIn(testConfigs), - GetDeviceListTest::getTestCaseName); +const std::vector testConfigsWithId = {Params{" ", " "}, + Params{"", "CPU,GPU.0,GPU.1"}, + Params{"CPU, ", "CPU, "}, + Params{" ,CPU", " ,CPU"}, + Params{"CPU,", "CPU"}, + Params{"CPU,,GPU", "CPU,GPU.0,GPU.1"}, + Params{"CPU, ,GPU", "CPU, ,GPU.0,GPU.1"}, + Params{"CPU,GPU,GPU.1", "CPU,GPU.0,GPU.1"}, + Params{"CPU,GPU,VPUX,INVALID_DEVICE", "CPU,GPU.0,GPU.1,VPUX,INVALID_DEVICE"}, + Params{"VPUX,GPU,CPU,-GPU.0", "VPUX,GPU.1,CPU"}, + Params{"-GPU.0,GPU,CPU", "GPU.1,CPU"}, + Params{"-GPU.0,GPU", "GPU.1"}, + Params{"-GPU,GPU.0", "GPU.0"}, + Params{"-GPU.0", "CPU,GPU.1"}, + Params{"-GPU.0,-GPU.1", "CPU"}, + Params{"-GPU.0,-GPU.1,INVALID_DEVICE", "INVALID_DEVICE"}, + Params{"-GPU.0,-GPU.1,-INVALID_DEVICE", "CPU"}, + Params{"-GPU.0,-CPU", "GPU.1"}}; + +const std::vector testConfigs = {Params{" ", " "}, + Params{"", "CPU,GPU"}, + Params{"GPU", "GPU"}, + Params{"GPU.0", "GPU.0"}, + Params{"GPU,GPU.0", "GPU"}, + Params{"CPU", "CPU"}, + Params{" ,CPU", " ,CPU"}, + Params{" ,GPU", " ,GPU"}, + Params{"GPU, ", "GPU, "}, + Params{"CPU,GPU", "CPU,GPU"}, + Params{"CPU,-GPU", "CPU"}, + Params{"CPU,-GPU,GPU.0", "CPU,GPU.0"}, + Params{"CPU,-GPU,GPU.1", "CPU,GPU.1"}, + Params{"CPU,GPU,-GPU.0", "CPU"}, + Params{"CPU,GPU,-GPU.1", "CPU,GPU"}, + Params{"CPU,GPU.0,GPU", "CPU,GPU"}, + Params{"CPU,GPU,GPU.0", "CPU,GPU"}, + Params{"CPU,GPU,GPU.1", "CPU,GPU,GPU.1"}, + Params{"CPU,GPU.1,GPU", "CPU,GPU.1,GPU"}, + Params{"CPU,VPUX", "CPU,VPUX"}, + Params{"CPU,-VPUX", "CPU"}, + Params{"CPU,-INVALID_DEVICE", "CPU"}, + Params{"CPU,GPU,VPUX", "CPU,GPU,VPUX"}}; + +INSTANTIATE_TEST_SUITE_P(smoke_Auto_BehaviorTests_GetDeviceListWithID, + GetDeviceListTest, + ::testing::Combine(::testing::Values(availableDevsWithId), + ::testing::ValuesIn(testConfigsWithId)), + GetDeviceListTest::getTestCaseName); + +INSTANTIATE_TEST_SUITE_P(smoke_Auto_BehaviorTests_GetDeviceList, + GetDeviceListTest, + ::testing::Combine(::testing::Values(availableDevs), ::testing::ValuesIn(testConfigs)), + GetDeviceListTest::getTestCaseName); //toDo need add test for ParseMetaDevices(_, config) to check device config of //return metaDevices From 5d6cd626bc91595e0b1d12181d604216de43fca1 Mon Sep 17 00:00:00 2001 From: "Min, Byungil" Date: Tue, 21 Mar 2023 18:55:06 +0900 Subject: [PATCH 010/296] Fix unit test on dGPU (#16295) * Resolve failed cases and queue-type issue + Resolved out_of_order queue-type issue + Added get_test_default_config for setting default config of onednn + Cleared failed case Signed-off-by: Min, Byungil Co-authored-by: tuxedcat --- .../dynamic_execution/memory_realloc_test.cpp | 2 +- .../tests/fusions/activation_fusion_test.cpp | 2 +- .../tests/fusions/concatenate_fusion_test.cpp | 10 +- .../tests/fusions/convolution_fusion_test.cpp | 4 +- .../tests/fusions/fusion_test_common.hpp | 7 +- .../tests/fusions/gemm_fusion_test.cpp | 1 - .../tests/fusions/lrn_fusion_test.cpp | 2 +- .../tests/fusions/pooling_fusion_test.cpp | 13 +- .../graph_manipulation_gpu_test.cpp | 6 +- .../intel_gpu/tests/passes/handle_reshape.cpp | 2 +- .../passes/prepare_buffer_fusing_test.cpp | 4 +- .../passes/prepare_primitive_fusing_test.cpp | 18 +- .../remove_redundant_reorders_tests.cpp | 2 +- .../tests/passes/reorder_inputs_test.cpp | 10 +- .../passes/select_preferred_formats_test.cpp | 2 +- .../passes/test_module_fusing_reorder.cpp | 33 +++- .../tests/shape_infer/broadcast_si_test.cpp | 7 +- .../test_cases/activation_simple_gpu_test.cpp | 60 +++--- .../adaptive_avg_pooling_gpu_test.cpp | 2 +- .../adaptive_max_pooling_gpu_test.cpp | 4 +- .../test_cases/add_reorders_gpu_test.cpp | 4 +- .../tests/test_cases/arg_max_gpu_test.cpp | 24 +-- .../tests/test_cases/barriers_test.cpp | 8 +- .../test_cases/batch_to_space_gpu_test.cpp | 28 +-- .../binary_convolution_gpu_test.cpp | 6 +- .../tests/test_cases/border_gpu_test.cpp | 46 ++--- .../tests/test_cases/broadcast_gpu_test.cpp | 7 +- .../tests/test_cases/bucketize_gpu_test.cpp | 2 +- .../tests/test_cases/cl_mem_input_test.cpp | 4 +- .../tests/test_cases/command_queue_test.cpp | 44 ++++- .../test_cases/concatenation_gpu_test.cpp | 38 ++-- .../tests/test_cases/condition_gpu_test.cpp | 18 +- .../test_cases/convert_color_gpu_test.cpp | 20 +- .../tests/test_cases/convolution_gpu_test.cpp | 187 +++++++++--------- .../tests/test_cases/crop_gpu_test.cpp | 58 +++--- .../tests/test_cases/ctc_loss_gpu_test.cpp | 2 +- .../tests/test_cases/cum_sum_gpu_test.cpp | 6 +- .../test_cases/custom_gpu_primitive_test.cpp | 12 +- .../test_cases/deconvolution_gpu_test.cpp | 77 ++++---- .../test_cases/depth_concatenate_gpu_test.cpp | 38 ++-- .../test_cases/depth_to_space_gpu_test.cpp | 16 +- .../test_cases/detection_output_test.cpp | 26 +-- .../tests/test_cases/dft_gpu_test.cpp | 6 +- .../tests/test_cases/eltwise_gpu_test.cpp | 126 ++++++------ .../test_cases/embedding_bag_gpu_test.cpp | 34 ++-- .../test_cases/empty_tensor_gpu_test.cpp | 2 +- ...al_detectron_detection_output_gpu_test.cpp | 6 +- ...nerate_proposals_single_image_gpu_test.cpp | 4 +- ...etectron_prior_grid_generator_gpu_test.cpp | 2 +- ...tectron_roi_feature_extractor_gpu_test.cpp | 6 +- ...erimental_detectron_topk_rois_gpu_test.cpp | 6 +- .../extract_image_patches_gpu_test.cpp | 14 +- .../intel_gpu/tests/test_cases/eye.cpp | 2 +- .../test_cases/fully_connected_gpu_test.cpp | 52 ++--- .../test_cases/gather_elements_gpu_test.cpp | 4 +- .../tests/test_cases/gather_gpu_test.cpp | 54 ++--- .../tests/test_cases/gather_nd_gpu_test.cpp | 2 +- .../tests/test_cases/gather_tree_gpu_test.cpp | 2 +- .../tests/test_cases/gemm_gpu_test.cpp | 16 +- .../generate_proposals_gpu_test.cpp | 13 +- .../tests/test_cases/grid_sample_gpu_test.cpp | 2 +- .../tests/test_cases/hash_key_gpu_test.cpp | 18 +- .../tests/test_cases/loop_gpu_test.cpp | 6 +- .../tests/test_cases/lrn_gpu_test.cpp | 8 +- .../test_cases/lstm_dynamic_gpu_test.cpp | 14 +- .../tests/test_cases/lstm_gpu_test.cpp | 18 +- .../tests/test_cases/matrix_nms_gpu_test.cpp | 2 +- .../tests/test_cases/memory_test.cpp | 22 ++- .../test_cases/multiclass_nms_gpu_test.cpp | 4 +- .../test_cases/multiple_streams_gpu_test.cpp | 2 +- .../tests/test_cases/mvn_gpu_test.cpp | 38 ++-- .../test_cases/non_max_suppression_test.cpp | 18 +- .../tests/test_cases/non_zero_gpu_test.cpp | 10 +- .../tests/test_cases/normalizel2_gpu_test.cpp | 2 +- .../tests/test_cases/one_hot_gpu_test.cpp | 26 +-- .../tests/test_cases/permute_gpu_test.cpp | 62 +++--- .../tests/test_cases/pooling_gpu_test.cpp | 108 +++++----- .../tests/test_cases/prior_box_gpu_test.cpp | 2 +- .../propagate_constants_gpu_test.cpp | 2 +- .../test_cases/pyramid_roi_align_gpu_test.cpp | 2 +- .../tests/test_cases/quantize_gpu_test.cpp | 20 +- .../test_cases/random_uniform_gpu_test.cpp | 2 +- .../tests/test_cases/range_gpu_test.cpp | 14 +- .../tests/test_cases/reduce_gpu_test.cpp | 70 +++---- .../tests/test_cases/region_yolo_gpu_test.cpp | 2 +- .../test_cases/removing_output_node_test.cpp | 4 +- .../tests/test_cases/reorder_gpu_test.cpp | 151 +++++++------- .../tests/test_cases/reorg_yolo_gpu_test.cpp | 2 +- .../tests/test_cases/resample_gpu_test.cpp | 57 +++--- .../tests/test_cases/reshape_gpu_test.cpp | 22 +-- .../tests/test_cases/reverse_gpu_test.cpp | 2 +- .../test_cases/reverse_sequence_gpu_test.cpp | 26 +-- .../tests/test_cases/roi_align_gpu_test.cpp | 3 +- .../tests/test_cases/roi_pooling_gpu_test.cpp | 2 +- .../tests/test_cases/roll_gpu_test.cpp | 2 +- .../scatter_elements_update_gpu_test.cpp | 4 +- .../test_cases/scatter_nd_update_gpu_test.cpp | 72 +++---- .../test_cases/scatter_update_gpu_test.cpp | 32 +-- .../tests/test_cases/select_gpu_test.cpp | 82 ++++---- .../test_cases/set_output_memory_gpu_test.cpp | 14 +- .../tests/test_cases/shape_of_gpu_test.cpp | 10 +- .../test_cases/shuffle_channels_test.cpp | 18 +- .../intel_gpu/tests/test_cases/slice.cpp | 2 +- .../tests/test_cases/softmax_gpu_test.cpp | 22 +-- .../test_cases/space_to_batch_gpu_test.cpp | 28 +-- .../test_cases/space_to_depth_gpu_test.cpp | 36 ++-- .../spatial_concatenate_gpu_test.cpp | 22 +-- .../tests/test_cases/split_gpu_test.cpp | 12 +- .../tests/test_cases/streams_test.cpp | 10 +- .../test_cases/strided_slice_gpu_test.cpp | 64 +++--- .../test_device_mem_usage_estimation.cpp | 8 +- .../tests/test_cases/tile_gpu_test.cpp | 16 +- .../test_cases/trim_to_outputs_gpu_test.cpp | 6 +- .../intel_gpu/tests/test_cases/variable.cpp | 6 +- .../intel_gpu/tests/test_utils/test_utils.cpp | 35 +++- .../intel_gpu/tests/test_utils/test_utils.h | 9 + 116 files changed, 1270 insertions(+), 1196 deletions(-) diff --git a/src/plugins/intel_gpu/tests/dynamic_execution/memory_realloc_test.cpp b/src/plugins/intel_gpu/tests/dynamic_execution/memory_realloc_test.cpp index 891c4d7913ba03..682b9dc68c5646 100644 --- a/src/plugins/intel_gpu/tests/dynamic_execution/memory_realloc_test.cpp +++ b/src/plugins/intel_gpu/tests/dynamic_execution/memory_realloc_test.cpp @@ -42,7 +42,7 @@ TEST(softmax_gpu_dynamic_f32_test_upper_bound, input_same_values) { layout(ov::PartialShape{ov::Dimension{1, 10}, ov::Dimension{1, 10}, ov::Dimension{1, 10}, ov::Dimension{1, 10}}, data_types::f32, format::bfyx); - network network(engine, topology(input_layout("input", in_layout), softmax("softmax", input_info("input"), 3))); + network network(engine, topology(input_layout("input", in_layout), softmax("softmax", input_info("input"), 3)), get_test_default_config(engine)); // First run float out_buffer_1[out_size_1]; diff --git a/src/plugins/intel_gpu/tests/fusions/activation_fusion_test.cpp b/src/plugins/intel_gpu/tests/fusions/activation_fusion_test.cpp index 34d73bbe75feb7..bf7378cfdab595 100644 --- a/src/plugins/intel_gpu/tests/fusions/activation_fusion_test.cpp +++ b/src/plugins/intel_gpu/tests/fusions/activation_fusion_test.cpp @@ -32,7 +32,7 @@ class ActivationFusingTest : public ::BaseFusingTest { void execute(activation_test_params& p) { auto input_prim = get_mem(get_input_layout(p)); - ExecutionConfig cfg; + ExecutionConfig cfg = get_test_default_config(engine); ov::intel_gpu::ImplementationDesc activation_impl = { p.input_format, p.kernel_name }; cfg.set_property(ov::intel_gpu::optimize_data(true)); cfg.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "act", activation_impl } })); diff --git a/src/plugins/intel_gpu/tests/fusions/concatenate_fusion_test.cpp b/src/plugins/intel_gpu/tests/fusions/concatenate_fusion_test.cpp index a22264e3b8b75e..c4dcbdee66d7c1 100644 --- a/src/plugins/intel_gpu/tests/fusions/concatenate_fusion_test.cpp +++ b/src/plugins/intel_gpu/tests/fusions/concatenate_fusion_test.cpp @@ -42,12 +42,14 @@ class ConcatOneDNNFusingTest : public ::BaseFusingTest { ov::intel_gpu::ImplementationDesc cldnn_impl = { p.input_format, "", impl_types::ocl }; // for onednn fusing test, topology_non_fused means cldnn, topology_fused is onednn - ExecutionConfig cldnn_cfg{ov::intel_gpu::queue_type(QueueTypes::in_order), + ExecutionConfig cldnn_cfg = get_test_default_config(engine, + {ov::intel_gpu::queue_type(QueueTypes::in_order), ov::intel_gpu::optimize_data(true), - ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "concat", cldnn_impl } })}; - ExecutionConfig onednn_cfg{ov::intel_gpu::queue_type(QueueTypes::in_order), + ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "concat", cldnn_impl } })}); + ExecutionConfig onednn_cfg = get_test_default_config(engine, + {ov::intel_gpu::queue_type(QueueTypes::in_order), ov::intel_gpu::optimize_data(true), - ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "concat", onednn_impl } })}; + ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "concat", onednn_impl } })}); network network_fused_cldnn(this->engine, this->topology_non_fused, cldnn_cfg); network network_fused_onednn(this->engine, this->topology_fused, onednn_cfg); diff --git a/src/plugins/intel_gpu/tests/fusions/convolution_fusion_test.cpp b/src/plugins/intel_gpu/tests/fusions/convolution_fusion_test.cpp index d88a740f441c80..1f4cef03037989 100644 --- a/src/plugins/intel_gpu/tests/fusions/convolution_fusion_test.cpp +++ b/src/plugins/intel_gpu/tests/fusions/convolution_fusion_test.cpp @@ -208,7 +208,7 @@ class ConvFusingForceKernelTest : public BaseFusingTest public: void execute(bc_force_kernel_params& p) { auto input_prim = get_mem(get_input_layout(p)); - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::optimize_data(true)); ov::intel_gpu::ImplementationDesc conv_impl = { p.input_format, p.kernel_name }; config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "conv_prim", conv_impl } })); @@ -4178,8 +4178,6 @@ class PermuteOptimizingTestOnednn : public BaseFusingTestengine, this->topology_non_fused, cfg_not_fused); network network_fused(this->engine, this->topology_fused, cfg_fused); diff --git a/src/plugins/intel_gpu/tests/fusions/fusion_test_common.hpp b/src/plugins/intel_gpu/tests/fusions/fusion_test_common.hpp index 78a5781e93bf85..e50df802bb5fd4 100644 --- a/src/plugins/intel_gpu/tests/fusions/fusion_test_common.hpp +++ b/src/plugins/intel_gpu/tests/fusions/fusion_test_common.hpp @@ -31,13 +31,12 @@ class BaseFusingTest : public ::testing::TestWithParam { static const int max_random = 200; void SetUp() override { + cfg_fused = get_test_default_config(engine); + cfg_not_fused = get_test_default_config(engine); + cfg_fused.set_property(ov::intel_gpu::optimize_data(true)); cfg_not_fused.set_property(ov::intel_gpu::optimize_data(false)); cfg_not_fused.set_property(ov::intel_gpu::allow_static_input_reorder(true)); - if (engine.get_device_info().supports_immad) { - cfg_fused.set_property(ov::intel_gpu::queue_type(QueueTypes::in_order)); - cfg_not_fused.set_property(ov::intel_gpu::queue_type(QueueTypes::in_order)); - } } void compare(network& not_fused, network& fused, T& p, bool count_reorder = false) { diff --git a/src/plugins/intel_gpu/tests/fusions/gemm_fusion_test.cpp b/src/plugins/intel_gpu/tests/fusions/gemm_fusion_test.cpp index 5e7ab52c861e09..34b35f26c054ef 100644 --- a/src/plugins/intel_gpu/tests/fusions/gemm_fusion_test.cpp +++ b/src/plugins/intel_gpu/tests/fusions/gemm_fusion_test.cpp @@ -288,7 +288,6 @@ TEST_P(gemm_2in_add, eltwise_postop) { if (engine.get_device_info().supports_immad) { ov::intel_gpu::ImplementationDesc gemmv_impl = { cldnn::format::type::any, "", impl_types::onednn }; cfg_fused.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "gemm_prim", gemmv_impl } })); - cfg_fused.set_property(ov::intel_gpu::queue_type(QueueTypes::in_order)); } auto add_data_layout = get_output_layout(p); diff --git a/src/plugins/intel_gpu/tests/fusions/lrn_fusion_test.cpp b/src/plugins/intel_gpu/tests/fusions/lrn_fusion_test.cpp index b4d0a522ae1fed..9b35647f3c6f89 100644 --- a/src/plugins/intel_gpu/tests/fusions/lrn_fusion_test.cpp +++ b/src/plugins/intel_gpu/tests/fusions/lrn_fusion_test.cpp @@ -35,7 +35,7 @@ class LrnFusingTest : public ::BaseFusingTest { void execute(lrn_test_params& p) { auto input_prim = get_mem(get_input_layout(p)); - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); ov::intel_gpu::ImplementationDesc lrn_impl = { p.input_format, p.kernel_name }; config.set_property(ov::intel_gpu::optimize_data(true)); config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "lrn_norm", lrn_impl } })); diff --git a/src/plugins/intel_gpu/tests/fusions/pooling_fusion_test.cpp b/src/plugins/intel_gpu/tests/fusions/pooling_fusion_test.cpp index ca58be33337166..7c99523050cea8 100644 --- a/src/plugins/intel_gpu/tests/fusions/pooling_fusion_test.cpp +++ b/src/plugins/intel_gpu/tests/fusions/pooling_fusion_test.cpp @@ -35,8 +35,9 @@ class PoolingFusingTest : public ::BaseFusingTest { void execute(pooling_test_params& p) { if (engine.get_device_info().supports_immad) p.expected_fused_primitives = p.expected_fused_primitives_onednn; + auto input_prim = get_mem(get_input_layout(p)); - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::optimize_data(true)); if (!p.kernel_name.empty()) { ov::intel_gpu::ImplementationDesc impl = { p.input_format, p.kernel_name }; @@ -540,12 +541,14 @@ class PoolingOneDNNFusingTest : public ::BaseFusingTest { ov::intel_gpu::ImplementationDesc onednn_impl = { p.input_format, "", impl_types::onednn }; ov::intel_gpu::ImplementationDesc cldnn_impl = { p.input_format, "", impl_types::ocl }; - ExecutionConfig cldnn_cfg{ov::intel_gpu::queue_type(QueueTypes::in_order), + ExecutionConfig cldnn_cfg = get_test_default_config(engine, + {ov::intel_gpu::queue_type(QueueTypes::in_order), ov::intel_gpu::optimize_data(true), - ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "pooling", cldnn_impl } })}; - ExecutionConfig onednn_cfg{ov::intel_gpu::queue_type(QueueTypes::in_order), + ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "pooling", cldnn_impl } })}); + ExecutionConfig onednn_cfg = get_test_default_config(engine, + {ov::intel_gpu::queue_type(QueueTypes::in_order), ov::intel_gpu::optimize_data(true), - ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "pooling", onednn_impl } })}; + ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "pooling", onednn_impl } })}); // for onednn fusing test, topology_non_fused means cldnn, topology_fused is onednn network network_fused_cldnn(this->engine, this->topology_non_fused, cldnn_cfg); diff --git a/src/plugins/intel_gpu/tests/module_tests/graph_manipulation_gpu_test.cpp b/src/plugins/intel_gpu/tests/module_tests/graph_manipulation_gpu_test.cpp index 52adccb6f97a71..7cad52b8497f73 100644 --- a/src/plugins/intel_gpu/tests/module_tests/graph_manipulation_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/module_tests/graph_manipulation_gpu_test.cpp @@ -28,7 +28,7 @@ using namespace ::tests; in similar way as it is done in tests utilizing clDNN API */ TEST(basic, test1) { auto& engine = get_test_engine(); - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::optimize_data(true)); auto input = engine.allocate_memory({ data_types::f16, format::yxfb,{ 1, 1, 2, 2 } }); @@ -67,9 +67,9 @@ TEST(basic, test1) { // Thus, a single method from program like add_intermediate might be tested separately. TEST(add_intermediate_gpu, test1) { - ExecutionConfig config; topology topology; auto& engine = get_test_engine(); + ExecutionConfig config = get_test_default_config(engine); auto input = engine.allocate_memory({ data_types::f32, format::bfyx, {2, 2, 2, 2} }); auto weights = engine.allocate_memory({ data_types::f32, format::bfyx, {2, 2, 2, 2} }); @@ -124,9 +124,9 @@ TEST(add_intermediate_gpu, test1) // Disabled for now as it produces wrong results TEST(add_intermediate_gpu, test2) { - ExecutionConfig config; topology topology; auto& engine = get_test_engine(); + ExecutionConfig config = get_test_default_config(engine); auto input = engine.allocate_memory({ data_types::f32, format::bfyx,{ 2, 2, 2, 2 } }); auto weights = engine.allocate_memory({ data_types::f32, format::bfyx,{ 2, 2, 2, 2 } }); diff --git a/src/plugins/intel_gpu/tests/passes/handle_reshape.cpp b/src/plugins/intel_gpu/tests/passes/handle_reshape.cpp index d1d9b9a592ed54..7634b0347112d8 100644 --- a/src/plugins/intel_gpu/tests/passes/handle_reshape.cpp +++ b/src/plugins/intel_gpu/tests/passes/handle_reshape.cpp @@ -35,7 +35,7 @@ TEST(handle_reshape, dont_remove_reshape_that_changes_rank) { topology.add(reshape("reshape", input_info("e1"), false, {1}, {1})); topology.add(eltwise("e2", input_info("reshape"), input_info("data1"), eltwise_mode::sum)); - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); config.set_property(ov::intel_gpu::optimize_data(true)); auto prog = program::build_program(engine, topology, config, false, true); diff --git a/src/plugins/intel_gpu/tests/passes/prepare_buffer_fusing_test.cpp b/src/plugins/intel_gpu/tests/passes/prepare_buffer_fusing_test.cpp index eab4f99a3c7217..0a5358d19fdccc 100644 --- a/src/plugins/intel_gpu/tests/passes/prepare_buffer_fusing_test.cpp +++ b/src/plugins/intel_gpu/tests/passes/prepare_buffer_fusing_test.cpp @@ -34,7 +34,7 @@ TEST(prepare_buffer_fusing, optimize_reshape) { topology.add(permute("permute2", input_info("reshape"), {0, 3, 2, 1})); topology.add(reorder("reorder", input_info("permute2"), format::bfyx, data_types::f32)); - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); auto prog = program::build_program(engine, topology, config, false, true); @@ -76,7 +76,7 @@ TEST(prepare_buffer_fusing, static_node_after_optimized_out_dyn_reshape) { topology.add(fully_connected("fc", input_info("reshape"), "weights", "", {}, 2)); topology.add(reorder("reorder", input_info("fc"), format::bfyx, data_types::f32)); - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); auto prog = program::build_program(engine, topology, config, false, true); ASSERT_NE(prog, nullptr); diff --git a/src/plugins/intel_gpu/tests/passes/prepare_primitive_fusing_test.cpp b/src/plugins/intel_gpu/tests/passes/prepare_primitive_fusing_test.cpp index b4175a32ed778d..6a265633151451 100644 --- a/src/plugins/intel_gpu/tests/passes/prepare_primitive_fusing_test.cpp +++ b/src/plugins/intel_gpu/tests/passes/prepare_primitive_fusing_test.cpp @@ -35,7 +35,7 @@ TEST(prepare_primitive_fusing, fuse_activation_to_fc_dyn) { topology.add(activation("act", input_info("fc"), activation_func::relu)); topology.add(reorder("reorder", input_info("act"), format::bfyx, data_types::f32)); - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); auto prog = program::build_program(engine, topology, config, false, true); @@ -61,7 +61,7 @@ TEST(prepare_primitive_fusing, dont_fuse_incompatible_eltwise) { topology.add(eltwise("eltw", { input_info("input"), input_info("reduce") }, eltwise_mode::sum)); topology.add(reorder("reorder", input_info("eltw"), format::bfyx, data_types::f32)); - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); auto prog = program::build_program(engine, topology, config, false, true); @@ -87,7 +87,7 @@ TEST(prepare_primitive_fusing, fuse_eltwise_to_fc_dyn_legal) { topology.add(eltwise("eltw", { input_info("fc"), input_info("extra_input") }, eltwise_mode::sum)); topology.add(reorder("reorder", input_info("eltw"), format::bfyx, data_types::f32)); - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::optimize_data(true)); config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); auto prog = program::build_program(engine, topology, config, false, true); @@ -129,7 +129,7 @@ TEST(prepare_primitive_fusing, fuse_eltwise_to_fc_dyn_illegal) { topology.add(eltwise("eltw", { input_info("fc"), input_info("extra_input")}, eltwise_mode::sum)); topology.add(reorder("reorder", input_info("eltw"), format::bfyx, data_types::f32)); - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::optimize_data(true)); config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); auto prog = program::build_program(engine, topology, config, false, true); @@ -185,7 +185,7 @@ TEST(prepare_primitive_fusing, fuse_eltwise_to_fc_dyn_illegal_const) { topology.add(eltwise("eltw", { input_info("fc"), input_info("extra_input") }, eltwise_mode::sum)); topology.add(reorder("reorder", input_info("eltw"), format::bfyx, data_types::f32)); - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::optimize_data(true)); config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); auto prog = program::build_program(engine, topology, config, false, true); @@ -239,7 +239,7 @@ TEST(prepare_primitive_fusing, fuse_eltwise_to_fc_dyn_legal_scalar_const_broadca topology.add(eltwise("eltw", { input_info("fc"), input_info("extra_input") }, eltwise_mode::sum)); topology.add(reorder("reorder", input_info("eltw"), format::bfyx, data_types::f32)); - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::optimize_data(true)); config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); auto prog = program::build_program(engine, topology, config, false, true); @@ -296,7 +296,7 @@ TEST(prepare_primitive_fusing, fuse_eltwise_to_fc_dyn_illegal_1) { topology.add(activation("act_fc2", input_info("eltw"), activation_func::relu)); topology.add(reorder("reorder", input_info("act_fc2"), format::bfyx, data_types::f32)); - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::optimize_data(true)); config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); auto prog = program::build_program(engine, topology, config, false, true); @@ -365,7 +365,7 @@ TEST(prepare_primitive_fusing, fuse_eltwise_to_fc_dyn_illegal_2) { topology.add(activation("act_fc3", input_info("eltw"), activation_func::relu)); topology.add(reorder("reorder", input_info("act_fc3"), format::bfyx, data_types::f32)); - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::optimize_data(true)); config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); auto prog = program::build_program(engine, topology, config, false, true); @@ -428,7 +428,7 @@ TEST(prepare_primitive_fusing, dont_remove_only_dep_reshape) { topology.add(reshape("reshape2", input_info("reshape1"), true, output_pattern, ov::PartialShape::dynamic(4))); topology.add(gemm("gemm", { input_info("reshape2"), input_info("input2") }, data_types::f32, false, false)); - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::optimize_data(true)); config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); auto prog = program::build_program(engine, topology, config, false, true); diff --git a/src/plugins/intel_gpu/tests/passes/remove_redundant_reorders_tests.cpp b/src/plugins/intel_gpu/tests/passes/remove_redundant_reorders_tests.cpp index b2c62a5506b302..0d679659d6671d 100644 --- a/src/plugins/intel_gpu/tests/passes/remove_redundant_reorders_tests.cpp +++ b/src/plugins/intel_gpu/tests/passes/remove_redundant_reorders_tests.cpp @@ -45,7 +45,7 @@ TEST(remove_redundant_reorders, remove_dep_dynamic) { topology.add(reorder("reorder", input_info("conv"), format::any, data_types::f32)); topology.add(softmax("softmax", input_info("reorder"), 1)); - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); config.set_property(ov::intel_gpu::optimize_data(true)); network network(engine, topology, config); diff --git a/src/plugins/intel_gpu/tests/passes/reorder_inputs_test.cpp b/src/plugins/intel_gpu/tests/passes/reorder_inputs_test.cpp index eeace41001b5f8..7619c70c730ca2 100644 --- a/src/plugins/intel_gpu/tests/passes/reorder_inputs_test.cpp +++ b/src/plugins/intel_gpu/tests/passes/reorder_inputs_test.cpp @@ -42,7 +42,7 @@ TEST(reorder_inputs, propagation) { topology.add(pooling("pool", input_info("conv1"), pooling_mode::max, { 1, 1 }, { 1, 1 })); topology.add(convolution("conv2", input_info("pool"), { "weights" })); - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::optimize_data(true)); auto prog = program::build_program(engine, topology, config); @@ -79,7 +79,7 @@ TEST(reorder_inputs, impl_forcing_basic_format) { ov::intel_gpu::ImplementationDesc pool_impl = { format::yxfb, "" }; - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"pool", pool_impl} })); network network(engine, topology, config); @@ -117,7 +117,7 @@ TEST(reorder_inputs, impl_forcing_not_existing) { ov::intel_gpu::ImplementationDesc pool_impl = { format::any, "NOT_EXISTING" }; - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"pool", pool_impl} })); ASSERT_ANY_THROW(network network(engine, topology, config)); @@ -133,7 +133,7 @@ TEST(reorder_inputs, impl_forcing_basic_format_kernel) { ov::intel_gpu::ImplementationDesc actv_impl = { format::yxfb, "activation_ref" }; - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"actv", actv_impl} })); network network(engine, topology, config); @@ -189,7 +189,7 @@ TEST(reorder_inputs, impl_forcing_basic_format_kernel) { // for (auto impl : possible_impls) { // SCOPED_TRACE(to_string(impl)); // -// ExecutionConfig config; +// ExecutionConfig config = get_test_default_config(engine); // config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"conv", impl} })); // // network network(engine, topology, config); diff --git a/src/plugins/intel_gpu/tests/passes/select_preferred_formats_test.cpp b/src/plugins/intel_gpu/tests/passes/select_preferred_formats_test.cpp index 17ce61f257166e..90346e82de1e1e 100644 --- a/src/plugins/intel_gpu/tests/passes/select_preferred_formats_test.cpp +++ b/src/plugins/intel_gpu/tests/passes/select_preferred_formats_test.cpp @@ -33,7 +33,7 @@ TEST(test_select_preferred_formats, setting_target_conv_format) { topology.add(reorder("reorder", input_info("input"), format::b_fs_yx_fsv16, data_types::f16)); topology.add(convolution("conv1", input_info("reorder"), { "weights" })); - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); ov::intel_gpu::ImplementationDesc impl = { format::b_fs_yx_fsv16, std::string(""), impl_types::onednn }; config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"conv1", impl} })); diff --git a/src/plugins/intel_gpu/tests/passes/test_module_fusing_reorder.cpp b/src/plugins/intel_gpu/tests/passes/test_module_fusing_reorder.cpp index 7f294eaea71de1..4de2436e999c4b 100644 --- a/src/plugins/intel_gpu/tests/passes/test_module_fusing_reorder.cpp +++ b/src/plugins/intel_gpu/tests/passes/test_module_fusing_reorder.cpp @@ -63,7 +63,7 @@ TEST(test_can_fuse_reorder, reorder_for_mixed_type_convolution_fsv32_onednn) topology.add(cldnn::convolution("conv", { input_info("reorder_input") }, { "weights" }, { "bias"}, 1, {1, 1}, {0, 0}, {1, 1}, {1, 32, 2, 2}, data_types::f32, false)); topology.add(reorder("reorder_conv", input_info("conv"), reorder_layout)); - ExecutionConfig cfg(ov::intel_gpu::queue_type(QueueTypes::in_order)); + ExecutionConfig cfg = get_test_default_config(engine); program::ptr prog = program::build_program(engine, topology, cfg, false, true); layout_optimizer lo = layout_optimizer(); lo.set_optimization_attribute(layout_optimizer::optimization_attributes_type::use_onednn_impls, true); @@ -100,7 +100,7 @@ TEST(test_can_fuse_reorder, reorder_for_mixed_type_convolution_fsv32_cldnn) topology.add(cldnn::convolution("conv", { input_info("reorder_input") }, { "weights" }, { "bias"}, 1, {1, 1}, {0, 0}, {1, 1}, {1, 32, 2, 2}, data_types::f32, false)); topology.add(reorder("reorder_conv", input_info("conv"), reorder_layout)); - ExecutionConfig cfg(ov::intel_gpu::queue_type(QueueTypes::in_order)); + ExecutionConfig cfg = get_test_default_config(engine); program::ptr prog = program::build_program(engine, topology, cfg, false, true); layout_optimizer lo = layout_optimizer(); lo.set_optimization_attribute(layout_optimizer::optimization_attributes_type::use_onednn_impls, false); @@ -172,7 +172,7 @@ TEST_P(test_fused_reorder_deep_depth, no_removal_for_deep_depth_conv) topology.add(cldnn::convolution("conv", { input_info("reorder_input") }, { "weights" })); topology.add(reorder("reorder_conv", input_info("conv"), reorder_layout)); - ExecutionConfig cfg(ov::intel_gpu::queue_type(QueueTypes::in_order)); + ExecutionConfig cfg = get_test_default_config(engine); program::ptr prog = program::build_program(engine, topology, cfg, false, true); layout_optimizer lo = layout_optimizer(); lo.set_optimization_attribute(layout_optimizer::optimization_attributes_type::use_onednn_impls, true); @@ -223,7 +223,13 @@ TEST_P(test_can_fuse_reorder_cldnn, reorder_for_firstconv_cldnn) topology.add(cldnn::convolution("conv2", { input_info("reorder_input") }, { "weights" }, { "bias"}, 1, {1, 1}, {0, 0}, {1, 1}, p.out_shape, p.input_data_type, false)); topology.add(reorder("reorder_conv", input_info("conv2"), reorder_layout)); - ExecutionConfig cfg(ov::intel_gpu::queue_type(QueueTypes::out_of_order)); + ExecutionConfig cfg = get_test_default_config(engine); + cfg.set_property(ov::intel_gpu::queue_type(QueueTypes::out_of_order)); + if (engine.get_device_info().supports_immad) { + // Enable this test for out_of_order queue-type if Onednn supports out_of_order + return; + } + program::ptr prog = program::build_program(engine, topology, cfg, false, true); layout_optimizer lo = layout_optimizer(); lo.set_optimization_attribute(layout_optimizer::optimization_attributes_type::use_onednn_impls, false); @@ -269,7 +275,7 @@ TEST_P(test_can_fuse_reorder_onednn, reorder_for_firstconv_onednn) topology.add(cldnn::convolution("conv", { input_info("reorder_input") }, { "weights" })); topology.add(reorder("reorder_result", input_info("conv"), reorder_layout)); - ExecutionConfig cfg(ov::intel_gpu::queue_type(QueueTypes::in_order)); + ExecutionConfig cfg = get_test_default_config(engine); program::ptr prog = program::build_program(engine, topology, cfg, false, true); layout_optimizer lo = layout_optimizer(); lo.set_optimization_attribute(layout_optimizer::optimization_attributes_type::use_onednn_impls, true); @@ -326,7 +332,12 @@ TEST_P(can_fuse_reorder, surface_input_reorder) { topology.add(input_layout_prim, weights_data_prim, surface_input_reorder_prim, conv_input_reorder_prim, conv_prim); - ExecutionConfig cfg(ov::intel_gpu::queue_type(QueueTypes::out_of_order)); + ExecutionConfig cfg = get_test_default_config(engine); + cfg.set_property(ov::intel_gpu::queue_type(QueueTypes::out_of_order)); + if (engine.get_device_info().supports_immad) { + // Enable this test for out_of_order queue-type if Onednn supports out_of_order + return; + } program::ptr prog = program::build_program(engine, topology, cfg, false, true); layout_optimizer lo = layout_optimizer(); program_wrapper::apply_opt_pass(*prog, lo); @@ -384,7 +395,13 @@ TEST_P(can_fuse_reorder, surface_input_reorder_batched) { surface_input_reorder_prim1, surface_input_reorder_prim2, conv_input_reorder_prim, concat, conv_prim); - ExecutionConfig cfg(ov::intel_gpu::queue_type(QueueTypes::out_of_order)); + ExecutionConfig cfg = get_test_default_config(engine); + cfg.set_property(ov::intel_gpu::queue_type(QueueTypes::out_of_order)); + if (engine.get_device_info().supports_immad) { + // Enable this test for out_of_order queue-type if Onednn supports out_of_order + return; + } + program::ptr prog = program::build_program(engine, topology, cfg, false, true); layout_optimizer lo = layout_optimizer(); program_wrapper::apply_opt_pass(*prog, lo); @@ -437,7 +454,7 @@ TEST_P(test_can_fuse_reorder_onednn_errata, errata_case_for_conv) { topology.add(convolution("conv", { input_info("reorder_conv") }, { "weights" })); topology.add(reorder("reorder_result", input_info("conv"), p.conv_layout)); - ExecutionConfig cfg(ov::intel_gpu::queue_type(QueueTypes::in_order)); + ExecutionConfig cfg = get_test_default_config(engine); program::ptr prog = program::build_program(engine, topology, cfg, false, true); layout_optimizer lo = layout_optimizer(); lo.set_optimization_attribute(layout_optimizer::optimization_attributes_type::use_onednn_impls, true); diff --git a/src/plugins/intel_gpu/tests/shape_infer/broadcast_si_test.cpp b/src/plugins/intel_gpu/tests/shape_infer/broadcast_si_test.cpp index bd41b1cf042de6..32f9e8e2f9c425 100644 --- a/src/plugins/intel_gpu/tests/shape_infer/broadcast_si_test.cpp +++ b/src/plugins/intel_gpu/tests/shape_infer/broadcast_si_test.cpp @@ -104,10 +104,9 @@ TEST_P(broadcast_test_two_inputs_blocked_format, shape_infer) { broadcast("output", input_info("data"), input_info("target_shape"), p.axes_mapping_data, p.mode) ); - ExecutionConfig config { - ov::intel_gpu::optimize_data(true), - ov::intel_gpu::allow_new_shape_infer(true) - }; + ExecutionConfig config = get_test_default_config(engine); + config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); + config.set_property(ov::intel_gpu::optimize_data(true)); std::vector input_data(p.data_layout.get_linear_size(), 1); diff --git a/src/plugins/intel_gpu/tests/test_cases/activation_simple_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/activation_simple_gpu_test.cpp index 0d444a85902695..ce292cfc5c2eb6 100644 --- a/src/plugins/intel_gpu/tests/test_cases/activation_simple_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/test_cases/activation_simple_gpu_test.cpp @@ -36,7 +36,7 @@ TEST(activation_f32_fw_gpu, dynamic) { topology topology(input_layout("input", in_layout)); topology.add(activation("activation", input_info("input"), func)); - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); network network(engine, topology, config); @@ -121,7 +121,7 @@ TEST(activation_f32_fw_gpu, not_basic_yxfb) { topology topology( input_layout("input", input->get_layout()), activation("not", input_info("input"), activation_func::negation)); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); auto outputs = network.execute(); ASSERT_EQ(outputs.size(), size_t(1)); @@ -165,7 +165,7 @@ TEST(activation_f32_fw_gpu, erf_basic_yxfb) { topology topology( input_layout("input", input->get_layout()), activation("not", input_info("input"), activation_func::erf)); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); auto outputs = network.execute(); ASSERT_EQ(outputs.size(), size_t(1)); @@ -211,7 +211,7 @@ TEST(activation_f32_fw_gpu, hard_sigmoid_basic_yxfb) { topology topology( input_layout("input", input->get_layout()), activation("not", input_info("input"), activation_func::hard_sigmoid, params)); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); auto outputs = network.execute(); ASSERT_EQ(outputs.size(), size_t(1)); @@ -257,7 +257,7 @@ TEST(activation_f32_fw_gpu, reciprocal_basic_yxfb) { topology topology( input_layout("input", input->get_layout()), activation("not", input_info("input"), activation_func::reciprocal)); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); auto outputs = network.execute(); ASSERT_EQ(outputs.size(), size_t(1)); @@ -304,7 +304,7 @@ TEST(activation_f32_fw_gpu, selu_basic_yxfb) { topology topology( input_layout("input", input->get_layout()), activation("not", input_info("input"), activation_func::selu, params)); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); auto outputs = network.execute(); ASSERT_EQ(outputs.size(), size_t(1)); @@ -351,7 +351,7 @@ TEST(activation_f32_fw_gpu, softplus_basic_yxfb) { topology topology( input_layout("input", input->get_layout()), activation("not", input_info("input"), activation_func::softplus)); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); auto outputs = network.execute(); ASSERT_EQ(outputs.size(), size_t(1)); @@ -397,7 +397,7 @@ TEST(activation_f32_fw_gpu, softsign_basic_yxfb) { topology topology( input_layout("input", input->get_layout()), activation("not", input_info("input"), activation_func::softsign)); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); auto outputs = network.execute(); ASSERT_EQ(outputs.size(), size_t(1)); @@ -433,7 +433,7 @@ TEST(activation_f16_fw_gpu, softsign_basic_yxfb) { topology topology(input_layout("input", input->get_layout()), activation("not", input_info("input"), activation_func::softsign)); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); auto outputs = network.execute(); ASSERT_EQ(outputs.size(), size_t(1)); @@ -478,7 +478,7 @@ TEST(activation_f32_fw_gpu, sign_basic_yxfb) { topology topology( input_layout("input", input->get_layout()), activation("not", input_info("input"), activation_func::sign)); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); auto outputs = network.execute(); ASSERT_EQ(outputs.size(), size_t(1)); @@ -516,7 +516,7 @@ TEST(activation_f32_fw_gpu, pow_basic_yxfb) { topology topology( input_layout("input", input->get_layout()), activation("pow", input_info("input"), activation_func::pow, { 2.0f, 0.0f })); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); auto outputs = network.execute(); ASSERT_EQ(outputs.size(), size_t(1)); @@ -552,7 +552,7 @@ TEST(activation_f16_fw_gpu, pow_basic_yxfb) { topology topology( input_layout("input", input->get_layout()), activation("pow", input_info("input"), activation_func::pow, { FLOAT16(3.0f), FLOAT16(0.0f) })); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); auto outputs = network.execute(); ASSERT_EQ(outputs.size(), size_t(1)); @@ -609,7 +609,7 @@ TEST(activation_f32_fw_gpu, relu_basic_yxfb) { topology topology( input_layout("input", input->get_layout()), activation("relu", input_info("input"), activation_func::relu_negative_slope, { 0.5f, 0.f }, padding{ { 0, 0, 0, 0 }, 0 })); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); auto outputs = network.execute(); ASSERT_EQ(outputs.size(), size_t(1)); @@ -685,7 +685,7 @@ TEST(activation_f32_fw_gpu, relu_basic_bfzyx) { topology topology( input_layout("input", input->get_layout()), activation("relu", input_info("input"), activation_func::relu_negative_slope, { 0.5f, 0.f }, padding{ { 0, 0, 0, 0, 0 }, 0 })); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); auto outputs = network.execute(); ASSERT_EQ(outputs.size(), size_t(1)); @@ -782,7 +782,7 @@ TEST(activation_f32_fw_gpu, basic_yxfb_all_functions) topology.add(activation("activation", input_info("input"), "input_params", func)); } - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); auto outputs = network.execute(); ASSERT_EQ(outputs.size(), size_t(1)); @@ -932,7 +932,7 @@ TEST(activation_f16_fw_gpu, basic_bfyx_all_functions) topology.add(activation("activation", input_info("input"), "input_params", func)); } - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); auto outputs = network.execute(); ASSERT_EQ(outputs.size(), size_t(1)); @@ -1010,7 +1010,7 @@ TEST(activation_f32_fw_gpu, basic_yxfb_asin_acos_log_atan) topology topology(input_layout("input", input->get_layout())); topology.add(activation("activation", input_info("input"), func)); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); auto outputs = network.execute(); ASSERT_EQ(outputs.size(), size_t(1)); @@ -1096,7 +1096,7 @@ TEST(activation_f32_fw_gpu, relu_basic_acosh_yxfb) { input_layout("input", input->get_layout()), reorder("reorder", input_info("input"), input->get_layout().with_padding(padding{ { 0, 0, 2, 1 }, 0 })), activation("relu", input_info("reorder"), activation_func::acosh, {0.5f, 0.f}, padding{ { 0, 0, 0, 0 }, 0 })); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); auto outputs = network.execute(); ASSERT_EQ(outputs.begin()->first, "relu"); @@ -1162,7 +1162,7 @@ TEST(activation_f32_fw_gpu, relu_basic_input_padding_yxfb) { input_layout("input", input->get_layout()), reorder("reorder", input_info("input"), input->get_layout().with_padding(padding{ { 0, 0, 2, 1 }, 0 })), activation("relu", input_info("reorder"), activation_func::relu_negative_slope, { 0.5f, 0.f }, padding{ { 0, 0, 0, 0 }, 0 })); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); auto outputs = network.execute(); ASSERT_EQ(outputs.begin()->first, "relu"); @@ -1249,7 +1249,7 @@ TEST(activation_f32_fw_gpu, relu_basic_input_padding_bfzyx) { input_layout("input", input->get_layout()), reorder("reorder", input_info("input"), input->get_layout().with_padding(padding{ { 0, 0, 2, 1, 0 }, 0 })), activation("relu", input_info("reorder"), activation_func::relu_negative_slope, { 0.5f, 0.f }, padding{ { 0, 0, 0, 0, 0 }, 0 })); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); auto outputs = network.execute(); ASSERT_EQ(outputs.begin()->first, "relu"); @@ -1322,7 +1322,7 @@ TEST(activation_f32_fw_gpu, relu_basic_output_padding_yxfb) { topology topology( input_layout("input", input->get_layout()), activation("relu", input_info("input"), activation_func::relu_negative_slope, { 0.5f, 0.f }, padding{ { 0, 0, 3, 3 }, 0 })); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); auto outputs = network.execute(); ASSERT_EQ(outputs.size(), size_t(1)); @@ -1365,7 +1365,7 @@ TEST(activation_f32_fw_gpu, basic_yxfb_floor_ceil) topology topology(input_layout("input", input->get_layout())); topology.add(activation("activation", input_info("input"), func)); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); auto outputs = network.execute(); ASSERT_EQ(outputs.size(), size_t(1)); @@ -1429,7 +1429,7 @@ TEST(activation_i8_fw_gpu, basic_yxfb_all_funcs) topology.add(input_layout("input", input->get_layout())); topology.add(activation("activation", input_info("input"), func)); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); auto outputs = network.execute(); @@ -1487,7 +1487,7 @@ TEST(activation_i32_fw_gpu, basic_yxfb_i32_funcs) { topology.add(input_layout("input", input->get_layout())); topology.add(activation("activation", input_info("input"), func, params)); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); auto outputs = network.execute(); @@ -1553,7 +1553,7 @@ TEST(activation_f32_fw_gpu, b_fs_yx_fsv16_prelu) { cldnn::reorder("out", input_info("actv"), cldnn::format::bfyx, cldnn::data_types::f32) ); - cldnn::network net(eng, topo); + cldnn::network net(eng, topo, get_test_default_config(eng)); set_values(in_mem, flatten_4d(format::bfyx, in_data)); net.set_input_data("in", in_mem); @@ -1693,7 +1693,8 @@ struct activation_random_test : testing::TestWithParam{"activation"})}; + ExecutionConfig config = get_test_default_config(engine, + ov::intel_gpu::custom_outputs(std::vector{"activation"})); cldnn::network::ptr net = get_network(engine, topo, config, get_test_stream_ptr(), is_caching_test); @@ -1714,10 +1715,9 @@ struct activation_random_test : testing::TestWithParam{"activation_blocked", "res_to_input_format"}), - ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{{"activation_blocked", {input_format, "activation_ref"}}}) - }; + ExecutionConfig config_opt = get_test_default_config(engine, + {ov::intel_gpu::custom_outputs(std::vector{"activation_blocked", "res_to_input_format"}), + ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{{"activation_blocked", {input_format, "activation_ref"}}})}); network net_opt(engine, topo_opt, config_opt); diff --git a/src/plugins/intel_gpu/tests/test_cases/adaptive_avg_pooling_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/adaptive_avg_pooling_gpu_test.cpp index 57b627a76d5cbb..55e12ebc384e02 100644 --- a/src/plugins/intel_gpu/tests/test_cases/adaptive_avg_pooling_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/test_cases/adaptive_avg_pooling_gpu_test.cpp @@ -135,7 +135,7 @@ struct adaptive_avg_pooling_test topology.add(adaptive_pooling("adaptive_avg_pooling_blocked", input_info("input_reordered"), params.outputTensor)); topology.add(reorder("adaptive_avg_pooling", input_info("adaptive_avg_pooling_blocked"), plain_layout, data_type)); - cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); network->set_input_data("input", input); diff --git a/src/plugins/intel_gpu/tests/test_cases/adaptive_max_pooling_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/adaptive_max_pooling_gpu_test.cpp index 2c582dd4b57b66..faa9520af558dd 100644 --- a/src/plugins/intel_gpu/tests/test_cases/adaptive_max_pooling_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/test_cases/adaptive_max_pooling_gpu_test.cpp @@ -162,7 +162,7 @@ struct adaptive_max_pooling_test result_id = reorder_result_id; } - cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); network->set_input_data(input_data_id, input_mem); @@ -192,7 +192,7 @@ struct adaptive_max_pooling_test cldnn::topology reorder_topology; reorder_topology.add(input_layout("indices", indices_layout)); reorder_topology.add(reorder("plane_indices", input_info("indices"), plain_layout, data_types::i32)); - cldnn::network reorder_net{engine, reorder_topology}; + cldnn::network reorder_net{engine, reorder_topology, get_test_default_config(engine)}; reorder_net.set_input_data("indices", indices_mem); const auto second_output_result = reorder_net.execute(); const auto plane_indices_mem = second_output_result.at("plane_indices").get_memory(); diff --git a/src/plugins/intel_gpu/tests/test_cases/add_reorders_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/add_reorders_gpu_test.cpp index bf962234217131..dee1ac04ec4705 100644 --- a/src/plugins/intel_gpu/tests/test_cases/add_reorders_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/test_cases/add_reorders_gpu_test.cpp @@ -24,7 +24,7 @@ add_reorders optimization pass. //concatenation of incompatible convolutions TEST(add_reorders_gpu, two_convolutions_and_concatenation) { auto& engine = get_test_engine(); - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::optimize_data(false)); auto input = engine.allocate_memory({ data_types::f32, format::yxfb,{ 1, 1, 2, 2 } }); @@ -123,7 +123,7 @@ void test_add_reorders_gpu_basic_reshape_and_tile(bool is_caching_test) { set_values(input, input_vec); tile_ref(input, output_ref, 2, 4); - cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); network->set_input_data("input", input); diff --git a/src/plugins/intel_gpu/tests/test_cases/arg_max_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/arg_max_gpu_test.cpp index 5192ecf819b01b..752ab7270cbe92 100644 --- a/src/plugins/intel_gpu/tests/test_cases/arg_max_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/test_cases/arg_max_gpu_test.cpp @@ -83,7 +83,7 @@ TYPED_TEST(argmax_gpu_test, base) { /*b1f3*/ 4.f, 0.5f, 8.f, 8.2f}; set_values(input, this->getTypedVector(input_vec)); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); auto outputs = network.execute(); @@ -127,7 +127,7 @@ TEST(arg_max_gpu_min_axis_batch_bfzyx, i32) { set_values(input, input_vec); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); auto outputs = network.execute(); @@ -194,7 +194,7 @@ TEST(arg_max_gpu_min_axis_y_yxfb, f32) { set_values(input, input_vec); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); auto outputs = network.execute(); @@ -259,7 +259,7 @@ TEST(arg_max_gpu_min_axis_batch_yxfb, f32) { set_values(input, input_vec); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); auto outputs = network.execute(); @@ -316,7 +316,7 @@ TEST(arg_max_gpu_min_axis_y_yxfb_topk_2, f32) { set_values(input, input_vec); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); auto outputs = network.execute(); @@ -360,7 +360,7 @@ TEST(top_k_layer_tests, second_output) { /*b1f3*/ 4.f, 0.5f, 8.f, 8.2f}; set_values(input, input_vec); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); auto outputs = network.execute(); @@ -454,7 +454,7 @@ TEST(top_k_layer_tests, second_output2) { set_values(input, input_vec); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); auto outputs = network.execute(); @@ -539,7 +539,7 @@ TEST(top_k_layer_tests, multiple_outputs) { set_values(input, input_vec); - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::optimize_data(true)); config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); @@ -601,7 +601,7 @@ TEST(arg_max_gpu_min_axis_y_yxfb_topk_2, sort_by_values) { set_values(input, input_vec); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); auto outputs = network.execute(); @@ -658,7 +658,7 @@ TEST(arg_max_gpu_min_axis_y_yxfb_topk_2, sort_by_indices) { set_values(input, input_vec); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); auto outputs = network.execute(); @@ -701,7 +701,7 @@ void test_top_k_layer_tests_sort_probabilities_by_indices(bool is_caching_test) set_values(input, input_vec); - cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); network->set_input_data("input", input); auto outputs = network->execute(); @@ -851,7 +851,7 @@ void test_top_k_layer_md_sync(bool is_caching_test) { true)); topology.add(mutable_data("arg_max.1", { input_info("arg_max.0") }, shared_memory)); - cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); network->set_input_data("input1", input1); auto outputs = network->execute(); diff --git a/src/plugins/intel_gpu/tests/test_cases/barriers_test.cpp b/src/plugins/intel_gpu/tests/test_cases/barriers_test.cpp index 73fc50647464bf..a739d6397216e4 100644 --- a/src/plugins/intel_gpu/tests/test_cases/barriers_test.cpp +++ b/src/plugins/intel_gpu/tests/test_cases/barriers_test.cpp @@ -41,7 +41,13 @@ TEST(DISABLED_oooq_test, simple) { tpl.add(reorder("r8", input_info("c6"), concat_layout, std::vector{ 8 })); tpl.add(concatenation("c9", { input_info("r7"), input_info("r8") }, 2)); - ExecutionConfig cfg(ov::intel_gpu::queue_type(QueueTypes::out_of_order)); + ExecutionConfig cfg = get_test_default_config(*eng); + cfg.set_property(ov::intel_gpu::queue_type(QueueTypes::out_of_order)); + if (eng->get_device_info().supports_immad) { + // Onednn currently does NOT support out_of_order queue-type + return; + } + network net{ *eng, tpl, cfg }; net.set_input_data("in", input_mem); diff --git a/src/plugins/intel_gpu/tests/test_cases/batch_to_space_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/batch_to_space_gpu_test.cpp index 3edf37b85ee59e..36eb362a7034bb 100644 --- a/src/plugins/intel_gpu/tests/test_cases/batch_to_space_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/test_cases/batch_to_space_gpu_test.cpp @@ -38,7 +38,7 @@ TEST(batch_to_space_fp16_gpu, i8111_bs1222_cb0000_ce0000) { tensor(format::bfyx, {0,0,0,0}, 0), tensor(format::bfyx, {0,0,0,0}, 0), tensor(format::bfyx, {1,2,2,2}, 1))); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("Input", input); @@ -85,7 +85,7 @@ TEST(batch_to_space_fp16_gpu, i4321_bs1212_cb0000_ce0000) { tensor(format::bfyx, {0,0,0,0}, 0), tensor(format::bfyx, {0,0,0,0}, 0), tensor(format::bfyx, {1,6,2,2}, 1))); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("Input", input); @@ -135,7 +135,7 @@ TEST(batch_to_space_fp16_gpu, i4321_bs1212_cb0010_ce0101) { tensor(format::bfyx, {0,0,1,0}, 0), tensor(format::bfyx, {0,1,0,1}, 0), tensor(format::bfyx, {1,5,1,1}, 1))); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("Input", input); @@ -182,7 +182,7 @@ TEST(batch_to_space_fp16_gpu, i62121_bs12311_cb02000_ce00110) { tensor(format::bfzyx, {0,2,0,0,0}, 0), tensor(format::bfzyx, {0,0,1,1,0}, 0), tensor(format::bfzyx, {1,2,2,1,1}, 1))); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("Input", input); @@ -231,7 +231,7 @@ TEST(batch_to_space_fp16_gpu, i1212112_bs112321_cb02000_ce00110) { tensor(format::bfwzyx, {0,0,1,0,0,0}, 0), tensor(format::bfwzyx, {0,0,0,2,0,0}, 0), tensor(format::bfwzyx, {1,1,3,1,2,2}, 1))); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("Input", input); @@ -281,7 +281,7 @@ TEST(batch_to_space_fp16_gpu, i21611_bs1112_cb0000_ce0000_b_fs_yx_fsv16) { tensor(format::bfyx, {1,16,1,2}, 1))); topology.add(reorder("bts_to_bfyx", input_info("batch_to_space"), format::bfyx, data_types::f16)); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("Input", input); @@ -332,7 +332,7 @@ TEST(batch_to_space_fp16_gpu, i2812_bs1112_cb0000_ce0000_b_fs_yx_fsv16) { tensor(format::bfyx, {1,6,1,4}, 1))); topology.add(reorder("bts_to_bfyx", input_info("batch_to_space"), format::bfyx, data_types::f16)); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("Input", input); @@ -377,7 +377,7 @@ TEST(batch_to_space_fp32_gpu, i8111_bs1222_cb0000_ce0000) { tensor(format::bfyx, {0,0,0,0}, 0), tensor(format::bfyx, {0,0,0,0}, 0), tensor(format::bfyx, {1,2,2,2}, 1))); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("Input", input); @@ -424,7 +424,7 @@ TEST(batch_to_space_fp32_gpu, i4321_bs1212_cb0000_ce0000) { tensor(format::bfyx, {0,0,0,0}, 0), tensor(format::bfyx, {0,0,0,0}, 0), tensor(format::bfyx, {1,6,2,2}, 1))); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("Input", input); @@ -474,7 +474,7 @@ TEST(batch_to_space_fp32_gpu, i4321_bs1212_cb0010_ce0101) { tensor(format::bfyx, {0,0,1,0}, 0), tensor(format::bfyx, {0,1,0,1}, 0), tensor(format::bfyx, {1,5,1,1}, 1))); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("Input", input); @@ -521,7 +521,7 @@ TEST(batch_to_space_fp32_gpu, i62121_bs12311_cb02000_ce00110) { tensor(format::bfzyx, {0,2,0,0,0}, 0), tensor(format::bfzyx, {0,0,1,1,0}, 0), tensor(format::bfzyx, {1,2,2,1,1}, 1))); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("Input", input); @@ -570,7 +570,7 @@ TEST(batch_to_space_fp32_gpu, i1212112_bs112321_cb02000_ce00110) { tensor(format::bfwzyx, {0,0,1,0,0,0}, 0), tensor(format::bfwzyx, {0,0,0,2,0,0}, 0), tensor(format::bfwzyx, {1,1,3,1,2,2}, 1))); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("Input", input); @@ -624,7 +624,7 @@ TEST(batch_to_space_fp32_gpu, i21621_bs1112_cb0201_ce0810_b_fs_yx_fsv16) { tensor(format::bfyx, {1,6,1,1}, 1))); topology.add(reorder("bts_to_bfyx", input_info("batch_to_space"), format::bfyx, data_types::f32)); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("Input", input); @@ -677,7 +677,7 @@ void test_batch_to_space_fp32_gpu_i41021_bs1221_cb0201_ce0810_b_fs_yx_fsv16(bool tensor(format::bfyx, {1,8,3,1}, 1))); topology.add(reorder("bts_to_bfyx", input_info("batch_to_space"), format::bfyx, data_types::f32)); - cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); network->set_input_data("Input", input); diff --git a/src/plugins/intel_gpu/tests/test_cases/binary_convolution_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/binary_convolution_gpu_test.cpp index 71e198f022e7b4..e4e6549db79022 100644 --- a/src/plugins/intel_gpu/tests/test_cases/binary_convolution_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/test_cases/binary_convolution_gpu_test.cpp @@ -185,7 +185,7 @@ TEST_P(binary_convolution_test, conv) { if(engine.get_device_info().supports_immad) return; - ov::intel_gpu::ExecutionConfig config; + ov::intel_gpu::ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::optimize_data(true)); topology topology_bin; @@ -382,7 +382,7 @@ TEST(binary_convolution, basic_convolution_1x1_single_packed_channel) { padding{ { 0,0,0,0 }, 0 }) ); - ov::intel_gpu::ExecutionConfig config; + ov::intel_gpu::ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::optimize_data(true)); network network(engine, topology, config); @@ -468,7 +468,7 @@ TEST(binary_convolution, basic_convolution_1x1_single_packed_channel_fp16) { padding{ { 0,0,0,0 }, 0 }) ); - ov::intel_gpu::ExecutionConfig config; + ov::intel_gpu::ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::optimize_data(true)); network network(engine, topology, config); diff --git a/src/plugins/intel_gpu/tests/test_cases/border_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/border_gpu_test.cpp index 83a9bc19940be3..2298f1e83397e1 100644 --- a/src/plugins/intel_gpu/tests/test_cases/border_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/test_cases/border_gpu_test.cpp @@ -87,7 +87,7 @@ class border_test : public ::testing::TestWithParam> { pad_mode, pad_value), reorder("output", input_info("border"), cldnn::format::bfyx, T_dt)); - cldnn::network::ptr target_network = get_network(engine, target_topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr target_network = get_network(engine, target_topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); target_network->set_input_data("input", input); auto target_output = target_network->execute().at("output").get_memory(); cldnn::mem_lock target_output_ptr(target_output, get_test_stream()); @@ -102,7 +102,7 @@ class border_test : public ::testing::TestWithParam> { pad_mode, pad_value)); - cldnn::network base_network(engine, base_topology); + cldnn::network base_network(engine, base_topology, get_test_default_config(engine)); base_network.set_input_data("input", input); auto base_output = base_network.execute().at("border").get_memory(); cldnn::mem_lock base_output_ptr(base_output, get_test_stream()); @@ -233,7 +233,7 @@ TEST(border_gpu, bsv16fsv16_without_reorder) { ov::CoordinateDiff(cd_rb.begin(), cd_rb.end()), pad_mode, pad_value)); - cldnn::network target_network(engine, target_topology); + cldnn::network target_network(engine, target_topology, get_test_default_config(engine)); target_network.set_input_data("input", input_b16f16); auto target_output = target_network.execute().at("border").get_memory(); cldnn::mem_lock target_output_ptr(target_output, get_test_stream()); @@ -247,7 +247,7 @@ TEST(border_gpu, bsv16fsv16_without_reorder) { ov::CoordinateDiff(cd_rb.begin(), cd_rb.end()), pad_mode, pad_value)); - cldnn::network base_network(engine, base_topology); + cldnn::network base_network(engine, base_topology, get_test_default_config(engine)); base_network.set_input_data("input", input); auto base_output = base_network.execute().at("border").get_memory(); cldnn::mem_lock base_output_ptr(base_output, get_test_stream()); @@ -290,7 +290,7 @@ TEST(border_gpu, zyx_bsv16fsv16) { pad_mode, pad_value), reorder("output", input_info("border"), cldnn::format::bfzyx, T_dt)); - cldnn::network target_network(engine, target_topology); + cldnn::network target_network(engine, target_topology, get_test_default_config(engine)); target_network.set_input_data("input", input); auto target_output = target_network.execute().at("output").get_memory(); cldnn::mem_lock target_output_ptr(target_output, get_test_stream()); @@ -304,7 +304,7 @@ TEST(border_gpu, zyx_bsv16fsv16) { ov::CoordinateDiff(cd_rb.begin(), cd_rb.end()), pad_mode, pad_value)); - cldnn::network base_network(engine, base_topology); + cldnn::network base_network(engine, base_topology, get_test_default_config(engine)); base_network.set_input_data("input", input); auto base_output = base_network.execute().at("border").get_memory(); cldnn::mem_lock base_output_ptr(base_output, get_test_stream()); @@ -364,7 +364,7 @@ TEST(border_gpu, basic_yxfb_0x0x1x2_0x0x3x4_border_constant) { }; set_values(input, input_data); - cldnn::network network(engine, topology); + cldnn::network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); auto outputs = network.execute(); @@ -441,7 +441,7 @@ TEST(border_gpu, basic_fsv16_0x0x1x2_0x0x3x4_border_constant) { }; set_values(input, input_data); - cldnn::network network(engine, topology); + cldnn::network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); auto outputs = network.execute(); @@ -541,7 +541,7 @@ TEST(border_gpu, basic_bfzyx_0x0x1x01_0x0x0x0x3_border_constant) { }; set_values(input, input_data); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); auto outputs = network.execute(); @@ -647,7 +647,7 @@ TEST(border_gpu, basic_bfwzyx_0x0x0x1x0x1_0x0x0x1x0x1_border_constant) { }; set_values(input, input_data); - cldnn::network network(engine, topology); + cldnn::network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); auto outputs = network.execute(); @@ -725,7 +725,7 @@ TEST(border_gpu, basic_yxfb_0x0x1x2_0x0x3x4_border_constant_non_constant) { }; set_values(input, input_data); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); auto outputs = network.execute(); @@ -798,7 +798,7 @@ TEST(border_gpu, basic_yxfb_0x0x1x2_0x0x3x4_border_mirror) { }; set_values(input, input_data); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); auto outputs = network.execute(); @@ -862,7 +862,7 @@ TEST(border_gpu, basic_bfzyx_0x0x0x0x1_0x0x0x0x1_border_mirror) { std::vector input_data = generate_rnd_real_input(sizes, -8.0f, 8.0f); set_values(input, input_data); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); auto outputs = network.execute(); @@ -940,7 +940,7 @@ TEST(border_gpu, basic_bfzyxw_0x0x0x0x1_0x0x0x0x1_border_mirror) { std::vector input_data = generate_rnd_real_input(sizes, -8.0f, 8.0f); set_values(input, input_data); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); auto outputs = network.execute(); @@ -1026,7 +1026,7 @@ TEST(border_gpu, basic_yxfb_0x0x1x2_0x0x3x4_border_mirror_101) { }; set_values(input, input_data); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); auto outputs = network.execute(); @@ -1103,7 +1103,7 @@ TEST(border_gpu, basic_bfzyx_0x0x0x0x1_0x0x0x0x1_border_mirror_101) { }; set_values(input, input_data); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); auto outputs = network.execute(); @@ -1197,7 +1197,7 @@ TEST(border_gpu, basic_bfwzyx_0x0x0x0x1x1_0x0x0x0x1x1_border_mirror_101) { }; set_values(input, input_data); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); auto outputs = network.execute(); @@ -1276,7 +1276,7 @@ TEST(border_gpu, basic_yxfb_0x0x1x2_0x0x3x4_border_edge) { }; set_values(input, input_data); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); auto outputs = network.execute(); @@ -1336,7 +1336,7 @@ TEST(border_gpu, basic_bfyx_2x1x2x3_1x2x3x4_border_constant) { std::vector input_data = generate_rnd_real_input(sizes, -8.0f, 8.0f); set_values(input, input_data); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); auto outputs = network.execute(); @@ -1401,7 +1401,7 @@ TEST(border_gpu, basic_bfyx_2x1x2x3_1x2x3x4_border_mirror) { std::vector input_data = generate_rnd_real_input(sizes, -8.0f, 8.0f); set_values(input, input_data); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); auto outputs = network.execute(); @@ -1464,7 +1464,7 @@ TEST(border_gpu, basic_bfyx_2x1x2x3_1x2x3x4_border_mirror_101) { std::vector input_data = generate_rnd_real_input(sizes, -8.0f, 8.0f); set_values(input, input_data); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); auto outputs = network.execute(); @@ -1527,7 +1527,7 @@ TEST(border_gpu, basic_bfyx_2x1x2x3_1x2x3x4_border_edge) { std::vector input_data = generate_rnd_real_input(sizes, -8.0f, 8.0f); set_values(input, input_data); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); auto outputs = network.execute(); @@ -1594,7 +1594,7 @@ TEST(border_gpu, basic_bfyx_2x1x2x3_1x2x3x4_border_constant_dynamic) { std::vector input_data = generate_rnd_real_input(sizes, -8.0f, 8.0f); set_values(input, input_data); - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); network network(engine, topology, config); network.set_input_data("input", input); diff --git a/src/plugins/intel_gpu/tests/test_cases/broadcast_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/broadcast_gpu_test.cpp index 0ed3d621325b36..ba447364c2c343 100644 --- a/src/plugins/intel_gpu/tests/test_cases/broadcast_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/test_cases/broadcast_gpu_test.cpp @@ -64,7 +64,8 @@ void start_broadcast_test(format cldnn_format, data_types cldnn_data_type, std:: set_values(input, input_data); - network network(engine, topology); + ExecutionConfig cfg = get_test_default_config(engine); + network network(engine, topology, cfg); network.set_input_data("input", input); auto outputs = network.execute(); @@ -140,7 +141,7 @@ void start_broadcast_test_dynamic(format input_format, set_values(target_shape_mem, target_shape_data); } - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); set_values(input, input_data); @@ -215,7 +216,7 @@ void start_broadcast_test_5d(format cldnn_format, data_types cldnn_data_type, st set_values(input, input_data); - cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); network->set_input_data("input", input); auto outputs = network->execute(); diff --git a/src/plugins/intel_gpu/tests/test_cases/bucketize_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/bucketize_gpu_test.cpp index bfaa6383fd6e5c..34ab112ab3da5c 100644 --- a/src/plugins/intel_gpu/tests/test_cases/bucketize_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/test_cases/bucketize_gpu_test.cpp @@ -59,7 +59,7 @@ struct bucketize_test : testing::TestWithParam> { topology.add( reorder("plane_bucketize_left_bound", input_info("bucketize_left_bound"), format::bfyx, type_to_data_type::value)); - cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); network->set_input_data("input", input); network->set_input_data("buckets", buckets); diff --git a/src/plugins/intel_gpu/tests/test_cases/cl_mem_input_test.cpp b/src/plugins/intel_gpu/tests/test_cases/cl_mem_input_test.cpp index 68b90c33a13f55..c7f574371f3f25 100644 --- a/src/plugins/intel_gpu/tests/test_cases/cl_mem_input_test.cpp +++ b/src/plugins/intel_gpu/tests/test_cases/cl_mem_input_test.cpp @@ -127,7 +127,7 @@ void start_cl_mem_check_2_inputs(bool is_caching_test) { topology.add(input2); topology.add(reorder("reorder", input_info("input"), input_info("input2"), output_layout)); - cldnn::network::ptr network = get_network(*engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr network = get_network(*engine, topology, get_test_default_config(*engine), get_test_stream_ptr(), is_caching_test); network->set_input_data("input", input_memory); network->set_input_data("input2", input_memory2); @@ -249,7 +249,7 @@ TEST(cl_mem_check, check_input) { topology.add(input); topology.add(reorder("reorder", input_info("input"), output_layout)); - network network(*engine, topology); + network network(*engine, topology, get_test_default_config(*engine)); network.set_input_data("input", input_memory); auto outputs = network.execute(); diff --git a/src/plugins/intel_gpu/tests/test_cases/command_queue_test.cpp b/src/plugins/intel_gpu/tests/test_cases/command_queue_test.cpp index 809aab070f9cf4..4d0b83b87fb231 100644 --- a/src/plugins/intel_gpu/tests/test_cases/command_queue_test.cpp +++ b/src/plugins/intel_gpu/tests/test_cases/command_queue_test.cpp @@ -57,31 +57,55 @@ void exexute_network(cldnn::engine& engine, const ExecutionConfig& cfg, bool is_ } // namespace TEST(command_queue_test, test_priority_hints) { - ExecutionConfig cfg{ov::intel_gpu::queue_type(QueueTypes::out_of_order), - ov::intel_gpu::hint::queue_priority(ov::hint::Priority::LOW)}; auto engine = engine::create(engine_types::ocl, runtime_types::ocl); + ExecutionConfig cfg = get_test_default_config(*engine, + {ov::intel_gpu::queue_type(QueueTypes::out_of_order), + ov::intel_gpu::hint::queue_priority(ov::hint::Priority::LOW)}); + if (engine->get_device_info().supports_immad) { + // Onednn currently does NOT support out_of_order queue-type + return; + } + exexute_network(*engine, cfg); } TEST(command_queue_test, test_throttle_hints) { - ExecutionConfig cfg{ov::intel_gpu::queue_type(QueueTypes::out_of_order), - ov::intel_gpu::hint::queue_throttle(ov::intel_gpu::hint::ThrottleLevel::HIGH)}; auto engine = engine::create(engine_types::ocl, runtime_types::ocl); + ExecutionConfig cfg = get_test_default_config(*engine, + {ov::intel_gpu::queue_type(QueueTypes::out_of_order), + ov::intel_gpu::hint::queue_throttle(ov::intel_gpu::hint::ThrottleLevel::HIGH)}); + if (engine->get_device_info().supports_immad) { + // Onednn currently does NOT support out_of_order queue-type + return; + } + exexute_network(*engine, cfg); } TEST(command_queue_test, test_priority_and_throttle_hints) { - ExecutionConfig cfg{ov::intel_gpu::queue_type(QueueTypes::out_of_order), - ov::intel_gpu::hint::queue_priority(ov::hint::Priority::HIGH), - ov::intel_gpu::hint::queue_throttle(ov::intel_gpu::hint::ThrottleLevel::LOW)}; auto engine = engine::create(engine_types::ocl, runtime_types::ocl); + ExecutionConfig cfg = get_test_default_config(*engine, + {ov::intel_gpu::queue_type(QueueTypes::out_of_order), + ov::intel_gpu::hint::queue_priority(ov::hint::Priority::HIGH), + ov::intel_gpu::hint::queue_throttle(ov::intel_gpu::hint::ThrottleLevel::LOW)}); + if (engine->get_device_info().supports_immad) { + // Onednn currently does NOT support out_of_order queue-type + return; + } + exexute_network(*engine, cfg); } TEST(export_import_command_queue_test, test_priority_and_throttle_hints) { - ExecutionConfig cfg{ov::intel_gpu::queue_type(QueueTypes::out_of_order), - ov::intel_gpu::hint::queue_priority(ov::hint::Priority::HIGH), - ov::intel_gpu::hint::queue_throttle(ov::intel_gpu::hint::ThrottleLevel::LOW)}; auto engine = engine::create(engine_types::ocl, runtime_types::ocl); + ExecutionConfig cfg = get_test_default_config(*engine, + {ov::intel_gpu::queue_type(QueueTypes::out_of_order), + ov::intel_gpu::hint::queue_priority(ov::hint::Priority::HIGH), + ov::intel_gpu::hint::queue_throttle(ov::intel_gpu::hint::ThrottleLevel::LOW)}); + if (engine->get_device_info().supports_immad) { + // Onednn currently does NOT support out_of_order queue-type + return; + } + exexute_network(*engine, cfg, true); } diff --git a/src/plugins/intel_gpu/tests/test_cases/concatenation_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/concatenation_gpu_test.cpp index 62aa7d2fb82962..d3e0c90cd7ec06 100644 --- a/src/plugins/intel_gpu/tests/test_cases/concatenation_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/test_cases/concatenation_gpu_test.cpp @@ -59,7 +59,7 @@ TEST(concat_gpu, mixed_input_types) { padding{ { 0,0,0,0 }, 0 }) ); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input0", input0); network.set_input_data("input1", input1); network.set_input_data("input2", input2); @@ -326,7 +326,7 @@ TEST(concat_gpu, mixed_input_types_5d) { padding{ { 0,0,0,0 }, 0 }) ); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input0", input0); network.set_input_data("input1", input1); network.set_input_data("input2", input2); @@ -399,7 +399,7 @@ TEST(concat_gpu, i8_optimization_with_pool) { data_types::i8, padding{{0, 0, 0, 0}, 0}), reorder("reorder", input_info("concat"), reorder_layout)); - ov::intel_gpu::ExecutionConfig config; + ov::intel_gpu::ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::optimize_data(true)); network network(engine, topology, config); network.set_input_data("input0", input0); @@ -501,7 +501,7 @@ TEST(concat_gpu, i8_optimization_with_conv) { data("weights", weights), convolution("conv", input_info("concat"), { "weights" }, { 2, 1 }), reorder("output", input_info("conv"), reorder_layout)); - ov::intel_gpu::ExecutionConfig config; + ov::intel_gpu::ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::optimize_data(true)); network network(engine, topology, config); network.set_input_data("input0", input0); @@ -602,7 +602,7 @@ TEST(concat_gpu, i8_optimization_with_pool_conv) { data("weights", weights), convolution("conv", input_info("concat"), {"weights"}, {1, 1}, {0, 1}), reorder("output", input_info("conv"), reorder_layout) ); - ov::intel_gpu::ExecutionConfig config; + ov::intel_gpu::ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::optimize_data(true)); network network(engine, topology, config); network.set_input_data("input0", input0); @@ -775,7 +775,7 @@ struct concat_gpu_4d : public concat_gpu { topology.add(concatenation("concat", input_ids, 1)); - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::optimize_data(true)); network network(engine, topology, config); @@ -861,7 +861,7 @@ struct concat_gpu_4d_axis3 : public concat_axis3_gpu { topology.add(concatenation("concat", input_ids, 3)); - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::optimize_data(true)); network network(engine, topology, config); @@ -1025,7 +1025,7 @@ struct concat_id_conv_gpu_4d : public concat_gpu { topology.add(data("weights", weights_mem)); topology.add(convolution("conv", input_info("concat"), { "weights" })); - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::optimize_data(true)); auto conv_forcing = ov::intel_gpu::ImplementationDesc{ fmt, std::string() }; config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {primitive_id("conv"), conv_forcing} })); @@ -1198,13 +1198,13 @@ struct concat_gpu_4d_implicit : public concat_gpu { auto input = generate_input(); // implicit concat - ExecutionConfig config1; + ExecutionConfig config1 = get_test_default_config(get_test_engine()); config1.set_property(ov::intel_gpu::optimize_data(true)); auto out_mem1 = run_concat_network(input, fmt, config1); cldnn::mem_lock out_ptr1(out_mem1, get_test_stream()); // explicit concat - ExecutionConfig config2; + ExecutionConfig config2 = get_test_default_config(get_test_engine()); config2.set_property(ov::intel_gpu::optimize_data(false)); auto out_mem2 = run_concat_network(input, fmt, config2); cldnn::mem_lock out_ptr2(out_mem2, get_test_stream()); @@ -1285,9 +1285,9 @@ TEST(concat_gpu_onednn, basic_input_types) { ov::intel_gpu::ImplementationDesc impl = { format::bfyx, std::string(""), impl_types::onednn }; - ExecutionConfig cfg{ov::intel_gpu::queue_type(QueueTypes::in_order), - ov::intel_gpu::custom_outputs(std::vector{ "concat" }), - ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"concat", impl} })}; + ExecutionConfig cfg = get_test_default_config(engine); + cfg.set_property(ov::intel_gpu::custom_outputs(std::vector{ "concat" })); + cfg.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"concat", impl} })); network network(engine, topology, cfg); network.set_input_data("input0", input0); network.set_input_data("input1", input1); @@ -1425,19 +1425,17 @@ struct concat_gpu_4d_implicit_onednn : public concat_gpu { auto input = generate_input(); // implicit concat - ExecutionConfig config1; + ExecutionConfig config1 = get_test_default_config(engine); config1.set_property(ov::intel_gpu::optimize_data(true)); ov::intel_gpu::ImplementationDesc impl = { fmt, std::string(""), impl_types::onednn }; config1.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"conv", impl} })); - config1.set_property(ov::intel_gpu::queue_type(QueueTypes::in_order)); auto out_mem1 = run_concat_network(input, fmt, config1); cldnn::mem_lock out_ptr1(out_mem1, stream); // explicit concat - ExecutionConfig config2; + ExecutionConfig config2 = get_test_default_config(engine); config2.set_property(ov::intel_gpu::optimize_data(false)); - config2.set_property(ov::intel_gpu::queue_type(QueueTypes::in_order)); auto out_mem2 = run_concat_network(input, fmt, config2); cldnn::mem_lock out_ptr2(out_mem2, stream); @@ -1594,19 +1592,17 @@ struct concat_gpu_4d_explict : public concat_gpu { auto input = generate_input(); // implicit concat when batch size is 1. - ExecutionConfig config1; + ExecutionConfig config1 = get_test_default_config(engine); config1.set_property(ov::intel_gpu::optimize_data(true)); ov::intel_gpu::ImplementationDesc impl = { fmt, std::string(""), impl_types::onednn }; config1.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{{"conv", impl}})); - config1.set_property(ov::intel_gpu::queue_type(QueueTypes::in_order)); auto out_mem1 = run_concat_network(input, fmt, config1); cldnn::mem_lock out_ptr1(out_mem1, stream); // explicit concat - ExecutionConfig config2; + ExecutionConfig config2 = get_test_default_config(engine); config2.set_property(ov::intel_gpu::optimize_data(false)); - config2.set_property(ov::intel_gpu::queue_type(QueueTypes::in_order)); auto out_mem2 = run_concat_network(input, fmt, config2); cldnn::mem_lock out_ptr2(out_mem2, stream); diff --git a/src/plugins/intel_gpu/tests/test_cases/condition_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/condition_gpu_test.cpp index 1583987ce8bee2..aaa32a4dc3ea51 100644 --- a/src/plugins/intel_gpu/tests/test_cases/condition_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/test_cases/condition_gpu_test.cpp @@ -87,7 +87,7 @@ std::pair, std::vector> get_values_to_compare(const cl TEST(DISABLED_condition_gpu, basic_equal_comp) { auto& engine = get_test_engine(); - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::optimize_data(true)); auto input = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 4, 1 } }); auto compare = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 1, 1 } }); @@ -139,7 +139,7 @@ TEST(DISABLED_condition_gpu, basic_equal_comp) { TEST(DISABLED_condition_gpu, basic_range_equal_comp) { auto& engine = get_test_engine(); - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::optimize_data(true)); auto input0 = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 4, 1 } }); auto input1 = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 4, 1 } }); @@ -212,7 +212,7 @@ TEST(DISABLED_condition_gpu, basic_range_equal_comp) { TEST(DISABLED_condition_gpu, generic_test_true_false) { auto& engine = get_test_engine(); - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::optimize_data(true)); auto input = engine.allocate_memory({ data_types::f32, format::bfyx,{ 5, 2, 5, 1 } }); std::vector input_data(50); @@ -321,7 +321,7 @@ TEST(DISABLED_condition_gpu, basic_stacked_ifs) { */ auto& engine = get_test_engine(); - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::optimize_data(true)); auto input = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 4, 1 } }); auto compare = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 1, 1 } }); @@ -391,7 +391,7 @@ TEST(DISABLED_condition_gpu, basic_nested_ifs) { */ auto& engine = get_test_engine(); - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::optimize_data(true)); auto input = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 4, 1 } }); auto compare = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 1, 1 } }); @@ -473,7 +473,7 @@ TEST(DISABLED_condition_gpu, basic_nested_ifs) { TEST(DISABLED_condition_gpu, negative_compare_wrong_layout) { auto& engine = get_test_engine(); - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::optimize_data(true)); auto input = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 4, 1 } }); auto compare = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 5, 1 } }); @@ -497,7 +497,7 @@ TEST(DISABLED_condition_gpu, negative_compare_wrong_layout) { TEST(DISABLED_condition_gpu, negative_too_big_offset) { auto& engine = get_test_engine(); - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::optimize_data(true)); auto input = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 4, 1 } }); auto compare = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 3, 1 } }); @@ -521,7 +521,7 @@ TEST(DISABLED_condition_gpu, negative_too_big_offset) { TEST(DISABLED_condition_gpu, negative_not_same_layouts) { auto& engine = get_test_engine(); - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::optimize_data(true)); auto input = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 4, 1 } }); auto compare = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 1, 1 } }); @@ -552,7 +552,7 @@ TEST(DISABLED_condition_gpu, negative_not_same_layouts) { TEST(DISABLED_condition_gpu, negative_same_names_within_different_networks) { auto& engine = get_test_engine(); - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::optimize_data(true)); auto input = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 4, 1 } }); auto compare = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 1, 1 } }); diff --git a/src/plugins/intel_gpu/tests/test_cases/convert_color_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/convert_color_gpu_test.cpp index 5c1dbb41576ccb..77cd01b1b492a4 100644 --- a/src/plugins/intel_gpu/tests/test_cases/convert_color_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/test_cases/convert_color_gpu_test.cpp @@ -81,7 +81,7 @@ TEST(convert_color, nv12_to_rgb_two_planes_buffer_fp32) { topology.add(convert_color("convert_color", { input_info("input_y"), input_info("input_uv") }, cldnn::convert_color::color_format::NV12, cldnn::convert_color::color_format::RGB, cldnn::convert_color::memory_type::buffer, output_layout)); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input_y", input_y); network.set_input_data("input_uv", input_uv); @@ -120,7 +120,7 @@ TEST(convert_color, nv12_to_bgr_two_planes_buffer_fp32) { topology.add(convert_color("convert_color", { input_info("input_y"), input_info("input_uv") }, cldnn::convert_color::color_format::NV12, cldnn::convert_color::color_format::BGR, cldnn::convert_color::memory_type::buffer, output_layout)); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input_y", input_y); network.set_input_data("input_uv", input_uv); @@ -160,7 +160,7 @@ TEST(convert_color, nv12_to_rgb_two_planes_buffer_u8) { topology.add(convert_color("convert_color", { input_info("input_y"), input_info("input_uv") }, cldnn::convert_color::color_format::NV12, cldnn::convert_color::color_format::RGB, cldnn::convert_color::memory_type::buffer, output_layout)); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input_y", input_y); network.set_input_data("input_uv", input_uv); @@ -200,7 +200,7 @@ TEST(convert_color, nv12_to_rgb_two_planes_buffer_fp16) { topology.add(convert_color("convert_color", { input_info("input_y"), input_info("input_uv") }, cldnn::convert_color::color_format::NV12, cldnn::convert_color::color_format::RGB, cldnn::convert_color::memory_type::buffer, output_layout)); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input_y", input_y); network.set_input_data("input_uv", input_uv); @@ -238,7 +238,7 @@ TEST(convert_color, nv12_to_rgb_single_plane_buffer_fp32) { topology.add(convert_color("convert_color", { input_info("input") }, cldnn::convert_color::color_format::NV12, cldnn::convert_color::color_format::RGB, cldnn::convert_color::memory_type::buffer, output_layout)); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); auto outputs = network.execute(); @@ -274,7 +274,7 @@ TEST(convert_color, nv12_to_rgb_single_plane_buffer_u8) { topology.add(convert_color("convert_color", { input_info("input") }, cldnn::convert_color::color_format::NV12, cldnn::convert_color::color_format::RGB, cldnn::convert_color::memory_type::buffer, output_layout)); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); auto outputs = network.execute(); @@ -350,7 +350,7 @@ TEST(convert_color, nv12_to_rgb_two_planes_surface_u8) { topology.add(convert_color("convert_color", { input_info("input"), input_info("input2") }, cldnn::convert_color::color_format::NV12, cldnn::convert_color::color_format::RGB, cldnn::convert_color::memory_type::image, output_layout)); - network network(*engine, topology); + network network(*engine, topology, get_test_default_config(*engine)); network.set_input_data("input", input_memory); network.set_input_data("input2", input_memory2); @@ -414,7 +414,7 @@ TEST(convert_color, nv12_to_rgb_single_plane_surface_u8) { topology.add(convert_color("convert_color", { input_info("input") }, cldnn::convert_color::color_format::NV12, cldnn::convert_color::color_format::RGB, cldnn::convert_color::memory_type::image, output_layout)); - network network(*engine, topology); + network network(*engine, topology, get_test_default_config(*engine)); network.set_input_data("input", input_memory); auto outputs = network.execute(); @@ -507,7 +507,7 @@ TEST(convert_color, i420_to_rgb_three_planes_buffer_fp32) { topology.add(convert_color("convert_color", { input_info("input_y"), input_info("input_u"), input_info("input_v") }, cldnn::convert_color::color_format::I420, cldnn::convert_color::color_format::RGB, cldnn::convert_color::memory_type::buffer, output_layout)); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input_y", input_y); network.set_input_data("input_u", input_u); network.set_input_data("input_v", input_v); @@ -593,7 +593,7 @@ void test_convert_color_i420_to_rgb_three_planes_surface_u8(bool is_caching_test topology.add(convert_color("convert_color", { input_info("input"), input_info("input2"), input_info("input3") }, cldnn::convert_color::color_format::I420, cldnn::convert_color::color_format::RGB, cldnn::convert_color::memory_type::image, output_layout)); - cldnn::network::ptr network = get_network(*engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr network = get_network(*engine, topology, get_test_default_config(*engine), get_test_stream_ptr(), is_caching_test); network->set_input_data("input", input_memory); network->set_input_data("input2", input_memory2); diff --git a/src/plugins/intel_gpu/tests/test_cases/convolution_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/convolution_gpu_test.cpp index c8607728374de4..88b267e8796c1b 100644 --- a/src/plugins/intel_gpu/tests/test_cases/convolution_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/test_cases/convolution_gpu_test.cpp @@ -339,7 +339,7 @@ TEST(deformable_convolution_f32_fw_gpu, basic_deformable_convolution_def_group1_ { 1, 4, 4, 4 }) ); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); network.set_input_data("trans", trans); @@ -470,7 +470,7 @@ TEST(deformable_convolution_f32_fw_gpu, basic_deformable_convolution_def_group1) { 1, 4, 4, 4 }) ); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); network.set_input_data("trans", trans); @@ -633,7 +633,7 @@ TEST(deformable_convolution_f32_fw_gpu, basic_deformable_convolution) { { 1, 4, 4, 4 }) ); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); network.set_input_data("trans", trans); @@ -696,7 +696,7 @@ TEST(convolution_f32_fw_gpu, basic_convolution_no_bias) { data("weights", weights), convolution("conv", input_info("input"), { "weights" }, { 2, 1 })); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); auto outputs = network.execute(); @@ -773,7 +773,7 @@ TEST(convolution_f32_fw_gpu, basic_convolution_int8_no_bias) { convolution("conv", input_info("to_int"), { "weights" }, {2, 1 }), reorder("output", input_info("conv"), { data_types::f32, format::bfyx, { 1, 1, 3, 2 } })); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); auto outputs = network.execute(); @@ -825,7 +825,7 @@ TEST(convolution_f32_fw_gpu, basic_convolution3D_no_bias) { data("weights", weights), convolution("conv", input_info("input"), { "weights" }, { 2, 1 })); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); auto outputs = network.execute(); @@ -961,7 +961,7 @@ TEST(convolution_f32_fw_gpu, basic_convolution3D) { data("biases", biases), convolution("conv", input_info("input"), { "weights" }, { "biases" }, {1, 1, 1}, {0, 0, 0}, {1, 1, 1})); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); auto outputs = network.execute(); @@ -1091,7 +1091,7 @@ TEST(convolution_f32_fw_gpu, basic_convolution3D_group2) { data("biases", biases), convolution("conv", input_info("input"), { "weights" }, { "biases" }, 2, {1, 1, 1}, {0, 0, 0}, {1, 1, 1})); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); auto outputs = network.execute(); @@ -1140,7 +1140,7 @@ TEST(convolution_f32_fw_gpu, with_output_size_same_input) { convolution::create_with_output_size("conv2", input_info("input"), { "weights2" }, { 1, 64, 320, 320 }, { 1, 1 }, { 3, 3 }) ); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); auto outputs = network.execute(); @@ -1181,7 +1181,7 @@ TEST(convolution_f32_fw_gpu, three_convolutions_same_weights) { convolution("conv3", input_info("conv2"), { "weights" }) ); - ov::intel_gpu::ExecutionConfig config; + ov::intel_gpu::ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::optimize_data(true)); network network(engine, topology, config); network.set_input_data("input", input); @@ -1252,7 +1252,7 @@ TEST(convolution_f32_fw_gpu, basic_convolution) { data("biases", biases), convolution( "conv", input_info("input"), { "weights" }, { "biases" }, {2, 1})); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); auto outputs = network.execute(); @@ -1310,7 +1310,7 @@ TEST(convolution_f32_fw_gpu, basic_convolution_bfyx_weights_as_input_layout) { input_layout("weights", weights->get_layout()), input_layout("biases", biases->get_layout()), convolution("conv", input_info("input"), { "weights" }, { "biases" }, { 2, 1 }, { 0, 0 })); - ov::intel_gpu::ExecutionConfig config; + ov::intel_gpu::ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::optimize_data(true)); network network(engine, topology, config); network.set_input_data("input", input); @@ -1371,7 +1371,7 @@ TEST(convolution_f32_fw_gpu, basic_convolution_bfyx_weights_as_input_layout_non_ input_layout("weights", weights->get_layout()), data("biases", biases), convolution("conv", input_info("input"), { "weights" }, { "biases" }, { 2, 1 }, { 0, 0 })); - ov::intel_gpu::ExecutionConfig config; + ov::intel_gpu::ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::optimize_data(false)); network network(engine, topology, config, true); network.set_input_data("input", input); @@ -1464,7 +1464,7 @@ TEST(convolution_f32_fw_gpu, basic_convolution_input_padding) { padding{ { 0, 0, 0, 0 }, 0 }) ); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); auto outputs = network.execute(); @@ -1566,7 +1566,7 @@ TEST(convolution_f32_fw_gpu, basic_convolution_sym_input_padding) { padding{ { 0, 0, 0, 0 }, 0 }) ); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); auto outputs = network.execute(); @@ -1662,7 +1662,7 @@ TEST(convolution_f32_fw_gpu, basic_convolution_asym_input_padding) { { 3, 2 }, padding{ { 0, 0, 0, 0 }, 0 })); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); auto outputs = network.execute(); @@ -1769,7 +1769,7 @@ TEST(convolution_f32_fw_gpu, basic_convolution_sym_input_padding_with_pad) { padding{ { 0, 0, 0, 0 }, 0 }) ); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); auto outputs = network.execute(); @@ -1878,7 +1878,7 @@ TEST(convolution_f32_fw_gpu, basic_convolution_asym_input_padding_with_pad) { { 3, 2 }, padding{ { 0, 0, 0, 0 }, 0 })); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); auto outputs = network.execute(); @@ -1976,7 +1976,7 @@ TEST(convolution_f32_fw_gpu, basic_convolution_input_and_output_padding) { padding{ { 0,0,x_pad,y_pad }, 0 }) ); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); auto outputs = network.execute(); @@ -2077,7 +2077,7 @@ TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in4x4x1x1_nopad_random) { convolution("conv", input_info("input"), { "weights" }, { "biases" }, { 2, 2 }) ); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); auto outputs = network.execute(); @@ -2147,7 +2147,7 @@ TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in2x2x1x2_nopad_random) { convolution("conv", input_info("input"), { "weights" }, { "biases" }, { 2, 2 }) ); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); auto outputs = network.execute(); @@ -2205,7 +2205,7 @@ TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in4x4x1x1_nopad) { convolution("conv", input_info("input"), { "weights" }, { "biases" }, { 2, 2 }) ); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); auto outputs = network.execute(); @@ -2259,7 +2259,7 @@ TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in2x2x1x2_nopad) { convolution("conv", input_info("input"), { "weights" }, { "biases" }, { 2, 2 } ) ); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); auto outputs = network.execute(); @@ -2311,7 +2311,7 @@ TEST(convolution_f32_fw_gpu, basic_ofm_wsiz2x1x2x1_in1x2x1_nopad) { convolution("conv", input_info("input"), { "weights" }, { "biases" }, { 5, 5 }) ); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); auto outputs = network.execute(); @@ -2370,7 +2370,7 @@ TEST(convolution_f32_fw_gpu, basic_ofm_wsiz3x2x2x1_in2x2x1_nopad) { convolution("conv", input_info("input"), { "weights" }, { "biases" }, { 5, 5 }) ); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); auto outputs = network.execute(); @@ -2426,7 +2426,7 @@ TEST(convolution_f32_fw_gpu, basic_wsiz2x2x1x3_wstr2x2_in2x2x1x1_nopad) { convolution("conv", input_info("input"), { "weights" }, { "biases" }, { 2, 2 }) ); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); auto outputs = network.execute(); @@ -2482,7 +2482,7 @@ TEST(convolution_f32_fw_gpu, wsiz3x3_wstr2x2_in2x2x1x1_zeropad) { convolution("conv", input_info("input"), { "weights" }, { "biases" }, { 2, 2 }) ); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); auto outputs = network.execute(); @@ -2547,7 +2547,7 @@ TEST(convolution_f32_fw_gpu, offsets_wsiz3x3_wstr2x2_in2x2x1x1_zeropad) { padding{ { 0, 0, 1, 1 }, 0 }) ); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); auto outputs = network.execute(); @@ -2627,7 +2627,7 @@ TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in4x4x2x1_nopad_split2) { { 1, 1 }) ); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); auto outputs = network.execute(); @@ -2725,7 +2725,7 @@ TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in4x4x2x2_nopad_split2) { { 1, 1 }) ); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); auto outputs = network.execute(); @@ -2789,7 +2789,7 @@ TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in4x4x2x1_nopad_group2) { { 1, 1 }) ); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); auto outputs = network.execute(); @@ -2847,7 +2847,7 @@ TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in4x4x2x1_nopad_group2_bfyx) { 1, 1 }) ); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); auto outputs = network.execute(); @@ -2904,7 +2904,7 @@ TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in4x4x2x2_nopad_group2) { { 1, 1 }) ); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); auto outputs = network.execute(); @@ -2999,7 +2999,7 @@ TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in4x4x2x2_nopad_split2_depthw { 1, 1 }) ); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); auto outputs = network.execute(); @@ -3088,7 +3088,7 @@ TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in4x4x2x2_nopad_split2_depthw { 1, 1 }) ); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); auto outputs = network.execute(); @@ -3187,7 +3187,7 @@ TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in4x4x2x2_nopad_group16) { { 1, 1 }) ); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); auto outputs = network.execute(); @@ -3282,7 +3282,7 @@ TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in4x4x2x2_nopad_group16_bfyx) { 1, 1 }) ); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); auto outputs = network.execute(); @@ -3377,7 +3377,7 @@ TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in4x4x2x2_nopad_group16_bfyx) { 1, 1, 1, 1 }) ); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); auto outputs = network.execute(); @@ -3458,7 +3458,7 @@ TEST(convolution_f32_fw_gpu, basic_wsiz1x1_wstr2x2_in1x1x2x1_nopad_split2) { { 1, 1, 1, 1 }) ); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); auto outputs = network.execute(); @@ -3545,7 +3545,7 @@ TEST(convolution_f32_fw_gpu, basic_wsiz1x1_wstr2x2_in1x1x4x1_filter_1x3x2x1x1_no { 1, 1, 1, 1 }) ); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); auto outputs = network.execute(); @@ -3624,7 +3624,7 @@ TEST(convolution_gpu, trivial_convolution_relu) { ) ); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); auto outputs = network.execute(); @@ -3702,7 +3702,7 @@ TEST(convolution_gpu, relu_with_negative_slope) { ) ); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); auto outputs = network.execute(); @@ -3751,7 +3751,7 @@ TEST(convolution_gpu, DISABLED_two_1x1_kernels_after_each_other) { conv_2 ); - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::optimize_data(true)); network network(engine, topology, config); network.set_input_data("input", input); @@ -3930,7 +3930,7 @@ TEST(convolution_gpu, basic_yxfb_4_4_yxfb_2_2_b16_if2_of16_st2_2_p0_sp1_fp32) ) ); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); auto outputs = network.execute(); @@ -4005,7 +4005,7 @@ TEST(convolution_f32_fw_gpu, byte_activation) { { 0, 0, 0 } } }; - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::optimize_data(true)); set_values(input, { 1, 2, -3, 4, -5, @@ -4080,7 +4080,7 @@ TEST(convolution_int8_fw_gpu, quantized_convolution_u8s8f32_symmetric) { convolution("conv", input_info("input"), { "weights" }, { "biases" }, { 2, 2 }, {0, 0}, { 1, 1 }, tensor{ 1, 2, 3, 2 }), reorder("out", input_info("conv"), format::bfyx, data_types::f32)); - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::optimize_data(true)); network network(engine, topology, config); network.set_input_data("input", input); @@ -4154,7 +4154,7 @@ TEST(convolution_int8_fw_gpu, quantized_convolution_u8s8f32_asymmetric_weight_an { 2, 2 }, { 0, 0 }, { 1, 1 }, tensor{ 1, 2, 3, 2 }, false), reorder("out", input_info("conv"), format::bfyx, data_types::f32)); - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::optimize_data(true)); network network(engine, topology, config); network.set_input_data("input", input); @@ -4225,7 +4225,7 @@ TEST(convolution_int8_fw_gpu, quantized_convolution_u8s8f32_asymmetric_activatio { 2, 2 }, { 0, 0 }, { 1, 1 }, tensor{ 1, 2, 3, 2 }, false), reorder("out", input_info("conv"), format::bfyx, data_types::f32)); - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::optimize_data(true)); network network(engine, topology, config); network.set_input_data("input", input); @@ -4310,7 +4310,7 @@ TEST(convolution_int8_fw_gpu, quantized_convolution_u8s8f32_asymmetric_activatio { 2, 2 }, { 0, 0 }, { 1, 1 }, tensor{ 1, 2, 3, 2 }, false), reorder("out", input_info("conv"), format::bfyx, data_types::f32)); - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::optimize_data(true)); network network(engine, topology, config); network.set_input_data("input", input); @@ -4411,7 +4411,7 @@ TEST(convolution_int8_fw_gpu, quantized_convolution_u8s8f32_asymmetric_activatio { 2, 2 }, { 0, 0 }, { 1, 1 }, tensor{ 1, 2, 3, 2 }, data_types::f32, false), reorder("out", input_info("conv"), format::bfyx, data_types::f32)); - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::optimize_data(true)); network network(engine, topology, config); network.set_input_data("input", input); @@ -4482,7 +4482,7 @@ TEST(convolution_int8_fw_gpu, quantized_convolution_u8s8f32_asymmetric_weights_p { 2, 2 }, { 0, 0 }, { 1, 1 }, tensor{ 1, 2, 3, 2 }, false), reorder("out", input_info("conv"), format::bfyx, data_types::f32)); - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::optimize_data(true)); network network(engine, topology, config); network.set_input_data("input", input); @@ -4680,7 +4680,7 @@ TEST(convolution_gpu, basic_yxfb_4_4_yxfb_2_2_b16_if2_of16_st2_2_p0_sp1_fp16) reorder("output", input_info("conv"), { data_types::f32, output_format, output_size }) ); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); auto outputs = network.execute(); @@ -5033,7 +5033,7 @@ TEST_P(convolution_gpu_fs_byx_fsv32, fs_byx_fsv32) } - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); ov::intel_gpu::ImplementationDesc conv_impl = { format::fs_b_yx_fsv32, "" }; config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "conv_fsv", conv_impl } })); config.set_property(ov::intel_gpu::optimize_data(true)); @@ -5134,7 +5134,7 @@ TEST(convolution_f16_fsv_gpu, convolution_f16_fsv_gpu_padding) { topology.add(conv_fsv); - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); ov::intel_gpu::ImplementationDesc conv_impl = { format::fs_b_yx_fsv32, "convolution_gpu_bfyx_to_fs_byx_fsv32" }; config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "conv_fsv", conv_impl } })); config.set_property(ov::intel_gpu::optimize_data(true)); @@ -5341,7 +5341,7 @@ TEST_P(convolution_gpu_fs_byx_fsv32_crop, fs_byx_fsv32_crop) } } - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); ov::intel_gpu::ImplementationDesc conv_impl = { format::fs_b_yx_fsv32, "convolution_gpu_bfyx_to_fs_byx_fsv32" }; config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "conv_fsv", conv_impl } })); config.set_property(ov::intel_gpu::optimize_data(true)); @@ -5415,7 +5415,7 @@ TEST(convolution_f32_fw_gpu, convolution_int8_b_fs_yx_fsv4_to_bfyx) { padding{ { 0, 0, output_padding, output_padding }, 0 }), reorder("output", input_info("conv"), { data_types::f32, format::bfyx, { batch_num, input_f, input_size_x, input_size_y } })); - ExecutionConfig config_ref; + ExecutionConfig config_ref = get_test_default_config(engine); network network_ref(engine, topology_ref, config_ref); network_ref.set_input_data("input", input); @@ -5437,7 +5437,7 @@ TEST(convolution_f32_fw_gpu, convolution_int8_b_fs_yx_fsv4_to_bfyx) { padding{ { 0, 0, output_padding, output_padding }, 0 }), reorder("output", input_info("conv"), { data_types::f32,format::bfyx, { batch_num, input_f, input_size_x, input_size_y } })); - ExecutionConfig config_act; + ExecutionConfig config_act = get_test_default_config(engine); config_act.set_property(ov::intel_gpu::optimize_data(true)); @@ -5577,7 +5577,7 @@ TEST(convolution_gpu, bfyx_iyxo_5x5_fp16) } - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); ov::intel_gpu::ImplementationDesc conv_impl = { format::bfyx, "" }; config.set_property(ov::intel_gpu::optimize_data(true)); network network(engine, topology, config); @@ -5804,7 +5804,7 @@ TEST_P(convolution_gpu_block_layout3D, bfzyx_bsv16_fsv16_fp32) topology.add(reorder("reorder_bfzyx", input_info("conv_bsv16_fsv16"), format::bfzyx, data_types::f32)); - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::optimize_data(true)); config.set_property(ov::intel_gpu::custom_outputs(std::vector{ "conv_bsv16_fsv16", "reorder_bfzyx" })); network network(engine, topology, config); @@ -5941,7 +5941,7 @@ TEST_P(convolution_gpu_block_layout3D, bfzyx_bsv16_fsv16_fp16) topology.add(reorder("reorder_bfzyx", input_info("conv_bsv16_fsv16"), format::bfzyx, data_types::f16)); - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::optimize_data(true)); config.set_property(ov::intel_gpu::custom_outputs(std::vector{ "conv_bsv16_fsv16", "reorder_bfzyx" })); network network(engine, topology, config); @@ -6077,7 +6077,7 @@ TEST_P(convolution_gpu_block_layout3D, bfzyx_bsv16_fsv16_fp32_fused_ops) topology.add(reorder("reorder_bfzyx", input_info("scale"), format::bfzyx, data_types::f32)); - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::optimize_data(true)); config.set_property(ov::intel_gpu::custom_outputs(std::vector{ "conv_bsv16_fsv16", "reorder_bfzyx" })); network network(engine, topology, config); @@ -6238,7 +6238,7 @@ TEST_P(convolution_gpu_block_layout, bfyx_bsv16_fsv16_fp32) topology.add(reorder("reorder_bfyx", input_info("conv_bsv16_fsv16"), format::bfyx, data_types::f32)); - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::optimize_data(true)); config.set_property(ov::intel_gpu::custom_outputs(std::vector{ "conv_bsv16_fsv16", "reorder_bfyx" })); ov::intel_gpu::ImplementationDesc conv_impl = { format::bs_fs_yx_bsv16_fsv16, "" }; @@ -6378,7 +6378,7 @@ TEST_P(convolution_gpu_block_layout, bfyx_bsv16_fsv16_fp16) topology.add(reorder("reorder_bfyx", input_info("conv_bsv16_fsv16"), format::bfyx, data_types::f16)); - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::optimize_data(true)); config.set_property(ov::intel_gpu::custom_outputs(std::vector{ "conv_bsv16_fsv16", "reorder_bfyx" })); ov::intel_gpu::ImplementationDesc conv_impl = { format::bs_fs_yx_bsv16_fsv16, "" }; @@ -6516,7 +6516,7 @@ TEST_P(convolution_gpu_block_layout, bfyx_bsv16_fsv16_fp32_fused_ops) topology.add(reorder("reorder_bfyx", input_info("scale"), format::bfyx, data_types::f32)); - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::optimize_data(true)); config.set_property(ov::intel_gpu::custom_outputs(std::vector{ "conv_bsv16_fsv16", "reorder_bfyx" })); ov::intel_gpu::ImplementationDesc conv_impl = { format::bs_fs_yx_bsv16_fsv16, "" }; @@ -6654,7 +6654,7 @@ TEST_P(convolution_depthwise_gpu, depthwise_conv_fs_b_yx_fsv32) topology.add(conv_fsv); - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::optimize_data(true)); ov::intel_gpu::ImplementationDesc conv_impl = { format::fs_b_yx_fsv32, "" }; config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "conv_fsv", conv_impl } })); @@ -6797,7 +6797,7 @@ TEST_P(convolution_depthwise_gpu_fsv16, depthwise_conv_b_fs_yx_fsv16) topology.add(conv_fsv); - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::optimize_data(true)); ov::intel_gpu::ImplementationDesc conv_impl = { format::b_fs_yx_fsv16, "" }; config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "conv_fsv", conv_impl } })); @@ -6928,7 +6928,7 @@ TEST_P(convolution_depthwise_gpu_fsv16_xy, depthwise_conv_b_fs_yx_fsv16) topology.add(conv_fsv); - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::optimize_data(true)); ov::intel_gpu::ImplementationDesc conv_impl = { format::b_fs_yx_fsv16, "convolution_gpu_bfyx_f16_depthwise" }; config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "conv_fsv", conv_impl } })); @@ -7018,7 +7018,7 @@ TEST(convolution_depthwise_gpu_fsv16, depthwise_conv_b_fs_yx_fsv16_in_feature_pa convolution("conv", input_info("input_reordered"), { "weights" }, { "bias" }, num_groups, stride, pad, dilation, output_size, data_types::f32, true), reorder("out", input_info("conv"), format::bfyx, data_types::f32)); - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::optimize_data(true)); ov::intel_gpu::ImplementationDesc conv_impl = { format::b_fs_yx_fsv16, "" }; config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "conv", conv_impl } })); @@ -7131,7 +7131,7 @@ TEST_P(convolution_depthwise_gpu_bfyx, depthwise_conv_bfyx) topology.add(conv_fsv); - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::optimize_data(true)); ov::intel_gpu::ImplementationDesc conv_impl = { format::bfyx, "" }; config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "conv", conv_impl } })); @@ -7452,7 +7452,7 @@ TEST_P(convolution_grouped_gpu, base) { if (has_comp) topology.add(data(comp_prim_name[0], comp)); - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::optimize_data(true)); config.set_property(ov::intel_gpu::custom_outputs(std::vector{ "conv", "out" })); ov::intel_gpu::ImplementationDesc conv_impl = { input_data_format, impl_name }; @@ -7613,7 +7613,7 @@ TEST_P(convolution_general_gpu, conv_fp16_cases) { conv_fsv.output_paddings = {padding({ 0, 0, output_padding, output_padding }, 0.f)}; topology.add(conv_fsv); } - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::optimize_data(true)); ov::intel_gpu::ImplementationDesc conv_impl = { input_data_format, impl_name }; config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "conv_fsv", conv_impl } })); @@ -7721,7 +7721,7 @@ TEST_P(convolution_gpu_fsv16_to_bfyx, conv_b_fs_yx_fsv16_to_bfyx_padding) topology.add(reorder_bfyx); // format 8 to 3 -> after fusing, removed // Exec ref network (non-fusing) - ExecutionConfig config_ref; + ExecutionConfig config_ref = get_test_default_config(engine); config_ref.set_property(ov::intel_gpu::optimize_data(false)); config_ref.set_property(ov::intel_gpu::allow_static_input_reorder(true)); @@ -7733,7 +7733,7 @@ TEST_P(convolution_gpu_fsv16_to_bfyx, conv_b_fs_yx_fsv16_to_bfyx_padding) cldnn::mem_lock ref_out_ptr(ref_out_mem, get_test_stream()); // Exec target network (fusing: conv+reorder) - ExecutionConfig config_target; + ExecutionConfig config_target = get_test_default_config(engine); ov::intel_gpu::ImplementationDesc conv_impl = { format::b_fs_yx_fsv16, "convolution_gpu_bfyx_f16" }; config_target.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "conv_fsv", conv_impl } })); config_target.set_property(ov::intel_gpu::optimize_data(true)); @@ -7817,7 +7817,7 @@ TEST_P(convolution_gpu_fsv16_to_bfyx, conv_b_fs_yx_fsv16_to_bfyx_different_type) topology.add(reorder_bfyx); // format 8 to 3 -> after fusing, removed // Exec ref network (non-fusing) - ExecutionConfig config_ref; + ExecutionConfig config_ref = get_test_default_config(engine); config_ref.set_property(ov::intel_gpu::optimize_data(false)); config_ref.set_property(ov::intel_gpu::allow_static_input_reorder(true)); @@ -7829,7 +7829,7 @@ TEST_P(convolution_gpu_fsv16_to_bfyx, conv_b_fs_yx_fsv16_to_bfyx_different_type) cldnn::mem_lock ref_out_ptr(ref_out_mem, get_test_stream()); // Exec target network (fusing: conv+reorder) - ExecutionConfig config_target; + ExecutionConfig config_target = get_test_default_config(engine); ov::intel_gpu::ImplementationDesc conv_impl = { format::b_fs_yx_fsv16, "convolution_gpu_bfyx_f16" }; config_target.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "conv_fsv", conv_impl } })); config_target.set_property(ov::intel_gpu::optimize_data(true)); @@ -7935,10 +7935,10 @@ class convolution_test_base { auto topo = build_topology(engine); - ExecutionConfig config{ - ov::intel_gpu::optimize_data(true), - ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "conv", { input_format(), "" } } }) - }; + ExecutionConfig config = get_test_default_config(engine); + config.set_property(ov::intel_gpu::optimize_data(true)); + config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "conv", { input_format(), "" } } })); + auto prog = program::build_program(engine, topo, config); cldnn::network net(prog, 0); @@ -8295,10 +8295,10 @@ class convolution_random_test_fsv4_input : public convolution_random_test_basebuild_topology(engine); - ExecutionConfig config{ - ov::intel_gpu::optimize_data(true), - ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "conv", { this->input_format(), "" } } }) - }; + ExecutionConfig config = get_test_default_config(engine); + config.set_property(ov::intel_gpu::optimize_data(true)); + config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "conv", { this->input_format(), "" } } })); + auto prog = program::build_program(engine, topo, config); cldnn::network net(prog, 0); @@ -8691,7 +8691,7 @@ class convolution_test : public tests::generic_test { for (cldnn::data_types data_type : data_types) { for (cldnn::format input_format : input_formats) { for (cldnn::format weights_format : weights_formats) { - ExecutionConfig network_build_config; + ExecutionConfig network_build_config = get_test_default_config(get_test_engine()); if (input_format == cldnn::format::bfyx) { network_build_config.set_property(ov::intel_gpu::optimize_data(true)); } @@ -9066,9 +9066,8 @@ TEST_P(convolution_gpu_onednn, conv_onednn_cases) { topology.add(conv_fsv); } topology.add(reorder("reorder_bfyx", input_info("conv_fsv"), format::bfyx, data_types::f32)); - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::optimize_data(true)); - config.set_property(ov::intel_gpu::queue_type(QueueTypes::in_order)); config.set_property(ov::intel_gpu::custom_outputs(std::vector{"conv_fsv","reorder_bfyx"})); network network(engine, topology, config); @@ -9134,19 +9133,17 @@ TEST(convolution_gpu_onednn, padding_for_cldnn_kernel_after_onednn) { topology topology_test(input, weights, input_reorder, conv1, conv2, output_reorder); topology topology_ref(input, weights, input_reorder, conv1, conv2, output_reorder); - ExecutionConfig config_test; + ExecutionConfig config_test = get_test_default_config(engine); ov::intel_gpu::ImplementationDesc conv1_impl_test = { format::byxf, "", impl_types::onednn }; ov::intel_gpu::ImplementationDesc conv2_impl_test = { format::b_fs_yx_fsv16, "convolution_gpu_bfyx_f16", impl_types::ocl }; config_test.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "conv1", conv1_impl_test }, { "conv2", conv2_impl_test } })); config_test.set_property(ov::intel_gpu::optimize_data(true)); - config_test.set_property(ov::intel_gpu::queue_type(QueueTypes::in_order)); - ExecutionConfig config_ref; + ExecutionConfig config_ref = get_test_default_config(engine); ov::intel_gpu::ImplementationDesc conv1_impl_ref = { format::bfyx, "", impl_types::ocl }; ov::intel_gpu::ImplementationDesc conv2_impl_ref = { format::bfyx, "", impl_types::ocl }; config_ref.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "conv1", conv1_impl_ref }, { "conv2", conv2_impl_ref } })); config_ref.set_property(ov::intel_gpu::optimize_data(true)); - config_ref.set_property(ov::intel_gpu::queue_type(QueueTypes::in_order)); network network_test(engine, topology_test, config_test); network network_ref(engine, topology_ref, config_ref); @@ -9228,11 +9225,10 @@ TEST(convolution_gpu_onednn, quantized_onednn_convolution_u8s8f32_asymmetric_act { 2, 2 }, { 0, 0 }, { 1, 1 }, tensor{ 1, 2, 3, 2 }, false), reorder("out", input_info("conv"), format::bfyx, data_types::f32)); - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); ov::intel_gpu::ImplementationDesc conv_impl = { format::byxf, "", impl_types::onednn }; config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "conv", conv_impl }})); config.set_property(ov::intel_gpu::optimize_data(true)); - config.set_property(ov::intel_gpu::queue_type(QueueTypes::in_order)); network network(engine, topology, config); network.set_input_data("input", input); @@ -9319,11 +9315,10 @@ TEST(convolution_gpu_onednn, quantized_onednn_convolution_u8s8f32_asymmetric_act { 2, 2 }, { 0, 0 }, { 1, 1 }, tensor{ 1, 2, 3, 2 }, false), reorder("out", input_info("conv"), format::bfyx, data_types::f32)); - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); ov::intel_gpu::ImplementationDesc conv_impl = { format::byxf, "", impl_types::onednn }; config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "conv", conv_impl }})); config.set_property(ov::intel_gpu::optimize_data(true)); - config.set_property(ov::intel_gpu::queue_type(QueueTypes::in_order)); network network(engine, topology, config); network.set_input_data("input", input); @@ -9421,7 +9416,7 @@ void test_convolution_f32_gpu_convolution_gpu_bfyx_f16_depthwise_x_bloxk_size_1( topology.add(conv_fsv); - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::optimize_data(true)); ov::intel_gpu::ImplementationDesc conv_impl = { format::b_fs_yx_fsv16, "convolution_gpu_bfyx_f16_depthwise" }; config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "conv_fsv", conv_impl } })); @@ -9502,7 +9497,7 @@ TEST(convolution_f32_fw_gpu, basic_convolution_no_bias_swap_xy) { data("weights", weights), convolution("conv", input_info("input"), { "weights" }, { 1, 1 })); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); auto outputs = network.execute(); diff --git a/src/plugins/intel_gpu/tests/test_cases/crop_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/crop_gpu_test.cpp index d0492aa55b19e8..0d828c66506fd1 100644 --- a/src/plugins/intel_gpu/tests/test_cases/crop_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/test_cases/crop_gpu_test.cpp @@ -57,7 +57,7 @@ TEST(crop_gpu, basic_in2x3x2x2_crop_all) { std::vector input_vec = generate_random_input(batch_num, feature_num, y_size, x_size, -10, 10); set_values(input, input_vec); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); @@ -103,7 +103,7 @@ TEST(crop_gpu, basic_in2x2x2x3_crop_all) { input_vec.push_back(static_cast(i)); set_values(input, input_vec); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); @@ -152,7 +152,7 @@ TEST(crop_gpu, basic_i32_in2x3x2x2_crop_all) { std::vector input_vec = generate_random_input(batch_num, feature_num, y_size, x_size, -10, 10); set_values(input, input_vec); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); @@ -200,7 +200,7 @@ TEST(crop_gpu, basic_i64_in2x3x2x2_crop_all) { std::vector input_vec = generate_random_input(batch_num, feature_num, y_size, x_size, -10, 10); set_values(input, input_vec); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); @@ -248,7 +248,7 @@ TEST(crop_gpu, basic_in2x3x2x2_crop_all_bfyx) { std::vector input_vec = generate_random_input(batch_num, feature_num, y_size, x_size, -10, 10); set_values(input, input_vec); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); @@ -297,7 +297,7 @@ TEST(crop_gpu, basic_i32_in2x3x2x2_crop_all_bfyx) { std::vector input_vec = generate_random_input(batch_num, feature_num, y_size, x_size, -10, 10); set_values(input, input_vec); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); @@ -346,7 +346,7 @@ TEST(crop_gpu, basic_i64_in2x3x2x2_crop_all_bfyx) { std::vector input_vec = generate_random_input(batch_num, feature_num, y_size, x_size, -10, 10); set_values(input, input_vec); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); @@ -395,7 +395,7 @@ TEST(crop_gpu, basic_in2x3x2x2_crop_all_fyxb) { std::vector input_vec = generate_random_input(batch_num, feature_num, y_size, x_size, -10, 10); set_values(input, input_vec); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); @@ -442,7 +442,7 @@ TEST(crop_gpu, basic_i32_in2x3x2x2_crop_all_fyxb) { std::vector input_vec = generate_random_input(batch_num, feature_num, y_size, x_size, -10, 10); set_values(input, input_vec); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); @@ -489,7 +489,7 @@ TEST(crop_gpu, basic_i64_in2x3x2x2_crop_all_fyxb) { std::vector input_vec = generate_random_input(batch_num, feature_num, y_size, x_size, -10, 10); set_values(input, input_vec); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); @@ -553,7 +553,7 @@ TEST(crop_gpu, basic_in2x3x2x2_crop_offsets) { -14.f, -15.f, -16.f, -17.f }; set_values(input, input_vec); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); @@ -618,7 +618,7 @@ TEST(crop_gpu, basic_i32_in2x3x2x2_crop_offsets) { -14, -15, -16, -17 }; set_values(input, input_vec); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); @@ -683,7 +683,7 @@ TEST(crop_gpu, basic_i64_in2x3x2x2_crop_offsets) { -14, -15, -16, -17 }; set_values(input, input_vec); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); @@ -759,7 +759,7 @@ TEST(crop_gpu, basic_in1x4x1x1_split) { std::vector out1 = { -1.f, 2.f,-3.f }; std::vector out2 = { 4.f, }; set_values(input, input_vec); - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::optimize_data(true)); config.set_property(ov::intel_gpu::custom_outputs(topology.get_primitives_ids())); @@ -807,7 +807,7 @@ TEST(crop_gpu, basic_in1x4x1x1_crop_pad) { std::vector input_vec = { -1.f, 2.f, -3.f, 4.f }; std::vector out1 = { -1.f, 2.f,-3.f }; set_values(input, input_vec); - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::optimize_data(true)); network network(engine, topology, config); @@ -875,7 +875,7 @@ TEST(crop_gpu, basic_i32_in1x4x1x1_split) { std::vector out1 = { -1, 2,-3 }; std::vector out2 = { 4, }; set_values(input, input_vec); - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::optimize_data(true)); config.set_property(ov::intel_gpu::custom_outputs(topology.get_primitives_ids())); @@ -950,7 +950,7 @@ TEST(crop_gpu, basic_i64_in1x4x1x1_split) { std::vector out1 = { -1, 2,-3 }; std::vector out2 = { 4, }; set_values(input, input_vec); - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::optimize_data(true)); config.set_property(ov::intel_gpu::custom_outputs(topology.get_primitives_ids())); @@ -1028,10 +1028,10 @@ TEST(crop_gpu, basic_in1x4x1x1_split_w_relu) { std::vector out2 = { 4.f, }; set_values(input, input_vec); - ExecutionConfig cfg{ - ov::intel_gpu::enable_memory_pool(false), - ov::intel_gpu::optimize_data(true) - }; + ExecutionConfig cfg = get_test_default_config(*engine); + cfg.set_property(ov::intel_gpu::enable_memory_pool(false)); + cfg.set_property(ov::intel_gpu::optimize_data(true)); + network network(*engine, topology, cfg); network.set_input_data("input", input); auto outputs = network.execute(); @@ -1081,7 +1081,7 @@ TEST(crop_gpu, basic_in3x1x2x2x1_crop_all_bfzyx) { std::vector input_vec = generate_random_input(batch_num, feature_num, y_size, x_size, -10, 10); set_values(input, input_vec); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); @@ -1137,7 +1137,7 @@ TEST(crop_gpu, basic_in3x1x3x2x2x1_crop_all_bfwzyx) { VF input_vec = flatten_6d(format::bfwzyx, input_rnd); set_values(input, input_vec); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); @@ -1212,7 +1212,7 @@ TEST_P(crop_gpu, pad_test) { res.insert(res.end(), res_data.begin(), res_data.end()); } set_values(input, input_vec); - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::optimize_data(true)); cldnn::network::ptr network = get_network(engine, topology, config, get_test_stream_ptr(), is_caching_test); @@ -1289,7 +1289,7 @@ TEST(crop_gpu, dynamic_i32_in2x3x2x2_crop_offsets) { 4, -5, 8, 8, -14, -15, -16, -17 }; set_values(input, input_vec); - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); network network(engine, topology, config); @@ -1348,7 +1348,7 @@ TEST(crop_gpu, dynamic_in1x4x1x1_split) { std::vector out1 = { -1.0f, 2.0f }; std::vector out2 = { -3.0f, 4.0f }; set_values(input_mem, input_vec); - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); config.set_property(ov::intel_gpu::optimize_data(true)); config.set_property(ov::intel_gpu::custom_outputs(topology.get_primitives_ids())); @@ -1417,7 +1417,7 @@ TEST(crop_gpu, dynamic_in1x4x1x1_varaidic_split) { set_values(axis_mem, {1}); set_values(splits_length_mem, splits_vec); - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); config.set_property(ov::intel_gpu::optimize_data(true)); config.set_property(ov::intel_gpu::custom_outputs(topology.get_primitives_ids())); @@ -1470,7 +1470,7 @@ TEST(crop_gpu, static_split_batch) { set_values(input_mem, input_vec); - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::optimize_data(true)); config.set_property(ov::intel_gpu::custom_outputs(topology.get_primitives_ids())); @@ -1525,7 +1525,7 @@ TEST(crop_gpu, optimized_out_crop) { topology.add(crop("crop2", { input_info("crop1") }, tensor(5, 4, 1, 1), { tensor(0, 0, 0, 0) }, padding({0, 0, 0, 0}, {0, 0, 0, 0}))); topology.add(reorder("reorder_out", input_info("crop2"), layout{ ov::PartialShape{5, 4, 1, 1}, data_types::f32, format::bfyx })); - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::optimize_data(true)); network network(engine, topology, config); diff --git a/src/plugins/intel_gpu/tests/test_cases/ctc_loss_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/ctc_loss_gpu_test.cpp index c63e5ec4e1e250..cc1b6b5999ae6d 100644 --- a/src/plugins/intel_gpu/tests/test_cases/ctc_loss_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/test_cases/ctc_loss_gpu_test.cpp @@ -104,7 +104,7 @@ struct ctc_loss_gpu_test : public testing::TestWithParamset_input_data(std::get<0>(input), std::get<1>(input)); diff --git a/src/plugins/intel_gpu/tests/test_cases/cum_sum_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/cum_sum_gpu_test.cpp index df45d0b8c802bf..1d6ef0d2c76b4e 100644 --- a/src/plugins/intel_gpu/tests/test_cases/cum_sum_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/test_cases/cum_sum_gpu_test.cpp @@ -185,7 +185,7 @@ class cum_sum_gpu : public ::testing::TestWithParam { topology.add(input_layout("Input0", input->get_layout())); topology.add(cum_sum("cum_sum", input_info("Input0"), axis, exclusive, reverse)); - cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); network->set_input_data("Input0", input); @@ -282,7 +282,7 @@ TEST(cum_sum_gpu_f16, DISABLED_basic_1d) { topology.add(input_layout("Input0", input->get_layout())); topology.add(cum_sum("cum_sum", input_info("Input0"))); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("Input0", input); @@ -317,7 +317,7 @@ TEST(cum_sum_gpu_fp32, dynamic) { topology.add(input_layout("input", in_layout)); topology.add(cum_sum("cum_sum", input_info("input"))); - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); network network(engine, topology, config); network.set_input_data("input", input); diff --git a/src/plugins/intel_gpu/tests/test_cases/custom_gpu_primitive_test.cpp b/src/plugins/intel_gpu/tests/test_cases/custom_gpu_primitive_test.cpp index de8634e02d6390..7661adc7737231 100644 --- a/src/plugins/intel_gpu/tests/test_cases/custom_gpu_primitive_test.cpp +++ b/src/plugins/intel_gpu/tests/test_cases/custom_gpu_primitive_test.cpp @@ -82,7 +82,7 @@ TEST(custom_gpu_primitive_f32, add_basic_in2x2x2x2) { 15.f, 17.f, 8.f, 10.f, -2.f, 6.5f, -0.5f, -2.5f }); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); network.set_input_data("input2", input2); @@ -188,7 +188,7 @@ void add_basic_in2x2x2x2_with_reorder() 15.f, 17.f, 8.f, 10.f, -2.f, 6.f, 0.f, -2.f }); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); network.set_input_data("input2", input2); @@ -288,7 +288,7 @@ TEST(custom_gpu_primitive_f32, eltwise_add_basic_in2x2x2x2) { 15.f, 17.f, 8.f, 10.f, -2.f, 6.5f, -0.5f, -2.5f }); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); network.set_input_data("input2", input2); @@ -381,7 +381,7 @@ TEST(custom_gpu_primitive_f32, add_eltwise_basic_in2x2x2x2) { 15.f, 17.f, 8.f, 10.f, -2.f, 6.5f, -0.5f, -2.5f }); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); network.set_input_data("input2", input2); @@ -483,7 +483,7 @@ TEST(custom_gpu_primitive_f32, two_kernels_with_same_entry_point_basic_in2x2x2x2 4.f, -0.5f, 8.f, 8.f }); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); auto outputs = network.execute(); @@ -547,7 +547,7 @@ void test_custom_gpu_primitive_u8_add_basic_in2x2x2x2(bool is_caching_test) { 2, 60, 0, 20 }); - cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); network->set_input_data("input", input); network->set_input_data("input2", input2); diff --git a/src/plugins/intel_gpu/tests/test_cases/deconvolution_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/deconvolution_gpu_test.cpp index 4c1a6431488d60..a218640354ae95 100644 --- a/src/plugins/intel_gpu/tests/test_cases/deconvolution_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/test_cases/deconvolution_gpu_test.cpp @@ -175,7 +175,7 @@ TYPED_TEST(deconvolution_basic, basic_wsiz2x2_in2x2x1x1_nopad) { reorder("plane_output", input_info("deconv"), format::bfyx, cldnn::data_types::f32) ); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); auto outputs = network.execute(); @@ -235,7 +235,7 @@ TYPED_TEST(deconvolution_basic, no_bias_basic_wsiz2x2_in2x2x1x1_nopad) { reorder("plane_output", input_info("deconv"), format::bfyx, data_types::f32) ); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); auto outputs = network.execute(); @@ -297,7 +297,7 @@ TYPED_TEST(deconvolution_basic, basic_wsiz2x2_in2x2x1x1_nopad_bfyx) { // Fil reorder("plane_output", input_info("deconv"), format::bfyx, cldnn::data_types::f32) ); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); auto outputs = network.execute(); @@ -359,7 +359,7 @@ TYPED_TEST(deconvolution_basic, basic_wsiz2x2_in2x2x1x1_pad1) { reorder("plane_output", input_info("deconv"), format::bfyx, cldnn::data_types::f32) ); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); auto outputs = network.execute(); @@ -410,7 +410,7 @@ TYPED_TEST(deconvolution_basic, basic_wsiz2x2_in2x2x1x1_stride2_nopad) { reorder("plane_output", input_info("deconv"), format::bfyx, cldnn::data_types::f32) ); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); auto outputs = network.execute(); @@ -475,7 +475,7 @@ TYPED_TEST(deconvolution_basic, basic_wsiz2x2_in2x2x1x1_stride4_pad2) { reorder("plane_output", input_info("deconv"), format::bfyx, cldnn::data_types::f32) ); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); auto outputs = network.execute(); @@ -537,7 +537,7 @@ TYPED_TEST(deconvolution_basic, basic_wsiz2x2_in2x2x1x2_stride2_pad1) { reorder("plane_output", input_info("deconv"), format::yxfb, cldnn::data_types::f32) ); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); auto outputs = network.execute(); @@ -584,7 +584,7 @@ TYPED_TEST(deconvolution_basic, basic_wsiz2x2x2_in2x2x1x1_stride2_pad1) { // f1: 17 - 13 auto& engine = get_test_engine(); - ov::intel_gpu::ExecutionConfig config; + ov::intel_gpu::ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::optimize_data(true)); auto input = engine.allocate_memory({ data_types::f32, format::yxfb, { 1, 1, 2, 2 } }); @@ -665,7 +665,7 @@ TYPED_TEST(deconvolution_basic, basic_wsiz2x2_in2x2x1x2_bfyx_stride2_pad1) { reorder("plane_output", input_info("deconv"), format::bfyx, cldnn::data_types::f32) ); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); auto outputs = network.execute(); @@ -726,7 +726,7 @@ TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in2x2x1x2_bfyx_stride2_pad1_input_p deconvolution("deconv", input_info("reorder"), { "weights" }, { "biases" }, { 2, 2 }, { 1, 1 }) ); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); auto outputs = network.execute(); @@ -776,7 +776,7 @@ TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2x2_in2x2x1x1_stride2_pad1_input_padd // f1: 17 - 13 auto& engine = get_test_engine(); - ov::intel_gpu::ExecutionConfig config; + ov::intel_gpu::ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::optimize_data(true)); auto input = engine.allocate_memory({ data_types::f32, format::yxfb,{ 1, 1, 2, 2 } }); @@ -858,7 +858,7 @@ TYPED_TEST(deconvolution_basic, basic_wsiz2x2_in2x2x1x2_bfyx_yxfb_stride2_pad1) reorder("plane_output", input_info("deconv"), format::bfyx, cldnn::data_types::f32) ); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); auto outputs = network.execute(); @@ -907,7 +907,7 @@ TYPED_TEST(deconvolution_basic, basic_f16_wsiz2x2_in2x2x1x2_bfyx_yxfb_stride2_pa auto weights = engine.allocate_memory({ data_types::f32, format::oiyx,{ 1, 1, 2, 2 } }); auto biases = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 1, 1 } }); - ov::intel_gpu::ExecutionConfig config; + ov::intel_gpu::ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::optimize_data(true)); set_values(input, { FLOAT16(8.f), FLOAT16(0.5f), @@ -996,7 +996,7 @@ TYPED_TEST(deconvolution_basic, basic_wsiz2x2_in1x2x2x2_bfyx_stride2_pad1_split2 reorder("plane_output", input_info("deconv"), format::bfyx, cldnn::data_types::f32) ); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); auto outputs = network.execute(); @@ -1041,7 +1041,7 @@ TYPED_TEST(deconvolution_basic, basic_wsiz2x2_in1x2x2x2_bfyx_stride2_pad1_group2 reorder("plane_output", input_info("deconv"), format::bfyx, cldnn::data_types::f32) ); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); auto outputs = network.execute(); @@ -1118,7 +1118,7 @@ TYPED_TEST(deconvolution_basic, basic_wsiz2x2_in1x2x2x2_bfyx_stride2_pad1_group1 deconvolution("deconv", input_info("reordered_input"), { "weights" }, { "bias" }, 16, { 2, 2 }, { 1, 1 }), reorder("plane_output", input_info("deconv"), format::bfyx, cldnn::data_types::f32)); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); auto outputs = network.execute(); @@ -1208,7 +1208,7 @@ TYPED_TEST(deconvolution_basic, basic_wsiz2x2_in1x2x2x2_bfyx_stride2_pad1_group1 deconvolution("deconv", input_info("reordered_input"), { "weights" }, { "bias" }, 16, { 2, 2 }, { 1, 1 }), reorder("plane_output", input_info("deconv"), format::bfyx, cldnn::data_types::f32)); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); auto outputs = network.execute(); @@ -1264,7 +1264,7 @@ TYPED_TEST(deconvolution_basic, basic_wsiz2x2_in1x6x1x1_bfyx_stride2_pad1_group2 reorder("plane_output", input_info("deconv"), format::bfyx, cldnn::data_types::f32) ); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); auto outputs = network.execute(); @@ -1337,7 +1337,7 @@ TYPED_TEST(deconvolution_basic_3d, basic3D_wsiz2x2x1_in1x1x2x2x1_nopad) { reorder("plane_output", input_info("deconv"), format::bfzyx, cldnn::data_types::f32) ); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); auto outputs = network.execute(); @@ -1489,7 +1489,7 @@ TYPED_TEST(deconvolution_basic_3d, basic3D_wsiz3x3x3_in1x1x4x4x4_nopad) { reorder("plane_output", input_info("deconv"), format::bfzyx, cldnn::data_types::f32) ); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); auto outputs = network.execute(); @@ -1583,7 +1583,7 @@ TYPED_TEST(deconvolution_basic_3d, basic3D_wsiz2x2x2_in1x1x2x2x2_stride2_nopad) reorder("plane_output", input_info("deconv"), format::bfzyx, cldnn::data_types::f32) ); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); auto outputs = network.execute(); @@ -1656,7 +1656,7 @@ TYPED_TEST(deconvolution_basic_3d, basic3D_wsiz2x2x2_in1x1x2x2x2_stride2_pad1) { reorder("plane_output", input_info("deconv"), format::bfzyx, cldnn::data_types::f32) ); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); auto outputs = network.execute(); @@ -1719,7 +1719,7 @@ TYPED_TEST(deconvolution_basic, basic_f16_k9x9_s2x2_pad4x4) { reorder("plane_output", input_info("deconv"), format::bfyx, cldnn::data_types::f16) ); - network network_ref(engine, topology_ref); + network network_ref(engine, topology_ref, get_test_default_config(engine)); network_ref.set_input_data("input", input); auto outputs_ref = network_ref.execute(); @@ -1739,7 +1739,7 @@ TYPED_TEST(deconvolution_basic, basic_f16_k9x9_s2x2_pad4x4) { reorder("out", input_info("deconv_act"), format::bfyx, data_types::f16) ); - ov::intel_gpu::ExecutionConfig config; + ov::intel_gpu::ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::optimize_data(true)); network network_act(engine, topology_act, config); network_act.set_input_data("input_act", input); @@ -1797,7 +1797,7 @@ TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in2x2x1x2_b_fs_yx_fsv16_stride2_pad reorder("out", input_info("deconv"), format::bfyx, data_types::f32) ); - ov::intel_gpu::ExecutionConfig config; + ov::intel_gpu::ExecutionConfig config = get_test_default_config(engine); ov::intel_gpu::ImplementationDesc impl = { format::b_fs_yx_fsv16, "" }; config.set_property(ov::intel_gpu::optimize_data(true)); config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"deconv", impl} })); @@ -1868,7 +1868,7 @@ TEST(deconvolution_f16_fw_gpu, basic_wsiz2x2_in2x2x1x2_b_fs_yx_fsv16_stride2_pad reorder("out", input_info("deconv"), format::bfyx, data_types::f16) ); - ov::intel_gpu::ExecutionConfig config; + ov::intel_gpu::ExecutionConfig config = get_test_default_config(engine); ov::intel_gpu::ImplementationDesc impl = { format::b_fs_yx_fsv16, "" }; config.set_property(ov::intel_gpu::optimize_data(true)); config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"deconv", impl} })); @@ -1917,7 +1917,7 @@ TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in1x2x2x2_b_fs_yx_fsv16_stride2_pad reorder("out", input_info("deconv"), format::bfyx, data_types::f32) ); - ov::intel_gpu::ExecutionConfig config; + ov::intel_gpu::ExecutionConfig config = get_test_default_config(engine); ov::intel_gpu::ImplementationDesc impl = { format::b_fs_yx_fsv16, "" }; config.set_property(ov::intel_gpu::optimize_data(true)); config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"deconv", impl} })); @@ -1965,7 +1965,7 @@ TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in1x2x2x2_b_fs_yx_fsv16_stride2_pad reorder("out", input_info("deconv"), format::bfyx, data_types::f32) ); - ov::intel_gpu::ExecutionConfig config; + ov::intel_gpu::ExecutionConfig config = get_test_default_config(engine); ov::intel_gpu::ImplementationDesc impl = { format::b_fs_yx_fsv16, "" }; config.set_property(ov::intel_gpu::optimize_data(true)); config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"deconv", impl} })); @@ -2011,7 +2011,7 @@ TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in2x2x1x1_nopad_b_fs_yx_fsv16_dw) { reorder("out", input_info("deconv"), format::bfyx, data_types::f32) ); - ov::intel_gpu::ExecutionConfig config; + ov::intel_gpu::ExecutionConfig config = get_test_default_config(engine); ov::intel_gpu::ImplementationDesc impl = { format::b_fs_yx_fsv16, "" }; config.set_property(ov::intel_gpu::optimize_data(true)); config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"deconv", impl} })); @@ -2065,7 +2065,7 @@ TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in2x2x1x1_pad1_b_fs_yx_fsv16_dw) { reorder("out", input_info("deconv"), format::bfyx, data_types::f32) ); - ov::intel_gpu::ExecutionConfig config; + ov::intel_gpu::ExecutionConfig config = get_test_default_config(engine); ov::intel_gpu::ImplementationDesc impl = { format::b_fs_yx_fsv16, "" }; config.set_property(ov::intel_gpu::optimize_data(true)); config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"deconv", impl} })); @@ -2107,7 +2107,7 @@ TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in2x2x1x1_stride2_nopad_b_fs_yx_fsv reorder("out", input_info("deconv"), format::bfyx, data_types::f32) ); - ov::intel_gpu::ExecutionConfig config; + ov::intel_gpu::ExecutionConfig config = get_test_default_config(engine); ov::intel_gpu::ImplementationDesc impl = { format::b_fs_yx_fsv16, "" }; config.set_property(ov::intel_gpu::optimize_data(true)); config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"deconv", impl} })); @@ -2163,7 +2163,7 @@ TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in2x2x1x1_stride4_pad2_b_fs_yx_fsv1 reorder("out", input_info("deconv"), format::bfyx, data_types::f32) ); - ov::intel_gpu::ExecutionConfig config; + ov::intel_gpu::ExecutionConfig config = get_test_default_config(engine); ov::intel_gpu::ImplementationDesc impl = { format::b_fs_yx_fsv16, "" }; config.set_property(ov::intel_gpu::optimize_data(true)); config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"deconv", impl} })); @@ -2219,7 +2219,7 @@ TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in2x2x1x1_stride4_pad2_b_fs_yx_fsv1 reorder("out", input_info("deconv"), format::bfyx, data_types::f32) ); - ov::intel_gpu::ExecutionConfig config; + ov::intel_gpu::ExecutionConfig config = get_test_default_config(engine); ov::intel_gpu::ImplementationDesc impl = { format::b_fs_yx_fsv16, "" }; config.set_property(ov::intel_gpu::optimize_data(true)); config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"deconv", impl} })); @@ -2304,7 +2304,7 @@ TEST(deconvolution_f32_fw_gpu, bs_fs_zyx_bsv16_fsv16_wsiz2x2x2_in1x1x2x2x2_strid reorder("out", input_info("deconv"), format::bfzyx, data_types::f32) ); - ov::intel_gpu::ExecutionConfig config; + ov::intel_gpu::ExecutionConfig config = get_test_default_config(engine); ov::intel_gpu::ImplementationDesc impl = { format::bs_fs_zyx_bsv16_fsv16, "" }; config.set_property(ov::intel_gpu::optimize_data(true)); config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"deconv", impl} })); @@ -2357,7 +2357,7 @@ void test_deconvolution_f16_fw_gpu_basic_wsiz2x2_in1x2x2x2_fs_b_yx_fsv32_stride1 reorder("out", input_info("deconv"), format::bfyx, data_types::f32) ); - ov::intel_gpu::ExecutionConfig config; + ov::intel_gpu::ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::optimize_data(true)); cldnn::network::ptr network = get_network(engine, topology, config, get_test_stream_ptr(), is_caching_test); @@ -2714,7 +2714,7 @@ class deconvolution_random_test : public testing::TestWithParam @@ -2971,8 +2971,9 @@ TEST(deconvolution_f32_fw_gpu_onednn, basic_wsiz2x2_in2x2x1x1_stride2_nopad) { ov::intel_gpu::ImplementationDesc conv_impl = { format::yxfb, "", impl_types::onednn }; - ExecutionConfig cfg{ov::intel_gpu::queue_type(QueueTypes::in_order), - ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"deconv", conv_impl} })}; + ExecutionConfig cfg = get_test_default_config(engine); + cfg.set_property(ov::intel_gpu::queue_type(QueueTypes::in_order)); + cfg.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"deconv", conv_impl} })); network network(engine, topology, cfg); network.set_input_data("input", input); diff --git a/src/plugins/intel_gpu/tests/test_cases/depth_concatenate_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/depth_concatenate_gpu_test.cpp index 71b1a4100995fc..01ca5a07dad41d 100644 --- a/src/plugins/intel_gpu/tests/test_cases/depth_concatenate_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/test_cases/depth_concatenate_gpu_test.cpp @@ -65,7 +65,7 @@ TEST(depth_concatenate_f32_gpu, test01) { topology.add(input_layout("input2", input2->get_layout())); topology.add(concatenation("depth1", { input_info("input1"), input_info("input2") }, 1)); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input1", input1); network.set_input_data("input2", input2); @@ -127,7 +127,7 @@ void concat_basic_with_reorder() { topology.add(concatenation("depth1", { input_info("to_int1"), input_info("to_int2") }, 1)); topology.add(reorder("to_float", input_info("depth1"), {data_types::f32, format::yxfb, {2, 5, 1, 1}})); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input1", input1); network.set_input_data("input2", input2); @@ -203,7 +203,7 @@ TEST(depth_concatenate_f32_gpu, test02) { topology.add(input_layout("input3", input3->get_layout())); topology.add(concatenation("depth1", { input_info("input1"), input_info("input2"), input_info("input3") }, 1)); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input1", input1); network.set_input_data("input2", input2); @@ -253,7 +253,7 @@ TEST(concatenate_f32_gpu, test_concatenation_of_pool_and_unpool) { topology.add(data("weights", weights)); topology.add(convolution("conv", input_info("concat1"), {"weights"})); - ov::intel_gpu::ExecutionConfig config; + ov::intel_gpu::ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::optimize_data(true)); network network(engine, topology, config); network.set_input_data("input1", input1); @@ -288,7 +288,7 @@ TEST(depth_concatenate_f32_gpu, test03_cascade_concat_opt) { topology.add(concatenation("depth3", { input_info("relu4"), input_info("depth2") }, 1)); topology.add(activation("relu5", input_info("depth3"), activation_func::relu)); - ov::intel_gpu::ExecutionConfig config; + ov::intel_gpu::ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::optimize_data(true)); network network(engine, topology, config); @@ -340,7 +340,7 @@ TEST(depth_concatenate_f32_gpu, test04_fused_relu) { topology.add(concatenation("depth1", { input_info("input1"), input_info("input2") }, 1)); topology.add(activation("relu1", input_info("depth1"), activation_func::relu)); - ov::intel_gpu::ExecutionConfig config; + ov::intel_gpu::ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::optimize_data(true)); network network(engine, topology, config); @@ -394,7 +394,7 @@ TEST(depth_concatenate_f32_gpu, test05_different_formats) { topology.add(concatenation("depth1", { input_info("reshape1"), input_info("reshape2") }, 1)); topology.add(reorder("output", input_info("depth1"), format::bfyx, data_types::f32)); - ov::intel_gpu::ExecutionConfig config; + ov::intel_gpu::ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::optimize_data(true)); network network(engine, topology, config); @@ -453,7 +453,7 @@ TEST(depth_concatenate_f32_gpu, test06_padded_input) { topology.add(concatenation("depth2", { input_info("depth1"), input_info("conv") }, 1)); topology.add(reorder("output", input_info("depth2"), format::bfyx, data_types::f32)); - ov::intel_gpu::ExecutionConfig config; + ov::intel_gpu::ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::optimize_data(true)); config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"conv", ov::intel_gpu::ImplementationDesc{format::fs_b_yx_fsv32, ""} } })); network network(engine, topology, config); @@ -529,7 +529,7 @@ TEST(depth_concatenate_f32_gpu, test07_padded_output) { topology.add(convolution("conv", input_info("depth1"), { "weights" }, {1, 1}, {1, 1})); topology.add(reorder("output", input_info("conv"), format::bfyx, data_types::f32)); - ov::intel_gpu::ExecutionConfig config; + ov::intel_gpu::ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::optimize_data(true)); config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"conv", ov::intel_gpu::ImplementationDesc{format::fs_b_yx_fsv32, ""} } })); network network(engine, topology, config); @@ -589,7 +589,7 @@ TEST(depth_concatenate_f32_gpu, test07_concat_is_output) { topology.add(activation("actv2", input_info("input2"), activation_func::linear, { 0.5f, 0.0f })); topology.add(concatenation("depth1", { input_info("actv1"), input_info("actv2") }, 1)); - ov::intel_gpu::ExecutionConfig config; + ov::intel_gpu::ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::optimize_data(true)); network network(engine, topology, config); @@ -620,7 +620,7 @@ TEST(depth_concatenate_f32_gpu, test07_concat_is_output) { TEST(depth_concatenate_f32_gpu, concat_with_different_format_inputs) { auto& engine = get_test_engine(); - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); const int in1_f = 2, in2_f = 1; const int b = 2, x = 2, y = 4; auto input1 = engine.allocate_memory({ data_types::f32, format::yxfb,{ b, in1_f, y, x } }); @@ -704,7 +704,7 @@ TEST(depth_concatenate_f32_gpu, concat_with_different_format_inputs) { TEST(depth_concatenate_f32_gpu, concat_with_reshape_input) { auto& engine = get_test_engine(); - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); auto input1 = engine.allocate_memory({ data_types::f32, format::bfyx,{ 2,4,1,2 } }); std::vector values = { @@ -742,7 +742,7 @@ TEST(depth_concatenate_f32_gpu, concat_with_reshape_input) { TEST(depth_concatenate_i32_gpu, optimize_data01) { auto& engine = get_test_engine(); - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); auto input = engine.allocate_memory({data_types::i32, format::bfyx, {1, 1, 1, 1}}); topology topology; @@ -769,7 +769,7 @@ TEST(depth_concatenate_i32_gpu, optimize_data01) { TEST(depth_concatenate_i32_gpu, optimize_data02) { auto& engine = get_test_engine(); - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); auto input1 = engine.allocate_memory({data_types::i32, format::bfyx, {1, 1, 2, 2}}); auto input2 = engine.allocate_memory({data_types::i32, format::bfyx, {1, 1, 2, 2}}); auto input3 = engine.allocate_memory({data_types::i32, format::bfyx, {1, 1, 2, 2}}); @@ -836,7 +836,7 @@ TEST(depth_concatenate_i32_gpu, optimize_data02) { TEST(depth_concatenate_i32_gpu, optimize_data03) { auto& engine = get_test_engine(); - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); auto input1 = engine.allocate_memory({data_types::i32, format::bfyx, {1, 1, 2, 2}}); topology topology; @@ -876,7 +876,7 @@ TEST(depth_concatenate_i32_gpu, optimize_data03) { TEST(depth_concatenate_i32_gpu, optimize_data04) { auto& engine = get_test_engine(); - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); auto input1 = engine.allocate_memory({data_types::i32, format::bfyx, {1, 1, 2, 2}}); topology topology; @@ -916,7 +916,7 @@ TEST(depth_concatenate_i32_gpu, optimize_data04) { TEST(depth_concatenate_i32_gpu, optimize_data05) { auto& engine = get_test_engine(); - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); auto input1 = engine.allocate_memory({data_types::i32, format::bfyx, {1, 1, 2, 2}}); topology topology; @@ -990,7 +990,7 @@ void test_depth_concatenate_f32_gpu_basic_bfwzyx_along_w(bool is_caching_test) { set_values(input1, input_data); - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::optimize_data(true)); cldnn::network::ptr network = get_network(engine, topology, config, get_test_stream_ptr(), is_caching_test); @@ -1042,7 +1042,7 @@ static network::ptr setup_depth_concatatenate_network(const std::vectorset_input_data("Input0", input1); diff --git a/src/plugins/intel_gpu/tests/test_cases/detection_output_test.cpp b/src/plugins/intel_gpu/tests/test_cases/detection_output_test.cpp index 41654f46b62ff1..d3af0d731c66d6 100644 --- a/src/plugins/intel_gpu/tests/test_cases/detection_output_test.cpp +++ b/src/plugins/intel_gpu/tests/test_cases/detection_output_test.cpp @@ -147,7 +147,7 @@ class detection_output_test : public ::testing::Test { topology.add(detection_output("detection_output", input_info("input_location"), input_info("input_confidence"), input_info("input_prior_box"), this->num_classes, keep_top_k)); - cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); network->set_input_data("input_location", input_location); network->set_input_data("input_confidence", input_confidence); @@ -182,7 +182,7 @@ class detection_output_test : public ::testing::Test { topology.add(detection_output("detection_output_1", input_info("input_location"), input_info("input_confidence"), input_info("input_prior_box"), this->num_classes, keep_top_k)); topology.add(detection_output("detection_output_2", input_info("input_location"), input_info("input_confidence"), input_info("input_prior_box"), this->num_classes, keep_top_k)); - cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); network->set_input_data("input_location", input_location); network->set_input_data("input_confidence", input_confidence); @@ -224,7 +224,7 @@ class detection_output_test : public ::testing::Test { topology.add(detection_output("detection_output", input_info("input_location"), input_info("input_confidence"), input_info("input_prior_box"), this->num_classes, keep_top_k, share_location, background_label_id, this->nms_threshold)); - cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); network->set_input_data("input_location", input_location); network->set_input_data("input_confidence", input_confidence); @@ -272,7 +272,7 @@ class detection_output_test : public ::testing::Test { topology.add(detection_output("detection_output", input_info("input_location"), input_info("input_confidence"), input_info("input_prior_box"), this->num_classes, keep_top_k, share_location, background_label_id, this->nms_threshold)); - cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); network->set_input_data("input_location", input_location); network->set_input_data("input_confidence", input_confidence); @@ -314,7 +314,7 @@ class detection_output_test : public ::testing::Test { topology.add(detection_output("detection_output", input_info("input_location"), input_info("input_confidence"), input_info("input_prior_box"), this->num_classes, keep_top_k, share_location, background_label_id, this->nms_threshold)); - cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); network->set_input_data("input_location", input_location); network->set_input_data("input_confidence", input_confidence); @@ -367,7 +367,7 @@ class detection_output_test : public ::testing::Test { topology.add(detection_output("detection_output", input_info("input_location"), input_info("input_confidence"), input_info("input_prior_box"), this->num_classes, keep_top_k, share_location, background_label_id, this->nms_threshold, top_k)); - cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); network->set_input_data("input_location", input_location); network->set_input_data("input_confidence", input_confidence); @@ -430,7 +430,7 @@ class detection_output_test : public ::testing::Test { prior_coordinates_offset, prior_is_normalized, input_width, input_height, decrease_label_id )); - cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); network->set_input_data("input_location", input_location); network->set_input_data("input_confidence", input_confidence); @@ -480,7 +480,7 @@ class detection_output_test : public ::testing::Test { topology.add(detection_output("detection_output", input_info("input_location"), input_info("input_confidence"), input_info("input_prior_box"), this->num_classes, keep_top_k, share_location, background_label_id, this->nms_threshold)); - cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); network->set_input_data("input_location", input_location); network->set_input_data("input_confidence", input_confidence); @@ -541,7 +541,7 @@ class detection_output_test : public ::testing::Test { topology.add(detection_output("detection_output", input_info("input_location"), input_info("input_confidence"), input_info("input_prior_box"), this->num_classes, keep_top_k, share_location, background_label_id, this->nms_threshold, top_k)); - cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); network->set_input_data("input_location", input_location); network->set_input_data("input_confidence", input_confidence); @@ -589,7 +589,7 @@ class detection_output_test : public ::testing::Test { topology.add(detection_output("detection_output", input_info("input_location"), input_info("input_confidence"), input_info("input_prior_box"), this->num_classes, keep_top_k, share_location, background_label_id, this->nms_threshold)); - cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); network->set_input_data("input_location", input_location); network->set_input_data("input_confidence", input_confidence); @@ -640,7 +640,7 @@ class detection_output_test : public ::testing::Test { topology.add(detection_output("detection_output", input_info("input_location"), input_info("input_confidence"), input_info("input_prior_box"), this->num_classes, keep_top_k, share_location, background_label_id, this->nms_threshold, top_k)); - cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); network->set_input_data("input_location", input_location); network->set_input_data("input_confidence", input_confidence); @@ -686,7 +686,7 @@ class detection_output_test : public ::testing::Test { topology.add(detection_output("detection_output", input_info("input_location_padded"), input_info("input_confidence_padded"), input_info("input_prior_box"), this->num_classes, keep_top_k, share_location, background_label_id, this->nms_threshold, top_k)); - cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); network->set_input_data("input_location", input_location); network->set_input_data("input_confidence", input_confidence); @@ -749,7 +749,7 @@ class detection_output_test : public ::testing::Test { prior_is_normalized, this->img_size, this->img_size )); - cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); network->set_input_data("input_location", input_location); network->set_input_data("input_confidence", input_confidence); diff --git a/src/plugins/intel_gpu/tests/test_cases/dft_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/dft_gpu_test.cpp index 8ccb5509d4042f..e1a7f04658af8c 100644 --- a/src/plugins/intel_gpu/tests/test_cases/dft_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/test_cases/dft_gpu_test.cpp @@ -118,7 +118,7 @@ struct dft_gpu_test : public testing::TestWithParam { // It's simpler to use "bfwzyx" format for all cases, as input and output can have different ranks topology.add(reorder("out", input_info("dft"), format::bfwzyx, data_type)); - cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); network->set_input_data("input", input); const auto outputs = network->execute(); @@ -2054,7 +2054,7 @@ TEST(dft_gpu_test, irdft_output_shape) { topology.add(dft("dft", input_info("reorder_input"), p.axes, p.signal_size, p.output_shape, type.direction, type.mode)); { - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); const auto outputs = network.execute(); @@ -2069,7 +2069,7 @@ TEST(dft_gpu_test, irdft_output_shape) { topology.add(reorder("out", input_info("dft"), format::bfwzyx, data_type)); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); const auto outputs = network.execute(); diff --git a/src/plugins/intel_gpu/tests/test_cases/eltwise_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/eltwise_gpu_test.cpp index 4bbe8ae9413c96..e3e9d06c3ce1a7 100644 --- a/src/plugins/intel_gpu/tests/test_cases/eltwise_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/test_cases/eltwise_gpu_test.cpp @@ -102,7 +102,7 @@ void generic_eltwise_test(cldnn::format test_input_fmt, int input_b, int input_f topology.add(activation("out", out_id, activation_func::relu, { slope, 0.0f })); out_id = "out"; } - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input1", input1); network.set_input_data("input2", input2); auto outputs = network.execute(); @@ -240,7 +240,7 @@ void generic_eltwise_bool_test(cldnn::format test_input_fmt, int input_b, int in topology.add(reorder("reorder1", input_info("input1"), input1->get_layout().with_padding(padding{{ 0, 0, input_padding_x, input_padding_y }, 0 }))); topology.add(eltwise("eltwise", { input_info("reorder1"), input_info("input2") }, mode, DEFAULT_BROADCAST_SPEC, padding{ { 0, 0, output_padding_x, output_padding_y }, 0 })); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input1", input1); network.set_input_data("input2", input2); auto outputs = network.execute(); @@ -343,7 +343,7 @@ TEST(eltwise_gpu_f32, equal_in2_float_out1_int) { topology.add(input_layout("input2", input2->get_layout())); topology.add(eltwise("eltwise", { input_info("input"), input_info("input2") }, eltwise_mode::eq)); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input1); network.set_input_data("input2", input2); @@ -413,7 +413,7 @@ TEST(eltwise_gpu_f32, not_equal_in2_float_out1_int) { topology.add(input_layout("input2", input2->get_layout())); topology.add(eltwise("eltwise", { input_info("input"), input_info("input2") }, eltwise_mode::ne)); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input1); network.set_input_data("input2", input2); @@ -483,7 +483,7 @@ TEST(eltwise_gpu_f32, less_in2_float_out1_int) { topology.add(input_layout("input2", input2->get_layout())); topology.add(eltwise("eltwise", { input_info("input"), input_info("input2") }, eltwise_mode::lt)); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input1); network.set_input_data("input2", input2); @@ -553,7 +553,7 @@ TEST(eltwise_gpu_f32, less_equal_in2_float_out1_int) { topology.add(input_layout("input2", input2->get_layout())); topology.add(eltwise("eltwise", { input_info("input"), input_info("input2") }, eltwise_mode::le)); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input1); network.set_input_data("input2", input2); @@ -623,7 +623,7 @@ TEST(eltwise_gpu_f32, greater_in2_float_out1_int) { topology.add(input_layout("input2", input2->get_layout())); topology.add(eltwise("eltwise", { input_info("input"), input_info("input2") }, eltwise_mode::gt)); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input1); network.set_input_data("input2", input2); @@ -693,7 +693,7 @@ TEST(eltwise_gpu_f32, greater_equal_in2_float_out1_int) { topology.add(input_layout("input2", input2->get_layout())); topology.add(eltwise("eltwise", { input_info("input"), input_info("input2") }, eltwise_mode::ge)); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input1); network.set_input_data("input2", input2); @@ -763,7 +763,7 @@ TEST(eltwise_gpu_f32, logicalAND_in2_float_out1_int) { topology.add(input_layout("input2", input2->get_layout())); topology.add(eltwise("eltwise", { input_info("input"), input_info("input2") }, eltwise_mode::logic_and)); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input1); network.set_input_data("input2", input2); @@ -849,7 +849,7 @@ TEST(eltwise_gpu_f32, logicalAND_in3_float_out1_int) { topology.add(input_layout("input3", input2->get_layout())); topology.add(eltwise("eltwise", { input_info("input"), input_info("input2"), input_info("input3") }, eltwise_mode::logic_and)); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input1); network.set_input_data("input2", input2); @@ -920,7 +920,7 @@ TEST(eltwise_gpu_f32, logicalOR_in2_float_out1_int) { topology.add(input_layout("input2", input2->get_layout())); topology.add(eltwise("eltwise", { input_info("input"), input_info("input2") }, eltwise_mode::logic_or)); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input1); network.set_input_data("input2", input2); @@ -1006,7 +1006,7 @@ TEST(eltwise_gpu_f32, logicalOR_in3_float_out1_int) { topology.add(input_layout("input3", input2->get_layout())); topology.add(eltwise("eltwise", { input_info("input"), input_info("input2"), input_info("input3") }, eltwise_mode::logic_or)); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input1); network.set_input_data("input2", input2); @@ -1077,7 +1077,7 @@ TEST(eltwise_gpu_f32, logicalXOR_in2_float_out1_int) { topology.add(input_layout("input2", input2->get_layout())); topology.add(eltwise("eltwise", { input_info("input"), input_info("input2") }, eltwise_mode::logic_xor)); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input1); network.set_input_data("input2", input2); @@ -1128,7 +1128,7 @@ TEST(eltwise_gpu_f32, isfinite_in1_float_out1_int) { topology.add(input_layout("input", input->get_layout())); topology.add(eltwise("eltwise", {input_info("input")}, eltwise_mode::is_finite)); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); const auto outputs = network.execute(); @@ -1183,7 +1183,7 @@ TEST(eltwise_gpu_f32, isinf_in1_float_out1_int) { topology.add(input_layout("input", input->get_layout())); topology.add(eltwise("eltwise", {input_info("input")}, eltwise_mode::is_inf, coefficients, data_types::i8)); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); const auto outputs = network.execute(); @@ -1227,7 +1227,7 @@ TEST(eltwise_gpu_f32, isnan_in1_float_out1_int) { topology.add(input_layout("input", input->get_layout())); topology.add(eltwise("eltwise", {input_info("input")}, eltwise_mode::is_nan)); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); const auto outputs = network.execute(); @@ -1270,7 +1270,7 @@ TEST(eltwise_gpu_f32, dynamic_kernel_no_broadcast) { 15.f, 17.f, 8.f, 10.f, -2.f, 6.5f, -0.5f, -2.5f }); - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); network network(engine, topology, config); network.set_input_data("input1", input1); @@ -1326,7 +1326,7 @@ TEST(eltwise_gpu_f32, dynamic_kernel_broadcast) { set_values(input2, { 0.5f, -0.5f }); - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); network network(engine, topology, config); network.set_input_data("input1", input1); @@ -1403,7 +1403,7 @@ TEST(eltwise_gpu_f32, add_basic_in4x4x2x2) { 15.f, 17.f, 8.f, 10.f, -2.f, 6.5f, -0.5f, -2.5f }); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); network.set_input_data("input2", input2); @@ -1459,7 +1459,7 @@ TEST(eltwise_gpu_f32, add_in2x2x2x2_broadcast_channel) { -2.f, 6.5f, -0.5f, -2.5f }); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); network.set_input_data("input2", input2); @@ -1528,7 +1528,7 @@ TEST(eltwise_gpu_f32, add_in2x2x2x2_broadcast_x) { -0.5f, }); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); network.set_input_data("input2", input2); @@ -1591,7 +1591,7 @@ TEST(eltwise_gpu_f32, add_in2x2x2x2_broadcast_y) { 4.f, -0.5f, }); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); network.set_input_data("input2", input2); @@ -1656,7 +1656,7 @@ TEST(eltwise_gpu_f32, add_in2x2x2x2_broadcast_batch) { 4.f, -0.5f, }); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); network.set_input_data("input2", input2); @@ -1715,7 +1715,7 @@ TEST(eltwise_gpu_f32, add_in2x2x2x2_broadcast_multiple_dims) { 1.f, 2.f }); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); network.set_input_data("input2", input2); @@ -1772,7 +1772,7 @@ TEST(eltwise_gpu_f32, pow_in2x2x2x2_broadcast_all) { set_values(input2, { 2.0f }); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); network.set_input_data("input2", input2); @@ -1849,7 +1849,7 @@ TEST(eltwise_gpu_f32, add_basic_in2x2x2x2_broadcast_2_inputs_same_dim) { -4.f, 0.5f, }); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); network.set_input_data("input2", input2); @@ -1925,7 +1925,7 @@ TEST(eltwise_gpu_f32, add_basic_in2x2x2x2_broadcast_2_inputs_diff_dim) { -4.f, 0.5f, }); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); network.set_input_data("input2", input2); @@ -2003,7 +2003,7 @@ TEST(eltwise_gpu_f32, max_basic_in4x4x4x4) { 15.f, 17.f, 8.f, 10.f, 6.f, 8.f, -0.5f, -2.5f }); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); network.set_input_data("input2", input2); @@ -2074,7 +2074,7 @@ TEST(eltwise_gpu_f32, sub_basic_in4x4x4x4) { 15.f, 17.f, 8.f, 8.5f, 6.f, 8.f, -0.5f, 10.5f }); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); network.set_input_data("input2", input2); @@ -2145,7 +2145,7 @@ TEST(eltwise_gpu_int, basic_in4x4x4x4) { 6.f, 8.f, 0.f, 10.f }; set_values(input2, input_2_vec); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); network.set_input_data("input2", input2); auto outputs = network.execute(); @@ -2223,7 +2223,7 @@ TEST(eltwise_gpu_f32_int, basic_in4x4x4x4) { 6.f, 8.f, 0.f, 10.f }; set_values(input2, input_2_vec); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); network.set_input_data("input2", input2); auto outputs = network.execute(); @@ -2304,7 +2304,7 @@ TEST(eltwise_gpu_f32, prod_basic_in4x4x4x4) { 2.5f, 7.f, 17.f, 8.f, 2.5f, 4.f, 10.f, -2.5f }); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); network.set_input_data("input2", input2); @@ -2378,7 +2378,7 @@ TEST(eltwise_gpu_f32, max_basic_in4x4x4x4_input_padding) { 15.f, 17.f, 8.f, 10.f, 6.f, 8.f, -0.5f, -2.5f }); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); network.set_input_data("input2", input2); @@ -2450,7 +2450,7 @@ TEST(eltwise_gpu_f32, add_basic_in4x4x2x2_with_coefficients) { 15.f, 17.f, 8.f, 10.f, -2.f, 6.5f, -0.5f, -2.5f }); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); network.set_input_data("input2", input2); @@ -2566,7 +2566,7 @@ TEST(eltwise_gpu_f32, add_basic_in4x4x2x2_with_coefficients_3inputs) { 6.f, 0.f, 2.f, 0.f, 5.f, 1.f, 1.f, 1.f }); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); network.set_input_data("input2", input2); @@ -2656,7 +2656,7 @@ TEST(eltwise_gpu_f32, max_3inputs_in4x4x4x4_input_padding) { 15.f, 3.f, 9.f, 1.f, -1.f, 6.f, 0.5f, 8.f }); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); network.set_input_data("input2", input2); @@ -2746,7 +2746,7 @@ TEST(eltwise_gpu_f32, stride_test_2x2) { 15, 31, 47, 63, 16, 32, 48, 64 }); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); network.set_input_data("input2", input2); @@ -2814,7 +2814,7 @@ TEST(eltwise_gpu_f32, broadcast_test_in4x4x2x2) { 0.5f, 2.5f, 0.5f, 2.5f }); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); network.set_input_data("input2", input2); @@ -2882,7 +2882,7 @@ TEST(eltwise_gpu_f16, fs_b_yx_fsv32_basic) golden_topology.add(input_layout("input2", input2->get_layout())); golden_topology.add(eltwise("eltwise", input_info("input1"), input_info("input2"), eltwise_mode::sum)); - network golden_network(engine, golden_topology); + network golden_network(engine, golden_topology, get_test_default_config(engine)); golden_network.set_input_data("input1", input1); golden_network.set_input_data("input2", input2); @@ -2899,7 +2899,7 @@ TEST(eltwise_gpu_f16, fs_b_yx_fsv32_basic) FSV32_topology.add(eltwise("eltwise", input_info("reorder1"), input_info("reorder2"), eltwise_mode::sum)); FSV32_topology.add(reorder("reorderOutput", input_info("eltwise"), layout(data_types::f16, format::bfyx, input_tensor))); - network FSV32_network(engine, FSV32_topology); + network FSV32_network(engine, FSV32_topology, get_test_default_config(engine)); FSV32_network.set_input_data("input1", input1); FSV32_network.set_input_data("input2", input2); @@ -2949,7 +2949,7 @@ TEST(eltwise_gpu_f16, fs_b_yx_fsv32_broadcast) ref_topology.add(input_layout("input2", input2->get_layout())); ref_topology.add(eltwise("eltwise", input_info("input1"), input_info("input2"), eltwise_mode::prod)); - network ref_network(engine, ref_topology); + network ref_network(engine, ref_topology, get_test_default_config(engine)); ref_network.set_input_data("input1", input1); ref_network.set_input_data("input2", input2); @@ -2965,7 +2965,7 @@ TEST(eltwise_gpu_f16, fs_b_yx_fsv32_broadcast) fsv32_topology.add(eltwise("eltwise", input_info("reorder1"), input_info("reorder2"), eltwise_mode::prod)); fsv32_topology.add(reorder("reorder_bfyx", input_info("eltwise"), layout(data_types::f16, format::bfyx, input1_tensor))); - network fsv32_network(engine, fsv32_topology); + network fsv32_network(engine, fsv32_topology, get_test_default_config(engine)); fsv32_network.set_input_data("input1", input1); fsv32_network.set_input_data("input2", input2); @@ -3013,7 +3013,7 @@ TEST(eltwise_gpu_f16, fs_b_yx_fsv32_broadcast_bfyx) ref_topology.add(input_layout("input2", input2->get_layout())); ref_topology.add(eltwise("eltwise", input_info("input1"), input_info("input2"), eltwise_mode::prod)); - network ref_network(engine, ref_topology); + network ref_network(engine, ref_topology, get_test_default_config(engine)); ref_network.set_input_data("input1", input1); ref_network.set_input_data("input2", input2); @@ -3028,7 +3028,7 @@ TEST(eltwise_gpu_f16, fs_b_yx_fsv32_broadcast_bfyx) fsv32_topology.add(eltwise("eltwise", input_info("reorder1"), input_info("input2"), eltwise_mode::prod)); fsv32_topology.add(reorder("reorder_bfyx", input_info("eltwise"), layout(data_types::f16, format::bfyx, input1_tensor))); - network fsv32_network(engine, fsv32_topology); + network fsv32_network(engine, fsv32_topology, get_test_default_config(engine)); fsv32_network.set_input_data("input1", input1); fsv32_network.set_input_data("input2", input2); @@ -3067,7 +3067,7 @@ TEST(eltwise_gpu_f32, broadcast_test_in4x4x2x2x2) { set_values(input2, { 0.5f, 2.5f, 0.5f, 2.5f, 1.f, 2.f, 3.f, 4.f }); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); network.set_input_data("input2", input2); @@ -3124,7 +3124,7 @@ TEST(eltwise_gpu_f16, bfyx_and_fs_b_yx_fsv32_basic) golden_topology.add(input_layout("input2", input2->get_layout())); golden_topology.add(eltwise("eltwise", input_info("input1"), input_info("input2"), eltwise_mode::sum)); - network golden_network(engine, golden_topology); + network golden_network(engine, golden_topology, get_test_default_config(engine)); golden_network.set_input_data("input1", input1); golden_network.set_input_data("input2", input2); @@ -3141,7 +3141,7 @@ TEST(eltwise_gpu_f16, bfyx_and_fs_b_yx_fsv32_basic) FS_B_YX_FSV32_OUTPUT_topology.add(eltwise("eltwise", input_info("reorder1"), input_info("reorder2"), eltwise_mode::sum)); FS_B_YX_FSV32_OUTPUT_topology.add(reorder("reorderOutput", input_info("eltwise"), layout(data_types::f16, format::bfyx, input_tensor))); - network FS_B_YX_FSV32_OUTPUT_network(engine, FS_B_YX_FSV32_OUTPUT_topology); + network FS_B_YX_FSV32_OUTPUT_network(engine, FS_B_YX_FSV32_OUTPUT_topology, get_test_default_config(engine)); FS_B_YX_FSV32_OUTPUT_network.set_input_data("input1", input1); FS_B_YX_FSV32_OUTPUT_network.set_input_data("input2", input2); @@ -3158,7 +3158,7 @@ TEST(eltwise_gpu_f16, bfyx_and_fs_b_yx_fsv32_basic) BYXF_OUTPUT_topology.add(eltwise("eltwise", input_info("reorder1"), input_info("reorder2"), eltwise_mode::sum)); BYXF_OUTPUT_topology.add(reorder("reorderOutput", input_info("eltwise"), layout(data_types::f16, format::bfyx, input_tensor))); - network BYXF_OUTPUT_network(engine, BYXF_OUTPUT_topology); + network BYXF_OUTPUT_network(engine, BYXF_OUTPUT_topology, get_test_default_config(engine)); BYXF_OUTPUT_network.set_input_data("input1", input1); BYXF_OUTPUT_network.set_input_data("input2", input2); @@ -3204,7 +3204,7 @@ TEST(eltwise_gpu_f16, bfyx_and_fs_b_yx_fsv32_output_padding) { golden_topology.add(input_layout("input2", input2->get_layout())); golden_topology.add(eltwise("eltwise", input_info("input1"), input_info("input2"), eltwise_mode::sum, DEFAULT_BROADCAST_SPEC, padding{ {0,0,5,10} , 0 })); - network golden_network(engine, golden_topology); + network golden_network(engine, golden_topology, get_test_default_config(engine)); golden_network.set_input_data("input1", input1); golden_network.set_input_data("input2", input2); @@ -3222,7 +3222,7 @@ TEST(eltwise_gpu_f16, bfyx_and_fs_b_yx_fsv32_output_padding) { FS_B_YX_FSV32_OUTPUT_topology.add(reorder("reorderOutput", input_info("eltwise"), layout(data_types::f16, format::bfyx, input_tensor, padding{ {0,0,5,10} , 0 }))); - network FS_B_YX_FSV32_OUTPUT_network(engine, FS_B_YX_FSV32_OUTPUT_topology); + network FS_B_YX_FSV32_OUTPUT_network(engine, FS_B_YX_FSV32_OUTPUT_topology, get_test_default_config(engine)); FS_B_YX_FSV32_OUTPUT_network.set_input_data("input1", input1); FS_B_YX_FSV32_OUTPUT_network.set_input_data("input2", input2); @@ -3240,7 +3240,7 @@ TEST(eltwise_gpu_f16, bfyx_and_fs_b_yx_fsv32_output_padding) { BYXF_OUTPUT_topology.add(reorder("reorderOutput", input_info("eltwise"), layout(data_types::f16, format::bfyx, input_tensor, padding{ {0,0,5,10} , 0 }))); - network BYXF_OUTPUT_network(engine, BYXF_OUTPUT_topology); + network BYXF_OUTPUT_network(engine, BYXF_OUTPUT_topology, get_test_default_config(engine)); BYXF_OUTPUT_network.set_input_data("input1", input1); BYXF_OUTPUT_network.set_input_data("input2", input2); @@ -3289,7 +3289,7 @@ TEST(eltwise_gpu_f16, bfyx_and_fs_b_yx_fsv32_input_padding) golden_topology.add(reorder("reorder2", input_info("input2"), layout(data_types::f16, format::bfyx, input_tensor, padding{ {0,0,5,7},0.0f }))); golden_topology.add(eltwise("eltwise", input_info("input1"), input_info("input2"), eltwise_mode::sum)); - network golden_network(engine, golden_topology); + network golden_network(engine, golden_topology, get_test_default_config(engine)); golden_network.set_input_data("input1", input1); golden_network.set_input_data("input2", input2); @@ -3306,7 +3306,7 @@ TEST(eltwise_gpu_f16, bfyx_and_fs_b_yx_fsv32_input_padding) FS_B_YX_FSV32_OUTPUT_topology.add(eltwise("eltwise", input_info("reorder1"), input_info("reorder2"), eltwise_mode::sum)); FS_B_YX_FSV32_OUTPUT_topology.add(reorder("reorderOutput", input_info("eltwise"), layout(data_types::f16, format::bfyx, input_tensor))); - network FS_B_YX_FSV32_OUTPUT_network(engine, FS_B_YX_FSV32_OUTPUT_topology); + network FS_B_YX_FSV32_OUTPUT_network(engine, FS_B_YX_FSV32_OUTPUT_topology, get_test_default_config(engine)); FS_B_YX_FSV32_OUTPUT_network.set_input_data("input1", input1); FS_B_YX_FSV32_OUTPUT_network.set_input_data("input2", input2); @@ -3323,7 +3323,7 @@ TEST(eltwise_gpu_f16, bfyx_and_fs_b_yx_fsv32_input_padding) BYXF_OUTPUT_topology.add(eltwise("eltwise", input_info("reorder1"), input_info("reorder2"), eltwise_mode::sum)); BYXF_OUTPUT_topology.add(reorder("reorderOutput", input_info("eltwise"), layout(data_types::f16, format::bfyx, input_tensor))); - network BYXF_OUTPUT_network(engine, BYXF_OUTPUT_topology); + network BYXF_OUTPUT_network(engine, BYXF_OUTPUT_topology, get_test_default_config(engine)); BYXF_OUTPUT_network.set_input_data("input1", input1); BYXF_OUTPUT_network.set_input_data("input2", input2); @@ -3455,7 +3455,7 @@ TEST(eltwise_gpu, b_fs_yx_fsv4_wo_callib) { eltw, actv); // Network processing - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input1", input1); network.set_input_data("input2", input2); network.set_input_data("input3", input3); @@ -3511,7 +3511,7 @@ TEST(eltwise_gpu, b_fs_yx_fsv4_wo_callib) { { in_B, in_F, in_X, in_Y }))); // Network processing - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input1", input1); network.set_input_data("input2", input2); network.set_input_data("input3", input3); @@ -3671,7 +3671,7 @@ struct eltwise_same_input_test : testing::TestWithParam{"eltwise"})); cldnn::network net(engine, topo, config); @@ -3835,7 +3835,7 @@ TEST_P(eltwise_test, fsv16) { topology.add(reorder("out", input_info("eltwise"), fmt_pln, data_types::f32)); primitive_id out_id = "out"; - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::optimize_data(true)); network network(engine, topology, config); @@ -3941,7 +3941,7 @@ TEST_P(eltwise_test_6d, bfwzyx) { topology.add(reorder("out", input_info("eltwise"), format::bfwzyx, data_types::f32)); primitive_id out_id = "out"; - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::optimize_data(true)); network network(engine, topology, config); @@ -4026,7 +4026,7 @@ TEST_P(eltwise_test_mixed_precision, fsv16) { topology.add(reorder("out", input_info("eltwise"), fmt_pln, data_types::f32)); primitive_id out_id = "out"; - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::optimize_data(true)); network network(engine, topology, config); @@ -4131,7 +4131,7 @@ TEST_P(eltwise_test_mixed_layout, mixed_layout) { topology.add(reorder("out", input_info("eltwise"), format::bfyx, data_types::f32)); primitive_id out_id = "out"; - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input1", input1); network.set_input_data("input2", input2); @@ -4278,7 +4278,7 @@ struct eltwise_random_test : testing::TestWithParam auto prim = eltwise("eltwise", { input_info("input1"), input_info("input2") }, params.mode); topo.add(prim); - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::custom_outputs(std::vector{"eltwise"})); config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"eltwise", {params.in_format, "generic_eltwise_ref"}} })); @@ -4295,7 +4295,7 @@ struct eltwise_random_test : testing::TestWithParam auto prim_opt = eltwise("eltwise_opt", { input_info("input1"), input_info("input2") }, params.mode); topo_opt.add(prim_opt); - ExecutionConfig config_opt; + ExecutionConfig config_opt = get_test_default_config(engine); config_opt.set_property(ov::intel_gpu::custom_outputs(std::vector{"eltwise_opt"})); cldnn::network::ptr net_opt = get_network(engine, topo_opt, config_opt, get_test_stream_ptr(), is_caching_test); diff --git a/src/plugins/intel_gpu/tests/test_cases/embedding_bag_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/embedding_bag_gpu_test.cpp index a90f9623973582..9220efbdd1bc3b 100644 --- a/src/plugins/intel_gpu/tests/test_cases/embedding_bag_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/test_cases/embedding_bag_gpu_test.cpp @@ -53,7 +53,7 @@ TEST(embedding_bag_fp16_gpu, packed_sum_basic) { embedding_bag("embedding_bag", { input_info("Input0"), input_info("Input1"), input_info("Input2") }, type, output_shape) ); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("Input0", emb_table); network.set_input_data("Input1", indices); @@ -106,7 +106,7 @@ TEST(embedding_bag_fp16_gpu, packed_sum_basic_without_weights) { embedding_bag("embedding_bag", { input_info("Input0"), input_info("Input1") }, type, output_shape) ); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("Input0", emb_table); network.set_input_data("Input1", indices); @@ -187,7 +187,7 @@ TEST(embedding_bag_fp16_gpu, packed_sum_dim2) { embedding_bag("embedding_bag", { input_info("Input0"), input_info("Input1"), input_info("Input2") }, type, output_shape) ); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("Input0", emb_table); network.set_input_data("Input1", indices); @@ -310,7 +310,7 @@ TEST(embedding_bag_fp16_gpu, packed_sum_dim3) { embedding_bag("embedding_bag", { input_info("Input0"), input_info("Input1"), input_info("Input2") }, type, output_shape) ); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("Input0", emb_table); network.set_input_data("Input1", indices); @@ -404,7 +404,7 @@ TEST(embedding_bag_fp16_gpu, offsets_sum_basic) { topology.add( embedding_bag("embedding_bag", { input_info("Input0"), input_info("Input1"), input_info("Input2"), input_info("Input3") }, type, output_shape, 0) ); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("Input0", emb_table); network.set_input_data("Input1", indices); @@ -469,7 +469,7 @@ TEST(embedding_bag_fp16_gpu, offsets_sum_basic_first_empty) { embedding_bag("embedding_bag", { input_info("Input0"), input_info("Input1"), input_info("Input2"), input_info("Input3") }, type, output_shape, 2) ); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("Input0", emb_table); network.set_input_data("Input1", indices); @@ -534,7 +534,7 @@ TEST(embedding_bag_fp16_gpu, offsets_sum_basic_last_empty) { embedding_bag("embedding_bag", { input_info("Input0"), input_info("Input1"), input_info("Input2"), input_info("Input3") }, type, output_shape, 2) ); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("Input0", emb_table); network.set_input_data("Input1", indices); @@ -592,7 +592,7 @@ TEST(embedding_bag_fp16_gpu, offsets_sum_without_weights_and_def_index) { embedding_bag("embedding_bag", { input_info("Input0"), input_info("Input1"), input_info("Input2") }, type, output_shape) ); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("Input0", emb_table); network.set_input_data("Input1", indices); @@ -707,7 +707,7 @@ TEST(embedding_bag_fp16_gpu, offsets_sum_dim3) { embedding_bag("embedding_bag", { input_info("Input0"), input_info("Input1"), input_info("Input2"), input_info("Input3") }, type, output_shape, 0) ); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("Input0", emb_table); network.set_input_data("Input1", indices); @@ -803,7 +803,7 @@ TEST(embedding_bag_fp16_gpu, segments_sum_basic) { embedding_bag("embedding_bag", { input_info("Input0"), input_info("Input1"), input_info("Input2"), input_info("Input3") }, type, output_shape, 0) ); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("Input0", emb_table); network.set_input_data("Input1", indices); @@ -868,7 +868,7 @@ TEST(embedding_bag_fp16_gpu, segments_sum_basic_first_empty) { embedding_bag("embedding_bag", { input_info("Input0"), input_info("Input1"), input_info("Input2"), input_info("Input3") }, type, output_shape, 2) ); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("Input0", emb_table); network.set_input_data("Input1", indices); @@ -933,7 +933,7 @@ TEST(embedding_bag_fp16_gpu, segments_sum_basic_last_empty) { embedding_bag("embedding_bag", { input_info("Input0"), input_info("Input1"), input_info("Input2"), input_info("Input3") }, type, output_shape, 2) ); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("Input0", emb_table); network.set_input_data("Input1", indices); @@ -991,7 +991,7 @@ TEST(embedding_bag_fp16_gpu, segments_sum_without_weights_and_def_index) { embedding_bag("embedding_bag", { input_info("Input0"), input_info("Input1"), input_info("Input2") }, type, output_shape) ); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("Input0", emb_table); network.set_input_data("Input1", indices); @@ -1106,7 +1106,7 @@ TEST(embedding_bag_fp16_gpu, segments_sum_dim3) { embedding_bag("embedding_bag", { input_info("Input0"), input_info("Input1"), input_info("Input2"), input_info("Input3") }, type, output_shape, 0) ); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("Input0", emb_table); network.set_input_data("Input1", indices); @@ -1199,7 +1199,7 @@ TEST(embedding_bag_fp32_gpu, packed_sum_basic) { embedding_bag("embedding_bag", { input_info("Input0"), input_info("Input1"), input_info("Input2") }, type, output_shape) ); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("Input0", emb_table); network.set_input_data("Input1", indices); @@ -1309,7 +1309,7 @@ TEST(embedding_bag_fp32_gpu, packed_sum_dim3) { embedding_bag("embedding_bag", { input_info("Input0"), input_info("Input1"), input_info("Input2") }, type, output_shape) ); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("Input0", emb_table); network.set_input_data("Input1", indices); @@ -1394,7 +1394,7 @@ void test_embedding_bag_fp32_gpu_extended5_6(bool is_caching_test) { embedding_bag("embedding_bag", { input_info("Input0"), input_info("Input1"), input_info("Input2") }, type, output_shape) ); - cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); network->set_input_data("Input0", emb_table); network->set_input_data("Input1", indices); diff --git a/src/plugins/intel_gpu/tests/test_cases/empty_tensor_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/empty_tensor_gpu_test.cpp index 7f7b0370c1daa0..ee39a7b6cd4bc4 100644 --- a/src/plugins/intel_gpu/tests/test_cases/empty_tensor_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/test_cases/empty_tensor_gpu_test.cpp @@ -44,7 +44,7 @@ TEST_P(test_empty_tensor, concat_two_inputs) { topology.add(gather_nonzero("gather_nonzero", input_info("nonzero_input"), input_info("count_nonzero"))); topology.add(concatenation("concat", { input_info("gather_nonzero"), input_info("concat_data") }, p.concat_axis)); - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::optimize_data(true)); config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); network network(engine, topology, config); diff --git a/src/plugins/intel_gpu/tests/test_cases/experimental_detectron_detection_output_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/experimental_detectron_detection_output_gpu_test.cpp index 660e3bdf34202f..46c13f3ac9a8f9 100644 --- a/src/plugins/intel_gpu/tests/test_cases/experimental_detectron_detection_output_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/test_cases/experimental_detectron_detection_output_gpu_test.cpp @@ -143,7 +143,7 @@ struct experimental_detectron_detection_output_test const primitive_id eddo_id = "experimental_detectron_detection_output"; topology.add(reorder(eddo_id, input_info(b_eddo_primitive) /*b_eddo_id*/, format::bfyx, data_type)); - cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); network->set_input_data(input_boxes_id, input_boxes); network->set_input_data(input_deltas_id, input_deltas); @@ -159,7 +159,7 @@ struct experimental_detectron_detection_output_test cldnn::topology reorder_score_topology; reorder_score_topology.add(input_layout(b_output_scores_id, output_scores_layout)); reorder_score_topology.add(reorder(output_scores_id, input_info(b_output_scores_id), format::bfyx, data_type)); - cldnn::network reorder_score_net{engine, reorder_score_topology}; + cldnn::network reorder_score_net{engine, reorder_score_topology, get_test_default_config(engine)}; reorder_score_net.set_input_data(b_output_scores_id, b_output_scores); const auto score_result = reorder_score_net.execute(); const auto output_scores = score_result.at(output_scores_id).get_memory(); @@ -170,7 +170,7 @@ struct experimental_detectron_detection_output_test cldnn::topology reorder_classes_topology; reorder_classes_topology.add(input_layout(b_output_classes_id, output_classes_layout)); reorder_classes_topology.add(reorder(output_classes_id, input_info(b_output_classes_id), format::bfyx, data_types::i32)); - cldnn::network reorder_classes_net{engine, reorder_classes_topology}; + cldnn::network reorder_classes_net{engine, reorder_classes_topology, get_test_default_config(engine)}; reorder_classes_net.set_input_data(b_output_classes_id, b_output_classes); const auto classes_result = reorder_classes_net.execute(); const auto output_classes = classes_result.at(output_classes_id).get_memory(); diff --git a/src/plugins/intel_gpu/tests/test_cases/experimental_detectron_generate_proposals_single_image_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/experimental_detectron_generate_proposals_single_image_gpu_test.cpp index 21b1ce49594976..7a9470b3ff36e0 100644 --- a/src/plugins/intel_gpu/tests/test_cases/experimental_detectron_generate_proposals_single_image_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/test_cases/experimental_detectron_generate_proposals_single_image_gpu_test.cpp @@ -241,7 +241,7 @@ struct experimental_detectron_generate_proposals_single_image_test const primitive_id reorder_result_id = edgpsi_id + "Reordered"; topology.add(reorder(reorder_result_id, input_info(edgpsi_primitive), format::bfyx, data_type)); - cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); network->set_input_data(input_im_info_id, input_im_info); network->set_input_data(input_anchors_id, input_anchors); @@ -258,7 +258,7 @@ struct experimental_detectron_generate_proposals_single_image_test cldnn::topology reorder_topology; reorder_topology.add(input_layout("scores", rois_scores_layout)); reorder_topology.add(reorder("plane_scores", input_info("scores"), format::bfyx, data_type)); - cldnn::network reorder_net{engine, reorder_topology}; + cldnn::network reorder_net{engine, reorder_topology, get_test_default_config(engine)}; reorder_net.set_input_data("scores", output_roi_scores); const auto second_output_result = reorder_net.execute(); const auto plane_data_mem = second_output_result.at("plane_scores").get_memory(); diff --git a/src/plugins/intel_gpu/tests/test_cases/experimental_detectron_prior_grid_generator_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/experimental_detectron_prior_grid_generator_gpu_test.cpp index 37a37e7bc5d796..3a52028e650139 100644 --- a/src/plugins/intel_gpu/tests/test_cases/experimental_detectron_prior_grid_generator_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/test_cases/experimental_detectron_prior_grid_generator_gpu_test.cpp @@ -62,7 +62,7 @@ struct experimental_detectron_prior_grid_generator_test params.imageShape.first, params.imageShape.second)); - cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), params.is_caching_test); + cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), params.is_caching_test); network->set_input_data(priors_id, prior_input); diff --git a/src/plugins/intel_gpu/tests/test_cases/experimental_detectron_roi_feature_extractor_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/experimental_detectron_roi_feature_extractor_gpu_test.cpp index 19f2f56d8490bc..85d45ad22ae615 100644 --- a/src/plugins/intel_gpu/tests/test_cases/experimental_detectron_roi_feature_extractor_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/test_cases/experimental_detectron_roi_feature_extractor_gpu_test.cpp @@ -53,7 +53,7 @@ void test_experimental_detectron_roi_feature_extractor_gpu_fp32_one_level(bool i topology.add(activation(activation_abs_id, feature_extractor_id, activation_func::abs)); topology.add(mutable_data(second_output_r_id, {feature_extractor_id}, second_output)); - cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); network->set_input_data(input_rois_id, roi_input); network->set_input_data(input_level_1_id, level_1); @@ -150,7 +150,7 @@ TEST(experimental_detectron_roi_feature_extractor_gpu_fp32, two_levels) { topology.add(activation(activation_abs_id, feature_extractor_id, activation_func::abs)); topology.add(mutable_data(second_output_r_id, {feature_extractor_id}, second_output)); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data(input_rois_id, roi_input); network.set_input_data(input_level_1_id, level_1); @@ -246,7 +246,7 @@ TEST(experimental_detectron_roi_feature_extractor_gpu_fp32, multiple_feature_ext topology.add(activation(activation_abs_second_instance_id, input_info(feature_extractor_second_instance_id), activation_func::abs)); topology.add(mutable_data(second_output_r_second_instance_id, { input_info(feature_extractor_second_instance_id) }, second_output_second_instance)); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data(input_rois_first_instance_id, roi_input_first_instance); network.set_input_data(input_rois_second_instance_id, roi_input_second_instance); diff --git a/src/plugins/intel_gpu/tests/test_cases/experimental_detectron_topk_rois_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/experimental_detectron_topk_rois_gpu_test.cpp index ed809b82b013de..2dd3ce2b417bcb 100644 --- a/src/plugins/intel_gpu/tests/test_cases/experimental_detectron_topk_rois_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/test_cases/experimental_detectron_topk_rois_gpu_test.cpp @@ -80,7 +80,7 @@ TYPED_TEST(experimental_detectron_topk_rois_gpu_test, check_set_indices_layer) { rois_num)); topology.add(reorder("plane_output", experimental_detectron_topk_rois_id, format::bfyx, this->data_type)); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data(input_rois_id, roi_input); network.set_input_data(input_indices_id, roi_indices); @@ -118,7 +118,7 @@ TYPED_TEST(experimental_detectron_topk_rois_gpu_test, check_set_indices_layer_mo rois_num)); topology.add(reorder("plane_output", input_info(experimental_detectron_topk_rois_id), format::bfyx, this->data_type)); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data(input_rois_id, roi_input); network.set_input_data(input_indices_id, roi_indices); @@ -159,7 +159,7 @@ TEST(experimental_detectron_topk_rois_gpu_test, export_import) { rois_num)); topology.add(reorder("plane_output", input_info(experimental_detectron_topk_rois_id), format::bfyx, test_data_type)); - cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), true); + cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), true); network->set_input_data(input_rois_id, roi_input); network->set_input_data(input_indices_id, roi_indices); diff --git a/src/plugins/intel_gpu/tests/test_cases/extract_image_patches_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/extract_image_patches_gpu_test.cpp index acee53d74a4e22..9a6e022efc0697 100644 --- a/src/plugins/intel_gpu/tests/test_cases/extract_image_patches_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/test_cases/extract_image_patches_gpu_test.cpp @@ -41,7 +41,7 @@ TEST(extract_image_patches_gpu, basic) { topology.add(input_layout("Input0", input->get_layout())); topology.add(extract_image_patches("extract_image_patches", input_info("Input0"), sizes, strides, rates, auto_pad, output_shape)); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("Input0", input); auto outputs = network.execute(); @@ -115,7 +115,7 @@ TEST(extract_image_patches_gpu, basic2) { topology.add(input_layout("Input0", input->get_layout())); topology.add(extract_image_patches("extract_image_patches", input_info("Input0"), sizes, strides, rates, auto_pad, output_shape)); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("Input0", input); auto outputs = network.execute(); @@ -179,7 +179,7 @@ TEST(extract_image_patches_gpu, basic3) { topology.add(input_layout("Input0", input->get_layout())); topology.add(extract_image_patches("extract_image_patches", input_info("Input0"), sizes, strides, rates, auto_pad, output_shape)); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("Input0", input); auto outputs = network.execute(); @@ -274,7 +274,7 @@ TEST(extract_image_patches_gpu, basic3_same_lower) { topology.add(input_layout("Input0", input->get_layout())); topology.add(extract_image_patches("extract_image_patches", input_info("Input0"), sizes, strides, rates, auto_pad, output_shape)); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("Input0", input); auto outputs = network.execute(); @@ -369,7 +369,7 @@ TEST(extract_image_patches_gpu, basic3_enough_space) { topology.add(input_layout("Input0", input->get_layout())); topology.add(extract_image_patches("extract_image_patches", input_info("Input0"), sizes, strides, rates, auto_pad, output_shape)); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("Input0", input); auto outputs = network.execute(); @@ -443,7 +443,7 @@ TEST(extract_image_patches_gpu, basic4) { topology.add(input_layout("Input0", input->get_layout())); topology.add(extract_image_patches("extract_image_patches", input_info("Input0"), sizes, strides, rates, auto_pad, output_shape)); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("Input0", input); auto outputs = network.execute(); @@ -518,7 +518,7 @@ void test_extract_image_patches_gpu_basic5(bool is_caching_test) { topology.add(input_layout("Input0", input->get_layout())); topology.add(extract_image_patches("extract_image_patches", input_info("Input0"), sizes, strides, rates, auto_pad, output_shape)); - cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); network->set_input_data("Input0", input); auto outputs = network->execute(); diff --git a/src/plugins/intel_gpu/tests/test_cases/eye.cpp b/src/plugins/intel_gpu/tests/test_cases/eye.cpp index ad316f4fb28d31..22ee147e12c747 100644 --- a/src/plugins/intel_gpu/tests/test_cases/eye.cpp +++ b/src/plugins/intel_gpu/tests/test_cases/eye.cpp @@ -85,7 +85,7 @@ class EyeTest : public ::testing::TestWithParam::value)); } - cldnn::network::ptr network = get_network(engine_, tp, ExecutionConfig(), get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr network = get_network(engine_, tp, get_test_default_config(engine_), get_test_stream_ptr(), is_caching_test); auto outputs = network->execute(); diff --git a/src/plugins/intel_gpu/tests/test_cases/fully_connected_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/fully_connected_gpu_test.cpp index 588b9a75fc8eff..e3722341a997c3 100644 --- a/src/plugins/intel_gpu/tests/test_cases/fully_connected_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/test_cases/fully_connected_gpu_test.cpp @@ -88,7 +88,7 @@ void generic_fully_connected_test(cldnn::format test_input_fmt, cldnn::format te topology.add(activation("out", input_info(out_id), activation_func::relu, { slope, 0.0f })); out_id = "out"; } - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); auto outputs = network.execute(); @@ -209,7 +209,7 @@ TEST(fully_connected_gpu, no_biases) { topology.add(w_data); topology.add(fc); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input_prim); auto outputs = network.execute(); @@ -269,7 +269,7 @@ TEST(fully_connected_gpu, no_biases_int8) { topology.add(fc); topology.add(ri); topology.add(rf); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input_prim); auto outputs = network.execute(); @@ -328,7 +328,7 @@ TEST(fully_connected_gpu, xb_f32_batch_1) { fully_connected("fc_prim", input_info("input"), "weights", "bias") ); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input_prim); auto outputs = network.execute(); @@ -388,7 +388,7 @@ TEST(fully_connected_gpu, xb_f32_batch_2) { fully_connected("fc_prim", input_info("input"), "weights", "bias") ); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input_prim); auto outputs = network.execute(); @@ -450,7 +450,7 @@ TEST(fully_connected_gpu, x_f32) { fully_connected("fc_prim", input_info("input"), "weights", "bias") ); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input_prim); auto outputs = network.execute(); @@ -511,7 +511,7 @@ TEST(fully_connected_gpu, xb_f32_batch_1_relu) { activation("out", input_info("fc_prim"), activation_func::relu) ); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input_prim); auto outputs = network.execute(); @@ -573,7 +573,7 @@ TEST(fully_connected_gpu, xb_f32_batch_2_relu) { activation("out", input_info("fc_prim"), activation_func::relu) ); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input_prim); auto outputs = network.execute(); @@ -636,7 +636,7 @@ TEST(fully_connected_gpu, x_f32_relu) { activation("out", input_info("fc_prim"), activation_func::relu) ); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input_prim); auto outputs = network.execute(); @@ -696,7 +696,7 @@ TEST(fully_connected_gpu, x_f32_relu_with_negative_slope) { activation("out", input_info("fc_prim"), activation_func::relu_negative_slope, { 0.1f }) ); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input_prim); auto outputs = network.execute(); @@ -799,7 +799,7 @@ TEST(fully_connected_gpu, b_fs_yx_fsv4) topology.add(reorder_gold, reorder_imad); // Network build - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::optimize_data(true)); network network(engine, topology, config); @@ -868,7 +868,7 @@ TEST(fully_connected_gpu, DISABLED_fs_byx_fsv32_b12) { ); // Set data optimization to allow weights reordering to optimal format - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::optimize_data(true)); network network(engine, topology, config); @@ -944,7 +944,7 @@ TEST(fully_connected_gpu, DISABLED_fs_byx_fsv32_b34) ); // Set data optimization to allow weights reordering to optimal format - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::optimize_data(true)); network network(engine, topology, config); @@ -1006,7 +1006,7 @@ struct fully_connected_random_test : ::testing::TestWithParam("bias", format::bfyx, std::move(bias_data)); auto fc = net.add_fully_connected("fc_prim", input, weights, bias, ov::intel_gpu::ImplementationDesc{ output_format, kernel }); - net.run(ExecutionConfig(ov::intel_gpu::optimize_data(true)), is_caching_test); + net.run(get_test_default_config(eng, ov::intel_gpu::optimize_data(true)), is_caching_test); } }; @@ -1129,7 +1129,9 @@ struct fully_connected_random_test_3d : ::testing::TestWithParam("bias", format::bfyx, std::move(bias_data)); auto fc = net.add_fully_connected_3d("fc_prim", input, weights, bias, ov::intel_gpu::ImplementationDesc{ output_format, kernel }, 3); - net.run(ExecutionConfig(ov::intel_gpu::optimize_data(true)), is_caching_test); + ExecutionConfig config = get_test_default_config(eng); + config.set_property(ov::intel_gpu::optimize_data(true)); + net.run(config, is_caching_test); } }; @@ -1393,7 +1395,7 @@ class fully_connected_quantized_test : public ::testing::Test { topo.add(reorder("output", input_info("quantization_prim"), format::bfyx, output_data_type())); - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::optimize_data(true)); network net(engine, topo, config); @@ -1686,9 +1688,8 @@ TEST(fully_connected_onednn_gpu, no_biases_int8) { ov::intel_gpu::ImplementationDesc fc_impl = { format::bfyx, "", impl_types::onednn }; - ExecutionConfig cfg{ov::intel_gpu::queue_type(QueueTypes::in_order), - ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"fc_prim", fc_impl} }) - }; + ExecutionConfig cfg = get_test_default_config(engine); + cfg.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"fc_prim", fc_impl} })); network network(engine, topology, cfg); network.set_input_data("input", input_prim); @@ -1738,7 +1739,8 @@ TEST(fully_connected_3d_onednn_gpu, no_biases_int8) { topology.add(rf); ov::intel_gpu::ImplementationDesc fc_impl = { format::bfyx, "", impl_types::onednn }; - ExecutionConfig cfg{ov::intel_gpu::queue_type(QueueTypes::in_order), ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "fc_prim", fc_impl } })}; + ExecutionConfig cfg = get_test_default_config(engine); + cfg.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"fc_prim", fc_impl} })); network network(engine, topology, cfg); network.set_input_data("input", input_prim); @@ -1778,7 +1780,7 @@ TEST(fully_connected_gpu, dynamic) { fully_connected("fc", input_info("input"), "weights") }; - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::optimize_data(true)); config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); network network(engine, topology, config); @@ -1828,7 +1830,7 @@ TEST(fully_connected_gpu, dynamic_multi_inference_same_shape) { fully_connected("fc", input_info("input"), "weights") }; - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::optimize_data(true)); config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); network network(engine, topology, config); @@ -1908,7 +1910,7 @@ TEST(fully_connected_gpu, dynamic_multi_inference_different_shape) { fully_connected("fc", input_info("input"), "weights") }; - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::optimize_data(true)); config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); network network(engine, topology, config); @@ -1998,7 +2000,7 @@ TEST(fully_connected_gpu, dynamic_multi_inference_multiple_shapes) { fully_connected("fc", input_info("input"), "weights") }; - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::optimize_data(true)); config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); network network(engine, topology, config); @@ -2133,7 +2135,7 @@ struct dynamic_fully_connected_gpu : ::testing::TestWithParamget_layout().format, output_tensor, axis) ); - cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); network->set_input_data("InputData", input0); network->set_input_data("InputIndices", input1); @@ -1294,7 +1294,7 @@ TEST(gather_elements_gpu, dynamic) { topology.add(input_layout("InputIndices", in1_dyn_layout)); topology.add(gather_elements("gather_elements", input_info("InputData"), input_info("InputIndices"), axis)); - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); network network(engine, topology, config); diff --git a/src/plugins/intel_gpu/tests/test_cases/gather_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/gather_gpu_test.cpp index 9f421837ca46ae..421802e60aa9de 100644 --- a/src/plugins/intel_gpu/tests/test_cases/gather_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/test_cases/gather_gpu_test.cpp @@ -90,7 +90,7 @@ class gather8_test : public ::testing::TestWithParam { batch_dim, true)); reorder_topo.add(reorder("reorder2", input_info("gather"), format::type::bfwzyx, T_dat_dt)); - network reorder_network(engine, reorder_topo); + network reorder_network(engine, reorder_topo, get_test_default_config(engine)); reorder_network.set_input_data("input0", input0); reorder_network.set_input_data("input1", input1); auto reorder_output = reorder_network.execute().at("reorder2").get_memory(); @@ -101,7 +101,7 @@ class gather8_test : public ::testing::TestWithParam { planar_topo.add(input_layout("input1", input1->get_layout())); planar_topo.add( gather("gather", input_info("input0"), input_info("input1"), axis, ov::Shape(shape_out.begin(), shape_out.end()), batch_dim, true)); - network planar_network(engine, planar_topo); + network planar_network(engine, planar_topo, get_test_default_config(engine)); planar_network.set_input_data("input0", input0); planar_network.set_input_data("input1", input1); auto planar_output = planar_network.execute().at("gather").get_memory(); @@ -358,7 +358,7 @@ TEST(gather8_gpu_fp16, d323_axisY_bdim_m1) { gather("gather", input_info("InputDictionary"), input_info("InputText"), axis, ov::Shape{3, 2, 3, 3, 2}, batch_dim, negative_indexes) ); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("InputDictionary", input1); network.set_input_data("InputText", input2); @@ -465,7 +465,7 @@ TEST(gather7_gpu_fp16, d222_axisX_bdim_m1) { gather("gather", input_info("InputDictionary"), input_info("InputText"), axis, ov::Shape{2, 2, 2, 2, 2, 2}, batch_dim) ); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("InputDictionary", input1); network.set_input_data("InputText", input2); @@ -576,7 +576,7 @@ TEST(gather7_gpu_fp16, d323_axisY_bdim_m1) { gather("gather", input_info("InputDictionary"), input_info("InputText"), axis, ov::Shape{3, 2, 3, 3, 2}, batch_dim) ); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("InputDictionary", input1); network.set_input_data("InputText", input2); @@ -680,7 +680,7 @@ TEST(gather7_gpu_fp16, d44_axisY_bdim1) { gather("gather", input_info("InputDictionary"), input_info("InputText"), axis, ov::Shape{4, 3, 4, 1}, batch_dim) ); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("InputDictionary", input1); network.set_input_data("InputText", input2); @@ -755,7 +755,7 @@ TEST(gather7_gpu_fp16, d32_axisF_bdim_m1) { gather("gather", input_info("InputDictionary"), input_info("InputText"), axis, ov::Shape{3, 2, 1, 1}, batch_dim) ); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("InputDictionary", input1); network.set_input_data("InputText", input2); @@ -818,7 +818,7 @@ TEST(gather7_gpu_fp16, d32_axisF_bdim1) { gather("gather", input_info("InputDictionary"), input_info("InputText"), axis, ov::Shape{3, 2, 1, 1}, batch_dim) ); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("InputDictionary", input1); network.set_input_data("InputText", input2); @@ -880,7 +880,7 @@ TEST(gather7_gpu_fp16, d32_axisF_bdim0) { gather("gather", input_info("InputDictionary"), input_info("InputText"), axis, ov::Shape{3, 3, 2, 1}, batch_dim) ); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("InputDictionary", input1); network.set_input_data("InputText", input2); @@ -948,7 +948,7 @@ TEST(gather_gpu_fp16, d14_axisB) { gather("gather", input_info("InputDictionary"), input_info("InputText"), axis, ov::Shape{1, 4, 2, 1}) ); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("InputDictionary", input1); network.set_input_data("InputText", input2); @@ -1010,7 +1010,7 @@ TEST(gather_gpu_fp16, d222_axisB) { gather("gather", input_info("InputDictionary"), input_info("InputText"), axis, ov::Shape{2, 2, 2, 2}) ); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("InputDictionary", input1); network.set_input_data("InputText", input2); @@ -1071,7 +1071,7 @@ TEST(gather_gpu_fp16, d22_axisY) { gather("gather", input_info("InputDictionary"), input_info("InputText"), axis, ov::Shape{2, 2, 2, 2}) ); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("InputDictionary", input1); network.set_input_data("InputText", input2); @@ -1132,7 +1132,7 @@ TEST(gather_gpu_fp16, d22_axisF) { gather("gather", input_info("InputDictionary"), input_info("InputText"), axis, ov::Shape{2, 2, 2, 2}) ); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("InputDictionary", input1); network.set_input_data("InputText", input2); @@ -1190,7 +1190,7 @@ TEST(gather_gpu_fp32, d14_axisB) { gather("gather", input_info("InputDictionary"), input_info("InputText"), axis, ov::Shape{1, 4, 2, 1}) ); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("InputDictionary", input1); network.set_input_data("InputText", input2); @@ -1251,7 +1251,7 @@ TEST(gather_gpu_fp32, d222_axisB) { gather("gather", input_info("InputDictionary"), input_info("InputText"), axis, ov::Shape{2, 2, 2, 2}) ); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("InputDictionary", input1); network.set_input_data("InputText", input2); @@ -1312,7 +1312,7 @@ TEST(gather_gpu_fp32, d22_axisY) { gather("gather", input_info("InputDictionary"), input_info("InputText"), axis, ov::Shape{2, 2, 2, 2}) ); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("InputDictionary", input1); network.set_input_data("InputText", input2); @@ -1373,7 +1373,7 @@ TEST(gather_gpu_fp32, d22_axisF) { gather("gather", input_info("InputDictionary"), input_info("InputText"), axis, ov::Shape{2, 2, 2, 2}) ); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("InputDictionary", input1); network.set_input_data("InputText", input2); @@ -1434,7 +1434,7 @@ TEST(gather_gpu_int32, d22_axisF) { gather("gather", input_info("InputDictionary"), input_info("InputText"), axis, ov::Shape{2, 2, 2, 2}) ); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("InputDictionary", input1); network.set_input_data("InputText", input2); @@ -1492,7 +1492,7 @@ TEST(gather_gpu_int32, d14_axisB) { gather("gather", input_info("InputDictionary"), input_info("InputText"), axis, ov::Shape{1, 4, 2, 1}) ); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("InputDictionary", input1); network.set_input_data("InputText", input2); @@ -1553,7 +1553,7 @@ TEST(gather_gpu_int32, d222_axisB) { gather("gather", input_info("InputDictionary"), input_info("InputText"), axis, ov::Shape{2, 2, 2, 2}) ); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("InputDictionary", input1); network.set_input_data("InputText", input2); @@ -1614,7 +1614,7 @@ TEST(gather_gpu_int32, d22_axisY) { gather("gather", input_info("InputDictionary"), input_info("InputText"), axis, ov::Shape{2, 2, 2, 2}) ); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("InputDictionary", input1); network.set_input_data("InputText", input2); @@ -1678,7 +1678,7 @@ TEST(gather_gpu_fp32, d41_axisB) { gather("gather", input_info("InputDictionary"), input_info("InputText"), axis, ov::Shape{4, 1, 2, 3}) ); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("InputDictionary", input1); network.set_input_data("InputText", input2); @@ -1741,7 +1741,7 @@ TEST(gather_gpu_fp32, d41_axisF) { gather("gather", input_info("InputDictionary"), input_info("InputText"), axis, ov::Shape{2, 4, 1, 2}) ); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("InputDictionary", input1); network.set_input_data("InputText", input2); @@ -1800,7 +1800,7 @@ TEST(gather_gpu_fp32, d2_axisX) { gather("gather", input_info("InputDictionary"), input_info("InputText"), axis, ov::Shape{2, 2, 1, 2}) ); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("InputDictionary", input1); network.set_input_data("InputText", input2); @@ -1850,7 +1850,7 @@ TEST(gather_gpu_fp32, 322_axisF) { gather("gather", input_info("InputDictionary"), input_info("InputText"), axis, ov::Shape{3, 2, 2, 1}) ); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("InputDictionary", input1); network.set_input_data("InputText", input2); @@ -1889,7 +1889,7 @@ TEST(gather_gpu_fp32, dynamic_322_axisF) { topology.add(input_layout("input2", in2_layout)); topology.add(gather("gather", input_info("input1"), input_info("input2"), axis, ov::Shape{})); - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); network network(engine, topology, config); network.set_input_data("input1", input1); @@ -1938,7 +1938,7 @@ void test_gather_gpu_u8_322_axisF(bool is_caching_test) { topology.add( gather("gather", input_info("InputDictionary"), input_info("InputText"), axis, ov::Shape{3, 2, 2, 1})); - cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); network->set_input_data("InputDictionary", input1); network->set_input_data("InputText", input2); diff --git a/src/plugins/intel_gpu/tests/test_cases/gather_nd_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/gather_nd_gpu_test.cpp index 564ff44725b8b6..0dc3f499d2aca5 100644 --- a/src/plugins/intel_gpu/tests/test_cases/gather_nd_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/test_cases/gather_nd_gpu_test.cpp @@ -39,7 +39,7 @@ inline void DoTestBase(engine& engine, topology.add(input_layout("InputIndices", input1->get_layout())); topology.add(gather_nd_inst); - cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); network->set_input_data("InputData", input0); network->set_input_data("InputIndices", input1); diff --git a/src/plugins/intel_gpu/tests/test_cases/gather_tree_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/gather_tree_gpu_test.cpp index 7cbe4e9968dce8..8c7e9fa22e6280 100644 --- a/src/plugins/intel_gpu/tests/test_cases/gather_tree_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/test_cases/gather_tree_gpu_test.cpp @@ -213,7 +213,7 @@ struct gather_tree_test const primitive_id reorder_result_id = result_id + "_reordered"; topology.add(reorder(reorder_result_id, input_info(result_id), plain_layout, data_type)); - cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); network->set_input_data(step_id, step_input); network->set_input_data(parent_id, parent_input); diff --git a/src/plugins/intel_gpu/tests/test_cases/gemm_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/gemm_gpu_test.cpp index ccfe63bda67aea..d2d31bd2b5dfd1 100644 --- a/src/plugins/intel_gpu/tests/test_cases/gemm_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/test_cases/gemm_gpu_test.cpp @@ -129,7 +129,7 @@ class GemmGPUTest : public ::testing::TestWithParam { std::cout << "cached" << std::endl; membuf mem_buf; { - cldnn::network _network(engine, tp); + cldnn::network _network(engine, tp, get_test_default_config(engine)); process_program(_network.get_program()); std::ostream out_mem(&mem_buf); BinaryOutputBuffer ob = BinaryOutputBuffer(out_mem); @@ -141,7 +141,7 @@ class GemmGPUTest : public ::testing::TestWithParam { network = std::make_shared(ib, get_test_stream_ptr(), engine); } } else { - network = std::make_shared(engine, tp); + network = std::make_shared(engine, tp, get_test_default_config(engine)); process_program(network->get_program()); } @@ -292,7 +292,7 @@ void test_basic_bfyx_t2_inplace_crop_with_pad(bool is_caching_test) { gemm("output", { input_info("crop.1"), input_info("input2") }, data_types::f32, false, true) ); - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::optimize_data(true)); cldnn::network::ptr network = get_network(engine, topology, config, get_test_stream_ptr(), is_caching_test); network->set_input_data("input", input); @@ -343,7 +343,7 @@ TEST(gemm_gpu, dynamic) { gemm("gemm", { input_info("input1"), input_info("input2") }, data_types::f32, false, true, 1.0f, 0.0f, 4, 2) ); - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::optimize_data(true)); config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); network network(engine, topology, config); @@ -412,7 +412,7 @@ TEST(gemm_gpu, dynamic_multi_inference_same_shape) { gemm("gemm", { input_info("input1"), input_info("input2") }, data_types::f32, false, false, 1.0f, 0.0f, 4, 2) ); - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::optimize_data(true)); config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); network network(engine, topology, config); @@ -501,7 +501,7 @@ TEST(gemm_gpu, dynamic_multi_inference_different_shape) { gemm("gemm", { input_info("input1"), input_info("input2") }, data_types::f32, false, false, 1.0f, 0.0f, 4, 2) ); - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::optimize_data(true)); config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); network network(engine, topology, config); @@ -1311,11 +1311,11 @@ class GemmBaseTest : public ::testing::TestWithParam { #ifdef ENABLE_ONEDNN_FOR_GPU ov::intel_gpu::ImplementationDesc gemm_impl = { format::bfyx, "", impl_types::onednn }; - ExecutionConfig cfg(ov::intel_gpu::queue_type(QueueTypes::in_order)); #else ov::intel_gpu::ImplementationDesc gemm_impl = { format::bfyx, p.kernel_name }; - ExecutionConfig cfg(ov::intel_gpu::queue_type(QueueTypes::out_of_order)); #endif + + ExecutionConfig cfg = get_test_default_config(engine); cfg.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"gemm_bfyx", gemm_impl} })); cldnn::network::ptr network = get_network(engine, topology, cfg, get_test_stream_ptr(), is_caching_test); diff --git a/src/plugins/intel_gpu/tests/test_cases/generate_proposals_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/generate_proposals_gpu_test.cpp index 1ad55e645d5a50..5435c6b96ddad4 100644 --- a/src/plugins/intel_gpu/tests/test_cases/generate_proposals_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/test_cases/generate_proposals_gpu_test.cpp @@ -2,6 +2,7 @@ // SPDX-License-Identifier: Apache-2.0 // +#include "intel_gpu/runtime/execution_config.hpp" #include "test_utils.h" #include @@ -289,7 +290,7 @@ struct generate_proposals_test const auto rois_num_type = type_to_data_type::value; auto& engine = get_test_engine(); - + std::shared_ptr stream = get_test_stream_ptr();; const primitive_id input_im_info_id = "InputImInfo"; const auto input_im_info = engine.allocate_memory({data_type, format::bfyx, tensor{batch(num_batches), feature(3)}}); set_values(input_im_info, getValues(im_info)); @@ -355,7 +356,7 @@ struct generate_proposals_test const primitive_id reorder_result_id = generate_proposals_id + "Reordered"; topology.add(reorder(reorder_result_id, input_info(generate_proposals_id), format::bfyx, data_type)); - cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), stream, is_caching_test); network->set_input_data(input_im_info_id, input_im_info); network->set_input_data(input_anchors_id, input_anchors); @@ -366,7 +367,7 @@ struct generate_proposals_test const auto rois = outputs.at(reorder_result_id).get_memory(); - const cldnn::mem_lock rois_ptr(rois, get_test_stream()); + const cldnn::mem_lock rois_ptr(rois, *stream); ASSERT_EQ(rois_ptr.size(), num_batches * param.post_nms_count * 4); const auto get_plane_data = [&](const memory::ptr& mem, const data_types data_type, const layout& from_layout) { @@ -376,7 +377,7 @@ struct generate_proposals_test cldnn::topology reorder_topology; reorder_topology.add(input_layout("data", from_layout)); reorder_topology.add(reorder("plane_data", input_info("data"), format::bfyx, data_type)); - cldnn::network reorder_net{engine, reorder_topology}; + cldnn::network reorder_net{engine, reorder_topology, get_test_default_config(engine)}; reorder_net.set_input_data("data", mem); const auto second_output_result = reorder_net.execute(); const auto plane_data_mem = second_output_result.at("plane_data").get_memory(); @@ -384,11 +385,11 @@ struct generate_proposals_test }; const cldnn::mem_lock roi_scores_ptr( - get_plane_data(output_roi_scores, data_type, rois_scores_layout), get_test_stream()); + get_plane_data(output_roi_scores, data_type, rois_scores_layout), *stream); ASSERT_EQ(roi_scores_ptr.size(), num_batches * param.post_nms_count); const cldnn::mem_lock rois_num_ptr( - get_plane_data(output_rois_num, rois_num_type, rois_num_layout), get_test_stream()); + get_plane_data(output_rois_num, rois_num_type, rois_num_layout), *stream); ASSERT_EQ(rois_num_ptr.size(), num_batches); const auto& expected_rois = param.expected_rois; diff --git a/src/plugins/intel_gpu/tests/test_cases/grid_sample_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/grid_sample_gpu_test.cpp index 1473961df8f593..122299d498dfd0 100644 --- a/src/plugins/intel_gpu/tests/test_cases/grid_sample_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/test_cases/grid_sample_gpu_test.cpp @@ -75,7 +75,7 @@ struct grid_sample_gpu_test : public testing::TestWithParamset_input_data("data", data); network->set_input_data("grid", grid); const auto outputs = network->execute(); diff --git a/src/plugins/intel_gpu/tests/test_cases/hash_key_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/hash_key_gpu_test.cpp index 154ca0f08eb019..04f5d34b531638 100644 --- a/src/plugins/intel_gpu/tests/test_cases/hash_key_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/test_cases/hash_key_gpu_test.cpp @@ -35,7 +35,7 @@ TEST(check_hash_value, eltwise_basic) { topology.add(input_layout("input2", input2->get_layout())); topology.add(eltwise(key_prim_id, { input_info("input"), input_info("input2") }, eltwise_mode::sum)); - auto prog = program::build_program(engine, topology, ExecutionConfig{}); + auto prog = program::build_program(engine, topology, get_test_default_config(engine)); network net(prog, 0); const auto prim_inst = net.get_primitive(key_prim_id); const auto primitve = prim_inst->desc(); @@ -65,7 +65,7 @@ TEST(check_hash_value, fc_basic) { fully_connected(key_prim_id, input_info("input"), "weights", "bias") ); - auto prog = program::build_program(engine, topology, ExecutionConfig{}); + auto prog = program::build_program(engine, topology, get_test_default_config(engine)); network net(prog, 0); const auto prim_inst = net.get_primitive(key_prim_id); const auto primitve = prim_inst->desc(); @@ -96,7 +96,7 @@ TEST(check_hash_value, gather_basic) { gather(key_prim_id, input_info("InputDictionary"), input_info("InputText"), axis, ov::Shape{3, 2, 3, 3, 2}, batch_dim, negative_indexes) ); - auto prog = program::build_program(engine, topology, ExecutionConfig{}); + auto prog = program::build_program(engine, topology, get_test_default_config(engine)); network net(prog, 0); const auto prim_inst = net.get_primitive(key_prim_id); const auto primitve = prim_inst->desc(); @@ -122,7 +122,7 @@ TEST(check_hash_value, gemm_basic) { topology.add(crop("crop.1", input_info("input"), { 1, 1, 4, 3 }, { 0, 1, 0, 0 })); topology.add(gemm(key_prim_id, { input_info("crop.1"), input_info("input2") }, data_types::f32, false, true)); - auto prog = program::build_program(engine, topology, ExecutionConfig{}); + auto prog = program::build_program(engine, topology, get_test_default_config(engine)); network net(prog, 0); const auto prim_inst = net.get_primitive(key_prim_id); const auto primitve = prim_inst->desc(); @@ -145,7 +145,7 @@ TEST(check_hash_value, permute_basic) { input_layout("input", input->get_layout()), permute(key_prim_id, input_info("input"), { 0, 1, 2, 3 })); - auto prog = program::build_program(engine, topology, ExecutionConfig{}); + auto prog = program::build_program(engine, topology, get_test_default_config(engine)); network net(prog, 0); const auto prim_inst = net.get_primitive(key_prim_id); const auto primitve = prim_inst->desc(); @@ -174,7 +174,7 @@ TEST(check_hash_value, reorder_basic) { input_layout("input", input->get_layout()), reorder(key_prim_id, input_info("input"), output_layout)); - auto prog = program::build_program(engine, topology, ExecutionConfig{}); + auto prog = program::build_program(engine, topology, get_test_default_config(engine)); network net(prog, 0); const auto prim_inst = net.get_primitive(key_prim_id); const auto primitve = prim_inst->desc(); @@ -200,7 +200,7 @@ TEST(check_hash_value, reshape_basic) { topology.add(reorder("reorder", input_info("input"), padded_input_layout)); topology.add(reshape(key_prim_id, input_info("reorder"), tensor( 1, 1, 4, 1 ), cldnn::reshape::reshape_mode::base, padding({0, 0, 2, 2}))); - auto prog = program::build_program(engine, topology, ExecutionConfig{}); + auto prog = program::build_program(engine, topology, get_test_default_config(engine)); network net(prog, 0); const auto prim_inst = net.get_primitive(key_prim_id); const auto primitve = prim_inst->desc(); @@ -227,7 +227,7 @@ TEST(check_hash_value, conv_basic) { data("biases", biases), convolution(key_prim_id, input_info("input"), { "weights" }, { "biases" }, {1, 1, 1}, {0, 0, 0}, {1, 1, 1})); - auto prog = program::build_program(engine, topology, ExecutionConfig{}); + auto prog = program::build_program(engine, topology, get_test_default_config(engine)); network net(prog, 0); const auto prim_inst = net.get_primitive(key_prim_id); const auto primitve = prim_inst->desc(); @@ -260,7 +260,7 @@ TEST(check_hash_value, quantize_basic) { quantize(key_prim_id, input_info("input"), input_info("input_low"), input_info("input_high"), input_info("output_low"), input_info("output_high"), 256, data_types::u8) ); - auto prog = program::build_program(engine, topology, ExecutionConfig{}); + auto prog = program::build_program(engine, topology, get_test_default_config(engine)); network net(prog, 0); const auto prim_inst = net.get_primitive(key_prim_id); const auto primitve = prim_inst->desc(); diff --git a/src/plugins/intel_gpu/tests/test_cases/loop_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/loop_gpu_test.cpp index a86c94b7119b61..94fb17104275e0 100644 --- a/src/plugins/intel_gpu/tests/test_cases/loop_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/test_cases/loop_gpu_test.cpp @@ -73,7 +73,7 @@ void test_loop_gpu_basic_no_concat(bool is_caching_test) input_primitive_maps, output_primitive_maps, back_edges, 8) ); - cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); network->set_input_data("input", input_mem); network->set_input_data("trip_count", trip_count_mem); @@ -174,7 +174,7 @@ void test_loop_gpu_basic_concat(bool is_caching_test) input_primitive_maps, output_primitive_maps, back_edges, trip_count) ); - cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); network->set_input_data("input", input_mem); network->set_input_data("trip_count", trip_count_mem); network->set_input_data("initial_condition", initial_condition_mem); @@ -314,7 +314,7 @@ void test_loop_gpu_basic_concat_nested(bool is_caching_test) ///////////////////////////////// // network execution ///////////////////////////////// - cldnn::network::ptr network = get_network(engine, main_topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr network = get_network(engine, main_topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); network->set_input_data("input", input_mem); network->set_input_data("trip_count", trip_count_mem); network->set_input_data("initial_condition", initial_condition_mem); diff --git a/src/plugins/intel_gpu/tests/test_cases/lrn_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/lrn_gpu_test.cpp index e35384a068fb03..fcbe4eaa7d3e76 100644 --- a/src/plugins/intel_gpu/tests/test_cases/lrn_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/test_cases/lrn_gpu_test.cpp @@ -37,7 +37,7 @@ void test_fp32_basic(bool is_caching_test) { float beta = 1.f; topology.add(lrn("lrn", input_info("input"), size, k, alpha, beta, cldnn::lrn_norm_region_across_channel)); - cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); network->set_input_data("input", input); @@ -89,7 +89,7 @@ void test_fp32_basic2(bool is_caching_test) { float beta = 1.f; topology.add(lrn("lrn", input_info("input"), size, k, alpha, beta, cldnn::lrn_norm_region_across_channel)); - cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); network->set_input_data("input", input); @@ -141,7 +141,7 @@ void test_fp16_basic1(bool is_caching_test) { float beta = 1.f; topology.add(lrn("lrn", input_info("input"), size, k, alpha, beta, cldnn::lrn_norm_region_across_channel)); - cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); network->set_input_data("input", input); @@ -193,7 +193,7 @@ void test_fp32_basic3(bool is_caching_test) { float beta = 0.75f; topology.add(lrn("lrn", input_info("input"), size, k, alpha, beta, cldnn::lrn_norm_region_across_channel)); - cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); network->set_input_data("input", input); diff --git a/src/plugins/intel_gpu/tests/test_cases/lstm_dynamic_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/lstm_dynamic_gpu_test.cpp index b9535c6a1a251e..5b7495ee5f1c51 100644 --- a/src/plugins/intel_gpu/tests/test_cases/lstm_dynamic_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/test_cases/lstm_dynamic_gpu_test.cpp @@ -245,7 +245,7 @@ struct lstm_dynamic_input_layer_test : public ::testing::Test "weights", bias_id)); - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::optimize_data(true)); network network(engine, topology, config); @@ -407,7 +407,7 @@ struct lstm_dynamic_single_layer_test : public ::testing::Test initial_hidden_id, initial_cell_id)); - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::optimize_data(true)); network network(engine, topology, config); network.set_input_data("input", input_mem); @@ -888,7 +888,7 @@ TEST(lstm_dynamic_negative, wrong_weights_size) { "dyn_len", "weights", "recurrent")); - ASSERT_ANY_THROW(network network(engine, topology)); + ASSERT_ANY_THROW(network network(engine, topology, get_test_default_config(engine))); } TEST(lstm_dynamic_negative, wrong_recurrent_size_0) { @@ -913,7 +913,7 @@ TEST(lstm_dynamic_negative, wrong_recurrent_size_0) { "dyn_len", "weights", "recurrent")); - ASSERT_ANY_THROW(network network(engine, topology)); + ASSERT_ANY_THROW(network network(engine, topology, get_test_default_config(engine))); } TEST(lstm_dynamic_negative, wrong_recurrent_size_1) { @@ -938,7 +938,7 @@ TEST(lstm_dynamic_negative, wrong_recurrent_size_1) { "dyn_len", "weights", "recurrent")); - ASSERT_ANY_THROW(network network(engine, topology)); + ASSERT_ANY_THROW(network network(engine, topology, get_test_default_config(engine))); } TEST(lstm_dynamic_negative, wrong_dynamic_length_size_0) { @@ -963,7 +963,7 @@ TEST(lstm_dynamic_negative, wrong_dynamic_length_size_0) { "dyn_len", "weights", "recurrent")); - ASSERT_ANY_THROW(network network(engine, topology)); + ASSERT_ANY_THROW(network network(engine, topology, get_test_default_config(engine))); } TEST(lstm_dynamic_negative, wrong_dynamic_length_size_1) { @@ -988,5 +988,5 @@ TEST(lstm_dynamic_negative, wrong_dynamic_length_size_1) { "dyn_len", "weights", "recurrent")); - ASSERT_ANY_THROW(network network(engine, topology)); + ASSERT_ANY_THROW(network network(engine, topology, get_test_default_config(engine))); } diff --git a/src/plugins/intel_gpu/tests/test_cases/lstm_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/lstm_gpu_test.cpp index 50c9555a59413d..2d62d73290d746 100644 --- a/src/plugins/intel_gpu/tests/test_cases/lstm_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/test_cases/lstm_gpu_test.cpp @@ -244,7 +244,7 @@ void generic_lstm_gemm_gpu_test(int sequence_len, int direction, int batch_size, topology.add(lstm_gemm("lstm_gemm", input_info("input"), "weights", "recurrent", hasBias ? "biases" : "", hasHidden ? "hidden" : "")); - cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); network->set_input_data("input", input); if (hasHidden) { network->set_input_data("hidden", hidden); @@ -307,7 +307,7 @@ void generic_lstm_elt_gpu_test(int /* sequence_len */, int direction, int batch_ } topology.add(lstm_elt("lstm_elt", input_info("tempGEMM"), hasCell ? "cell" : "", clip_threshold, input_forget)); - cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); network->set_input_data("tempGEMM", tempGEMM); if (hasCell) { network->set_input_data("cell", cell); @@ -430,7 +430,7 @@ void generic_lstm_custom_gpu_test(int sequence_len, int direction, int batch_siz generate_lstm_topology(topology, input, hidden, cell, weights, recurrent, biases, sequence_len, hasBias, hasInitialHidden, hasInitialCell); - cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); network->set_input_data("input", input); if (hasInitialHidden) network->set_input_data("hidden", hidden); if (hasInitialCell) network->set_input_data("cell", cell); @@ -596,7 +596,7 @@ void generic_lstm_gpu_test(int layers, int sequence_len, int direction, int batc prev_lstm_id = lstm_id; } - cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); network->set_input_data("input", input); for (int i = 0; i < layers; ++i) { std::string sid = get_string_id(i); @@ -722,7 +722,7 @@ void lstm_gpu_output_test(const lstm_output_selection& output_selection, int dir topology.add(crop("crop:last_cell", input_info("lstm"), cell_tensor, tensor{0, concatenation_len - 1, 0, 0})); } - cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); network->set_input_data("input", input); network->set_input_data("hidden", hidden); network->set_input_data("cell", cell); @@ -886,7 +886,7 @@ void lstm_gpu_format_test(const cldnn::format& format, int directions, bool is_c topology.add(crop("crop:last_cell", input_info("lstm"), cell_tensor, tensor{0, concatenation_len - 1, 0, 0})); } - cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); std::map outputs; @@ -1053,7 +1053,7 @@ void lstm_gpu_users_test(bool is_caching_test = false) { std::vector output_ids_offsets { input_info("lstm"), input_info("hidden") }; topology.add(concatenation("concatenation", output_ids_offsets, 1)); - cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); std::map outputs; @@ -1212,7 +1212,7 @@ void lstm_gpu_concatenated_input_test(int layers, int sequence_len, int directio prev_node_id = output_crop_id; } - cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); network->set_input_data("input", input); for (int i = 0; i < layers; ++i) { std::string sid = get_string_id(i); @@ -1555,7 +1555,7 @@ void lstm_gpu_chain_test(int batch_size, int input_size, int hidden_size, } // Creating network out of the above designed topology - cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); network->set_input_data("input", input); for (size_t layer = 0; layer < layers; layer++) { std::string sid = get_string_id(layer); diff --git a/src/plugins/intel_gpu/tests/test_cases/matrix_nms_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/matrix_nms_gpu_test.cpp index 7fe4a65e0de3d0..0adeae76c21f6d 100644 --- a/src/plugins/intel_gpu/tests/test_cases/matrix_nms_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/test_cases/matrix_nms_gpu_test.cpp @@ -107,7 +107,7 @@ struct matrix_nms_gpu_test : public testing::TestWithParamset_input_data("boxes", boxes); network->set_input_data("scores", scores); diff --git a/src/plugins/intel_gpu/tests/test_cases/memory_test.cpp b/src/plugins/intel_gpu/tests/test_cases/memory_test.cpp index 511caddca2e83a..0d8c2df6c67a0a 100644 --- a/src/plugins/intel_gpu/tests/test_cases/memory_test.cpp +++ b/src/plugins/intel_gpu/tests/test_cases/memory_test.cpp @@ -77,7 +77,7 @@ TEST(memory_pool, basic_non_padded_relu_pipe) { std::vector input_vec = { -1.f, 2.f, -3.f, 4.f }; set_values(input, input_vec); - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(*engine); config.set_property(ov::intel_gpu::optimize_data(true)); network network(*engine, topology, config); @@ -109,7 +109,7 @@ TEST(memory_pool, basic_non_padded_relu_and_pooling_pipe) { topology.add(activation("relu4", input_info("relu3"), activation_func::relu)); topology.add(activation("relu5", input_info("relu4"), activation_func::relu)); - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(*engine); config.set_property(ov::intel_gpu::optimize_data(true)); network network(*engine, topology, config); @@ -144,7 +144,7 @@ TEST(memory_pool, multi_outputs_network) { topology.add(activation("relu6", input_info("relu5"), activation_func::relu)); topology.add(activation("relu7", input_info("relu6"), activation_func::relu)); - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(*engine); config.set_property(ov::intel_gpu::optimize_data(true)); network network(*engine, topology, config); @@ -182,7 +182,7 @@ TEST(memory_pool, oooq) { topology.add(concatenation("concat2", { input_info("relu4"), input_info("relu5") }, 1)); topology.add(activation("relu6", input_info("concat2"), activation_func::relu)); - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(*engine); config.set_property(ov::intel_gpu::optimize_data(true)); network network(*engine, topology, config); @@ -227,7 +227,7 @@ TEST(memory_pool, DISABLED_shared_mem_pool_same_topology_twice) { topology.add(concatenation("concat2", { input_info("relu4"), input_info("relu5") }, 1)); topology.add(activation("relu6", input_info("concat2"), activation_func::linear, { 1.0f, 0.5f })); - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(*engine); config.set_property(ov::intel_gpu::optimize_data(true)); network network_first(*engine, topology, config); @@ -302,7 +302,7 @@ TEST(memory_pool, DISABLED_shared_mem_pool_same_topology_twice_weights) { convolution("conv", input_info("input"), { "weights" }, { 1, 1, 1, 2 }), softmax("softmax", input_info("conv"))); - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(*engine); config.set_property(ov::intel_gpu::optimize_data(true)); network network_first(*engine, topology, config); @@ -388,7 +388,7 @@ TEST(memory_pool, shared_mem_pool_diff_batches) { convolution("conv", input_info("input"), { "weights" }, { 2, 1 }), softmax("softmax", input_info("conv"))); - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(*engine); config.set_property(ov::intel_gpu::optimize_data(true)); network network_first(*engine, topo, config); @@ -421,7 +421,7 @@ TEST(memory_pool, shared_dep_two_output) { topo.add(cldnn::concatenation("result_1_0", { input_info("constant_0_0") }, 0)); topo.add(cldnn::concatenation("result_2_0", { input_info("constant_0_0") }, 0)); - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(*engine); config.set_property(ov::intel_gpu::optimize_data(true)); network network(*engine, topo, config); @@ -462,7 +462,9 @@ TEST(memory_pool, non_opt_intermidate_opt_after) { data_memory ); - ExecutionConfig config(ov::intel_gpu::optimize_data(false)); + ExecutionConfig config = get_test_default_config(engine); + config.set_property(ov::intel_gpu::optimize_data(false)); + network network(engine, topology, config); network.set_input_data("input1", input_memory1); network.set_input_data("input2", input_memory2); @@ -510,7 +512,7 @@ TEST(memory_pool, add_mem_dep_test) { actv3, actv4 ); - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::optimize_data(true)); network network(engine, topology, config); network.set_input_data("input1", input_memory1); diff --git a/src/plugins/intel_gpu/tests/test_cases/multiclass_nms_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/multiclass_nms_gpu_test.cpp index 9ac247cf60dff0..d5fe3121996c65 100644 --- a/src/plugins/intel_gpu/tests/test_cases/multiclass_nms_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/test_cases/multiclass_nms_gpu_test.cpp @@ -170,7 +170,7 @@ struct multiclass_nms_test : public ::testing::TestWithParam(task_config); auto& engine = get_test_engine(); - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); auto input1_dyn_layout = layout{ ov::PartialShape::dynamic(3), data_types::f16,format::bfyx }; diff --git a/src/plugins/intel_gpu/tests/test_cases/mvn_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/mvn_gpu_test.cpp index 4e8abcd2def262..491be53d4b4e49 100644 --- a/src/plugins/intel_gpu/tests/test_cases/mvn_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/test_cases/mvn_gpu_test.cpp @@ -122,7 +122,7 @@ void test_mvn_test_across_channels_outside_sqrt_bfyx(bool is_caching_test) { topology.add(input_layout("input", input->get_layout())); topology.add(mvn("mvn", input_info("input"), false, 1e-10f, false, true)); - cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); network->set_input_data("input", input); @@ -156,7 +156,7 @@ void test_mvn_test_across_channels_inside_sqrt_bfyx(bool is_caching_test) { topology.add(input_layout("input", input->get_layout())); topology.add(mvn("mvn", input_info("input"), false, 1e-10f, true, true)); - cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); network->set_input_data("input", input); @@ -195,7 +195,7 @@ TEST(mvn_gpu_test, mvn_test_across_channels_outside_sqrt_bfyx_normalize_variance topology.add(input_layout("input", input->get_layout())); topology.add(mvn("mvn", input_info("input"), true, 1e-10f, false, true)); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); @@ -222,7 +222,7 @@ TEST(mvn_gpu_test, mvn_test_across_channels_inside_sqrt_bfyx_normalize_variance) topology.add(input_layout("input", input->get_layout())); topology.add(mvn("mvn", input_info("input"), true, 1e-10f, true, true)); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); @@ -249,7 +249,7 @@ TEST(mvn_gpu_test, mvn_test_across_channels_outside_sqrt_bfyx_normalize_variance topology.add(input_layout("input", input->get_layout())); topology.add(mvn("mvn", input_info("input"), true, 1e-10f, false, true)); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); @@ -276,7 +276,7 @@ TEST(mvn_gpu_test, mvn_test_across_channels_inside_sqrt_bfyx_normalize_variance_ topology.add(input_layout("input", input->get_layout())); topology.add(mvn("mvn", input_info("input"), true, 1e-10f, true, true)); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); @@ -305,7 +305,7 @@ TEST(mvn_gpu_test, dynamic_across_channels_inside_sqrt_bfyx_normalize_variance_f topology.add(input_layout("input", in_layout)); topology.add(mvn("mvn", input_info("input"), true, 1e-10f, true, true)); - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); network network(engine, topology, config); network.set_input_data("input", input); @@ -338,7 +338,7 @@ TEST(mvn_gpu_test, mvn_test_within_channels_outside_sqrt_bfyx) { topology.add(input_layout("input", input->get_layout())); topology.add(mvn("mvn", input_info("input"), false, 1e-10f, false, false)); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); @@ -365,7 +365,7 @@ TEST(mvn_gpu_test, mvn_test_within_channels_inside_sqrt__bfyx) { topology.add(input_layout("input", input->get_layout())); topology.add(mvn("mvn", input_info("input"), false, 1e-10f, true, false)); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); @@ -392,7 +392,7 @@ TEST(mvn_gpu_test, mvn_test_within_channels_outside_sqrt_bfyx_fp16) { topology.add(input_layout("input", input->get_layout())); topology.add(mvn("mvn", input_info("input"), false, 1e-10f, false, false)); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); @@ -419,7 +419,7 @@ TEST(mvn_gpu_test, mvn_test_within_channels_inside_sqrt_bfyx_fp16) { topology.add(input_layout("input", input->get_layout())); topology.add(mvn("mvn", input_info("input"), false, 1e-10f, true, false)); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); @@ -446,7 +446,7 @@ TEST(mvn_gpu_test, mvn_test_within_channels_outside_sqrt_bfyx_normalize_variance topology.add(input_layout("input", input->get_layout())); topology.add(mvn("mvn", input_info("input"), true, 1e-10f, false, false)); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); @@ -473,7 +473,7 @@ TEST(mvn_gpu_test, mvn_test_within_channels_inside_sqrt_bfyx_normalize_variance) topology.add(input_layout("input", input->get_layout())); topology.add(mvn("mvn", input_info("input"), true, 1e-10f, true, false)); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); @@ -500,7 +500,7 @@ TEST(mvn_gpu_test, mvn_test_within_channels_outside_sqrt_bfyx_normalize_variance topology.add(input_layout("input", input->get_layout())); topology.add(mvn("mvn", input_info("input"), true, 1e-10f, false, false)); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); @@ -527,7 +527,7 @@ TEST(mvn_gpu_test, mvn_test_within_channels_inside_sqrt_bfyx_normalize_variance_ topology.add(input_layout("input", input->get_layout())); topology.add(mvn("mvn", input_info("input"), true, 1e-10f, true, false)); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); @@ -556,7 +556,7 @@ TEST(mvn_gpu_test, dynamic_within_channels_inside_sqrt_bfyx_normalize_variance_f topology.add(input_layout("input", in_layout)); topology.add(mvn("mvn", input_info("input"), true, 1e-10f, true, false)); - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); network network(engine, topology, config); network.set_input_data("input", input); @@ -663,7 +663,7 @@ struct mvn_random_test : ::testing::TestWithParam { prim.output_paddings = {output_pad}; topo.add(prim); - cldnn::network::ptr net = get_network(eng, topo, ExecutionConfig(), get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr net = get_network(eng, topo, get_test_default_config(eng), get_test_stream_ptr(), is_caching_test); net->set_input_data("input", input); @@ -852,7 +852,7 @@ struct mvn_random_test_bsv32 : ::testing::TestWithParam { auto prim = mvn("mvn", input_info("input"), params.normalize_variance, 1e-10f, false, params.across_channels); prim.output_paddings = {output_pad}; topo.add(prim); - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::custom_outputs(std::vector{"mvn"})); config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"mvn", {format::type::bfyx, "mvn_gpu_bfyx_opt"}} })); @@ -869,7 +869,7 @@ struct mvn_random_test_bsv32 : ::testing::TestWithParam { auto prim_opt = mvn("mvn_opt", input_info("input_to_target_layout"), params.normalize_variance, 1e-10f, false, params.across_channels); prim_opt.output_paddings = {output_pad}; topo_opt.add(prim_opt); - ExecutionConfig config_opt; + ExecutionConfig config_opt = get_test_default_config(engine); config_opt.set_property(ov::intel_gpu::custom_outputs(std::vector{"mvn_opt", "input_to_target_layout"})); config_opt.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"mvn_opt", {params.input_format, "mvn_gpu_b_fs_yx_fsv16_imad"}} })); diff --git a/src/plugins/intel_gpu/tests/test_cases/non_max_suppression_test.cpp b/src/plugins/intel_gpu/tests/test_cases/non_max_suppression_test.cpp index ffc9ace5e39049..245db179d50feb 100644 --- a/src/plugins/intel_gpu/tests/test_cases/non_max_suppression_test.cpp +++ b/src/plugins/intel_gpu/tests/test_cases/non_max_suppression_test.cpp @@ -125,7 +125,7 @@ struct non_max_suppression_basic : public testing::Test { topo.add(non_max_suppression("nms", input_info("reformat_boxes"), input_info("reformat_scores"), 6, false, true)); topo.add(reorder("plane_nms", input_info("nms"), format::bfyx, cldnn::data_types::i32)); - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::optimize_data(true)); cldnn::network::ptr net = get_network(engine, topo, config, get_test_stream_ptr(), is_caching_test); @@ -186,7 +186,7 @@ struct non_max_suppression_basic : public testing::Test { "num_per_class")); topo.add(reorder("plane_nms", input_info("nms"), format::bfyx, cldnn::data_types::i32)); - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::optimize_data(true)); cldnn::network::ptr net = get_network(engine, topo, config, get_test_stream_ptr(), is_caching_test); @@ -257,7 +257,7 @@ struct non_max_suppression_basic : public testing::Test { topo.add(reorder("plane_nms", input_info("nms"), format::bfyx, cldnn::data_types::i32)); topo.add(reorder("plane_scores", input_info("selected_scores"), format::bfyx, this->data_type)); - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::optimize_data(true)); cldnn::network::ptr net = get_network(engine, topo, config, get_test_stream_ptr(), is_caching_test); @@ -317,7 +317,7 @@ struct non_max_suppression_basic : public testing::Test { second_output_topology.add(input_layout("num_outputs", this->valid_outputs_layout)); second_output_topology.add(reorder("plane_scores", input_info("selected_scores"), format::bfyx, this->data_type)); second_output_topology.add(reorder("plane_num", input_info("num_outputs"), format::bfyx, cldnn::data_types::i32)); - network second_output_net{engine, second_output_topology}; + network second_output_net{engine, second_output_topology, get_test_default_config(engine)}; second_output_net.set_input_data("selected_scores", selected_scores_mem); second_output_net.set_input_data("num_outputs", valid_outputs_mem); auto second_output_result = second_output_net.execute(); @@ -375,7 +375,7 @@ struct non_max_suppression_basic : public testing::Test { topo.add(reorder("plane_scores", input_info("nms", 1), format::bfyx, this->data_type)); topo.add(reorder("plane_outputs", input_info("nms", 2), format::bfyx, cldnn::data_types::i32)); - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::optimize_data(true)); config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); @@ -437,7 +437,7 @@ struct non_max_suppression_basic : public testing::Test { second_output_topology.add(input_layout("num_outputs", valid_outputs_mem->get_layout())); second_output_topology.add(reorder("plane_scores", input_info("selected_scores"), format::bfyx, this->data_type)); second_output_topology.add(reorder("plane_num", input_info("num_outputs"), format::bfyx, cldnn::data_types::i32)); - network second_output_net{engine, second_output_topology}; + network second_output_net{engine, second_output_topology, get_test_default_config(engine)}; second_output_net.set_input_data("selected_scores", selected_scores_mem); second_output_net.set_input_data("num_outputs", valid_outputs_mem); auto second_output_result = second_output_net.execute(); @@ -485,7 +485,7 @@ struct non_max_suppression_basic : public testing::Test { "iou_threshold")); topo.add(reorder("plane_nms", input_info("nms"), format::bfyx, cldnn::data_types::i32)); - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::optimize_data(true)); cldnn::network::ptr net = get_network(engine, topo, config, get_test_stream_ptr(), is_caching_test); @@ -542,7 +542,7 @@ struct non_max_suppression_basic : public testing::Test { "score_threshold")); topo.add(reorder("plane_nms", input_info("nms"), format::bfyx, cldnn::data_types::i32)); - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::optimize_data(true)); cldnn::network::ptr net = get_network(engine, topo, config, get_test_stream_ptr(), is_caching_test); @@ -603,7 +603,7 @@ struct non_max_suppression_basic : public testing::Test { "soft_nms_sigma")); topo.add(reorder("plane_nms", input_info("nms"), format::bfyx, cldnn::data_types::i32)); - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::optimize_data(true)); cldnn::network::ptr net = get_network(engine, topo, config, get_test_stream_ptr(), is_caching_test); diff --git a/src/plugins/intel_gpu/tests/test_cases/non_zero_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/non_zero_gpu_test.cpp index e1ef46d7f9a9ea..1c9122ca4171c3 100644 --- a/src/plugins/intel_gpu/tests/test_cases/non_zero_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/test_cases/non_zero_gpu_test.cpp @@ -60,7 +60,7 @@ void test_count_non_zero(layout in_layout, std::vector in_data) { topology.add(count_nonzero("count_nonzero", input_info("InputData")) ); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("InputData", input_mem); auto outputs = network.execute(); auto output = outputs.at("count_nonzero").get_memory(); @@ -132,7 +132,7 @@ TEST(test_count_non_zero, dynamic_2d_f32_bfyx) { topology.add(input_layout("InputData", in_dyn_layout)); topology.add(count_nonzero("count_nonzero", input_info("InputData"))); - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); std::vector input_shapes = {171, 531, 168, 169, 174, 172, 168, 167, 1169, 16, 677}; @@ -180,7 +180,7 @@ void test_gather_non_zero(layout in_layout, std::vector in_data) { gather_nonzero("gather_nonzero", input_info("InputData"), input_info("OutputShape")) ); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("InputData", input_mem); auto outputs = network.execute(); @@ -290,7 +290,7 @@ TEST(non_zero_gpu, dynamic) { topology.add(count_nonzero("count_nonzero", input_info("InputData"))); topology.add(gather_nonzero("gather_nonzero", input_info("InputData"), input_info("count_nonzero"))); - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::optimize_data(true)); config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); network network(engine, topology, config); @@ -335,7 +335,7 @@ void test_non_zero(layout in_layout, std::vector in_data) { topology.add(count_nonzero("count_nonzero", input_info("InputData"))); topology.add(gather_nonzero("gather_nonzero", input_info("InputData"), input_info("count_nonzero"))); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("InputData", input_mem); auto outputs = network.execute(); diff --git a/src/plugins/intel_gpu/tests/test_cases/normalizel2_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/normalizel2_gpu_test.cpp index 713ab875b66e8e..b9b6f620a43842 100644 --- a/src/plugins/intel_gpu/tests/test_cases/normalizel2_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/test_cases/normalizel2_gpu_test.cpp @@ -83,7 +83,7 @@ struct normalize_basic : public testing::Test { topology.add(normalize("normalize2", input_info("reordered_Input0"), "reordered_Input1", this->across_spatial)); topology.add(reorder("plane_normalize2", input_info("normalize2"), format::bfyx, this->output_data_type)); - cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); network->set_input_data("Input0", input); diff --git a/src/plugins/intel_gpu/tests/test_cases/one_hot_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/one_hot_gpu_test.cpp index eebb0af3aeae81..9d1c4698802313 100644 --- a/src/plugins/intel_gpu/tests/test_cases/one_hot_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/test_cases/one_hot_gpu_test.cpp @@ -84,7 +84,7 @@ void generic_one_hot_test_int(cldnn::format test_input_fmt, int input_b, int inp topology.add(input_layout("input", input->get_layout())); topology.add(one_hot("output", input_info("input"), shape, one_hot_axis, one_hot_limit)); - cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); network->set_input_data("input", input); auto outputs = network->execute(); ASSERT_EQ(outputs.size(), size_t(1)); @@ -183,7 +183,7 @@ TEST(one_hot_gpu_i32, bfzyx_ax4) { set_values(input, input_rnd_vec); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); auto outputs = network.execute(); ASSERT_EQ(outputs.size(), size_t(1)); @@ -242,7 +242,7 @@ TEST(one_hot_gpu_i64, bfzyx_ax4) { set_values(input, input_rnd_vec); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); auto outputs = network.execute(); ASSERT_EQ(outputs.size(), size_t(1)); @@ -301,7 +301,7 @@ TEST(one_hot_gpu_i32_to_f32, bfyx_ax4) { set_values(input, input_rnd_vec); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); auto outputs = network.execute(); ASSERT_EQ(outputs.size(), size_t(1)); @@ -354,7 +354,7 @@ TEST(one_hot_gpu_i64_to_f32, bfyx_ax4) { set_values(input, input_rnd_vec); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); auto outputs = network.execute(); ASSERT_EQ(outputs.size(), size_t(1)); @@ -405,7 +405,7 @@ TEST(one_hot_gpu_i32, bfzyx_ax0) { set_values(input, input_rnd_vec); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); auto outputs = network.execute(); ASSERT_EQ(outputs.size(), size_t(1)); @@ -460,7 +460,7 @@ TEST(one_hot_gpu_i64, bfzyx_ax0) { set_values(input, input_rnd_vec); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); auto outputs = network.execute(); ASSERT_EQ(outputs.size(), size_t(1)); @@ -515,7 +515,7 @@ TEST(one_hot_gpu_i32, bfzyx_ax1) { set_values(input, input_rnd_vec); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); auto outputs = network.execute(); ASSERT_EQ(outputs.size(), size_t(1)); @@ -570,7 +570,7 @@ TEST(one_hot_gpu_i64, bfzyx_ax1) { set_values(input, input_rnd_vec); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); auto outputs = network.execute(); ASSERT_EQ(outputs.size(), size_t(1)); @@ -625,7 +625,7 @@ TEST(one_hot_gpu_i32, bfzyx_ax2) { set_values(input, input_rnd_vec); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); auto outputs = network.execute(); ASSERT_EQ(outputs.size(), size_t(1)); @@ -680,7 +680,7 @@ TEST(one_hot_gpu_i64, bfzyx_ax2) { set_values(input, input_rnd_vec); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); auto outputs = network.execute(); ASSERT_EQ(outputs.size(), size_t(1)); @@ -735,7 +735,7 @@ TEST(one_hot_gpu_i32, bfzyx_ax3) { set_values(input, input_rnd_vec); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); auto outputs = network.execute(); ASSERT_EQ(outputs.size(), size_t(1)); @@ -790,7 +790,7 @@ TEST(one_hot_gpu_i64, bfzyx_ax3) { set_values(input, input_rnd_vec); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); auto outputs = network.execute(); ASSERT_EQ(outputs.size(), size_t(1)); diff --git a/src/plugins/intel_gpu/tests/test_cases/permute_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/permute_gpu_test.cpp index 54439b2816ea1a..0b7b285b467a63 100644 --- a/src/plugins/intel_gpu/tests/test_cases/permute_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/test_cases/permute_gpu_test.cpp @@ -58,7 +58,7 @@ TEST(permute_gpu_f32, output_ordering_test) input_layout("input", input->get_layout()), permute("permute", input_info("input"), perm)); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); auto outputs = network.execute(); auto output = outputs.at("permute"); @@ -113,7 +113,7 @@ TEST(permute_gpu_f32, basic_bfyx_permute_0_1_2_3) input_layout("input", input->get_layout()), permute("permute", input_info("input"), { 0, 1, 2, 3 })); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); auto outputs = network.execute(); @@ -172,7 +172,7 @@ TEST(permute_gpu_f32, basic_bfyx_permute_0_1_3_2) input_layout("input", input->get_layout()), permute("permute", input_info("input"), { 0, 1, 3, 2 })); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); auto outputs = network.execute(); @@ -219,7 +219,7 @@ TEST(permute_gpu_f32, basic_yxfb_permute_1_0_2_3) input_layout("input", input_mem->get_layout()), permute("permute", input_info("input"), { 1, 0, 2, 3 })); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input_mem); auto outputs = network.execute(); @@ -281,7 +281,7 @@ TEST(permute_gpu_f32, basic_bfyx_permute_0_1_3_2_input_padding) reorder("reorder", input_info("input"), input->get_layout().with_padding(padding{ { 0, 0, 2, 1 }, 0 })), permute("permute", input_info("reorder"), { 0, 1, 3, 2 })); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); auto outputs = network.execute(); @@ -338,7 +338,7 @@ TEST(permute_gpu_f32, basic_yxfb_permute_batch_with_feature) input_layout("input", input->get_layout()), permute("permute", input_info("input"), { 1, 0, 2, 3 })); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); auto outputs = network.execute(); @@ -393,7 +393,7 @@ TEST(permute_gpu_f32, basic_bfyx_permute_batch_with_feature) input_layout("input", input->get_layout()), permute("permute", input_info("input"), { 1, 0, 2, 3 })); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); auto outputs = network.execute(); @@ -453,7 +453,7 @@ void permute_test_with_reorder() permute("permute", input_info("reorder"), { 0, 1, 3, 2 }), reorder("reorder_out", input_info("permute"), { data_types::f32, format::bfyx,{ 2, 2, 3, 2 } })); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); auto outputs = network.execute(); @@ -552,7 +552,7 @@ TEST(permute_fuse_reorder_gpu_f32, basic_b_fs_yx_fsv4_permute_1_8_16_1) reorder("reorder2", input_info("permute"), format::bfyx, data_types::f32), permute("out", input_info("reorder2"), { 0, 3, 1, 2})); - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::optimize_data(false)); config.set_property(ov::intel_gpu::allow_static_input_reorder(true)); @@ -567,7 +567,7 @@ TEST(permute_fuse_reorder_gpu_f32, basic_b_fs_yx_fsv4_permute_1_8_16_1) reorder("reorder2", input_info("permute"), format::bfyx, data_types::f32), // to be fused to previous permute permute("out", input_info("reorder2"), { 0, 3, 1, 2})); // return to original value - ExecutionConfig config_fused; + ExecutionConfig config_fused = get_test_default_config(engine); config_fused.set_property(ov::intel_gpu::optimize_data(true)); network fused(engine, topology_fused, config_fused); fused.set_input_data("input", input); @@ -602,7 +602,7 @@ TEST(fc_permute_crop_gpu, basic_permute_yxfb) permute("permute", input_info("input"), { 1, 0, 2, 3 }) // yxfb {5, 1, 1, 512} --- without permute fix yxfb {1, 5, 512, 1} ); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input_mem); auto outputs = network.execute(); @@ -637,7 +637,7 @@ TEST(fc_permute_crop_gpu, basic_0) crop("crop", input_info("permute"), { 1, 1, 1, 512 }, { 4, 0, 0 ,0 }) // without permute fix it will fail "Tensor pitches didn't set correctly" ); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input_mem); auto outputs = network.execute(); @@ -667,7 +667,7 @@ TEST(fc_permute_gpu, basic_permute_bfyx) permute("permute", input_info("input"), { 1, 0, 2, 3 }) ); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input_mem); auto outputs = network.execute(); @@ -727,7 +727,7 @@ TEST(permute_gpu_f32, permute_bfwzyx) permute("permute", input_info("input"), permute_order) ); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input_mem); auto outputs = network.execute(); @@ -819,7 +819,7 @@ TEST(permute_gpu_f32, 6D_reshape_permute_reshape) reorder("output_4d", input_info("reshape_6_to_4"), { data_types::f32, format::bfyx, cldnn::tensor(batch(b), feature(f), spatial(x, y)) }) ); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input_mem); auto outputs = network.execute(); @@ -870,7 +870,7 @@ TEST(permute_gpu_f32, basic_bfzyx_permute_0_4_1_2_3) input_layout("input", input->get_layout()), permute("permute", input_info("input"), { 0, 4, 1, 2, 3 })); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); auto outputs = network.execute(); @@ -937,7 +937,7 @@ TEST(permute_gpu_f32_tile_8x8_4x4, normal_bfyx_0_2_3_1) { input_layout("input", input->get_layout()), permute("permute", input_info("input"), { 0, 2, 3, 1 })); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); auto outputs = network.execute(); @@ -993,7 +993,7 @@ TEST(permute_gpu_f32_tile_8x8_4x4, f_remainder_bfyx_0_2_3_1) { input_layout("input", input->get_layout()), permute("permute", input_info("input"), { 0, 2, 3, 1 })); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); auto outputs = network.execute(); @@ -1049,7 +1049,7 @@ TEST(permute_gpu_f32_tile_8x8_4x4, x_remainder_bfyx_0_2_3_1) { input_layout("input", input->get_layout()), permute("permute", input_info("input"), { 0, 2, 3, 1 })); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); auto outputs = network.execute(); @@ -1099,7 +1099,7 @@ TEST(permute_gpu_f32_tile_8x8_4x4, xf_remainder_bfyx_0_2_3_1) { input_layout("input", input->get_layout()), permute("permute", input_info("input"), { 0, 2, 3, 1 })); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); auto outputs = network.execute(); @@ -1149,7 +1149,7 @@ TEST(permute_gpu_f32_tile_8x8_4x4, normal_bfzyx_0_2_3_4_1) { input_layout("input", input->get_layout()), permute("permute", input_info("input"), { 0, 2, 3, 4, 1 })); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); auto outputs = network.execute(); @@ -1211,7 +1211,7 @@ TEST(permute_gpu_f32_tile_8x8_4x4, f_remainder_bfzyx_0_2_3_4_1) { input_layout("input", input->get_layout()), permute("permute", input_info("input"), { 0, 2, 3, 4, 1 })); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); auto outputs = network.execute(); @@ -1265,7 +1265,7 @@ TEST(permute_gpu_f32_tile_8x8_4x4, x_remainder_bfzyx_0_2_3_4_1) { input_layout("input", input->get_layout()), permute("permute", input_info("input"), { 0, 2, 3, 4, 1 })); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); auto outputs = network.execute(); @@ -1319,7 +1319,7 @@ TEST(permute_gpu_f32_tile_8x8_4x4, xf_remainder_bfzyx_0_2_3_4_1) { input_layout("input", input->get_layout()), permute("permute", input_info("input"), { 0, 2, 3, 4, 1 })); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); auto outputs = network.execute(); @@ -1373,7 +1373,7 @@ TEST(permute_gpu_f32_tile_8x8_4x4, normal_bfwzyx_0_2_3_4_5_1) { input_layout("input", input->get_layout()), permute("permute", input_info("input"), { 0, 2, 3, 4, 5, 1 })); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); auto outputs = network.execute(); @@ -1445,7 +1445,7 @@ TEST(permute_gpu_f32_tile_8x8_4x4, f_remainder_bfwzyx_0_2_3_4_5_1) { input_layout("input", input->get_layout()), permute("permute", input_info("input"), { 0, 2, 3, 4, 5, 1 })); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); auto outputs = network.execute(); @@ -1505,7 +1505,7 @@ TEST(permute_gpu_f32_tile_8x8_4x4, x_remainder_bfwzyx_0_2_3_4_5_1) { input_layout("input", input->get_layout()), permute("permute", input_info("input"), { 0, 2, 3, 4, 5, 1 })); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); auto outputs = network.execute(); @@ -1565,7 +1565,7 @@ TEST(permute_gpu_f32_tile_8x8_4x4, xf_remainder_bfwzyx_0_2_3_4_5_1) { input_layout("input", input->get_layout()), permute("permute", input_info("input"), { 0, 2, 3, 4, 5, 1 })); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); auto outputs = network.execute(); @@ -1686,7 +1686,7 @@ void TiledPermuteTest::run_test(const std::vector& si ); // run with permute_ref - ov::intel_gpu::ExecutionConfig config_ref; + ov::intel_gpu::ExecutionConfig config_ref = get_test_default_config(engine); ov::intel_gpu::ImplementationDesc permute_ref = { format_fsv, "permute_ref" }; config_ref.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"output", permute_ref} })); @@ -1697,7 +1697,7 @@ void TiledPermuteTest::run_test(const std::vector& si cldnn::mem_lock output_ref_ptr(output_ref, get_test_stream()); // run with optimized kernel, e.g. permute_tile_8x8_4x4_fsv16 - ExecutionConfig config_tile; + ExecutionConfig config_tile = get_test_default_config(engine); ov::intel_gpu::ImplementationDesc permute_tile_opt = { format_fsv, permute_opt }; config_tile.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"output", permute_tile_opt} })); @@ -1872,7 +1872,7 @@ TEST(permute_gpu_f32_dynamic, bfyx_0_2_3_1) { input_layout("input", input_layout_dynamic), permute("permute", input_info("input"), { 0, 2, 3, 1 })); - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); network network(engine, topology, config); network.set_input_data("input", input); diff --git a/src/plugins/intel_gpu/tests/test_cases/pooling_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/pooling_gpu_test.cpp index 42312c5bfd91ca..e1a348624dd912 100644 --- a/src/plugins/intel_gpu/tests/test_cases/pooling_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/test_cases/pooling_gpu_test.cpp @@ -210,7 +210,7 @@ TEST(pooling_forward_gpu, basic_max_byxf_f32_wsiz3x3_wstr1x1_i1x3x3x8_nopad) { topology topology; topology.add(input_layout("input_prim", input_prim->get_layout())); topology.add(pooling("pool_prim", input_info("input_prim"), pooling_mode::max, { 3, 3 }, { 1, 1 })); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); set_values(input_prim, { 0.5f, -0.5f, -0.5f, -0.5f, 0.5f, -0.5f, -0.5f, -0.5f, 1.0f, 0.0f, 0.0f, 0.0f, 0.5f, -0.5f, -0.5f, -0.5f, 2.0f, 0.0f, 0.0f, 0.0f, 0.5f, -0.5f, -0.5f, -0.5f, @@ -256,7 +256,7 @@ TEST(pooling_forward_gpu, basic_max_yxfb_f32_wsiz3x3_wstr1x1_i3x3x1x1_nopad) { topology.add(input_layout("input_prim", input_prim->get_layout())); topology.add(pooling("pool_prim", input_info("input_prim"), pooling_mode::max, { 3, 3 }, { 1, 1 })); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); set_values(input_prim, { -0.5f, 1.0f, 0.5f, 2.0f, 1.5f, -0.5f, 0.0f, -1.0f, 0.5f }); network.set_input_data("input_prim", input_prim); @@ -297,12 +297,9 @@ TEST(pooling_forward_gpu, basic_max_pooling_int8) { reorder("reorder2", input_info("pool1"), out_layout) ); - network network( - engine, - topology, - ExecutionConfig{ - ov::intel_gpu::custom_outputs(std::vector{ "reorder2" }) - }); + ExecutionConfig cfg = get_test_default_config(engine); + cfg.set_property(ov::intel_gpu::custom_outputs(std::vector{ "reorder2" })); + network network(engine, topology, cfg); network.set_input_data("input", input_memory); @@ -349,12 +346,9 @@ TEST(pooling_forward_gpu, basic_avg_pooling_int8) { reorder("reorder2", input_info("pool1"), out_layout) ); - network network( - engine, - topology, - ExecutionConfig{ - ov::intel_gpu::custom_outputs(std::vector{ "reorder2" }) - }); + ExecutionConfig cfg = get_test_default_config(engine); + cfg.set_property(ov::intel_gpu::custom_outputs(std::vector{ "reorder2" })); + network network(engine, topology, cfg); network.set_input_data("input", input_memory); @@ -390,7 +384,7 @@ TEST(pooling_forward_gpu, basic_max_yxfb_f32_wsiz2x2_wstr1x1_i3x3x1x1_nopad) { topology.add(input_layout("input_prim", input_prim->get_layout())); topology.add(pooling("pool_prim", input_info("input_prim"), pooling_mode::max, { 2, 2 }, { 1, 1 })); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); set_values(input_prim, { -0.5f, 1.0f, 0.5f, 2.0f, 1.5f, -0.5f, 0.0f, -1.0f, 0.5f }); network.set_input_data("input_prim", input_prim); @@ -434,7 +428,7 @@ TEST(pooling_forward_gpu, basic_max_yxfb_f32_wsiz2x2_wstr2x2_i4x4x1x1_nopad) { topology.add(input_layout("input_prim", input_prim->get_layout())); topology.add(pooling("pool_prim", input_info("input_prim"), pooling_mode::max, { 2, 2 }, { 2, 2 })); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); set_values(input_prim, { -0.25f, 1.00f, 0.50f, 0.25f, 2.00f, 1.50f, -0.50f, -0.75f, 0.00f, -1.00f, 0.50f, 0.25f, 0.50f, -2.00f, -1.50f, -2.50f }); network.set_input_data("input_prim", input_prim); @@ -488,7 +482,7 @@ TEST(pooling_forward_gpu, basic_max_yxfb_f32_wsiz2x2_wstr1x1_i3x3x2x2_nopad) { topology.add(input_layout("input_prim", input_prim->get_layout())); topology.add(pooling("pool_prim", input_info("input_prim"), pooling_mode::max, { 2, 2 }, { 1, 1 })); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); set_values(input_prim, { -0.5f, 0.5f, -1.5f, 0.0f, 0.5f, 0.0f, -0.5f, 0.5f, 0.0f, -0.5f, 0.0f, -0.5f, 1.0f, -2.0f, 0.0f, 1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -2.0f, 1.0f, 1.5f, 0.0f, -1.0f, -0.5f, -2.0f, 0.5f, -0.5f, -1.0f, 1.0f, -0.5f, -0.5f, 1.5f, -0.5f, 0.0f }); network.set_input_data("input_prim", input_prim); @@ -538,7 +532,7 @@ TEST(pooling_forward_gpu, offsets_max_yxfb_f32_wsiz2x2_wstr2x2_i2x2x1x1_zeropad) topology.add(input_layout("input_prim", input_prim->get_layout())); topology.add(pooling("pool_prim", input_info("input_prim"), pooling_mode::max, {2, 2}, {2, 2}, {1, 1})); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); set_values(input_prim, { 1.50f, -0.50f, -1.00f, 0.50f }); network.set_input_data("input_prim", input_prim); @@ -583,7 +577,7 @@ TEST(pooling_forward_gpu, offsets_max_yxfb_f32_wsiz2x2_wstr2x2_i3x3x1x1_zeropad) topology.add(input_layout("input_prim", input_prim->get_layout())); topology.add(pooling("pool_prim", input_info("input_prim"), pooling_mode::max, {2, 2}, {2, 2}, {1, 1})); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); set_values(input_prim, { 1.50f, -1.00f, -0.50f, @@ -632,7 +626,7 @@ TEST(pooling_forward_gpu, basic_avg_yxfb_f32_wsiz2x2_wstr1x1_i3x3x1x1_nopad) { topology.add(input_layout("input_prim", input_prim->get_layout())); topology.add(pooling("pool_prim", input_info("input_prim"), pooling_mode::average, {2, 2}, {1, 1})); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); set_values(input_prim, { -0.5f, 1.0f, 0.5f, 2.0f, 1.5f, -0.5f, 4.0f, -1.0f, 3.5f }); network.set_input_data("input_prim", input_prim); @@ -677,7 +671,7 @@ TEST(pooling_forward_gpu, offsets_avg_yxfb_f32_wsiz2x2_wstr2x2_i2x2x1x1_zeropad) topology.add(input_layout("input_prim", input_prim->get_layout())); topology.add(pooling("pool_prim", input_info("input_prim"), pooling_mode::average, {2, 2}, {2, 2}, {1, 1})); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); set_values(input_prim, { 1.5f, -0.5f, -1.0f, 0.5f }); network.set_input_data("input_prim", input_prim); @@ -722,7 +716,7 @@ TEST(pooling_forward_gpu, offsets_avg_bfyx_f32_wsiz3x3_wstr3x3_i1x1x3x3_zeropad) topology.add(input_layout("input_prim", input_prim->get_layout())); topology.add(pooling("pool_prim", input_info("input_prim"), pooling_mode::average, { 3, 3 }, { 3, 3 }, {1, 1})); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); std::vector input_vec = { 1.5f, -0.5f, -1.0f, 0.5f, 0.1f, 0.2f, 0.9f, 1.1f, 2.2f }; set_values(input_prim, input_vec); @@ -770,7 +764,7 @@ TEST(pooling_forward_gpu, offsets_avg_yxfb_f32_wsiz2x2_wstr2x2_i3x3x1x1_zeropad) topology.add(input_layout("input_prim", input_prim->get_layout())); topology.add(pooling("pool_prim", input_info("input_prim"), pooling_mode::average, {2, 2}, {2, 2}, {1, 1})); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); set_values(input_prim, { 1.5f, -0.5f, 2.5f, -1.0f, 0.5f, 3.0f, 0.5f, 0.0f, -8.0f }); network.set_input_data("input_prim", input_prim); @@ -825,7 +819,7 @@ TEST(pooling_forward_gpu, offsets_avg_yxfb_bfyx_f32_wsiz2x2_wstr2x2_i2x2x1x1_out topology.add(input_layout("input_prim", input_prim->get_layout())); topology.add(pooling("pool_prim", input_info("input_prim"), pooling_mode::average, {2, 2}, {2, 2}, {1, 1}, {1, 1}, ov::op::PadType::EXPLICIT, ov::op::RoundingType::FLOOR, padding{{0, 0, 2, 2}, 0})); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); set_values(input_prim, { 1.5f, -0.5f, -1.0f, 0.5f }); network.set_input_data("input_prim", input_prim); @@ -886,7 +880,7 @@ TEST(pooling_forward_gpu, offsets_max_yxfb_bfyx_f32_wsiz2x2_wstr2x2_i3x3x1x1_out topology.add(input_layout("input_prim", input_prim->get_layout())); topology.add(pooling("pool_prim", input_info("input_prim"), pooling_mode::max, {2, 2}, {2, 2}, {1, 1}, {1, 1}, ov::op::PadType::EXPLICIT, ov::op::RoundingType::FLOOR, padding{{0, 0, 1, 1}, 0})); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); set_values(input_prim, { 1.50f, -1.00f, -0.50f, @@ -957,7 +951,7 @@ TEST(pooling_forward_gpu, offsets_avg_yxfb_bfyx_f32_wsiz2x2_wstr2x2_i2x2x1x1_inp topology.add(reorder("reorder", input_info("input_prim"), input_prim->get_layout().with_padding(padding{ {0,0,1,2}, 0 }))); topology.add(pooling("pool_prim", input_info("reorder"), pooling_mode::average, {2, 2}, {2, 2}, {1, 1}, {1, 1}, ov::op::PadType::EXPLICIT, ov::op::RoundingType::FLOOR, padding{{0, 0, 2, 2}, 0})); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); set_values(input_prim, { 1.5f, -0.5f, -1.0f, 0.5f }); network.set_input_data("input_prim", input_prim); @@ -1020,7 +1014,7 @@ TEST(pooling_forward_gpu, offsets_max_yxfb_bfyx_f32_wsiz2x2_wstr2x2_i3x3x1x1_inp topology.add(reorder("reorder", input_info("input_prim"), input_prim->get_layout().with_padding(padding{ { 0, 0, 1, 2 }, 0 }))); topology.add(pooling("pool_prim", input_info("reorder"), pooling_mode::max, {2, 2}, {2, 2}, {1, 1}, {1, 1}, ov::op::PadType::EXPLICIT, ov::op::RoundingType::FLOOR, padding{{0, 0, 1, 1}, 0})); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); set_values(input_prim, { 1.50f, -1.00f, -0.50f, @@ -1091,7 +1085,7 @@ TEST(pooling_forward_gpu, avg_yxfb_bfyx_f32_wsiz2x2_wstr2x2_i2x2x1x1_inpad2x1_ou topology.add(reorder("reorder", input_info("input_prim"), input_prim->get_layout().with_padding(padding{ { 0, 0, 2, 1 }, 0 }))); topology.add(pooling("pool_prim", input_info("reorder"), pooling_mode::average, { 2, 2 }, { 2, 2 }, { 0, 0 }, { 0, 0 }, ov::op::PadType::EXPLICIT, ov::op::RoundingType::FLOOR, padding{ { 0, 0, 2, 2 }, 0 })); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); set_values(input_prim, { 1.f, 2.f, 3.f, 4.f, 5.f, 1.5f, -0.5f, 6.f, @@ -1159,7 +1153,7 @@ TEST(pooling_forward_gpu, max_yxfb_bfyx_f32_wsiz2x2_wstr2x2_i3x3x1x1_inpad2x1_ou topology.add(reorder("reorder", input_info("input_prim"), input_prim->get_layout().with_padding(padding{ { 0, 0, 2, 1 }, 0 }))); topology.add(pooling("pool_prim", input_info("reorder"), pooling_mode::max, { 2, 2}, { 2, 2}, {1, 1}, {1, 1}, ov::op::PadType::EXPLICIT, ov::op::RoundingType::FLOOR, padding{{0, 0, 1, 1}, 0})); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); set_values(input_prim, { 1.f, 2.f, 3.f, 4.f, 5.f, @@ -1426,7 +1420,7 @@ TEST(pooling_forward_gpu, b_fs_yx_fsv4) pool); // Network processing - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); //network_exe(network, vGoldOutput, "pool_GOLD"); auto outputs = network.execute(); @@ -1475,7 +1469,7 @@ TEST(pooling_forward_gpu, b_fs_yx_fsv4) format::bfyx, { in_B, in_F, in_X, in_Y }))); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); //network_exe(network, vTestOutput, "reorder_UnSwizzelled"); auto outputs = network.execute(); @@ -1529,7 +1523,7 @@ TEST(pooling_forward_gpu, fs_b_yx_fsv32_avg_3x3_input_2x2_pool_1x1_stride_2x2_ou topology.add(pooling("avg_pooling", input_info("reorder_input"), pooling_mode::average, { 2, 2 }, { 1, 1 })); topology.add(reorder("reorder_after_pooling", input_info("avg_pooling"), layout(data_types::f16, format::bfyx, { 1, 1, 2, 2 }))); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); set_values(input_prim, { FLOAT16(-0.5f), FLOAT16(1.0f), FLOAT16(0.5f), FLOAT16(2.0f), FLOAT16(1.5f), FLOAT16(-0.5f), FLOAT16(4.0f), FLOAT16(-1.0f), FLOAT16(3.5f) }); network.set_input_data("input", input_prim); @@ -1581,7 +1575,7 @@ TEST(pooling_forward_gpu, fs_b_yx_fsv32_avg_3x3_input_2x2_pool_2x2_stride) topology.add(pooling("avg_pooling", input_info("reorder_input"), pooling_mode::average, { 2, 2 }, { 2, 2 })); topology.add(reorder("reorder_after_pooling", input_info("avg_pooling"), layout(data_types::f16, format::bfyx, { 1, 1, 3, 3 }))); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); set_values(input_prim, { FLOAT16(-0.5f), FLOAT16(1.0f), FLOAT16(0.5f), FLOAT16(2.0f), FLOAT16(1.5f), FLOAT16(-0.5f), FLOAT16(4.0f), FLOAT16(-1.0f), FLOAT16(3.5f) }); network.set_input_data("input", input_prim); @@ -1647,7 +1641,7 @@ TEST(pooling_forward_gpu, fs_b_yx_fsv32_avg_2x2x3x3_input_2x2_pool_2x2_stride) topology.add(pooling("avg_pooling", input_info("reorder_input"), pooling_mode::average, { 2, 2 }, { 2, 2 })); topology.add(reorder("reorder_after_pooling", input_info("avg_pooling"), layout(data_types::f16, format::bfyx, { batch_count, features_count, out_y, out_x }))); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); set_values(input_prim, { FLOAT16(-0.5f), FLOAT16(1.0f), FLOAT16(0.5f), FLOAT16(2.0f), FLOAT16(1.5f), FLOAT16(-0.5f), FLOAT16(4.0f), FLOAT16(-1.0f), FLOAT16(3.5f), //B0F0 FLOAT16(-0.5f), FLOAT16(1.0f), FLOAT16(0.5f), FLOAT16(2.0f), FLOAT16(1.5f), FLOAT16(-0.5f), FLOAT16(4.0f), FLOAT16(-1.0f), FLOAT16(3.5f), //B0F1 FLOAT16(-0.5f), FLOAT16(1.0f), FLOAT16(0.5f), FLOAT16(2.0f), FLOAT16(1.5f), FLOAT16(-0.5f), FLOAT16(4.0f), FLOAT16(-1.0f), FLOAT16(3.5f), //B1F0 @@ -1718,7 +1712,7 @@ TEST(pooling_forward_gpu, fs_b_yx_fsv32_max_1x1x3x3_input_2x2_pool_2x2_stride_2x topology.add(pooling("pool_prim", input_info("reorder_input"), pooling_mode::max, {2, 2}, {2, 2}, {1, 1}, {1, 1}, ov::op::PadType::EXPLICIT, ov::op::RoundingType::FLOOR, padding{{0, 0, 1, 1}, 0})); topology.add(reorder("reorder_pooling", input_info("pool_prim"), layout(data_types::f16, format::bfyx, { 1,1,4,4 }, padding{ { 0, 0, 1, 1 }, 0 }))); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); set_values(input_prim, { FLOAT16(1.50f), FLOAT16(-1.00f), FLOAT16(-0.50f), @@ -1791,7 +1785,7 @@ TEST(pooling_forward_gpu, fs_b_yx_fsv32_max_1x1x5x5_input_2x2_pool_2x2_stride_2x topology.add(pooling("pool_prim", input_info("reorder_input"), pooling_mode::max, {2, 2}, {2, 2}, {1, 1}, {1, 1}, ov::op::PadType::EXPLICIT, ov::op::RoundingType::FLOOR, padding{{0, 0, 1, 1}, 0})); topology.add(reorder("reorder_pooling", input_info("pool_prim"), layout(data_types::f16, format::bfyx, input_tensor, padding{ { 0, 0, 1, 1 }, 0 }))); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); set_values(input_prim, { FLOAT16(1.f), FLOAT16(2.f), FLOAT16(3.f), FLOAT16(4.f), FLOAT16(5.f), @@ -1867,7 +1861,7 @@ TEST(pooling_forward_gpu, fs_b_yx_fsv32_avg_65x5x6x7_input_3x3_pool_4x4_stride_3 golden_topology.add(reorder("reorder_input", input_info("input"), input_prim->get_layout().with_padding(padding{ {0,0,x_in_pad,y_in_pad},0 }))); golden_topology.add(pooling("golden_pooling", input_info("reorder_input"), pooling_mode::average, { pool_size, pool_size }, { stride_size, stride_size }, { 0, 0 }, { 0, 0 }, ov::op::PadType::EXPLICIT, ov::op::RoundingType::FLOOR, padding{ { 0, 0, x_out_pad, y_out_pad }, 0 })); - network golden_network(engine, golden_topology); + network golden_network(engine, golden_topology, get_test_default_config(engine)); golden_network.set_input_data("input", input_prim); auto outputs = golden_network.execute(); @@ -1885,7 +1879,7 @@ TEST(pooling_forward_gpu, fs_b_yx_fsv32_avg_65x5x6x7_input_3x3_pool_4x4_stride_3 golden_topology.add(pooling("fsv32_pooling", input_info("reorder_input"), pooling_mode::average, { pool_size, pool_size }, { stride_size, stride_size }, { 0, 0 }, { 0, 0 }, ov::op::PadType::EXPLICIT, ov::op::RoundingType::FLOOR, padding{ { 0, 0, x_out_pad, y_out_pad }, 0 })); golden_topology.add(reorder("reorder_pooling", input_info("fsv32_pooling"), layout(data_types::f16, format::bfyx, input_tensor, padding{ { 0,0,x_out_pad,y_out_pad },0 }))); - network fsv32_network(engine, golden_topology); + network fsv32_network(engine, golden_topology, get_test_default_config(engine)); fsv32_network.set_input_data("input", input_prim); auto outputs = fsv32_network.execute(); @@ -1936,7 +1930,8 @@ class pooling_test_base { virtual void run_expect(const VVVVVF& expected, bool is_caching_test) { auto& eng = get_test_engine(); auto topo = build_topology(eng); - ExecutionConfig config(ov::intel_gpu::optimize_data(true)); + ExecutionConfig config = get_test_default_config(eng); + config.set_property(ov::intel_gpu::optimize_data(true)); cldnn::network::ptr net = get_network(eng, topo, config, get_test_stream_ptr(), is_caching_test); @@ -2314,7 +2309,7 @@ TEST(pooling_forward_gpu, bsv16_fsv16_max_16x16x8x8_input_2x2_pool_2x2_stride) golden_topology.add(pooling("golden_pooling", input_info("reorder_input"), pooling_mode::max, {pool_size, pool_size}, {stride_size, stride_size},{y_in_pad, x_in_pad})); - network golden_network(engine, golden_topology); + network golden_network(engine, golden_topology, get_test_default_config(engine)); golden_network.set_input_data("input", input_prim); auto outputs = golden_network.execute(); @@ -2336,7 +2331,7 @@ TEST(pooling_forward_gpu, bsv16_fsv16_max_16x16x8x8_input_2x2_pool_2x2_stride) tested_topology.add(reorder("reorder_pooling", input_info("bsv16_fsv16_pooling"), layout(data_types::f32, format::bfyx, input_tensor))); - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::custom_outputs(std::vector{"bsv16_fsv16_pooling", "reorder_pooling"})); network bsv16_fsv16_network(engine, tested_topology, config); bsv16_fsv16_network.set_input_data("input", input_prim); @@ -2399,7 +2394,7 @@ TEST(pooling_forward_gpu, bsv16_fsv16_max_16x16x2x2_input_4x4_pool_1x1_stride_1x pooling("golden_pooling", input_info("reorder_input"), pooling_mode::max, {pool_size, pool_size}, {stride_size, stride_size}, {y_in_pad, x_in_pad})); - network golden_network(engine, golden_topology); + network golden_network(engine, golden_topology, get_test_default_config(engine)); golden_network.set_input_data("input", input_prim); auto outputs = golden_network.execute(); @@ -2420,7 +2415,7 @@ TEST(pooling_forward_gpu, bsv16_fsv16_max_16x16x2x2_input_4x4_pool_1x1_stride_1x {stride_size, stride_size}, {y_in_pad, x_in_pad})); tested_topology.add(reorder("reorder_pooling", input_info("bsv16_fsv16_pooling"), layout(data_types::f32, format::bfyx, input_tensor))); - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::custom_outputs(std::vector{"bsv16_fsv16_pooling", "reorder_pooling"})); network bsv16_fsv16_network(engine, tested_topology, config); bsv16_fsv16_network.set_input_data("input", input_prim); @@ -2481,7 +2476,7 @@ TEST(pooling_forward_gpu, bsv16_fsv16_avg_16x16x20x20_input_5x5_pool_3x3_stride) golden_topology.add(pooling("golden_pooling", input_info("reorder_input"), pooling_mode::average, {pool_size, pool_size}, {stride_size, stride_size}, {y_in_pad, x_in_pad})); - network golden_network(engine, golden_topology); + network golden_network(engine, golden_topology, get_test_default_config(engine)); golden_network.set_input_data("input", input_prim); auto outputs = golden_network.execute(); @@ -2504,7 +2499,7 @@ TEST(pooling_forward_gpu, bsv16_fsv16_avg_16x16x20x20_input_5x5_pool_3x3_stride) tested_topology.add(reorder("reorder_pooling", input_info("bsv16_fsv16_pooling"), layout(data_types::f32, format::bfyx, input_tensor))); - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::custom_outputs(std::vector{"bsv16_fsv16_pooling", "reorder_pooling"})); network bsv16_fsv16_network(engine, tested_topology, config); bsv16_fsv16_network.set_input_data("input", input_prim); @@ -2566,7 +2561,7 @@ TEST(pooling_forward_gpu, bsv16_fsv16_avg_16x16x20x20_input_5x5_pool_3x1_stride) golden_topology.add(pooling("golden_pooling", input_info("reorder_input"), pooling_mode::average, {pool_size, pool_size}, {stride_size_y, stride_size_x}, {y_in_pad, x_in_pad})); - network golden_network(engine, golden_topology); + network golden_network(engine, golden_topology, get_test_default_config(engine)); golden_network.set_input_data("input", input_prim); auto outputs = golden_network.execute(); @@ -2587,7 +2582,7 @@ TEST(pooling_forward_gpu, bsv16_fsv16_avg_16x16x20x20_input_5x5_pool_3x1_stride) {stride_size_y, stride_size_x}, {y_in_pad, x_in_pad})); tested_topology.add(reorder("reorder_pooling", input_info("bsv16_fsv16_pooling"), layout(data_types::f32, format::bfyx, input_tensor))); - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::custom_outputs(std::vector{"bsv16_fsv16_pooling", "reorder_pooling"})); network bsv16_fsv16_network(engine, tested_topology, config); bsv16_fsv16_network.set_input_data("input", input_prim); @@ -2649,7 +2644,7 @@ TEST(pooling_forward_gpu, bsv16_fsv16_max_16x16x20x20_input_5x5_pool_3x1_stride) golden_topology.add(pooling("golden_pooling", input_info("reorder_input"), pooling_mode::max, {pool_size, pool_size}, {stride_size_y, stride_size_x}, {y_in_pad, x_in_pad})); - network golden_network(engine, golden_topology); + network golden_network(engine, golden_topology, get_test_default_config(engine)); golden_network.set_input_data("input", input_prim); auto outputs = golden_network.execute(); @@ -2672,7 +2667,7 @@ TEST(pooling_forward_gpu, bsv16_fsv16_max_16x16x20x20_input_5x5_pool_3x1_stride) {stride_size_y, stride_size_x}, {y_in_pad, x_in_pad})); tested_topology.add(reorder("reorder_pooling", input_info("bsv16_fsv16_pooling"), layout(data_types::f32, format::bfyx, input_tensor))); - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::custom_outputs(std::vector{"bsv16_fsv16_pooling", "reorder_pooling"})); network bsv16_fsv16_network(engine, tested_topology, config); bsv16_fsv16_network.set_input_data("input", input_prim); @@ -2734,7 +2729,7 @@ TEST(pooling_forward_gpu, bsv16_fsv16_max_32x32x20x20_input_5x5_pool_3x1_stride) golden_topology.add(pooling("golden_pooling", input_info("reorder_input"), pooling_mode::max, {pool_size, pool_size}, {stride_size_y, stride_size_x}, {y_in_pad, x_in_pad})); - network golden_network(engine, golden_topology); + network golden_network(engine, golden_topology, get_test_default_config(engine)); golden_network.set_input_data("input", input_prim); auto outputs = golden_network.execute(); @@ -2757,7 +2752,7 @@ TEST(pooling_forward_gpu, bsv16_fsv16_max_32x32x20x20_input_5x5_pool_3x1_stride) {stride_size_y, stride_size_x}, {y_in_pad, x_in_pad})); tested_topology.add(reorder("reorder_pooling", input_info("bsv16_fsv16_pooling"), layout(data_types::f32, format::bfyx, input_tensor))); - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::custom_outputs(std::vector{"bsv16_fsv16_pooling", "reorder_pooling"})); network bsv16_fsv16_network(engine, tested_topology, config); bsv16_fsv16_network.set_input_data("input", input_prim); @@ -2823,7 +2818,7 @@ TEST(pooling_forward_gpu, bsv16_fsv16_max_32x16x20x20_input_5x5_pool_3x1_stride) golden_topology.add(pooling("golden_pooling", input_info("reorder_input"), pooling_mode::max, {pool_size, pool_size}, {stride_size_y, stride_size_x}, {y_in_pad, x_in_pad})); - network golden_network(engine, golden_topology); + network golden_network(engine, golden_topology, get_test_default_config(engine)); golden_network.set_input_data("input", input_prim); auto outputs = golden_network.execute(); @@ -2846,7 +2841,7 @@ TEST(pooling_forward_gpu, bsv16_fsv16_max_32x16x20x20_input_5x5_pool_3x1_stride) {stride_size_y, stride_size_x}, {y_in_pad, x_in_pad})); tested_topology.add(reorder("reorder_pooling", input_info("bsv16_fsv16_pooling"), layout(data_types::f32, format::bfyx, input_tensor))); - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::custom_outputs(std::vector{"bsv16_fsv16_pooling", "reorder_pooling"})); network bsv16_fsv16_network(engine, tested_topology, config); bsv16_fsv16_network.set_input_data("input", input_prim); @@ -3231,10 +3226,9 @@ TEST(pooling_forward_gpu_onednn, basic_max_pooling_int8) { ); ov::intel_gpu::ImplementationDesc impl = {format::bfyx, std::string(""), impl_types::onednn}; - ExecutionConfig cfg{ov::intel_gpu::queue_type(QueueTypes::in_order), - ov::intel_gpu::custom_outputs(std::vector{ "reorder2" }), - ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{{"pool1", impl}}), - }; + ExecutionConfig cfg = get_test_default_config(engine); + cfg.set_property(ov::intel_gpu::custom_outputs(std::vector{ "reorder2" })); + cfg.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{{"pool1", impl}})); network network( engine, diff --git a/src/plugins/intel_gpu/tests/test_cases/prior_box_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/prior_box_gpu_test.cpp index 6fb279797239a4..3ce2af6391f06a 100644 --- a/src/plugins/intel_gpu/tests/test_cases/prior_box_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/test_cases/prior_box_gpu_test.cpp @@ -90,7 +90,7 @@ class PriorBoxGPUTest : public ::testing::TestWithParam void test_copy_dependecies_from_nodes(bool is_caching_test) { auto& engine = get_test_engine(); - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::optimize_data(true)); auto input = engine.allocate_memory({ data_types::f16, format::yxfb,{ 1, 1, 2, 2 } }); diff --git a/src/plugins/intel_gpu/tests/test_cases/pyramid_roi_align_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/pyramid_roi_align_gpu_test.cpp index 484bd9caf64139..51f047aaa70f64 100644 --- a/src/plugins/intel_gpu/tests/test_cases/pyramid_roi_align_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/test_cases/pyramid_roi_align_gpu_test.cpp @@ -100,7 +100,7 @@ struct pyramid_roi_align_typed_test : testing::Test { { P2_scale, P3_scale, P4_scale, P5_scale }, starting_level)); - cldnn::network::ptr net = get_network(engine, topo, ExecutionConfig(), get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr net = get_network(engine, topo, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); net->set_input_data("rois", rois_mem); diff --git a/src/plugins/intel_gpu/tests/test_cases/quantize_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/quantize_gpu_test.cpp index fdea2522d96bb7..b3e5a2d3a1a85f 100644 --- a/src/plugins/intel_gpu/tests/test_cases/quantize_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/test_cases/quantize_gpu_test.cpp @@ -84,7 +84,7 @@ TEST(quantize_gpu, quantize_levels_2_output_broadcast_inputs_1) { quantize("quantize", input_info("input"), input_info("input_low"), input_info("input_high"), input_info("output_low"), input_info("output_high"), 2, data_types::f32) ); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); auto outputs = network.execute(); @@ -148,7 +148,7 @@ TEST(quantize_gpu, quantize_levels_2_output_broadcast_inputs_1_ch8) { quantize("quantize", input_info("input"), input_info("input_low"), input_info("input_high"), input_info("output_low"), input_info("output_high"), 2, data_types::f32) ); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); auto outputs = network.execute(); @@ -212,7 +212,7 @@ TEST(quantize_gpu, quantize_levels_2_output_broadcast_inputs_1_ch8_binary_pack) reorder("reorder", input_info("quantize"), layout{data_types::f32, format::bfyx, tensor{1,8,2,2}}) ); - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::optimize_data(true)); network network(engine, topology, config); network.set_input_data("input", input); @@ -292,7 +292,7 @@ TEST(quantize_gpu, quantize_levels_2_output_broadcast_inputs_2) { quantize("quantize", input_info("input"), input_info("input_low"), input_info("input_high"), input_info("output_low"), input_info("output_high"), 2, data_types::f32) ); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); auto outputs = network.execute(); @@ -381,7 +381,7 @@ TEST(quantize_gpu, quantize_levels_3) { quantize("quantize", input_info("input"), input_info("input_low"), input_info("input_high"), input_info("output_low"), input_info("output_high"), 3, data_types::f32) ); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); auto outputs = network.execute(); @@ -472,7 +472,7 @@ TEST(quantize_gpu, quantize_levels_256_2d_unsigned) { quantize("quantize", input_info("input"), input_info("input_low"), input_info("input_high"), input_info("output_low"), input_info("output_high"), 256, data_types::u8) ); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); auto outputs = network.execute(); @@ -564,7 +564,7 @@ TEST(quantize_gpu, quantize_levels_256_3d_unsigned) { reorder("out", input_info("quantize"), format::bfzyx, data_types::u8) ); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); auto outputs = network.execute(); @@ -658,7 +658,7 @@ TEST(quantize_gpu, dynamic) { quantize("quantize", input_info("input"), input_info("input_low"), input_info("input_high"), input_info("output_low"), input_info("output_high"), 2, data_types::f32) ); - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); network network(engine, topology, config); network.set_input_data("input", input); @@ -837,7 +837,7 @@ struct quantize_random_test : testing::TestWithParam{"quantize"})); cldnn::network::ptr net = get_network(engine, topo, config, get_test_stream_ptr(), is_caching_test); @@ -877,7 +877,7 @@ struct quantize_random_test : testing::TestWithParamget_layout())); topology.add(input_layout("min_val", min_val->get_layout())); topology.add(input_layout("max_val", max_val->get_layout())); - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::optimize_data(true)); cldnn::network::ptr net = get_network(engine, topology, config, get_test_stream_ptr(), is_caching_test); diff --git a/src/plugins/intel_gpu/tests/test_cases/range_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/range_gpu_test.cpp index 8a405850392e4c..96edafa0e6dc75 100644 --- a/src/plugins/intel_gpu/tests/test_cases/range_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/test_cases/range_gpu_test.cpp @@ -44,9 +44,11 @@ struct RangeArgs { step.addTo(topology); topology.add(range { "range", { input_info(start.name), input_info(stop.name), input_info(step.name) }, { dt, format::bfyx, tensor{batch(outLen)} } }); - ExecutionConfig config(ov::intel_gpu::allow_new_shape_infer(use_new_shape_infer)); + auto& engine = get_test_engine(); + ExecutionConfig config = get_test_default_config(engine); + config.set_property(ov::intel_gpu::allow_new_shape_infer(use_new_shape_infer)); - network network { tests::get_test_engine(), topology, config }; + network network { engine, topology, config }; start.setData(network); stop.setData(network); @@ -207,7 +209,7 @@ TEST(range_gpu_test, range_with_select) { set_values(input0, {start_val}); set_values(input2, {step_val}); - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); network network { tests::get_test_engine(), topology, config }; @@ -243,7 +245,7 @@ TEST(range_gpu_test, constant_folding) { topology.add(data("input2", input2)); topology.add(range{ "range", { input_info("input0"), input_info("input1"), input_info("input2") }, data_types::i32}); - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); network network(engine, topology, config); @@ -281,7 +283,7 @@ TEST(range_gpu_test, dynamic_all) { topology.add(input_layout("input2", dynamic_input_layout)); topology.add(range{ "range", { input_info("input0"), input_info("input1"), input_info("input2") }, data_types::i32}); - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); network network(engine, topology, config); @@ -327,7 +329,7 @@ TEST(range_gpu_test, dynamic_stop) { topology.add(data("input2", input2)); topology.add(range{ "range", { input_info("input0"), input_info("input1"), input_info("input2") }, data_types::i32}); - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); network network(engine, topology, config); diff --git a/src/plugins/intel_gpu/tests/test_cases/reduce_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/reduce_gpu_test.cpp index a40f8885e35f76..bf7e6199bb73af 100644 --- a/src/plugins/intel_gpu/tests/test_cases/reduce_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/test_cases/reduce_gpu_test.cpp @@ -526,7 +526,7 @@ class ReduceTestBase : public ::testing::TestWithParamget_layout())); topology.add(red); - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::optimize_data(true)); ov::intel_gpu::ImplementationDesc reduce_impl = {input_format, kernel_name}; config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{{"reduce", reduce_impl}})); @@ -780,7 +780,7 @@ void test_common_bfyx(bool is_caching_test) { topology.add(input_layout("input", input->get_layout())); topology.add(reduce("reduce", input_info("input"), reduce_mode::sum, {0}, 0)); - cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); network->set_input_data("input", input); @@ -814,7 +814,7 @@ TEST(reduce_gpu, common_bfyx_keepdims) { topology.add(input_layout("input", input->get_layout())); topology.add(reduce("reduce", input_info("input"), reduce_mode::sum, {3, 2}, 1)); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); @@ -844,7 +844,7 @@ TEST(reduce_gpu, regr_bfyx_keepdims) { topology.add(input_layout("input", input->get_layout())); topology.add(reduce("reduce", input_info("input"), reduce_mode::sum, { 0, 3 }, 1)); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); @@ -874,7 +874,7 @@ TEST(reduce_gpu, common_bfzyx) { topology.add(input_layout("input", input->get_layout())); topology.add(reduce("reduce", input_info("input"), reduce_mode::sum, {0}, 0)); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); @@ -904,7 +904,7 @@ TEST(reduce_gpu, common_bfzyx_keepdims) { topology.add(input_layout("input", input->get_layout())); topology.add(reduce("reduce", input_info("input"), reduce_mode::sum, {0}, 1)); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); @@ -934,7 +934,7 @@ TEST(reduce_gpu, common_bfwzyx) { topology.add(input_layout("input", input->get_layout())); topology.add(reduce("reduce", input_info("input"), reduce_mode::sum, {2, 3, 4, 5}, 0)); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); @@ -964,7 +964,7 @@ TEST(reduce_gpu, common_bfwzyx_keepdims) { topology.add(input_layout("input", input->get_layout())); topology.add(reduce("reduce", input_info("input"), reduce_mode::sum, {1, 2, 3}, 1)); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); @@ -995,7 +995,7 @@ TEST(reduce_gpu, common_bfwzyx_max_keepdims) { topology.add(input_layout("input", input->get_layout())); topology.add(reduce("reduce", input_info("input"), reduce_mode::max, {0, 1}, 1)); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); @@ -1025,7 +1025,7 @@ TEST(reduce_gpu, common_bfwzyx_min) { topology.add(input_layout("input", input->get_layout())); topology.add(reduce("reduce", input_info("input"), reduce_mode::min, {1, 2}, 0)); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); @@ -1055,7 +1055,7 @@ TEST(reduce_gpu, common_bfwzyx_min_keepdims) { topology.add(input_layout("input", input->get_layout())); topology.add(reduce("reduce", input_info("input"), reduce_mode::min, {1, 2}, 1)); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); @@ -1085,7 +1085,7 @@ TEST(reduce_gpu, common_bfwzyx_mean) { topology.add(input_layout("input", input->get_layout())); topology.add(reduce("reduce", input_info("input"), reduce_mode::mean, {1, 2}, 0)); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); @@ -1115,7 +1115,7 @@ TEST(reduce_gpu, common_bfwzyx_mean_keepdims) { topology.add(input_layout("input", input->get_layout())); topology.add(reduce("reduce", input_info("input"), reduce_mode::mean, {1, 2}, 1)); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); @@ -1145,7 +1145,7 @@ TEST(reduce_gpu, common_bfwzyx_prod) { topology.add(input_layout("input", input->get_layout())); topology.add(reduce("reduce", input_info("input"), reduce_mode::prod, {1, 2}, 0)); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); @@ -1175,7 +1175,7 @@ TEST(reduce_gpu, common_bfwzyx_prod_keepdims) { topology.add(input_layout("input", input->get_layout())); topology.add(reduce("reduce", input_info("input"), reduce_mode::prod, {1, 2}, 1)); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); @@ -1206,7 +1206,7 @@ TEST(reduce_gpu, common_bfwzyx_sum_keepdims) { topology.add(input_layout("input", input->get_layout())); topology.add(reduce("reduce", input_info("input"), reduce_mode::sum, {0, 1}, 1)); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); @@ -1236,7 +1236,7 @@ TEST(reduce_gpu, common_bfwzyx_logical_and) { topology.add(input_layout("input", input->get_layout())); topology.add(reduce("reduce", input_info("input"), reduce_mode::logical_and, {1, 2}, 0)); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); @@ -1266,7 +1266,7 @@ TEST(reduce_gpu, common_bfwzyx_logical_and_keepdims) { topology.add(input_layout("input", input->get_layout())); topology.add(reduce("reduce", input_info("input"), reduce_mode::logical_and, {1, 2}, 1)); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); @@ -1296,7 +1296,7 @@ TEST(reduce_gpu, common_bfwzyx_logical_or) { topology.add(input_layout("input", input->get_layout())); topology.add(reduce("reduce", input_info("input"), reduce_mode::logical_or, {1, 2}, 0)); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); @@ -1326,7 +1326,7 @@ TEST(reduce_gpu, common_bfwzyx_logical_or_keepdims) { topology.add(input_layout("input", input->get_layout())); topology.add(reduce("reduce", input_info("input"), reduce_mode::logical_or, {1, 2}, 1)); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); @@ -1356,7 +1356,7 @@ TEST(reduce_gpu, common_bfwzyx_sum_square) { topology.add(input_layout("input", input->get_layout())); topology.add(reduce("reduce", input_info("input"), reduce_mode::sum_square, {1, 2}, 0)); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); @@ -1386,7 +1386,7 @@ TEST(reduce_gpu, common_bfwzyx_sum_square_keepdims) { topology.add(input_layout("input", input->get_layout())); topology.add(reduce("reduce", input_info("input"), reduce_mode::sum_square, {1, 2}, 1)); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); @@ -1416,7 +1416,7 @@ TEST(reduce_gpu, common_bfwzyx_l1) { topology.add(input_layout("input", input->get_layout())); topology.add(reduce("reduce", input_info("input"), reduce_mode::l1, {1, 2}, 0)); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); @@ -1446,7 +1446,7 @@ TEST(reduce_gpu, common_bfwzyx_l1_keepdims) { topology.add(input_layout("input", input->get_layout())); topology.add(reduce("reduce", input_info("input"), reduce_mode::l1, {1, 2}, 1)); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); @@ -1476,7 +1476,7 @@ TEST(reduce_gpu, common_bfwzyx_l2) { topology.add(input_layout("input", input->get_layout())); topology.add(reduce("reduce", input_info("input"), reduce_mode::l2, {1, 2}, 0)); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); @@ -1506,7 +1506,7 @@ TEST(reduce_gpu, common_bfwzyx_l2_keepdims) { topology.add(input_layout("input", input->get_layout())); topology.add(reduce("reduce", input_info("input"), reduce_mode::l2, {1, 2}, 1)); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); @@ -1536,7 +1536,7 @@ TEST(reduce_gpu, common_bfwzyx_log_sum) { topology.add(input_layout("input", input->get_layout())); topology.add(reduce("reduce", input_info("input"), reduce_mode::log_sum, {1, 2}, 0)); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); @@ -1566,7 +1566,7 @@ TEST(reduce_gpu, common_bfwzyx_log_sum_keepdims) { topology.add(input_layout("input", input->get_layout())); topology.add(reduce("reduce", input_info("input"), reduce_mode::log_sum, {1, 2}, 1)); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); @@ -1596,7 +1596,7 @@ TEST(reduce_gpu, common_bfwzyx_log_sum_exp) { topology.add(input_layout("input", input->get_layout())); topology.add(reduce("reduce", input_info("input"), reduce_mode::log_sum_exp, {1, 2}, 0)); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); @@ -1626,7 +1626,7 @@ TEST(reduce_gpu, common_bfwzyx_log_sum_exp_keepdims) { topology.add(input_layout("input", input->get_layout())); topology.add(reduce("reduce", input_info("input"), reduce_mode::log_sum_exp, {1, 2}, 1)); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); @@ -1658,7 +1658,7 @@ TEST(reduce_gpu, dynamic) { topology.add(input_layout("input", in_dyn_layout)); topology.add(reduce("reduce", input_info("input"), reduce_mode::prod, {1, 2}, 1)); - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); network network(engine, topology, config); network.set_input_data("input", input); @@ -1715,7 +1715,7 @@ TEST(reduce_gpu, b_fs_yx_fsv16_min_dynamic) { topology.add(reorder("reorder", input_info("input"), used_layout)); topology.add(reduce("reduce", input_info("reorder"), reduce_mode::min, {1}, 0)); - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); network network(engine, topology, config); @@ -1770,7 +1770,7 @@ TEST(reduce_gpu, b_fs_yx_fsv16_max_dynamic) { topology.add(reorder("reorder", input_info("input"), used_layout)); topology.add(reduce("reduce", input_info("reorder"), reduce_mode::max, {1}, 0)); - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); network network(engine, topology, config); @@ -1891,7 +1891,7 @@ class ReduceXYWithBigTensorTestBase : public ::testing::TestWithParamget_layout())); topology.add(red); - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::optimize_data(true)); ov::intel_gpu::ImplementationDesc reduce_impl = {input_format, kernel_name}; config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{{"reduce", reduce_impl}})); @@ -2045,7 +2045,7 @@ class ReduceOnednnTestBase : public ::testing::TestWithParamget_layout())); topology.add(red); - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::optimize_data(true)); ov::intel_gpu::ImplementationDesc reduce_impl = {input_format, kernel_name, impl_types::onednn}; config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{{"reduce", reduce_impl}})); diff --git a/src/plugins/intel_gpu/tests/test_cases/region_yolo_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/region_yolo_gpu_test.cpp index 49144fffdba540..e1e0c7ff6b86a6 100644 --- a/src/plugins/intel_gpu/tests/test_cases/region_yolo_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/test_cases/region_yolo_gpu_test.cpp @@ -180,7 +180,7 @@ void runRegionTest(region_yolo_test_params& params, bool is_caching_test = false params.regionNum, static_cast(params.mask.size()), params.softMax)); topology.add(reorder("reorder_post", input_info("region_yolo"), format::bfyx, params.dataType)); - cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); network->set_input_data("InputData", inputPrim); diff --git a/src/plugins/intel_gpu/tests/test_cases/removing_output_node_test.cpp b/src/plugins/intel_gpu/tests/test_cases/removing_output_node_test.cpp index 254c076e54e25f..0cdb5526aa4376 100644 --- a/src/plugins/intel_gpu/tests/test_cases/removing_output_node_test.cpp +++ b/src/plugins/intel_gpu/tests/test_cases/removing_output_node_test.cpp @@ -63,7 +63,7 @@ void test_multiple_outputs(bool is_caching_test) { std::vector out_vec = { 0.0f, 3.0f, 1.0f, 4.0f, 2.0f, 5.0f }; set_values(input, input_vec); - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::custom_outputs(std::vector{ "shuffle_channels", "reshape", "strided_slice" })); cldnn::network::ptr network = get_network(engine, topology, config, get_test_stream_ptr(), is_caching_test); @@ -131,7 +131,7 @@ void test_output_node_optimization(bool is_caching_test) { topology.add(convolution("conv", input_info("input"), { "weights" }, { 2, 1 })); topology.add(activation("relu", input_info("conv"), activation_func::relu)); - cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); network->set_input_data("input", input); // checking the output node has the same name after output node deleting due to ReLU optimization diff --git a/src/plugins/intel_gpu/tests/test_cases/reorder_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/reorder_gpu_test.cpp index 5619d415928ef4..09438ac7c17145 100644 --- a/src/plugins/intel_gpu/tests/test_cases/reorder_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/test_cases/reorder_gpu_test.cpp @@ -44,6 +44,14 @@ static void compare_bfyx2blocked_with_ref(const std::string& kernel_name, int32_t b_in, int32_t f_in, int32_t x_in, int32_t y_in, int32_t z_in, int32_t w_in, bool is_caching_test) { auto& engine = get_test_engine(); + ExecutionConfig cfg = get_test_default_config(engine); + cfg.set_property(ov::intel_gpu::queue_type(QueueTypes::out_of_order)); + if (engine.get_device_info().supports_immad) { + // Onednn currently does NOT support out_of_order : skip this test + return; + } + + auto stream = std::shared_ptr(engine.create_stream(cfg)); tensor ts; if (input_format.dimension() == 4) { @@ -60,7 +68,7 @@ static void compare_bfyx2blocked_with_ref(const std::string& kernel_name, layout output_layout(output_data_type, output_format, ts); if (input_data_type == data_types::i8) { - mem_lock input_ptr{input, get_test_stream()}; + mem_lock input_ptr{input, *stream}; unsigned char i = 1; for (auto it = input_ptr.begin(); it != input_ptr.end(); ++it) { @@ -70,7 +78,7 @@ static void compare_bfyx2blocked_with_ref(const std::string& kernel_name, } } } else { - mem_lock input_ptr{input, get_test_stream()}; + mem_lock input_ptr{input, *stream}; float i = 1.f; for (auto it = input_ptr.begin(); it != input_ptr.end(); ++it) { @@ -84,11 +92,11 @@ static void compare_bfyx2blocked_with_ref(const std::string& kernel_name, reorder("reorder", input_info("input"), output_layout)); // run on reference(reorder_data) kernel - ov::intel_gpu::ExecutionConfig config_ref; + ov::intel_gpu::ExecutionConfig config_ref = get_test_default_config(engine); ov::intel_gpu::ImplementationDesc reorder_ref = { output_format, "reorder_data" }; config_ref.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"reorder", reorder_ref} })); - cldnn::network::ptr network_ref = get_network(engine, topology, config_ref, get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr network_ref = get_network(engine, topology, config_ref, stream, is_caching_test); network_ref->set_input_data("input", input); @@ -97,11 +105,11 @@ static void compare_bfyx2blocked_with_ref(const std::string& kernel_name, e1->wait(); // run on optimized kernel - ov::intel_gpu::ExecutionConfig config; + ov::intel_gpu::ExecutionConfig config = get_test_default_config(engine); ov::intel_gpu::ImplementationDesc reorder_optimized = { output_format, kernel_name }; config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"reorder", reorder_optimized} })); - cldnn::network::ptr network = get_network(engine, topology, config, get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr network = get_network(engine, topology, config, stream, is_caching_test); network->set_input_data("input", input); @@ -268,7 +276,7 @@ TEST(reorder_gpu_optimization, bfyx_to_fsv16_without_f_remainder) { input_layout("input", input->get_layout()), reorder("reorder", input_info("input"), output_layout)); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); auto outputs = network.execute(); @@ -355,7 +363,7 @@ TEST(reorder_gpu_f32, basic) { input_layout("input", input->get_layout()), reorder("reorder", input_info("input"), output_layout)); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); auto outputs = network.execute(); @@ -447,7 +455,7 @@ TEST(reorder_gpu_f32, basic_subtract) { input_layout("subtract", subtract->get_layout()), reorder("reorder", input_info("input"), output_layout, "subtract")); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); network.set_input_data("subtract", subtract); @@ -529,7 +537,7 @@ TEST(reorder_gpu_f32, basic_subtract_value) { topology topology; topology.add(input_layout("input", input->get_layout()), reorder("reorder", input_info("input"), output_layout, subtract_val)); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); auto outputs = network.execute(); @@ -627,7 +635,7 @@ TEST(reorder_gpu_f16, basic_subtract_f32_output_f32) { topology.add(data("subtract", subtract)); topology.add(reorder("reorder", input_info("input"), output_layout, "subtract")); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); auto outputs = network.execute(); @@ -715,7 +723,7 @@ TEST(reorder_gpu_f16, basic_subtract_value) { topology.add(input_layout("input", input->get_layout())); topology.add(reorder("reorder", input_info("input"), output_layout, subtract_val)); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); auto outputs = network.execute(); @@ -788,12 +796,9 @@ TEST(reorder_gpu, basic_convert_f16_f32_f16) { topology.add(reorder("reorder_f16_f32", input_info("input"), interm_layout)); topology.add(reorder("reorder_f32_f16", input_info("reorder_f16_f32"), output_layout)); - network network( - engine, - topology, - ExecutionConfig{ - ov::intel_gpu::custom_outputs(std::vector{"reorder_f16_f32", "reorder_f32_f16"}) - }); + ExecutionConfig cfg = get_test_default_config(engine); + cfg.set_property(ov::intel_gpu::custom_outputs(std::vector{"reorder_f16_f32", "reorder_f32_f16"})); + network network(engine, topology, cfg); network.set_input_data("input", input); @@ -859,12 +864,9 @@ TEST(reorder_gpu, basic_convert_int8) { reorder("reorder2", input_info("reorder_input"), in_layout) ); - network network( - engine, - topology, - ExecutionConfig{ - ov::intel_gpu::custom_outputs(std::vector{ "reorder_input", "reorder2"}) - }); + ExecutionConfig cfg = get_test_default_config(engine); + cfg.set_property(ov::intel_gpu::custom_outputs(std::vector{ "reorder_input", "reorder2"})); + network network(engine, topology, cfg); network.set_input_data("input", input_memory); @@ -906,12 +908,9 @@ TEST(reorder_gpu, basic_convert_uint8) { reorder("reorder2", input_info("reorder_input"), in_layout) ); - network network( - engine, - topology, - ExecutionConfig{ - ov::intel_gpu::custom_outputs(std::vector{ "reorder_input", "reorder2" }) - }); + ExecutionConfig cfg = get_test_default_config(engine); + cfg.set_property(ov::intel_gpu::custom_outputs(std::vector{ "reorder_input", "reorder2" })); + network network(engine, topology, cfg); network.set_input_data("input", input_memory); @@ -988,12 +987,9 @@ TEST(reorder_gpu, basic_convert_uint8rgbabyxf_to_fp32_bfyx) { ) ); - network network( - engine, - topology, - ExecutionConfig{ - ov::intel_gpu::custom_outputs(std::vector{ "reorder_input", "crop" }) - }); + ExecutionConfig cfg = get_test_default_config(engine); + cfg.set_property(ov::intel_gpu::custom_outputs(std::vector{ "reorder_input", "crop" })); + network network(engine, topology, cfg); network.set_input_data("input", input_memory); @@ -1091,7 +1087,7 @@ TEST(reorder_gpu_f32, basic_yxfb_to_bfyx_input_padding) reorder("reorder", input_info("input"), input->get_layout().format, input->get_layout().data_type, "", reorder_mean_mode::subtract, padding{ { 0, 0, 1, 2 }, 0 }), reorder("reorder2", input_info("reorder"), output_layout)); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); auto outputs = network.execute(); @@ -1170,7 +1166,7 @@ TEST(reorder_gpu_f32, basic_bfyx_to_yxfb_input_padding) reorder("reorder", input_info("input"), input->get_layout().format, input->get_layout().data_type, "", reorder_mean_mode::subtract, padding{ { 0, 0, 2, 1 }, 0 }), reorder("reorder2", input_info("reorder"), output_layout)); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); auto outputs = network.execute(); @@ -1229,7 +1225,7 @@ TEST(reorder_gpu_f32, basic_bfyx_to_bfzyx) input_layout("input", input->get_layout()), reorder("reorder", input_info("input"), format::bfzyx, data_types::f32)); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); auto outputs = network.execute(); @@ -1291,7 +1287,7 @@ TEST(reorder_gpu_f32, dynamic_bfyx_to_bfzyx) { input_layout("input", in_layout), reorder("reorder", input_info("input"), format::bfzyx, data_types::f32)); - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::optimize_data(true)); config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); network network(engine, topology, config); @@ -1360,7 +1356,7 @@ TEST(reorder_gpu_f32, basic_yxfb_to_bfzyx) input_layout("input", input->get_layout()), reorder("reorder", input_info("input"), format::bfzyx, data_types::f32)); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); auto outputs = network.execute(); @@ -1436,7 +1432,7 @@ TEST(reorder_gpu_f32, basic_bfzyx_to_bfyx) input_layout("input", input->get_layout()), reorder("reorder", input_info("input"), format::bfyx, data_types::f32)); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); auto outputs = network.execute(); @@ -1496,7 +1492,7 @@ TEST(reorder_gpu_opt, basic_remove_redundant) reorder("r2", input_info("r1"), format::yxfb, data_types::f32) }; - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::optimize_data(true)); network net(engine, tpl, config); @@ -1525,7 +1521,7 @@ TEST(reorder_gpu_opt, remove_redundant_activation_fuse) eltwise("output", { input_info("relu"), input_info("scale_data") }, eltwise_mode::prod) }; - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::optimize_data(true)); network net(engine, tpl, config); @@ -1549,7 +1545,7 @@ TEST(reorder_gpu_opt, basic_remove_redundant_output_due_to_implicit_reorders) reorder("r1", input_info("conv"), format::bfyx, data_types::f32) //optimize data should add conversion from yxfb to bfyx and 'conv' should output data in bfyx as well (IE case) }; - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); //we need to check if r1 will be successfully opimized and still we should be able to query for r1's output which should point to conv's output (note conv cannot be marked as output in this case) config.set_property(ov::intel_gpu::custom_outputs(std::vector{ "r1" })); @@ -1578,7 +1574,7 @@ TEST(reorder_gpu_opt, basic_remove_redundant_due_to_implicit_reorders) softmax("output", input_info("r1")) }; - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::optimize_data(true)); network net(engine, tpl, config); @@ -1603,7 +1599,7 @@ TEST(reorder_gpu_opt, non_trivial_remove_redundant) reorder("r1", input_info("in"), format::bfyx, data_types::f32) }; - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::optimize_data(true)); @@ -1641,7 +1637,7 @@ TEST(reorder_gpu_opt, mean_mul) }; float answers[] = { 0.5f, 5.0f, -15.0f, 17.2f, 6.0f, -21.0f }; - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::optimize_data(true)); network net(engine, tpl, config); net.set_input_data("in", in); @@ -1676,7 +1672,7 @@ TEST(reorder_gpu_opt, mean_div) }; float answers[] = { 2.0f, 1.0f, -1.0f, 0.5f, 4.0f, -2.0f }; - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::optimize_data(true)); network net(engine, tpl, config); net.set_input_data("in", in); @@ -1707,7 +1703,7 @@ TEST(reorder_gpu_opt, mean_mul_val) }; float answers[] = { 2.0f, 4.0f, 1.5f, 2.0f, 50.0f, 600.0f }; - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::optimize_data(true)); network net(engine, tpl, config); net.set_input_data("in", in); @@ -1737,7 +1733,7 @@ TEST(reorder_gpu_opt, mean_mul_val_float_to_int) }; char answers[] = { 0, 2, 1, 2, 25, 127 }; - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::optimize_data(true)); network net(engine, tpl, config); net.set_input_data("in", in); @@ -1769,7 +1765,7 @@ TEST(reorder_gpu_i32, basic) input_layout("input", input->get_layout()), reorder("reorder", input_info("input"), output_layout)); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); auto outputs = network.execute(); @@ -1810,7 +1806,7 @@ TEST(reorder_gpu_i64, basic) input_layout("input", input->get_layout()), reorder("reorder", input_info("input"), output_layout)); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); auto outputs = network.execute(); @@ -1836,7 +1832,7 @@ TEST(reorder_gpu_binary, binary_output) { auto& engine = get_test_engine(); - ov::intel_gpu::ExecutionConfig config; + ov::intel_gpu::ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::optimize_data(true)); auto input = engine.allocate_memory({ data_types::f32, format::bfyx,{ 2, 2, 2, 2 } }); @@ -1855,7 +1851,7 @@ TEST(reorder_gpu_binary, binary_output) input_layout("input", input->get_layout()), reorder("reorder", input_info("input"), output_layout)); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); auto outputs = network.execute(); @@ -1884,7 +1880,7 @@ TEST(reorder_gpu_binary, binary_input) { auto& engine = get_test_engine(); - ov::intel_gpu::ExecutionConfig config; + ov::intel_gpu::ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::optimize_data(true)); auto input = engine.allocate_memory({ data_types::bin, format::b_fs_yx_32fp,{ 2, 2, 2, 2 } }); @@ -1906,7 +1902,7 @@ TEST(reorder_gpu_binary, binary_input) input_layout("input", input->get_layout()), reorder("reorder", input_info("input"), output_layout)); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); auto outputs = network.execute(); @@ -1976,7 +1972,7 @@ TEST(reorder_gpu_f32, bfwzyx_bfyx_chain) reorder("reorder3", input_info("reshape3"), format::bfyx, data_types::f32, sub_bfyx), reorder("out_reorder", input_info("reorder3"), format::bfwzyx, data_types::f32) ); - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::optimize_data(true)); network network(engine, topology, config); network.set_input_data("input", input); @@ -2021,7 +2017,7 @@ TEST(reorder_gpu_f32, bfzyx_to_bsv16_fsv16) input_layout("input", input->get_layout()), reorder("reorder", input_info("input"), output_layout)); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); auto outputs = network.execute(); @@ -2103,7 +2099,7 @@ TEST(reorder_gpu_f32, bfzyx_to_bsv16_fsv16_padded) input_layout("input", input->get_layout()), reorder("reorder", input_info("input"), output_layout.with_padding(padding({ 0, 0, x_pad, y_pad, 0 }, 0.f)))); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); auto outputs = network.execute(); @@ -2178,7 +2174,7 @@ TEST(reorder_gpu_f32, b_fs_yx_fsv16_to_bfyx_opt_allowed) reorder(reorder_name, input_info("first_activation"), format::bfyx, data_types::f32), activation("second_activation", input_info(reorder_name), activation_func::abs)); - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::optimize_data(true)); network network(engine, topology, config); network.set_input_data("input", input); @@ -2225,7 +2221,7 @@ TEST(reorder_gpu_f32, b_fs_yx_fsv16_to_bfyx_opt_not_allowed) reorder(reorder_name, input_info("input"), format::bfyx, data_types::f32), convolution("convolution", input_info(reorder_name), {"weights"}, { 1, 1 }, { 1, 1 }, { 1, 1 })); - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::optimize_data(true)); network network(engine, topology, config); network.set_input_data("input", input); @@ -2281,7 +2277,7 @@ TEST(reorder_gpu_f32, b_fs_yx_fsv16_to_bfyx_opt_padded) reorder(reorder_name, input_info("input"), format::bfyx, data_types::f32), activation("activation", input_info(reorder_name), activation_func::abs)); - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::optimize_data(true)); network network(engine, topology, config); network.set_input_data("input", input); @@ -2317,7 +2313,7 @@ TEST(reorder_gpu, any_format) { topo.add(input_layout("in", input->get_layout())); topo.add(reorder("out", input_info("in"), format::any, data_types::f32)); - network net(engine, topo); + network net(engine, topo, get_test_default_config(engine)); auto data = generate_random_1d(input->count(), -1, 1); set_values(input, data); @@ -2350,7 +2346,7 @@ TEST(reorder_image2d_rgba_to_bfyx_gpu, basic) input_layout("input", input->get_layout()), reorder("reorder", input_info("input"), output_layout)); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); auto outputs = network.execute(); @@ -2400,7 +2396,7 @@ TEST(reorder_bfyx_to_image2d_rgba_gpu, basic) input_layout("input", input->get_layout()), reorder("reorder", input_info("input"), output_layout)); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); auto outputs = network.execute(); @@ -2571,7 +2567,7 @@ class ReorderTest : public ::testing::TestWithParam { public: cldnn::engine& engine = get_test_engine(); cldnn::topology topology_test; - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); static const int min_random = -200; static const int max_random = 200; std::vector executed_prims; @@ -2721,10 +2717,9 @@ TEST_P(testing_removal_reorder, removal_no_padded_reorder) { ); ov::intel_gpu::ImplementationDesc impl = { format::b_fs_yx_fsv16, std::string(""), impl_types::ocl }; - ExecutionConfig config{ov::intel_gpu::queue_type(QueueTypes::in_order), - ov::intel_gpu::optimize_data(true), - ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"conv_output", impl} }) - }; + ExecutionConfig config = get_test_default_config(engine); + config.set_property(ov::intel_gpu::optimize_data(true)); + config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"conv_output", impl} })); setup_with_build_ops(config); @@ -2751,10 +2746,9 @@ TEST_P(testing_removal_reorder, removal_padded_reorder) { ); ov::intel_gpu::ImplementationDesc impl = { format::b_fs_yx_fsv16, std::string(""), impl_types::ocl }; - ExecutionConfig config{ov::intel_gpu::queue_type(QueueTypes::in_order), - ov::intel_gpu::optimize_data(true), - ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"conv_output", impl} }) - }; + ExecutionConfig config = get_test_default_config(engine); + config.set_property(ov::intel_gpu::optimize_data(true)); + config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"conv_output", impl} })); setup_with_build_ops(config); @@ -2925,10 +2919,9 @@ TEST(reorder_onednn_gpu, basic_convert_int8) { ); ov::intel_gpu::ImplementationDesc impl = { format::bfyx, std::string(""), impl_types::onednn }; - ExecutionConfig cfg{ov::intel_gpu::queue_type(QueueTypes::in_order), - ov::intel_gpu::custom_outputs(std::vector{ "reorder_input", "reorder2"}), - ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{{ "reorder_input", impl }}), - }; + ExecutionConfig cfg = get_test_default_config(engine); + cfg.set_property(ov::intel_gpu::custom_outputs(std::vector{ "reorder_input", "reorder2"})); + cfg.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{{ "reorder_input", impl }})); network network( engine, diff --git a/src/plugins/intel_gpu/tests/test_cases/reorg_yolo_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/reorg_yolo_gpu_test.cpp index 245892c57c274f..edaecf60a34508 100644 --- a/src/plugins/intel_gpu/tests/test_cases/reorg_yolo_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/test_cases/reorg_yolo_gpu_test.cpp @@ -320,7 +320,7 @@ struct reorg_yolo_test topology.add(reorg_yolo("reorg_yolo", input_info("input_reordered"), params.stride)); topology.add(reorder("reorg_yolo_reordered", input_info("reorg_yolo"), plain_format, data_type)); - cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); network->set_input_data("input", input); const auto result = network->execute(); diff --git a/src/plugins/intel_gpu/tests/test_cases/resample_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/resample_gpu_test.cpp index 6c25f8ea09fd9d..2a239fc285d099 100644 --- a/src/plugins/intel_gpu/tests/test_cases/resample_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/test_cases/resample_gpu_test.cpp @@ -47,7 +47,7 @@ void test_basic_in2x3x2x2_nearest(bool is_caching_test) { 12.f, 9.f, -17.f, }); - cldnn::network::ptr net = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr net = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); net->set_input_data("input", input); @@ -117,7 +117,7 @@ TEST(resample_gpu, basic_in2x3x2x2_bilinear) { 3.f, 4.f, }); - cldnn::network net{ engine, topology }; + cldnn::network net{ engine, topology, get_test_default_config(engine) }; net.set_input_data("input", input); auto outputs = net.execute(); @@ -168,7 +168,7 @@ TEST(resample_gpu, nearest_asymmetric) { 3.f, 4.f, }); - cldnn::network net{ engine, topology }; + cldnn::network net{ engine, topology, get_test_default_config(engine) }; net.set_input_data("input", input); auto outputs = net.execute(); @@ -219,7 +219,7 @@ TEST(resample_gpu, nearest_asymmetric_i8) { 3, 4, }); - cldnn::network net{ engine, topology }; + cldnn::network net{ engine, topology, get_test_default_config(engine) }; net.set_input_data("input", input); auto outputs = net.execute(); @@ -270,7 +270,7 @@ TEST(resample_gpu, bilinear_asymmetric) { 3.f, 4.f, }); - cldnn::network net{ engine, topology }; + cldnn::network net{ engine, topology, get_test_default_config(engine) }; net.set_input_data("input", input); auto outputs = net.execute(); @@ -471,7 +471,8 @@ struct resample_random_test : testing::TestWithParam{"resample"})); config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"resample", {params.in_format, "resample_ref"}} })); @@ -649,7 +650,7 @@ struct caffe_resample_random_test : testing::TestWithParam{"resample_opt"})); config_opt.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"resample_opt", {params.in_format, "resample_opt"}} })); @@ -725,7 +726,7 @@ TEST(resample_gpu, interpolate_in2x2x3x2_nearest1) { // Sample Type: Nearest auto& engine = get_test_engine(); - ov::intel_gpu::ExecutionConfig config; + ov::intel_gpu::ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); int b = 2; @@ -815,7 +816,7 @@ TEST(resample_gpu, interpolate_in2x2x3x2_nearest2) { // Sample Type: Nearest auto& engine = get_test_engine(); - ov::intel_gpu::ExecutionConfig config; + ov::intel_gpu::ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); int b = 2; @@ -905,7 +906,7 @@ TEST(resample_gpu, interpolate_in2x2x3x2_nearest3) { // Sample Type: Nearest auto& engine = get_test_engine(); - ov::intel_gpu::ExecutionConfig config; + ov::intel_gpu::ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); int b = 2; @@ -995,7 +996,7 @@ TEST(resample_gpu, interpolate_in2x2x3x2_nearest4) { // Sample Type: Nearest auto& engine = get_test_engine(); - ov::intel_gpu::ExecutionConfig config; + ov::intel_gpu::ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); int b = 2; @@ -1085,7 +1086,7 @@ TEST(resample_gpu, interpolate_in2x2x3x2_nearest5) { // Sample Type: Nearest auto& engine = get_test_engine(); - ov::intel_gpu::ExecutionConfig config; + ov::intel_gpu::ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); int b = 2; @@ -1175,7 +1176,7 @@ TEST(resample_gpu, interpolate_in2x2x3x2_coord_transform_mode1) { // Sample Type: Nearest auto& engine = get_test_engine(); - ov::intel_gpu::ExecutionConfig config; + ov::intel_gpu::ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); int b = 2; @@ -1245,7 +1246,7 @@ TEST(resample_gpu, interpolate_in2x2x3x2_coord_transform_mode2) { // Sample Type: Nearest auto& engine = get_test_engine(); - ov::intel_gpu::ExecutionConfig config; + ov::intel_gpu::ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); int b = 2; @@ -1309,7 +1310,7 @@ TEST(resample_gpu, interpolate_in2x2x3x2_coord_transform_mode3) { // Sample Type: Nearest auto& engine = get_test_engine(); - ov::intel_gpu::ExecutionConfig config; + ov::intel_gpu::ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); int b = 2; @@ -1379,7 +1380,7 @@ TEST(resample_gpu, interpolate_in2x2x3x2_coord_transform_mode4) { // Sample Type: Nearest auto& engine = get_test_engine(); - ov::intel_gpu::ExecutionConfig config; + ov::intel_gpu::ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); int b = 2; @@ -1449,7 +1450,7 @@ TEST(resample_gpu, interpolate_in2x2x3x2_coord_transform_mode5) { // Sample Type: Nearest auto& engine = get_test_engine(); - ov::intel_gpu::ExecutionConfig config; + ov::intel_gpu::ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); int b = 2; @@ -1519,7 +1520,7 @@ TEST(resample_gpu, interpolate_in2x2x3x2_cubic) { // Sample Type: Nearest auto& engine = get_test_engine(); - ov::intel_gpu::ExecutionConfig config; + ov::intel_gpu::ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); int b = 2; @@ -1587,7 +1588,7 @@ TEST(resample_gpu, interpolate_in2x2x3x2_cubic2) { // Sample Type: Nearest auto& engine = get_test_engine(); - ov::intel_gpu::ExecutionConfig config; + ov::intel_gpu::ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); int b = 1; @@ -1640,7 +1641,7 @@ TEST(resample_gpu, interpolate_in2x2x3x2_linear) { // Sample Type: Nearest auto& engine = get_test_engine(); - ov::intel_gpu::ExecutionConfig config; + ov::intel_gpu::ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); int b = 2; @@ -1867,7 +1868,7 @@ TEST(resample_gpu, interpolate_in1x1x2x4_linear_scale) { // Sample Type: Linear auto& engine = get_test_engine(); - ov::intel_gpu::ExecutionConfig config; + ov::intel_gpu::ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); int b = 1; @@ -2027,7 +2028,7 @@ struct resample_opt_random_test : testing::TestWithParam{"resample"})); network net(engine, topo, config); @@ -2046,7 +2047,7 @@ struct resample_opt_random_test : testing::TestWithParam{"resample_opt", "res_to_bfyx"})); cldnn::network::ptr net_opt = get_network(engine, topo_opt, config_opt, get_test_stream_ptr(), is_caching_test); @@ -2138,10 +2139,10 @@ struct resample_opt_random_test_ext : resample_opt_random_test topo_opt.add(prim_opt); topo_opt.add(reorder("res_to_bfyx", input_info("resample_opt"), origin_format, params.input_type)); - ExecutionConfig cfg{ov::enable_profiling(true), - ov::intel_gpu::custom_outputs(std::vector{"res_to_bfyx"}), - ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"resample_opt", {working_format, kernel}} }) - }; + ExecutionConfig cfg = get_test_default_config(engine); + cfg.set_property(ov::enable_profiling(true)); + cfg.set_property(ov::intel_gpu::custom_outputs(std::vector{"res_to_bfyx"})); + cfg.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"resample_opt", {working_format, kernel}} })); network net_opt(engine, topo_opt, cfg); diff --git a/src/plugins/intel_gpu/tests/test_cases/reshape_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/reshape_gpu_test.cpp index 9825d0492a8759..acb659a11a3dcf 100644 --- a/src/plugins/intel_gpu/tests/test_cases/reshape_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/test_cases/reshape_gpu_test.cpp @@ -66,7 +66,7 @@ void generic_reshape_test(format fmt, tensor const& input_size, tensor const& re } tpl.add(reshape("reshape", reshape_input, reshape_size, cldnn::reshape::reshape_mode::base, output_padd)); - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::custom_outputs(std::vector{reshape_input, "reshape"})); cldnn::network::ptr net = get_network(engine, tpl, config, get_test_stream_ptr(), is_caching_test); @@ -459,7 +459,7 @@ void test_multiple_users_with_reorder(bool is_caching_test) { std::vector out2 = {0.f, 2.f, 0.f, 4.0f}; set_values(input, input_vec); - cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); network->set_input_data("input", input); auto outputs = network->execute(); @@ -502,7 +502,7 @@ void test_calc_output_shape(bool is_caching_test) { set_values(input, {-1.f, 2.f, -3.f, 4.f}); - cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); network->set_input_data("input", input); auto outputs = network->execute(); @@ -574,7 +574,7 @@ void test_basic_bfwzyx(bool is_caching_test) { set_values(input, input_data); - cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); network->set_input_data("input", input); auto outputs = network->execute(); @@ -630,7 +630,7 @@ void test_shrink_chain_partial(bool is_caching_test) { std::vector out = {5.f, 12.f, 15.f, 32.0f}; set_values(input, input_vec); - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::optimize_data(true)); cldnn::network::ptr network = get_network(engine, topology, config, get_test_stream_ptr(), is_caching_test); network->set_input_data("input", input); @@ -675,7 +675,7 @@ void test_shrink_chain_full(bool is_caching_test) { std::vector out = {5.f, 12.f, 15.f, 32.0f}; set_values(input, input_vec); - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::optimize_data(true)); network network(engine, topology, config); network.set_input_data("input", input); @@ -715,7 +715,7 @@ void test_shrink_chain_out(bool is_caching_test) { std::vector out = {0.f, 2.f, 0.f, 4.0f}; set_values(input, input_vec); - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::optimize_data(true)); cldnn::network::ptr network = get_network(engine, topology, config, get_test_stream_ptr(), is_caching_test); network->set_input_data("input", input); @@ -758,7 +758,7 @@ TEST(reshape_gpu_f32, basic_runtime_static_shape) { set_values(input, input_data); - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); network network(engine, topology, config); network.set_input_data("input", input); @@ -806,7 +806,7 @@ TEST(reshape_gpu_f32, basic_runtime_dynamic_shape) { set_values(input, input_data); - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); config.set_property(ov::intel_gpu::optimize_data(true)); network network(engine, topology, config); @@ -857,7 +857,7 @@ TEST(reshape_gpu_f32, basic_runtime_dynamic_shape_with_const) { set_values(input, input_data); - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); config.set_property(ov::intel_gpu::optimize_data(true)); network network(engine, topology, config); @@ -914,7 +914,7 @@ TEST(reshape_gpu_f32, basic_runtime_dynamic_shape_with_const_optimized_out) { set_values(input, input_data); - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); config.set_property(ov::intel_gpu::optimize_data(true)); network network(engine, topology, config); diff --git a/src/plugins/intel_gpu/tests/test_cases/reverse_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/reverse_gpu_test.cpp index 497366d046c9eb..cb512fcd839463 100644 --- a/src/plugins/intel_gpu/tests/test_cases/reverse_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/test_cases/reverse_gpu_test.cpp @@ -76,7 +76,7 @@ struct reverse_gpu_test : public ::testing::TestWithParam tp.add(reverse(reverse_id, input_info(reverse_input_id), input_info(axes_id), mode)); } - cldnn::network::ptr network = get_network(engine, tp, ExecutionConfig(), get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr network = get_network(engine, tp, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); network->set_input_data(reverse_input_id, reverse_input); network->set_input_data(axes_id, reverse_axes); auto result = network->execute(); diff --git a/src/plugins/intel_gpu/tests/test_cases/reverse_sequence_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/reverse_sequence_gpu_test.cpp index e689368a6f51e7..d3491b814dd864 100644 --- a/src/plugins/intel_gpu/tests/test_cases/reverse_sequence_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/test_cases/reverse_sequence_gpu_test.cpp @@ -36,7 +36,7 @@ void test_fp32_d2_2_ba1_sa0(bool is_caching_test) { reverse_sequence("reverse_sequence", input_info("input"), input_info("seq_lengths"), seq_axis, batch_axis) ); - cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); network->set_input_data("input", input); network->set_input_data("seq_lengths", seq_lengths); @@ -85,7 +85,7 @@ void test_fp32_d3_3_3_ba0_sa1(bool is_caching_test) { reverse_sequence("reverse_sequence", input_info("input"), input_info("seq_lengths"), seq_axis, batch_axis) ); - cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); network->set_input_data("input", input); network->set_input_data("seq_lengths", seq_lengths); @@ -135,7 +135,7 @@ TEST(reverese_sequence_gpu_test, fp32_d3_3_3_ba2_sa0) { reverse_sequence("reverse_sequence", input_info("input"), input_info("seq_lengths"), seq_axis, batch_axis) ); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); network.set_input_data("seq_lengths", seq_lengths); @@ -181,7 +181,7 @@ TEST(reverese_sequence_gpu_test, fp32_d2_2_3_2ba0_sa3) { reverse_sequence("reverse_sequence", input_info("input"), input_info("seq_lengths"), seq_axis, batch_axis) ); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); network.set_input_data("seq_lengths", seq_lengths); @@ -228,7 +228,7 @@ TEST(reverese_sequence_gpu_test, fp32_d2_2_3_2ba0_sa2) { reverse_sequence("reverse_sequence", input_info("input"), input_info("seq_lengths"), seq_axis, batch_axis) ); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); network.set_input_data("seq_lengths", seq_lengths); @@ -275,7 +275,7 @@ TEST(reverese_sequence_gpu_test, fp32_d2_2_3_2ba2_sa0) { reverse_sequence("reverse_sequence", input_info("input"), input_info("seq_lengths"), seq_axis, batch_axis) ); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); network.set_input_data("seq_lengths", seq_lengths); @@ -320,7 +320,7 @@ TEST(reverese_sequence_gpu_test, fp16_d2_2_ba1_sa0) { reverse_sequence("reverse_sequence", input_info("input"), input_info("seq_lengths"), seq_axis, batch_axis) ); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); network.set_input_data("seq_lengths", seq_lengths); @@ -362,7 +362,7 @@ TEST(reverese_sequence_gpu_test, fp16x2_d2_2_ba1_sa0) { reverse_sequence("reverse_sequence", input_info("input"), input_info("seq_lengths"), seq_axis, batch_axis) ); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); network.set_input_data("seq_lengths", seq_lengths); @@ -406,7 +406,7 @@ TEST(reverese_sequence_gpu_test, fp16_d3_3_3_ba0_sa1) { reverse_sequence("reverse_sequence", input_info("input"), input_info("seq_lengths"), seq_axis, batch_axis) ); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); network.set_input_data("seq_lengths", seq_lengths); @@ -452,7 +452,7 @@ TEST(reverese_sequence_gpu_test, fp16_d3_3_3_ba2_sa0) { reverse_sequence("reverse_sequence", input_info("input"), input_info("seq_lengths"), seq_axis, batch_axis) ); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); network.set_input_data("seq_lengths", seq_lengths); @@ -498,7 +498,7 @@ TEST(reverese_sequence_gpu_test, fp16_d2_2_3_2ba0_sa3) { reverse_sequence("reverse_sequence", input_info("input"), input_info("seq_lengths"), seq_axis, batch_axis) ); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); network.set_input_data("seq_lengths", seq_lengths); @@ -545,7 +545,7 @@ TEST(reverese_sequence_gpu_test, fp16_d2_2_3_2ba0_sa2) { reverse_sequence("reverse_sequence", input_info("input"), input_info("seq_lengths"), seq_axis, batch_axis) ); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); network.set_input_data("seq_lengths", seq_lengths); @@ -592,7 +592,7 @@ TEST(reverese_sequence_gpu_test, fp16_d2_2_3_2ba2_sa0) { reverse_sequence("reverse_sequence", input_info("input"), input_info("seq_lengths"), seq_axis, batch_axis) ); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); network.set_input_data("seq_lengths", seq_lengths); diff --git a/src/plugins/intel_gpu/tests/test_cases/roi_align_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/roi_align_gpu_test.cpp index c0682d49ebfb8a..5afbe017c9d35f 100644 --- a/src/plugins/intel_gpu/tests/test_cases/roi_align_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/test_cases/roi_align_gpu_test.cpp @@ -69,6 +69,7 @@ struct roi_align_test : public testing::Test { roi_align::AlignedMode aligned_mode, bool is_caching_test) const { auto& engine = get_test_engine(); + auto stream = get_test_stream_ptr(get_test_default_config(engine)); auto input = get_memory(engine, input_lt, input_data); auto coords = get_memory(engine, coords_lt, coords_data); @@ -91,7 +92,7 @@ struct roi_align_test : public testing::Test { aligned_mode)); topology.add(reorder("out", input_info("roi_align"), plain_format, device_data_type)); - cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), stream, is_caching_test); network->set_input_data("input", input); network->set_input_data("coords", coords); diff --git a/src/plugins/intel_gpu/tests/test_cases/roi_pooling_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/roi_pooling_gpu_test.cpp index 282fcf99e5d7ce..1d9e6098e2c67d 100644 --- a/src/plugins/intel_gpu/tests/test_cases/roi_pooling_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/test_cases/roi_pooling_gpu_test.cpp @@ -185,7 +185,7 @@ struct roi_pooling_gpu_test : public testing::TestWithParam::value)); - cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); for (auto& input : inputs) { network->set_input_data(input.first, input.second); diff --git a/src/plugins/intel_gpu/tests/test_cases/roll_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/roll_gpu_test.cpp index e8a33f30615be1..515a0a2ef3d3a0 100644 --- a/src/plugins/intel_gpu/tests/test_cases/roll_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/test_cases/roll_gpu_test.cpp @@ -54,7 +54,7 @@ struct roll_test : testing::TestWithParam> { topology.add(roll("roll", input_info("reordered_input"), tensor(input_format, p.shift))); topology.add(reorder("reordered_roll", input_info("roll"), plane_format, type_to_data_type::value)); - cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); network->set_input_data("input", input); const auto outputs = network->execute(); diff --git a/src/plugins/intel_gpu/tests/test_cases/scatter_elements_update_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/scatter_elements_update_gpu_test.cpp index 1ca64dd52552e2..961d10d5190cb0 100644 --- a/src/plugins/intel_gpu/tests/test_cases/scatter_elements_update_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/test_cases/scatter_elements_update_gpu_test.cpp @@ -71,7 +71,7 @@ void test_d2411_axisF(bool is_caching_test) { scatter_elements_update("scatter_elements_update", input_info("InputData"), input_info("InputIndices"), input_info("InputUpdates"), axis) ); - cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); network->set_input_data("InputData", input1); network->set_input_data("InputIndices", input2); @@ -296,7 +296,7 @@ struct scatter_elements_update_gpu_formats_test ); topology.add(reorder("ScatterEelementsUpdatePlain", input_info("ScatterEelementsUpdate"), plain_format, data_type)); - cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); network->set_input_data("Data", data); network->set_input_data("Indices", indices); diff --git a/src/plugins/intel_gpu/tests/test_cases/scatter_nd_update_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/scatter_nd_update_gpu_test.cpp index 7bcba7be9c08d7..89d418109ba1a1 100644 --- a/src/plugins/intel_gpu/tests/test_cases/scatter_nd_update_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/test_cases/scatter_nd_update_gpu_test.cpp @@ -146,7 +146,7 @@ struct scatter_nd_update_random_test : testing::TestWithParamset_input_data("InputData", input1); network->set_input_data("InputIndices", input2); @@ -216,7 +216,7 @@ struct scatter_nd_update_random_test : testing::TestWithParamset_input_data("InputData", input1); network->set_input_data("InputIndices", input2); @@ -570,7 +570,7 @@ TEST(scatter_nd_update_gpu_fp16_test15, data5_indice3_update5) { scatter_nd_update("scatter_nd_update", input_info("InputData"), input_info("InputIndices"), input_info("InputUpdates"), 3) ); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("InputData", input1); @@ -654,7 +654,7 @@ TEST(scatter_nd_update_gpu_fp16_test14, data5_indice2_update3) { scatter_nd_update("scatter_nd_update", input_info("InputData"), input_info("InputIndices"), input_info("InputUpdates"), 2) ); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("InputData", input1); @@ -718,7 +718,7 @@ TEST(scatter_nd_update_gpu_fp16_test13, data4_indice2_update2) { scatter_nd_update("scatter_nd_update", input_info("InputData"), input_info("InputIndices"), input_info("InputUpdates"), 2) ); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("InputData", input1); @@ -789,7 +789,7 @@ TEST(scatter_nd_update_gpu_fp16_test12, data3_indice3_update1) { scatter_nd_update("scatter_nd_update", input_info("InputData"), input_info("InputIndices"), input_info("InputUpdates"), 2) ); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("InputData", input1); @@ -919,7 +919,7 @@ TEST(scatter_nd_update_gpu_fp16_test11, data6_indice1_update6) { scatter_nd_update("scatter_nd_update", input_info("InputData"), input_info("InputIndices"), input_info("InputUpdates"), 2) ); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("InputData", input1); @@ -1015,7 +1015,7 @@ TEST(scatter_nd_update_gpu_fp16_test10, data5_indice1_update5) { scatter_nd_update("scatter_nd_update", input_info("InputData"), input_info("InputIndices"), input_info("InputUpdates"), 2) ); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("InputData", input1); @@ -1093,7 +1093,7 @@ TEST(scatter_nd_update_gpu_fp16_test9, data4_indice1_update4) { scatter_nd_update("scatter_nd_update", input_info("InputData"), input_info("InputIndices"), input_info("InputUpdates"), 2) ); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("InputData", input1); @@ -1191,7 +1191,7 @@ TEST(scatter_nd_update_gpu_fp16_test8, data6_indice2_update5) { scatter_nd_update("scatter_nd_update", input_info("InputData"), input_info("InputIndices"), input_info("InputUpdates"), 2) ); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("InputData", input1); @@ -1259,7 +1259,7 @@ TEST(scatter_nd_update_gpu_fp16_test7, data5_indice2_update4) { scatter_nd_update("scatter_nd_update", input_info("InputData"), input_info("InputIndices"), input_info("InputUpdates"), 2) ); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("InputData", input1); @@ -1325,7 +1325,7 @@ TEST(scatter_nd_update_gpu_fp16_test6, data4_indice2_update3) { scatter_nd_update("scatter_nd_update", input_info("InputData"), input_info("InputIndices"), input_info("InputUpdates"), 2) ); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("InputData", input1); @@ -1390,7 +1390,7 @@ TEST(scatter_nd_update_gpu_fp16_test5, data3_indice2_update2) { scatter_nd_update("scatter_nd_update", input_info("InputData"), input_info("InputIndices"), input_info("InputUpdates"), 2) ); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("InputData", input1); @@ -1445,7 +1445,7 @@ TEST(scatter_nd_update_gpu_fp16_test4, data2_indice2_update1) { scatter_nd_update("scatter_nd_update", input_info("InputData"), input_info("InputIndices"), input_info("InputUpdates"), 2) ); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("InputData", input1); @@ -1520,7 +1520,7 @@ TEST(scatter_nd_update_gpu_fp16_test3, data3_indice1_update3) { scatter_nd_update("scatter_nd_update", input_info("InputData"), input_info("InputIndices"), input_info("InputUpdates"), 2) ); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("InputData", input1); @@ -1575,7 +1575,7 @@ TEST(scatter_nd_update_gpu_fp16_test2, data2_indice1_update2) { scatter_nd_update("scatter_nd_update", input_info("InputData"), input_info("InputIndices"), input_info("InputUpdates"), 2) ); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("InputData", input1); @@ -1624,7 +1624,7 @@ TEST(scatter_nd_update_gpu_fp16_test1, data1_indice1_update1) { scatter_nd_update("scatter_nd_update", input_info("InputData"), input_info("InputIndices"), input_info("InputUpdates"), 2) ); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("InputData", input1); @@ -1719,7 +1719,7 @@ TEST(scatter_nd_update_gpu_fp16, d6661_i2311) { scatter_nd_update("scatter_nd_update", input_info("InputData"), input_info("InputIndices"), input_info("InputUpdates"), 2) ); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("InputData", input1); @@ -1858,7 +1858,7 @@ TEST(scatter_nd_update_gpu_fp16, d6661_i2211) { scatter_nd_update("scatter_nd_update", input_info("InputData"), input_info("InputIndices"), input_info("InputUpdates"), 2) ); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("InputData", input1); @@ -2008,7 +2008,7 @@ TEST(scatter_nd_update_gpu_fp16, d6661_i2111) { scatter_nd_update("scatter_nd_update", input_info("InputData"), input_info("InputIndices"), input_info("InputUpdates"), 2) ); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("InputData", input1); @@ -2129,7 +2129,7 @@ TEST(scatter_nd_update_gpu_fp16, d3232_i2411) { scatter_nd_update("scatter_nd_update", input_info("InputData"), input_info("InputIndices"), input_info("InputUpdates"), 2) ); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("InputData", input1); @@ -2232,7 +2232,7 @@ TEST(scatter_nd_update_gpu_fp16, d3232_i2311) { scatter_nd_update("scatter_nd_update", input_info("InputData"), input_info("InputIndices"), input_info("InputUpdates"), 2) ); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("InputData", input1); @@ -2341,7 +2341,7 @@ TEST(scatter_nd_update_gpu_fp16, d3232_i2211) { scatter_nd_update("scatter_nd_update", input_info("InputData"), input_info("InputIndices"), input_info("InputUpdates"), 2) ); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("InputData", input1); @@ -2458,7 +2458,7 @@ TEST(scatter_nd_update_gpu_fp16, d3232_i2111) { scatter_nd_update("scatter_nd_update", input_info("InputData"), input_info("InputIndices"), input_info("InputUpdates"), 2) ); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("InputData", input1); @@ -2592,7 +2592,7 @@ TEST(scatter_nd_update_gpu_fp16, d32323_i25111) { scatter_nd_update("scatter_nd_update", input_info("InputData"), input_info("InputIndices"), input_info("InputUpdates"), 2) ); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("InputData", input1); @@ -2760,7 +2760,7 @@ TEST(scatter_nd_update_gpu_fp16, d32323_i24111) { scatter_nd_update("scatter_nd_update", input_info("InputData"), input_info("InputIndices"), input_info("InputUpdates"), 2) ); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("InputData", input1); @@ -2931,7 +2931,7 @@ TEST(scatter_nd_update_gpu_fp16, d32323_i23111) { scatter_nd_update("scatter_nd_update", input_info("InputData"), input_info("InputIndices"), input_info("InputUpdates"), 2) ); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("InputData", input1); @@ -3114,7 +3114,7 @@ TEST(scatter_nd_update_gpu_fp16, d32323_i22111) { scatter_nd_update("scatter_nd_update", input_info("InputData"), input_info("InputIndices"), input_info("InputUpdates"), 2) ); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("InputData", input1); @@ -3315,7 +3315,7 @@ TEST(scatter_nd_update_gpu_fp16, d32323_i21111) { scatter_nd_update("scatter_nd_update", input_info("InputData"), input_info("InputIndices"), input_info("InputUpdates"), 2) ); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("InputData", input1); @@ -3475,7 +3475,7 @@ TEST(scatter_nd_update_gpu_fp16, d222222_i261111) { scatter_nd_update("scatter_nd_update", input_info("InputData"), input_info("InputIndices"), input_info("InputUpdates"), 2) ); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("InputData", input1); @@ -3628,7 +3628,7 @@ TEST(scatter_nd_update_gpu_fp16, d222222_i251111) { scatter_nd_update("scatter_nd_update", input_info("InputData"), input_info("InputIndices"), input_info("InputUpdates"), 2) ); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("InputData", input1); @@ -3784,7 +3784,7 @@ TEST(scatter_nd_update_gpu_fp16, d222222_i241111) { scatter_nd_update("scatter_nd_update", input_info("InputData"), input_info("InputIndices"), input_info("InputUpdates"), 2) ); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("InputData", input1); @@ -3947,7 +3947,7 @@ TEST(scatter_nd_update_gpu_fp16, d222222_i231111) { scatter_nd_update("scatter_nd_update", input_info("InputData"), input_info("InputIndices"), input_info("InputUpdates"), 2) ); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("InputData", input1); @@ -4121,7 +4121,7 @@ TEST(scatter_nd_update_gpu_fp16, d222222_i221111) { scatter_nd_update("scatter_nd_update", input_info("InputData"), input_info("InputIndices"), input_info("InputUpdates"), 2) ); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("InputData", input1); @@ -4319,7 +4319,7 @@ void test_d222222_i211111(bool is_caching_test) { scatter_nd_update("scatter_nd_update", input_info("InputData"), input_info("InputIndices"), input_info("InputUpdates"), 2) ); - cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); network->set_input_data("InputData", input1); network->set_input_data("InputIndices", input2); @@ -4431,7 +4431,7 @@ TEST(scatter_nd_update_gpu, dynamic) { scatter_nd_update("scatter_nd_update", input_info("InputData"), input_info("InputIndices"), input_info("InputUpdates"), 2) ); - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); network network(engine, topology, config); diff --git a/src/plugins/intel_gpu/tests/test_cases/scatter_update_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/scatter_update_gpu_test.cpp index fedc3f54996785..1eb3c62d09ccef 100644 --- a/src/plugins/intel_gpu/tests/test_cases/scatter_update_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/test_cases/scatter_update_gpu_test.cpp @@ -95,7 +95,7 @@ void test_d2411_axisB(bool is_caching_test) { ); topology.add(reorder("out", input_info("scatter_update"), plain_2d_format, data_types::f16)); - cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); network->set_input_data("InputDictionary", input1); network->set_input_data("InputText", input2); @@ -176,7 +176,7 @@ TEST(scatter_update_gpu_fp32, d8111_axisB) { ); topology.add(reorder("out", input_info("scatter_update"), plain_2d_format, data_types::f32)); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("InputDictionary", input1); @@ -270,7 +270,7 @@ TEST(scatter_update_gpu_fp16, d4311_axisB) { ); topology.add(reorder("out", input_info("scatter_update"), plain_2d_format, data_types::f16)); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("InputDictionary", input1); network.set_input_data("InputText", input2); @@ -397,7 +397,7 @@ TEST(scatter_update_gpu_fp16, d2521_axisF) { ); topology.add(reorder("out", input_info("scatter_update"), plain_2d_format, data_types::f16)); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("InputDictionary", input1); network.set_input_data("InputText", input2); @@ -510,7 +510,7 @@ TEST(scatter_update_gpu_fp16, d2241_axisY) { ); topology.add(reorder("out", input_info("scatter_update"), plain_2d_format, data_types::f16)); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("InputDictionary", input1); network.set_input_data("InputText", input2); @@ -671,7 +671,7 @@ TEST(scatter_update_gpu_fp16, d8x2x20x1_axisB) { ); topology.add(reorder("out", input_info("scatter_update"), plain_2d_format, data_types::f16)); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("InputDictionary", input1); network.set_input_data("InputText", input2); @@ -797,7 +797,7 @@ TEST(scatter_update_gpu_fp32, d2214_axisX) { ); topology.add(reorder("out", input_info("scatter_update"), plain_2d_format, data_types::f32)); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("InputDictionary", input1); network.set_input_data("InputText", input2); @@ -899,7 +899,7 @@ TEST(scatter_update_gpu_int32, d6211_axisB) { ); topology.add(reorder("out", input_info("scatter_update"), plain_2d_format, data_types::i32)); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("InputDictionary", input1); network.set_input_data("InputText", input2); @@ -998,7 +998,7 @@ TEST(scatter_update_gpu_int32, d3151_axisY) { ); topology.add(reorder("out", input_info("scatter_update"), plain_2d_format, data_types::i32)); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("InputDictionary", input1); network.set_input_data("InputText", input2); @@ -1082,7 +1082,7 @@ TEST(scatter_update_gpu_fp32, d24111_axisF_bfzyx) { ); topology.add(reorder("out", input_info("scatter_update"), plain_2d_format, data_types::f32)); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("InputDictionary", input1); network.set_input_data("InputText", input2); @@ -1188,7 +1188,7 @@ TEST(scatter_update_gpu_int32, d121251_bfwzyx_axisB) { scatter_update("scatter_update", input_info("InputDictionary"), input_info("TextReordered"), input_info("InputUpdates"), axis) ); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("InputDictionary", input1); network.set_input_data("InputText", input2); @@ -1279,7 +1279,7 @@ TEST(scatter_update_gpu_fp32, d21511_bfzyx_axisX) { ); topology.add(reorder("out", input_info("scatter_update"), plain_3d_format, data_types::f32)); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("InputDictionary", input1); @@ -1385,7 +1385,7 @@ TEST(scatter_update_gpu_fp32, d1252_axisY_bfwzyx) { ); topology.add(reorder("out", input_info("scatter_update"), plain_2d_format, data_types::f32)); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("InputDictionary", input1); network.set_input_data("InputText", input2); @@ -1475,7 +1475,7 @@ TEST(scatter_update_gpu_int32, d2115_axisX_bfwzyx) { ); topology.add(reorder("out", input_info("scatter_update"), plain_2d_format, data_types::i32)); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("InputDictionary", input1); network.set_input_data("InputText", input2); @@ -1569,7 +1569,7 @@ void test_d21214_bfzyx_axisX_bfwzyx(bool is_caching_test) { ); topology.add(reorder("out", input_info("scatter_update"), plain_3d_format, data_types::f16)); - cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); network->set_input_data("InputDictionary", input1); network->set_input_data("InputText", input2); @@ -1656,7 +1656,7 @@ TEST(scatter_update_gpu_fp32, dynamic) { ); topology.add(reorder("out", input_info("scatter_update"), format::bfyx, data_types::f32)); - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); network network(engine, topology, config); diff --git a/src/plugins/intel_gpu/tests/test_cases/select_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/select_gpu_test.cpp index fab61b847f7889..ec67d67a268ffd 100644 --- a/src/plugins/intel_gpu/tests/test_cases/select_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/test_cases/select_gpu_test.cpp @@ -45,7 +45,7 @@ void test_select_basic(bool is_caching_test) { 0.f, 1.f, 0.f, 1.f, 1.f, 0.f, 1.f, 0.f }); - cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); network->set_input_data("input", input); network->set_input_data("input2", input2); @@ -103,7 +103,7 @@ TEST(select_gpu_f32, select_basic_negative) { -0.f, -1.f, -0.f, -1.f, -1.f, -0.f, -1.f, -0.f }); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); network.set_input_data("input2", input2); @@ -180,7 +180,7 @@ TEST(select_gpu_f32, select_basic_bfyx_2x2x2x2_bcast_mask_2x2x1x2) { 0.f, }); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input1", input1); network.set_input_data("input2", input2); @@ -256,7 +256,7 @@ TEST(select_gpu_f32, select_basic_bfyx_2x2x2x2_bcast_mask_1x1x1x1) { 0.f }); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input1", input1); network.set_input_data("input2", input2); @@ -336,7 +336,7 @@ TEST(select_gpu_f32, select_basic_comma_byxf_2x2x2x2_bcast_mask_2x2x2x1) { -0.f, -0.5f, }); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input1", input1); network.set_input_data("input2", input2); @@ -422,7 +422,7 @@ TEST(select_gpu_f32, select_basic_bfyx_2x2x2x2_bcast_in2_2x2x1x2) { 1.f, 0.f }); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input1", input1); network.set_input_data("input2", input2); @@ -504,7 +504,7 @@ TEST(select_gpu_f32, select_basic_bfyx_2x2x2x2_bcast_in1_2x2x2x1_bcast_in2_2x2x1 1.f, 0.f }); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input1", input1); network.set_input_data("input2", input2); @@ -578,7 +578,7 @@ TEST(select_gpu_f32, select_basic_bfyx_2x2x2x2_bcast_mask_2x1x2x2_in1_1x2x2x2_in 0.f, 1.f, }); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input1", input1); network.set_input_data("input2", input2); @@ -654,7 +654,7 @@ TEST(select_gpu_f32, select_basic_comma_byxf_2x2x2x2_bcast_mask_2x1x2x2_in1_2x2x -0.f }); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input1", input1); network.set_input_data("input2", input2); @@ -730,7 +730,7 @@ TEST(select_gpu_f32, select_basic_bfyx_2x2x2x2_bcast_in2_1x1x1x1) { 1.f, 0.f }); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input1", input1); network.set_input_data("input2", input2); @@ -810,7 +810,7 @@ TEST(select_gpu_f32, select_basic_comma_byxf_2x2x2x2_bcast_in2_2x2x2x1) { -1.5f, -0.f }); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input1", input1); network.set_input_data("input2", input2); @@ -896,7 +896,7 @@ TEST(select_gpu_f32, select_basic_bfyx_2x2x2x2_bcast_in1_2x2x1x2) { 1.f, 0.f }); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input1", input1); network.set_input_data("input2", input2); @@ -972,7 +972,7 @@ TEST(select_gpu_f32, select_basic_bfyx_2x2x2x2_bcast_in1_1x1x1x1) { 1.f, 0.f }); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input1", input1); network.set_input_data("input2", input2); @@ -1052,7 +1052,7 @@ TEST(select_gpu_f32, select_basic_comma_byxf_2x2x2x2_bcast_in1_2x2x2x1) { -1.5f, -0.f }); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input1", input1); network.set_input_data("input2", input2); @@ -1132,7 +1132,7 @@ TEST(select_gpu_f32, select_basic_comma_byxf_2x2x2x2_bcast_mask_2x1x2x2_in1_2x2x -0.f }); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input1", input1); network.set_input_data("input2", input2); @@ -1195,7 +1195,7 @@ TEST(select_gpu_f32, select_basic_comma) { -0.f, -0.1f, -0.f, -0.5f, -0.7f, -0.f, -1.5f, -0.f }); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); network.set_input_data("input2", input2); @@ -1230,7 +1230,7 @@ TEST(select_gpu_f32, select_basic_error_input_sizes) { topology.add(input_layout("mask", mask->get_layout())); topology.add(cldnn::select("select", input_info("mask"), input_info("input"), input_info("input2"))); - EXPECT_ANY_THROW(network(engine, topology)); + EXPECT_ANY_THROW(network(engine, topology, get_test_default_config(engine))); } TEST(select_gpu_f32, select_basic_error_mask_sizes) { @@ -1246,7 +1246,7 @@ TEST(select_gpu_f32, select_basic_error_mask_sizes) { topology.add(input_layout("mask", mask->get_layout())); topology.add(cldnn::select("select", input_info("mask"), input_info("input"), input_info("input2"))); - EXPECT_ANY_THROW(network(engine, topology)); + EXPECT_ANY_THROW(network(engine, topology, get_test_default_config(engine))); } TEST(select_gpu_f32, select_basic_error_input_types) { @@ -1261,7 +1261,7 @@ TEST(select_gpu_f32, select_basic_error_input_types) { topology.add(input_layout("input2", input2->get_layout())); topology.add(input_layout("mask", mask->get_layout())); topology.add(cldnn::select("select", input_info("mask"), input_info("input"), input_info("input2"))); - EXPECT_ANY_THROW(network(engine, topology)); + EXPECT_ANY_THROW(network(engine, topology, get_test_default_config(engine))); } TEST(select_gpu_f32, select_basic_byxf) { @@ -1296,7 +1296,7 @@ TEST(select_gpu_f32, select_basic_byxf) { 0.f, 1.f, 0.f, 1.f, 1.f, 0.f, 1.f, 0.f }); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); network.set_input_data("input2", input2); @@ -1350,7 +1350,7 @@ TEST(select_gpu_f32, select_basic_mask_f16) { 0, 1, 0, 1, 1, 0, 1, 0 }); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); network.set_input_data("input2", input2); @@ -1404,7 +1404,7 @@ TEST(select_gpu_f32, select_basic_mask_i8) { 0, 1, 0, 1, 1, 0, 1, 0 }); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); network.set_input_data("input2", input2); @@ -1458,7 +1458,7 @@ TEST(select_gpu_f32, select_basic_mask_u8) { 0, 211, 0, 255, 199, 0, 160, 0 }); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); network.set_input_data("input2", input2); @@ -1505,7 +1505,7 @@ TEST(select_gpu_f32, select_basic_1x1x2x2) { 0.f, 0.f, 1.f, 1.f }); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); network.set_input_data("input2", input2); @@ -1554,7 +1554,7 @@ TEST(select_gpu_f32, select_basic_bfyx_1x1x2x2) { 1.f, 1.f }); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); network.set_input_data("input2", input2); @@ -1604,7 +1604,7 @@ TEST(select_gpu_f32, select_basic_byxf_1x1x2x2) { 1.f, 1.f }); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); network.set_input_data("input2", input2); @@ -1656,7 +1656,7 @@ void test_f16_select_basic_1x1x2x2(bool is_caching_test) { 1, 1 }); - cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); network->set_input_data("input", input); network->set_input_data("input2", input2); @@ -1710,7 +1710,7 @@ TEST(select_gpu_f16, select_basic_mask_f32_1x1x2x2) { 1.5f, 0.4f }); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); network.set_input_data("input2", input2); @@ -1760,7 +1760,7 @@ TEST(select_gpu_f16, select_basic_mask_i8_1x1x2x2) { 1, 1 }); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); network.set_input_data("input2", input2); @@ -1810,7 +1810,7 @@ TEST(select_gpu_f16, select_basic_mask_u8_1x1x2x2) { 128, 255 }); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); network.set_input_data("input2", input2); @@ -1862,7 +1862,7 @@ void test_i8_select_basic_1x1x2x2(bool is_caching_test) { 3, 5 }); - cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); network->set_input_data("input", input); network->set_input_data("input2", input2); @@ -1916,7 +1916,7 @@ TEST(select_gpu_i8, select_basic_mask_f32_1x1x2x2) { 1.5f, 0.4f }); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); network.set_input_data("input2", input2); @@ -1966,7 +1966,7 @@ TEST(select_gpu_i8, select_basic_mask_f16_1x1x2x2) { 3, 5 }); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); network.set_input_data("input2", input2); @@ -2016,7 +2016,7 @@ TEST(select_gpu_i8, select_basic_mask_u8_1x1x2x2) { 128, 255 }); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); network.set_input_data("input2", input2); @@ -2068,7 +2068,7 @@ void test_u8_select_basic_1x1x2x2(bool is_caching_test) { 128, 255 }); - cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); network->set_input_data("input", input); network->set_input_data("input2", input2); @@ -2122,7 +2122,7 @@ TEST(select_gpu_u8, select_basic_mask_f32_1x1x2x2) { 1.5f, 0.4f }); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); network.set_input_data("input2", input2); @@ -2172,7 +2172,7 @@ TEST(select_gpu_u8, select_basic_mask_f16_1x1x2x2) { 1, 1 }); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); network.set_input_data("input2", input2); @@ -2222,7 +2222,7 @@ TEST(select_gpu_u8, select_basic_mask_i8_1x1x2x2) { 1, 1 }); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); network.set_input_data("input2", input2); @@ -2269,7 +2269,7 @@ TEST(select_gpu_fp32, select_numpy_broadcast_mask_u8_1x1x3) { 1, 0, 1 }); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); network.set_input_data("input2", input2); @@ -2332,7 +2332,7 @@ TEST(select_gpu_f32, select_different_formats) { 1.f, 1.f }); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input1", input1); network.set_input_data("input2", input2); @@ -2419,7 +2419,7 @@ TEST(select_gpu_f32, dynamic) { topology.add(input_layout("mask", mask_layout)); topology.add(cldnn::select("select", input_info("mask"), input_info("input1"), input_info("input2"))); - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); network network(engine, topology, config); diff --git a/src/plugins/intel_gpu/tests/test_cases/set_output_memory_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/set_output_memory_gpu_test.cpp index 2e6c9f55bc7561..a1adf941f132f0 100644 --- a/src/plugins/intel_gpu/tests/test_cases/set_output_memory_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/test_cases/set_output_memory_gpu_test.cpp @@ -46,7 +46,7 @@ void test_basic(bool is_caching_test) { reorder("reorder", input_info("Input"), input_data->get_layout()) ); - cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); network->set_input_data("Input", input_data); network->set_output_memory("reorder", output_mem); @@ -94,7 +94,7 @@ TEST(set_output_memory_gpu, basic_const) { reorder("reorder_const", input_info("Const"), input_data->get_layout()) ); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("Input", input_data); network.set_output_memory("reorder_dyn", output_mem); @@ -143,7 +143,7 @@ TEST(set_output_memory_gpu, basic_mutable) { reorder("reorder_mutable", input_info("Mutable"), input_data->get_layout()) ); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("Input", input_data); network.set_output_memory("reorder_dyn", output_mem); @@ -196,7 +196,7 @@ TEST(set_output_memory_gpu, top_k1) { }; set_values(input, input_vec); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); network.set_output_memory("reorder", output_mem); @@ -242,7 +242,7 @@ TEST(set_output_memory_gpu, top_k2) { }; set_values(input, input_vec); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); network.set_output_memory("reorder", second_output_mem); @@ -322,7 +322,7 @@ TEST(set_output_memory_gpu, basic_opt) { primitive_id outputID = "reorder3"; topology.add(reorder(outputID, input_info("concat"), ol)); - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::optimize_data(true)); network network(engine, topology, config); @@ -374,7 +374,7 @@ TEST(set_output_memory_gpu, mutable_output_data) { /*b1f3*/4.f, 0.5f, 8.f, 8.2f }; set_values(input, input_vec); - auto prog = program::build_program(engine, topology, ExecutionConfig{}); + auto prog = program::build_program(engine, topology, get_test_default_config(engine)); network network(prog, 0); network.set_input_data("Add_1396", input); diff --git a/src/plugins/intel_gpu/tests/test_cases/shape_of_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/shape_of_gpu_test.cpp index 2cbbb93a890e46..c041372e0d7ab7 100644 --- a/src/plugins/intel_gpu/tests/test_cases/shape_of_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/test_cases/shape_of_gpu_test.cpp @@ -25,7 +25,7 @@ TEST(shape_of_gpu, bfyx) { topology.add(input_layout("input", input->get_layout())); topology.add(shape_of("shape_of", input_info("input"), 4, data_types::i32)); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); @@ -50,7 +50,7 @@ TEST(shape_of_gpu, bfyx_i64) { topology.add(input_layout("input", input->get_layout())); topology.add(shape_of("shape_of", input_info("input"), 4, data_types::i64)); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); @@ -75,7 +75,7 @@ TEST(shape_of_gpu, yxfb) { topology.add(input_layout("input", input->get_layout())); topology.add(shape_of("shape_of", input_info("input"), 4, data_types::i32)); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); @@ -100,7 +100,7 @@ TEST(shape_of_gpu, bfzyx) { topology.add(input_layout("input", input->get_layout())); topology.add(shape_of("shape_of", input_info("input"), 5, data_types::i32)); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); @@ -129,7 +129,7 @@ TEST(shape_of_gpu, dynamic) { topology.add(input_layout("input", in_layout)); topology.add(shape_of("shape_of", input_info("input"), 5, data_types::i32)); - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); network network(engine, topology, config); diff --git a/src/plugins/intel_gpu/tests/test_cases/shuffle_channels_test.cpp b/src/plugins/intel_gpu/tests/test_cases/shuffle_channels_test.cpp index f46864d0a8cd1a..7614bf9ae92bae 100644 --- a/src/plugins/intel_gpu/tests/test_cases/shuffle_channels_test.cpp +++ b/src/plugins/intel_gpu/tests/test_cases/shuffle_channels_test.cpp @@ -35,7 +35,7 @@ void test_d1_15_2_2_ax1_g5(bool is_caching_test) { shuffle_channels("shuffle_channels", input_info("Input0"), group, axis) ); - cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); network->set_input_data("Input0", input0); @@ -81,7 +81,7 @@ TEST(shuffle_channels_fp32_gpu, d1_15_2_2_axm3_g5) { shuffle_channels("shuffle_channels", input_info("Input0"), group, axis) ); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("Input0", input0); @@ -123,7 +123,7 @@ TEST(shuffle_channels_fp32_gpu, d15_2_2_ax0_g5) { shuffle_channels("shuffle_channels", input_info("Input0"), group, axis) ); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("Input0", input0); @@ -165,7 +165,7 @@ TEST(shuffle_channels_fp32_gpu, d15_2_2_axm4_g5) { shuffle_channels("shuffle_channels", input_info("Input0"), group, axis) ); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("Input0", input0); @@ -204,7 +204,7 @@ TEST(shuffle_channels_fp32_gpu, d2_2_6_axm2_g3) { shuffle_channels("shuffle_channels", input_info("Input0"), group, axis) ); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("Input0", input0); @@ -242,7 +242,7 @@ TEST(shuffle_channels_fp32_gpu, d2_6_2_axm3_g3) { shuffle_channels("shuffle_channels", input_info("Input0"), group, axis) ); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("Input0", input0); @@ -280,7 +280,7 @@ TEST(shuffle_channels_fp32_gpu, d2_2_6_axm2_g2) { shuffle_channels("shuffle_channels", input_info("Input0"), group, axis) ); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("Input0", input0); @@ -318,7 +318,7 @@ TEST(shuffle_channels_fp32_gpu, d2_6_2_axm3_g2) { shuffle_channels("shuffle_channels", input_info("Input0"), group, axis) ); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("Input0", input0); @@ -354,7 +354,7 @@ TEST(shuffle_channels_fp32_gpu, d6_axm0_g2) { shuffle_channels("shuffle_channels", input_info("Input0"), group, axis) ); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("Input0", input0); diff --git a/src/plugins/intel_gpu/tests/test_cases/slice.cpp b/src/plugins/intel_gpu/tests/test_cases/slice.cpp index 9e06a840b5e645..3dccacfc01637b 100644 --- a/src/plugins/intel_gpu/tests/test_cases/slice.cpp +++ b/src/plugins/intel_gpu/tests/test_cases/slice.cpp @@ -45,7 +45,7 @@ class SliceTest : public ::testing::Test { } topology.add(slice("slice", inputs, tensor{output_shape_})); - cldnn::network::ptr network = get_network(engine_, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr network = get_network(engine_, topology, get_test_default_config(engine_), get_test_stream_ptr(), is_caching_test); network->set_input_data("input", input); diff --git a/src/plugins/intel_gpu/tests/test_cases/softmax_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/softmax_gpu_test.cpp index 35658f33ba37b1..e45969be2f041d 100644 --- a/src/plugins/intel_gpu/tests/test_cases/softmax_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/test_cases/softmax_gpu_test.cpp @@ -73,7 +73,7 @@ class softmax_gpu_xb_f32_test_fixture: public ::testing::Test { topology.add(input_layout("input", input->get_layout())); topology.add(softmax("softmax", input_info("input"), 3)); - cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); network->set_input_data("input", input); @@ -108,7 +108,7 @@ class softmax_gpu_xb_f32_test_fixture: public ::testing::Test { topology.add(input_layout("input", input->get_layout())); topology.add(softmax("softmax", input_info("input"), 3)); - cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); network->set_input_data("input", input); auto outputs = network->execute(); @@ -165,7 +165,7 @@ class softmax_gpu_xb_f32_test_fixture: public ::testing::Test { topology.add(input_layout("input", input->get_layout())); topology.add(softmax("softmax", input_info("input"), 3)); - cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); network->set_input_data("input", input); @@ -238,7 +238,7 @@ TEST(softmax_gpu_bfyx_f32, normalize_y) { 0.993307149f //b=1, f=2, x=1 }; - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); auto outputs = network.execute(); @@ -318,7 +318,7 @@ TEST(softmax_gpu_bfyx_f32, normalize_f) { 0.977054322f //b=1, y=1, x=1 }; - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); auto outputs = network.execute(); @@ -403,7 +403,7 @@ TEST(softmax_gpu_bfzyx_f32, normalize_z) { 0.880797f, 0.952574f, }; - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); auto outputs = network.execute(); @@ -486,7 +486,7 @@ TEST(softmax_gpu_bfyx_f32, normalize_b) { 0.977054322f //f=1, y=1, x=1 }; - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); auto outputs = network.execute(); @@ -946,9 +946,9 @@ struct softmax_gpu_formats_test set_values(input, params.input); - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::optimize_data(false)); - cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); network->set_input_data("input", input); const auto outputs = network->execute(); @@ -1048,7 +1048,7 @@ TEST(softmax_gpu_bfyx_f32, normalize_f_dynamic) { 0.977054322f //b=1, y=1, x=1 }; - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); network network(engine, topology, config); network.set_input_data("input", input); @@ -1153,7 +1153,7 @@ TEST(softmax_gpu_bfyx_f32, bf_opt_normalize_f_dynamic) { 0.719294981f //b=1, y=0, x=0 }; - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); network network(engine, topology, config); network.set_input_data("input", input); diff --git a/src/plugins/intel_gpu/tests/test_cases/space_to_batch_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/space_to_batch_gpu_test.cpp index 0b6580c2d0bf20..ddec5b3c84a4c3 100644 --- a/src/plugins/intel_gpu/tests/test_cases/space_to_batch_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/test_cases/space_to_batch_gpu_test.cpp @@ -40,7 +40,7 @@ class space_to_batch_fp16_gpu: public ::testing::Test { tensor(format::bfyx, {0,0,0,0}, 0), tensor(format::bfyx, {8,1,1,1}, 1))); - cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); network->set_input_data("Input", input); @@ -84,7 +84,7 @@ class space_to_batch_fp16_gpu: public ::testing::Test { tensor(format::bfyx, {0,0,2,0}, 0), tensor(format::bfyx, {0,0,0,0}, 0), tensor(format::bfyx, {4,1,3,2}, 1))); - cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); network->set_input_data("Input", input); @@ -130,7 +130,7 @@ class space_to_batch_fp16_gpu: public ::testing::Test { tensor(format::bfyx, {0,0,1,0}, 0), tensor(format::bfyx, {0,1,0,0}, 0), tensor(format::bfyx, {16,1,2,1}, 1))); - cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); network->set_input_data("Input", input); @@ -176,7 +176,7 @@ class space_to_batch_fp16_gpu: public ::testing::Test { tensor(format::bfzyx, {0,0,0,1,0}, 0), tensor(format::bfzyx, {0,0,0,0,0}, 0), tensor(format::bfzyx, {8,1,1,2,1}, 1))); - cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); network->set_input_data("Input", input); @@ -224,7 +224,7 @@ class space_to_batch_fp16_gpu: public ::testing::Test { tensor(format::bfwzyx, {0,1,0,1,0,0}, 0), tensor(format::bfwzyx, {0,0,0,0,0,0}, 0), tensor(format::bfwzyx, {16,1,2,2,1,1}, 1))); - cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); network->set_input_data("Input", input); @@ -277,7 +277,7 @@ class space_to_batch_fp16_gpu: public ::testing::Test { tensor(format::bfyx, {0,0,0,1}, 0), tensor(format::bfyx, {8,8,1,1}, 1))); topology.add(reorder("stb_to_bfyx", input_info("space_to_batch"), format::bfyx, data_types::f16)); - cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); network->set_input_data("Input", input); @@ -330,7 +330,7 @@ class space_to_batch_fp16_gpu: public ::testing::Test { tensor(format::bfyx, {0,2,0,0}, 0), tensor(format::bfyx, {4,5,1,2}, 1))); topology.add(reorder("stb_to_bfyx", input_info("space_to_batch"), format::bfyx, data_types::f16)); - cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); network->set_input_data("Input", input); @@ -406,7 +406,7 @@ class space_to_batch_fp32_gpu: public ::testing::Test { tensor(format::bfyx, {0,0,0,0}, 0), tensor(format::bfyx, {0,0,0,0}, 0), tensor(format::bfyx, {8,1,1,1}, 1))); - cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); network->set_input_data("Input", input); @@ -450,7 +450,7 @@ class space_to_batch_fp32_gpu: public ::testing::Test { tensor(format::bfyx, {0,0,2,0}, 0), tensor(format::bfyx, {0,0,0,0}, 0), tensor(format::bfyx, {4,1,3,2}, 1))); - cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); network->set_input_data("Input", input); @@ -496,7 +496,7 @@ class space_to_batch_fp32_gpu: public ::testing::Test { tensor(format::bfyx, {0,0,1,0}, 0), tensor(format::bfyx, {0,1,0,0}, 0), tensor(format::bfyx, {16,1,2,1}, 1))); - cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); network->set_input_data("Input", input); @@ -542,7 +542,7 @@ class space_to_batch_fp32_gpu: public ::testing::Test { tensor(format::bfzyx, {0,0,0,1,0}, 0), tensor(format::bfzyx, {0,0,0,0,0}, 0), tensor(format::bfzyx, {8,1,1,2,1}, 1))); - cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); network->set_input_data("Input", input); @@ -588,7 +588,7 @@ class space_to_batch_fp32_gpu: public ::testing::Test { tensor(format::bfwzyx, {0,1,0,1,0,0}, 0), tensor(format::bfwzyx, {0,0,0,0,0,0}, 0), tensor(format::bfwzyx, {16,1,2,2,1,1}, 1))); - cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); network->set_input_data("Input", input); @@ -645,7 +645,7 @@ class space_to_batch_fp32_gpu: public ::testing::Test { tensor(format::bfyx, {0,0,0,0}, 0), tensor(format::bfyx, {8,4,1,2}, 1))); topology.add(reorder("stb_to_bfyx", input_info("space_to_batch"), format::bfyx, data_types::f32)); - cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); network->set_input_data("Input", input); @@ -699,7 +699,7 @@ class space_to_batch_fp32_gpu: public ::testing::Test { tensor(format::bfyx, {0,0,0,0}, 0), tensor(format::bfyx, {6,2,2,2}, 1))); topology.add(reorder("stb_to_bfyx", input_info("space_to_batch"), format::bfyx, data_types::f32)); - cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); network->set_input_data("Input", input); diff --git a/src/plugins/intel_gpu/tests/test_cases/space_to_depth_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/space_to_depth_gpu_test.cpp index c8a8aed7b0e4f7..ad4c415829b324 100644 --- a/src/plugins/intel_gpu/tests/test_cases/space_to_depth_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/test_cases/space_to_depth_gpu_test.cpp @@ -36,7 +36,7 @@ class space_to_depth_fp16_gpu: public ::testing::Test { space_to_depth("space_to_depth", input_info("Input0"), space_to_depth::blocks_first, block_size) ); - cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); network->set_input_data("Input0", input1); @@ -78,7 +78,7 @@ class space_to_depth_fp16_gpu: public ::testing::Test { space_to_depth("space_to_depth", input_info("Input0"), space_to_depth::blocks_first, block_size) ); - cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); network->set_input_data("Input0", input1); @@ -126,7 +126,7 @@ class space_to_depth_fp16_gpu: public ::testing::Test { space_to_depth("space_to_depth", input_info("Input0"), space_to_depth::blocks_first, block_size) ); - cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); network->set_input_data("Input0", input1); @@ -188,7 +188,7 @@ class space_to_depth_fp16_gpu: public ::testing::Test { space_to_depth("space_to_depth", input_info("Input0"), space_to_depth::blocks_first, block_size) ); - cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); network->set_input_data("Input0", input1); @@ -236,7 +236,7 @@ class space_to_depth_fp16_gpu: public ::testing::Test { space_to_depth("space_to_depth", input_info("Input0"), space_to_depth::depth_first, block_size) ); - cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); network->set_input_data("Input0", input1); @@ -278,7 +278,7 @@ class space_to_depth_fp16_gpu: public ::testing::Test { space_to_depth("space_to_depth", input_info("Input0"), space_to_depth::depth_first, block_size) ); - cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); network->set_input_data("Input0", input1); @@ -326,7 +326,7 @@ class space_to_depth_fp16_gpu: public ::testing::Test { space_to_depth("space_to_depth", input_info("Input0"), space_to_depth::depth_first, block_size) ); - cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); network->set_input_data("Input0", input1); @@ -388,7 +388,7 @@ class space_to_depth_fp16_gpu: public ::testing::Test { space_to_depth("space_to_depth", input_info("Input0"), space_to_depth::depth_first, block_size) ); - cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); network->set_input_data("Input0", input1); @@ -438,7 +438,7 @@ class space_to_depth_fp32_gpu: public ::testing::Test { space_to_depth("space_to_depth", input_info("Input0"), space_to_depth::blocks_first, block_size) ); - cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); network->set_input_data("Input0", input1); @@ -477,7 +477,7 @@ class space_to_depth_fp32_gpu: public ::testing::Test { space_to_depth("space_to_depth", input_info("Input0"), space_to_depth::blocks_first, block_size) ); - cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); network->set_input_data("Input0", input1); @@ -525,7 +525,7 @@ class space_to_depth_fp32_gpu: public ::testing::Test { space_to_depth("space_to_depth", input_info("Input0"), space_to_depth::blocks_first, block_size) ); - cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); network->set_input_data("Input0", input1); @@ -579,7 +579,7 @@ class space_to_depth_fp32_gpu: public ::testing::Test { space_to_depth("space_to_depth", input_info("Input0"), space_to_depth::blocks_first, block_size) ); - cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); network->set_input_data("Input0", input1); @@ -626,7 +626,7 @@ class space_to_depth_fp32_gpu: public ::testing::Test { space_to_depth("space_to_depth", input_info("Input0"), space_to_depth::depth_first, block_size) ); - cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); network->set_input_data("Input0", input1); @@ -665,7 +665,7 @@ class space_to_depth_fp32_gpu: public ::testing::Test { space_to_depth("space_to_depth", input_info("Input0"), space_to_depth::depth_first, block_size) ); - cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); network->set_input_data("Input0", input1); @@ -713,7 +713,7 @@ class space_to_depth_fp32_gpu: public ::testing::Test { space_to_depth("space_to_depth", input_info("Input0"), space_to_depth::depth_first, block_size) ); - cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); network->set_input_data("Input0", input1); @@ -767,7 +767,7 @@ class space_to_depth_fp32_gpu: public ::testing::Test { space_to_depth("space_to_depth", input_info("Input0"), space_to_depth::depth_first, block_size) ); - cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); network->set_input_data("Input0", input1); @@ -822,7 +822,7 @@ class space_to_depth_fp32_gpu: public ::testing::Test { topology.add(space_to_depth("space_to_depth", input_info("reorder"), space_to_depth::depth_first, block_size)); topology.add(reorder("reorder_out", input_info("space_to_depth"), format::bfyx, data_types::f32)); - cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); network->set_input_data("Input0", input1); @@ -877,7 +877,7 @@ class space_to_depth_fp32_gpu: public ::testing::Test { topology.add(space_to_depth("space_to_depth", input_info("reorder"), space_to_depth::depth_first, block_size)); topology.add(reorder("reorder_out", input_info("space_to_depth"), format::bfyx, data_types::f32)); - cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); network->set_input_data("Input0", input1); diff --git a/src/plugins/intel_gpu/tests/test_cases/spatial_concatenate_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/spatial_concatenate_gpu_test.cpp index b91d8be7fc27fc..1963622528e88c 100644 --- a/src/plugins/intel_gpu/tests/test_cases/spatial_concatenate_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/test_cases/spatial_concatenate_gpu_test.cpp @@ -38,7 +38,7 @@ class spatial_concatenate_f32_gpu: public ::testing::Test { tpl.add(input_layout("in2", input2->get_layout())); tpl.add(concatenation("conc", { input_info("in1"), input_info("in2") }, 3)); - cldnn::network::ptr net = get_network(engine, tpl, ExecutionConfig(), get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr net = get_network(engine, tpl, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); net->set_input_data("in1", input1); net->set_input_data("in2", input2); @@ -93,7 +93,7 @@ class spatial_concatenate_f32_gpu: public ::testing::Test { tpl.add(input_layout("in2", input2->get_layout())); tpl.add(concatenation("conc", { input_info("in1"), input_info("in2") }, 2)); - cldnn::network::ptr net = get_network(engine, tpl, ExecutionConfig(), get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr net = get_network(engine, tpl, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); net->set_input_data("in1", input1); net->set_input_data("in2", input2); @@ -150,7 +150,7 @@ class spatial_concatenate_f32_gpu: public ::testing::Test { tpl.add(input_layout("in2", input2->get_layout())); tpl.add(concatenation("conc", { input_info("in1"), input_info("in2") }, 2, padding({ 0, 0, 1, 1 }, 0.0f))); - cldnn::network::ptr net = get_network(engine, tpl, ExecutionConfig(), get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr net = get_network(engine, tpl, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); net->set_input_data("in1", input1); net->set_input_data("in2", input2); @@ -205,7 +205,7 @@ class spatial_concatenate_f32_gpu: public ::testing::Test { tpl.add(input_layout("in2", input2->get_layout())); tpl.add(concatenation("conc", { input_info("in1"), input_info("in2") }, 3, padding({ 0, 0, 2, 0 }, { 0, 0, 0, 0 }))); - cldnn::network::ptr net = get_network(engine, tpl, ExecutionConfig(), get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr net = get_network(engine, tpl, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); net->set_input_data("in1", input1); net->set_input_data("in2", input2); @@ -265,7 +265,7 @@ class spatial_concatenate_f32_gpu: public ::testing::Test { tpl.add(input_layout("in3", input3->get_layout())); tpl.add(concatenation("conc", { input_info("in1"), input_info("in2"), input_info("in3") }, 3)); - cldnn::network::ptr net = get_network(engine, tpl, ExecutionConfig(), get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr net = get_network(engine, tpl, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); net->set_input_data("in1", input1); net->set_input_data("in2", input2); net->set_input_data("in3", input3); @@ -353,7 +353,7 @@ class spatial_concatenate_f32_gpu: public ::testing::Test { tpl.add(input_layout("in3", input3->get_layout())); tpl.add(concatenation("conc", { input_info("in1"), input_info("in2"), input_info("in3") }, 0)); - cldnn::network::ptr net = get_network(engine, tpl, ExecutionConfig(), get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr net = get_network(engine, tpl, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); net->set_input_data("in1", input1); net->set_input_data("in2", input2); net->set_input_data("in3", input3); @@ -413,7 +413,7 @@ class spatial_concatenate_f32_gpu: public ::testing::Test { tpl.add(input_layout("in2", input2->get_layout())); tpl.add(concatenation("conc", { input_info("in1"), input_info("in2") }, 4)); - cldnn::network::ptr net = get_network(engine, tpl, ExecutionConfig(), get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr net = get_network(engine, tpl, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); net->set_input_data("in1", input1); net->set_input_data("in2", input2); @@ -477,7 +477,7 @@ class spatial_concatenate_f32_gpu: public ::testing::Test { tpl.add(input_layout("in2", input2->get_layout())); tpl.add(concatenation("conc", { input_info("in1"), input_info("in2") }, 3)); - cldnn::network::ptr net = get_network(engine, tpl, ExecutionConfig(), get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr net = get_network(engine, tpl, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); net->set_input_data("in1", input1); net->set_input_data("in2", input2); @@ -541,7 +541,7 @@ class spatial_concatenate_f32_gpu: public ::testing::Test { tpl.add(input_layout("in2", input2->get_layout())); tpl.add(concatenation("conc", { input_info("in1"), input_info("in2") }, 2)); - cldnn::network::ptr net = get_network(engine, tpl, ExecutionConfig(), get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr net = get_network(engine, tpl, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); net->set_input_data("in1", input1); net->set_input_data("in2", input2); @@ -616,7 +616,7 @@ class spatial_concatenate_f32_gpu: public ::testing::Test { tpl.add(input_layout("in2", input2->get_layout())); tpl.add(concatenation("conc", { input_info("in1"), input_info("in2") }, 0)); - cldnn::network::ptr net = get_network(engine, tpl, ExecutionConfig(), get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr net = get_network(engine, tpl, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); net->set_input_data("in1", input1); net->set_input_data("in2", input2); @@ -746,7 +746,7 @@ class spatial_concatenate_f32_gpu: public ::testing::Test { tpl.add(input_layout("in3", input3->get_layout())); tpl.add(concatenation("conc", { input_info("in1"), input_info("in2"), input_info("in3") }, 0)); - cldnn::network::ptr net = get_network(engine, tpl, ExecutionConfig(), get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr net = get_network(engine, tpl, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); net->set_input_data("in1", input1); net->set_input_data("in2", input2); net->set_input_data("in3", input3); diff --git a/src/plugins/intel_gpu/tests/test_cases/split_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/split_gpu_test.cpp index feffb2181af190..186f4e05a6a50a 100644 --- a/src/plugins/intel_gpu/tests/test_cases/split_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/test_cases/split_gpu_test.cpp @@ -74,7 +74,7 @@ void split_test(int batch_num, int feature_num, int x_size, int y_size, std::vec std::vector input_vec = generate_random_input(batch_num, feature_num, y_size, x_size, -10, 10); set_values(input, input_vec); - cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); network->set_input_data("input", input); auto outputs = network->execute(); @@ -225,7 +225,7 @@ TEST(split_gpu_f32, basic_split_concat_optimization) { topology.add(concatenation("concat", inputs, 1)); topology.add(reorder("output", input_info("concat"), format::bfyx, data_types::f32)); - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::optimize_data(true)); network network(engine, topology, config); @@ -265,7 +265,7 @@ TEST(split_gpu_i64, basic_split_concat_optimization) { topology.add(concatenation("concat", inputs, 1)); topology.add(reorder("output", input_info("concat"), format::bfyx, data_types::i64)); - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::optimize_data(true)); network network(engine, topology, config); @@ -540,7 +540,7 @@ TEST(split_gpu_f32, basic_in2x3x2x2_split_feature_bfyx) { std::vector input_vec = generate_random_input(batch_num, feature_num, y_size, x_size, -10, 10); set_values(input, input_vec); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); @@ -586,7 +586,7 @@ TEST(split_gpu_i64, basic_in2x3x2x2_split_feature_bfyx) { std::vector input_vec = generate_random_input(batch_num, feature_num, y_size, x_size, -10, 10); set_values(input, input_vec); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); @@ -649,7 +649,7 @@ TEST(split_gpu_f32, basic_in2x3x2x2_split_scale_feature_bfyx) { std::vector input_vec = generate_random_input(batch_num, feature_num, y_size, x_size, -10, 10); set_values(input, input_vec); - network network(engine, topology); + network network(engine, topology, get_test_default_config(engine)); network.set_input_data("input", input); network.set_input_data("scale_input0", scale_input0); diff --git a/src/plugins/intel_gpu/tests/test_cases/streams_test.cpp b/src/plugins/intel_gpu/tests/test_cases/streams_test.cpp index 77b52aad26478d..ba179242f2aa1d 100644 --- a/src/plugins/intel_gpu/tests/test_cases/streams_test.cpp +++ b/src/plugins/intel_gpu/tests/test_cases/streams_test.cpp @@ -32,7 +32,7 @@ class gpu_streams: public ::testing::Test { input_layout("input", input->get_layout()), activation("relu", input_info("input"), activation_func::relu_negative_slope, activation_additional_params{ 0.5f, 0.f }, padding{ { 0, 0, 0, 0 }, 0 })); - cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); network->set_input_data("input", input); auto outputs = network->execute(); ASSERT_EQ(outputs.size(), size_t(1)); @@ -82,7 +82,7 @@ class gpu_streams: public ::testing::Test { membuf mem_buf0; membuf mem_buf1; { - auto prog = program::build_program(engine, topology, ExecutionConfig{}); + auto prog = program::build_program(engine, topology, get_test_default_config(engine)); { network0 = std::make_shared(prog, 0); std::ostream out_mem0(&mem_buf0); @@ -109,7 +109,7 @@ class gpu_streams: public ::testing::Test { } } } else { - auto prog = program::build_program(engine, topology, ExecutionConfig{}); + auto prog = program::build_program(engine, topology, get_test_default_config(engine)); network0 = std::make_shared(prog, 0); network1 = std::make_shared(prog, 1); } @@ -185,7 +185,7 @@ class gpu_streams: public ::testing::Test { membuf mem_buf0; membuf mem_buf1; { - auto prog = program::build_program(engine, topology, ExecutionConfig{}); + auto prog = program::build_program(engine, topology, get_test_default_config(engine)); { network0 = std::make_shared(prog, 0); std::ostream out_mem0(&mem_buf0); @@ -212,7 +212,7 @@ class gpu_streams: public ::testing::Test { } } } else { - auto prog = program::build_program(engine, topology, ExecutionConfig{}); + auto prog = program::build_program(engine, topology, get_test_default_config(engine)); network0 = std::make_shared(prog, 0); network1 = std::make_shared(prog, 1); } diff --git a/src/plugins/intel_gpu/tests/test_cases/strided_slice_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/strided_slice_gpu_test.cpp index 974af981e28ae4..62aafb16ae9b7e 100644 --- a/src/plugins/intel_gpu/tests/test_cases/strided_slice_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/test_cases/strided_slice_gpu_test.cpp @@ -36,7 +36,7 @@ class strided_slice_gpu: public ::testing::Test { topology.add(input_layout("input", input->get_layout())); topology.add(strided_slice("strided_slice", input_info("input"), begin_data, end_data, strides_data, {}, {}, {}, {}, {}, {2, 2, 2, 2})); - cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); network->set_input_data("input", input); @@ -81,7 +81,7 @@ class strided_slice_gpu: public ::testing::Test { topology.add(input_layout("input", input->get_layout())); topology.add(strided_slice("strided_slice", input_info("input"), begin_data, end_data, strides_data, {1, 1, 1, 1}, {1, 1, 1, 1}, {}, {}, {}, {2, 2, 2, 2})); - cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); network->set_input_data("input", input); @@ -128,7 +128,7 @@ class strided_slice_gpu: public ::testing::Test { topology.add(input_layout("input", input->get_layout())); topology.add(strided_slice("strided_slice", input_info("input"), begin_data, end_data, strides_data, {}, {}, {}, {}, {}, {1, 1, 1, 1})); - cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); network->set_input_data("input", input); @@ -176,7 +176,7 @@ class strided_slice_gpu: public ::testing::Test { topology.add(input_layout("input", input->get_layout())); topology.add(strided_slice("strided_slice", input_info("input"), begin_data, end_data, strides_data, {1, 1, 1, 1}, {1, 1, 1, 1}, {}, {}, {}, {2, 2, 2, 3})); - cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); network->set_input_data("input", input); @@ -240,7 +240,7 @@ class strided_slice_gpu: public ::testing::Test { topology.add(input_layout("input", input->get_layout())); topology.add(strided_slice("strided_slice", input_info("input"), begin_data, end_data, strides_data, {0, 1, 1, 0}, {}, {}, {}, {}, {1, 2, 4, 2})); - cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); network->set_input_data("input", input); @@ -292,7 +292,7 @@ class strided_slice_gpu: public ::testing::Test { topology.add(input_layout("input", input->get_layout())); topology.add(strided_slice("strided_slice", input_info("input"), begin_data, end_data, strides_data, {}, {}, { 1 }, {}, {}, {2, 2, 4, 1})); - cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); network->set_input_data("input", input); @@ -335,7 +335,7 @@ class strided_slice_gpu: public ::testing::Test { topology.add(input_layout("input", input->get_layout())); topology.add(strided_slice("strided_slice", input_info("input"), begin_data, end_data, strides_data, {}, {}, { 1, 0, 1 }, {}, {}, {2, 2, 1, 1})); - cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); network->set_input_data("input", input); @@ -376,7 +376,7 @@ class strided_slice_gpu: public ::testing::Test { topology.add(input_layout("input", input->get_layout())); topology.add(strided_slice("strided_slice", input_info("input"), begin_data, end_data, strides_data, {1, 0}, {}, {}, {}, {}, {2, 2, 1, 1})); - cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); network->set_input_data("input", input); @@ -417,7 +417,7 @@ class strided_slice_gpu: public ::testing::Test { topology.add(input_layout("input", input->get_layout())); topology.add(strided_slice("strided_slice", input_info("input"), begin_data, end_data, strides_data, {}, {}, {}, {}, {}, {1, 2, 2, 1})); - cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); network->set_input_data("input", input); @@ -458,7 +458,7 @@ class strided_slice_gpu: public ::testing::Test { topology.add(input_layout("input", input->get_layout())); topology.add(strided_slice("strided_slice", input_info("input"), begin_data, end_data, strides_data, {}, {}, {}, {}, {}, {2, 1, 1, 1})); - cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); network->set_input_data("input", input); @@ -503,7 +503,7 @@ class strided_slice_gpu: public ::testing::Test { topology.add(input_layout("input", input->get_layout())); topology.add(strided_slice("strided_slice", input_info("input"), begin_data, end_data, strides_data, {}, {}, {}, {}, {}, {2, 2, 2, 2})); - cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); network->set_input_data("input", input); @@ -544,7 +544,7 @@ class strided_slice_gpu: public ::testing::Test { topology.add(input_layout("input", input->get_layout())); topology.add(strided_slice("strided_slice", input_info("input"), begin_data, end_data, strides_data, {}, {}, {}, {}, {}, {2, 1, 1, 1})); - cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); network->set_input_data("input", input); @@ -590,7 +590,7 @@ class strided_slice_gpu: public ::testing::Test { topology.add(data("input4", strides)); topology.add(strided_slice("strided_slice", input_info("input"), input_info("input2"), input_info("input3"), input_info("input4"), {}, {}, {}, {}, {}, {})); - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); network network(engine, topology, config); @@ -640,7 +640,7 @@ class strided_slice_gpu: public ::testing::Test { topology.add(data("input4", strides)); topology.add(strided_slice("strided_slice", input_info("input"), input_info("input2"), input_info("input3"), input_info("input4"), {}, {}, {}, {}, {}, {})); - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); network network(engine, topology, config); @@ -759,7 +759,7 @@ class strided_slice_gpu_constants: public ::testing::Test { topology.add(data("input4", strides)); topology.add(strided_slice("strided_slice", input_info("input"), input_info("input2"), input_info("input3"), input_info("input4"), {}, {}, {}, {}, {}, {2, 2, 2, 2})); - cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); network->set_input_data("input", input); @@ -816,7 +816,7 @@ class strided_slice_gpu_constants: public ::testing::Test { topology.add(data("input4", strides)); topology.add(strided_slice("strided_slice", input_info("input"), input_info("input2"), input_info("input3"), input_info("input4"), {1, 1, 1, 1}, {1, 1, 1, 1}, {}, {}, {}, {2, 2, 2, 2})); - cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); network->set_input_data("input", input); @@ -875,7 +875,7 @@ class strided_slice_gpu_constants: public ::testing::Test { topology.add(data("input4", strides)); topology.add(strided_slice("strided_slice", input_info("input"), input_info("input2"), input_info("input3"), input_info("input4"), {}, {}, {}, {}, {}, {1, 1, 1, 1})); - cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); network->set_input_data("input", input); @@ -935,7 +935,7 @@ class strided_slice_gpu_constants: public ::testing::Test { topology.add(data("input4", strides)); topology.add(strided_slice("strided_slice", input_info("input"), input_info("input2"), input_info("input3"), input_info("input4"), {1, 1, 1, 1}, {1, 1, 1, 1}, {}, {}, {}, {2, 2, 2, 3})); - cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); network->set_input_data("input", input); @@ -1011,7 +1011,7 @@ class strided_slice_gpu_constants: public ::testing::Test { topology.add(input_layout("input4", strides->get_layout())); topology.add(strided_slice("strided_slice", input_info("input"), input_info("input2"), input_info("input3"), input_info("input4"), {0, 1, 1, 0}, {}, {}, {}, {}, {1, 2, 4, 2})); - cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); network->set_input_data("input", input); network->set_input_data("input2", begin); @@ -1078,7 +1078,7 @@ class strided_slice_gpu_constants: public ::testing::Test { topology.add(data("input4", strides)); topology.add(strided_slice("strided_slice", input_info("input"), input_info("input2"), input_info("input3"), input_info("input4"), {}, {}, { 1 }, {}, {}, {2, 2, 4, 1})); - cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); network->set_input_data("input", input); @@ -1133,7 +1133,7 @@ class strided_slice_gpu_constants: public ::testing::Test { topology.add(data("input4", strides)); topology.add(strided_slice("strided_slice", input_info("input"), input_info("input2"), input_info("input3"), input_info("input4"), {}, {}, { 1, 0, 1 }, {}, {}, {2, 2, 1, 1})); - cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); network->set_input_data("input", input); @@ -1187,7 +1187,7 @@ class strided_slice_gpu_constants: public ::testing::Test { topology.add(data("input4", strides)); topology.add(strided_slice("strided_slice", input_info("input"), input_info("input2"), input_info("input3"), input_info("input4"), {1, 0}, {}, {}, {}, {}, {2, 2, 1, 1})); - cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); network->set_input_data("input", input); @@ -1240,7 +1240,7 @@ class strided_slice_gpu_constants: public ::testing::Test { topology.add(data("input4", strides)); topology.add(strided_slice("strided_slice", input_info("input"), input_info("input2"), input_info("input3"), input_info("input4"), {}, {}, {}, {}, {}, {1, 2, 2, 1})); - cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); network->set_input_data("input", input); @@ -1293,7 +1293,7 @@ class strided_slice_gpu_constants: public ::testing::Test { topology.add(data("input4", strides)); topology.add(strided_slice("strided_slice", input_info("input"), input_info("input2"), input_info("input3"), input_info("input4"), {}, {}, {}, {}, {}, {2, 1, 1, 1})); - cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); network->set_input_data("input", input); @@ -1350,7 +1350,7 @@ class strided_slice_gpu_constants: public ::testing::Test { topology.add(data("input4", strides)); topology.add(strided_slice("strided_slice", input_info("input"), input_info("input2"), input_info("input3"), input_info("input4"), {}, {}, {}, {}, {}, {2, 2, 2, 2})); - cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); network->set_input_data("input", input); @@ -1401,7 +1401,7 @@ class strided_slice_gpu_constants: public ::testing::Test { topology.add(input_layout("input", input->get_layout())); topology.add(strided_slice("strided_slice", input_info("input"), begin, end, strides, {}, {}, {}, {}, {}, {1, 2, 2, 2})); - cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); network->set_input_data("input", input); @@ -1452,7 +1452,7 @@ class strided_slice_gpu_constants: public ::testing::Test { topology.add(input_layout("input", input->get_layout())); topology.add(strided_slice("strided_slice", input_info("input"), begin, end, strides, {}, {}, {}, {}, {}, {1, 2, 2, 2})); - cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); network->set_input_data("input", input); @@ -1505,7 +1505,7 @@ class strided_slice_gpu_constants: public ::testing::Test { topology.add(data("input4", strides)); topology.add(strided_slice("strided_slice", input_info("input"), input_info("input2"), input_info("input3"), input_info("input4"), {}, {}, {}, {}, {}, {2, 1, 1, 1})); - cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); network->set_input_data("input", input); @@ -1563,7 +1563,7 @@ class strided_slice_gpu_four_inputs: public ::testing::Test { topology.add(input_layout("input4", strides->get_layout())); topology.add(strided_slice("strided_slice", input_info("input"), input_info("input2"), input_info("input3"), input_info("input4"), {}, {}, { 1 }, {}, {}, {2, 2, 4, 1})); - cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); network->set_input_data("input", input); network->set_input_data("input2", begin); @@ -1621,7 +1621,7 @@ class strided_slice_gpu_four_inputs: public ::testing::Test { topology.add(input_layout("input4", strides->get_layout())); topology.add(strided_slice("strided_slice", input_info("input"), input_info("input2"), input_info("input3"), input_info("input4"), {}, {}, { 1, 0, 1 }, {}, {}, {2, 2, 1, 1})); - cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); network->set_input_data("input", input); network->set_input_data("input2", begin); @@ -1668,7 +1668,7 @@ class strided_slice_gpu_i8: public ::testing::Test { topology.add(input_layout("input", input->get_layout())); topology.add(strided_slice("strided_slice", input_info("input"), begin_data, end_data, strides_data, {}, {}, {}, {}, {}, {1, 2, 2, 1})); - cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); network->set_input_data("input", input); @@ -1723,7 +1723,7 @@ class strided_slice_gpu_f32_i32: public ::testing::Test { topology.add(data("input4", strides)); topology.add(strided_slice("strided_slice", input_info("input"), input_info("input2"), input_info("input3"), input_info("input4"), {1, 0, 0, 1, 0}, {1, 0, 0, 1, 0}, {0, 1, 1, 0, 1}, {0, 0, 0, 0, 0}, {0, 0, 0, 0, 0}, {1, 1, 1, 8, 1})); - cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); network->set_input_data("input", input); diff --git a/src/plugins/intel_gpu/tests/test_cases/test_device_mem_usage_estimation.cpp b/src/plugins/intel_gpu/tests/test_cases/test_device_mem_usage_estimation.cpp index 1f32f6d3c253b9..bca2df35496d04 100644 --- a/src/plugins/intel_gpu/tests/test_cases/test_device_mem_usage_estimation.cpp +++ b/src/plugins/intel_gpu/tests/test_cases/test_device_mem_usage_estimation.cpp @@ -14,8 +14,14 @@ using namespace tests; class test_device_mem_usage_estimation: public ::testing::Test { public: void test_basic(bool is_caching_test) { - ExecutionConfig cfg(ov::intel_gpu::queue_type(QueueTypes::out_of_order)); + ExecutionConfig cfg = get_test_default_config(get_test_engine()); + cfg.set_property(ov::intel_gpu::queue_type(QueueTypes::out_of_order)); + std::shared_ptr engine1 = create_test_engine(); + if (engine1->get_device_info().supports_immad) { + // Enable this test for out_of_order queue-type if Onednn supports out_of_order + return; + } auto input1 = engine1->allocate_memory({ data_types::f16, format::bfyx,{ 2, 2, 256, 256} }); auto input2 = engine1->allocate_memory({ data_types::f16, format::bfyx,{ 2, 2, 256, 256} }); diff --git a/src/plugins/intel_gpu/tests/test_cases/tile_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/tile_gpu_test.cpp index 826c7f0a4ee025..e4d70dd8f81f9e 100644 --- a/src/plugins/intel_gpu/tests/test_cases/tile_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/test_cases/tile_gpu_test.cpp @@ -72,7 +72,7 @@ class tile_gpu: public ::testing::Test { set_values(input, input_vec); tile_ref(input, output_ref, 0, 2); - cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); network->set_input_data("input", input); auto outputs = network->execute(); @@ -104,7 +104,7 @@ class tile_gpu: public ::testing::Test { set_values(input, input_vec); tile_ref(input, output_ref, 1, 2); - cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); network->set_input_data("input", input); auto outputs = network->execute(); @@ -140,7 +140,7 @@ class tile_gpu: public ::testing::Test { set_values(input, input_vec); tile_ref(input, output_ref, 2, 2); - cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); network->set_input_data("input", input); auto outputs = network->execute(); @@ -172,7 +172,7 @@ class tile_gpu: public ::testing::Test { set_values(input, input_vec); tile_ref(input, output_ref, 3, 2); - cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); network->set_input_data("input", input); auto outputs = network->execute(); @@ -200,7 +200,7 @@ class tile_gpu: public ::testing::Test { set_values(input, input_vec); tile_ref(input, output_ref, 3, 4); - cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); network->set_input_data("input", input); auto outputs = network->execute(); @@ -237,7 +237,7 @@ class tile_gpu: public ::testing::Test { set_values(input, input_vec); tile_ref(input, output_ref, 2, 2); - cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); network->set_input_data("input", input); auto outputs = network->execute(); @@ -292,7 +292,7 @@ TEST_F(tile_gpu, dynamic) { topology.add(input_layout("input", input_dyn_layout)); topology.add(tile("tile", input_info("input"), std::vector{ 1, 2, 1, 1 })); - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); network network(engine, topology, config); network.set_input_data("input", input); @@ -672,7 +672,7 @@ struct tile_test result_id = reorder_result_id; } - cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); network->set_input_data(input_data_id, input); diff --git a/src/plugins/intel_gpu/tests/test_cases/trim_to_outputs_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/trim_to_outputs_gpu_test.cpp index 2efdc4efaf646a..064f86c911e26f 100644 --- a/src/plugins/intel_gpu/tests/test_cases/trim_to_outputs_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/test_cases/trim_to_outputs_gpu_test.cpp @@ -26,7 +26,7 @@ class trim_to_outputs: public ::testing::Test { */ void test_one_node_to_eliminate_case1(bool is_caching_test) { auto& engine = get_test_engine(); - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::custom_outputs(std::vector{ "conv1" })); config.set_property(ov::intel_gpu::optimize_data(false)); // to avoid adding reorders @@ -75,7 +75,7 @@ class trim_to_outputs: public ::testing::Test { */ void test_one_node_to_eliminate_case2(bool is_caching_test) { auto& engine = get_test_engine(); - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::custom_outputs(std::vector{ "conv1" })); config.set_property(ov::intel_gpu::optimize_data(false)); // to avoid adding reorders @@ -132,7 +132,7 @@ class trim_to_outputs: public ::testing::Test { */ void test_two_nodes_to_eliminate_case1(bool is_caching_test) { auto& engine = get_test_engine(); - ExecutionConfig config; + ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::custom_outputs(std::vector{ "conv4" })); config.set_property(ov::intel_gpu::optimize_data(false)); // to avoid adding reorders diff --git a/src/plugins/intel_gpu/tests/test_cases/variable.cpp b/src/plugins/intel_gpu/tests/test_cases/variable.cpp index 1e27aa9045e729..bcc25a2c5ebd1a 100644 --- a/src/plugins/intel_gpu/tests/test_cases/variable.cpp +++ b/src/plugins/intel_gpu/tests/test_cases/variable.cpp @@ -35,7 +35,7 @@ struct variable_test : public ::testing::TestWithParam> { topology.add(eltwise{"sum", { input_info("input"), input_info("read_value") }, eltwise_mode::sum, {}, variable_layout.data_type}); topology.add(assign{"assign", { input_info("sum") }, "v0", variable_layout}); - cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); network->assign_variables_memories({ { "v0", std::make_shared(engine.allocate_memory(variable_layout)) } }); network->set_input_data("input", input_data); @@ -123,7 +123,7 @@ void test_exception_on_wrong_layout(bool is_caching_test) { topology.add(input_layout("wrong_input", wrong_input_data->get_layout())); topology.add(assign{"assign", { input_info("wrong_input") }, "v0", wrong_layout}); - cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); network->assign_variables_memories({ { "v0", std::make_shared(engine.allocate_memory(variable_layout)) } }); network->set_input_data("input", input_data); @@ -179,7 +179,7 @@ void test_variables_are_preserved_across_inferences(bool is_caching_test) { topology.add(data("dummy2", dummy2)); topology.add(read_value{"read_result", { input_info("dummy2") }, "v_result", variable_layout}); - cldnn::network::ptr network = get_network(engine, topology, ExecutionConfig(), get_test_stream_ptr(), is_caching_test); + cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); network->assign_variables_memories({ { "v1", std::make_shared(engine.allocate_memory(variable_layout)) }, diff --git a/src/plugins/intel_gpu/tests/test_utils/test_utils.cpp b/src/plugins/intel_gpu/tests/test_utils/test_utils.cpp index e71a2b32bc53e3..9751aafbb3f237 100644 --- a/src/plugins/intel_gpu/tests/test_utils/test_utils.cpp +++ b/src/plugins/intel_gpu/tests/test_utils/test_utils.cpp @@ -286,10 +286,30 @@ std::vector> generic_test::generate_generic_test_pa return all_generic_params; } +cldnn::ExecutionConfig get_test_default_config(const cldnn::engine& engine) { + return get_test_default_config(engine, {}); +} + +cldnn::ExecutionConfig get_test_default_config(const cldnn::engine& engine, ov::AnyMap::value_type values) { + return get_test_default_config(engine, {values}); +} + +cldnn::ExecutionConfig get_test_default_config(const cldnn::engine& engine, + std::initializer_list values) { + ExecutionConfig config(values); + + // Onednn engine currently does NOT support out_of_order + if (engine.get_device_info().supports_immad) { + config.set_property(ov::intel_gpu::queue_type(QueueTypes::in_order)); + } + + return config; +} + std::shared_ptr create_test_engine() { auto ret = cldnn::engine::create(engine_types::ocl, runtime_types::ocl); #ifdef ENABLE_ONEDNN_FOR_GPU - if(ret->get_device_info().supports_immad) + if (ret->get_device_info().supports_immad) ret->create_onednn_engine({}); #endif return ret; @@ -304,12 +324,15 @@ cldnn::engine& get_test_engine() { } cldnn::stream_ptr get_test_stream_ptr() { + // Create OOO queue for test purposes. If in-order queue is needed in a test, then it should be created there explicitly + auto cfg = get_test_default_config(get_test_engine()); + + return get_test_stream_ptr(cfg); +} + +cldnn::stream_ptr get_test_stream_ptr(cldnn::ExecutionConfig cfg) { static std::shared_ptr test_stream = nullptr; - if (!test_stream) { - // Create OOO queue for test purposes. If in-order queue is needed in a test, then it should be created there explicitly - ExecutionConfig cfg(ov::intel_gpu::queue_type(QueueTypes::out_of_order)); - test_stream = get_test_engine().create_stream(cfg); - } + test_stream = get_test_engine().create_stream(cfg); return test_stream; } diff --git a/src/plugins/intel_gpu/tests/test_utils/test_utils.h b/src/plugins/intel_gpu/tests/test_utils/test_utils.h index e21f259bc2f309..7fa5c5b0b11114 100644 --- a/src/plugins/intel_gpu/tests/test_utils/test_utils.h +++ b/src/plugins/intel_gpu/tests/test_utils/test_utils.h @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -55,9 +56,17 @@ namespace tests { std::shared_ptr create_test_engine(); cldnn::engine& get_test_engine(); +cldnn::stream_ptr get_test_stream_ptr(cldnn::ExecutionConfig cfg); cldnn::stream_ptr get_test_stream_ptr(); cldnn::stream& get_test_stream(); +// Set default configuration for test-cases +cldnn::ExecutionConfig get_test_default_config(const cldnn::engine&); +cldnn::ExecutionConfig get_test_default_config(const cldnn::engine&, ov::AnyMap::value_type values); +cldnn::ExecutionConfig get_test_default_config(const cldnn::engine&, + std::initializer_list values); + + template bool has_node_with_type(cldnn::program& prog) { for (auto node : prog.get_processing_order()) { From 05e54e9f3dd43573e22e5c175ed7894841f30496 Mon Sep 17 00:00:00 2001 From: hyunback kim Date: Tue, 21 Mar 2023 18:56:41 +0900 Subject: [PATCH 011/296] [GPU] Update the latest onedNN3.1 (#16381) - Fix group conv regression issue Signed-off-by: hyunback --- src/plugins/intel_gpu/thirdparty/CMakeLists.txt | 1 + src/plugins/intel_gpu/thirdparty/onednn_gpu | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/src/plugins/intel_gpu/thirdparty/CMakeLists.txt b/src/plugins/intel_gpu/thirdparty/CMakeLists.txt index 60883cbac6b5ad..f54f0a1d80853d 100644 --- a/src/plugins/intel_gpu/thirdparty/CMakeLists.txt +++ b/src/plugins/intel_gpu/thirdparty/CMakeLists.txt @@ -91,6 +91,7 @@ if(ENABLE_ONEDNN_FOR_GPU) "-DDNNL_BUILD_EXAMPLES=OFF" "-DDNNL_BLAS_VENDOR=NONE" "-DDNNL_LIBRARY_TYPE=STATIC" + "-DONEDNN_BUILD_GRAPH=OFF" "-DOpenCL_LIBRARY=${OpenCL_LIBRARY}" "-DOpenCL_INCLUDE_DIR=${OpenCL_INCLUDE_DIR}" ) diff --git a/src/plugins/intel_gpu/thirdparty/onednn_gpu b/src/plugins/intel_gpu/thirdparty/onednn_gpu index ad34c124895690..b52e9cd54df5af 160000 --- a/src/plugins/intel_gpu/thirdparty/onednn_gpu +++ b/src/plugins/intel_gpu/thirdparty/onednn_gpu @@ -1 +1 @@ -Subproject commit ad34c124895690bafd2b110577639824899ecbca +Subproject commit b52e9cd54df5af92d1d586d435cdd514dd7617fe From 8926282ac57491c3e66ef693b938697d2b8d5e52 Mon Sep 17 00:00:00 2001 From: Sebastian Golebiewski Date: Tue, 21 Mar 2023 10:57:48 +0100 Subject: [PATCH 012/296] DOCS shift to rst - `Multi device execution` article (#16400) --- docs/OV_Runtime_UG/multi_device.md | 104 +++++++++++++---------------- 1 file changed, 46 insertions(+), 58 deletions(-) diff --git a/docs/OV_Runtime_UG/multi_device.md b/docs/OV_Runtime_UG/multi_device.md index 01d03ef100bd3f..e1b6492d59e5a2 100644 --- a/docs/OV_Runtime_UG/multi_device.md +++ b/docs/OV_Runtime_UG/multi_device.md @@ -7,13 +7,13 @@ To run inference on multiple devices, you can choose either of the following way - Use the :ref:`CUMULATIVE_THROUGHPUT option ` of the Automatic Device Selection mode. This way, you can use all available devices in the system without the need to specify them. - Use the Multi-Device execution mode. This page will explain how it works and how to use it. -@endsphinxdirective - -## How MULTI Works +How MULTI Works +#################### The Multi-Device execution mode, or MULTI for short, acts as a "virtual" or a "proxy" device, which does not bind to a specific type of hardware. Instead, it assigns available computing devices to particular inference requests, which are then executed in parallel. The potential gains from using Multi-Device execution are: + * improved throughput from using multiple devices at once, * increase in performance stability due to multiple devices sharing inference workload. @@ -22,31 +22,29 @@ Importantly, the Multi-Device mode does not change the application logic, so it Note that the performance increase in this mode comes from utilizing multiple devices at once. This means that you need to provide the devices with enough inference requests to keep them busy, otherwise you will not benefit much from using MULTI. -## Using the Multi-Device Mode +Using the Multi-Device Mode +########################### Following the OpenVINO™ naming convention, the Multi-Device mode is assigned the label of “MULTI.” The only configuration option available for it is a prioritized list of devices to use: -@sphinxdirective -+---------------------------+---------------------------------+------------------------------------------------------------+ -| Property | Property values | Description | -+===========================+=================================+============================================================+ -| | | MULTI: | | Specifies the devices available for selection. | -| | | comma-separated, no spaces | | The device sequence will be taken as priority | -+---------------------------+---------------------------------+ | from high to low. | -| ov::device::priorities | | device names | | Priorities can be set directly as a string. | -| | | comma-separated, no spaces | | -+---------------------------+---------------------------------+------------------------------------------------------------+ ++----------------------------+---------------------------------+------------------------------------------------------------+ +| Property | Property values | Description | ++============================+=================================+============================================================+ +| | | MULTI: | | Specifies the devices available for selection. | +| | | comma-separated, no spaces | | The device sequence will be taken as priority | ++----------------------------+---------------------------------+ | from high to low. | +| ``ov::device::priorities`` | | device names | | Priorities can be set directly as a string. | +| | | comma-separated, no spaces | | ++----------------------------+---------------------------------+------------------------------------------------------------+ -@endsphinxdirective Specifying the device list explicitly is required by MULTI, as it defines the devices available for inference and sets their priorities. Importantly, the list may also specify the number of requests for MULTI to keep for each device, as described below. -Note that OpenVINO™ Runtime enables you to use “GPU” as an alias for “GPU.0” in function calls. More details on enumerating devices can be found in [Working with devices](supported_plugins/Device_Plugins.md). +Note that OpenVINO™ Runtime enables you to use “GPU” as an alias for “GPU.0” in function calls. More details on enumerating devices can be found in :doc:`Working with devices `. The following commands are accepted by the API: -@sphinxdirective .. tab:: C++ @@ -60,11 +58,9 @@ The following commands are accepted by the API: :language: python :fragment: [MULTI_0] -@endsphinxdirective Notice that MULTI allows you to **change device priorities on the fly**. You can alter the order, exclude a device, and bring an excluded device back. Still, it does not allow adding new devices. -@sphinxdirective .. tab:: C++ @@ -78,19 +74,17 @@ Notice that MULTI allows you to **change device priorities on the fly**. You can :language: python :fragment: [MULTI_1] -@endsphinxdirective - +One more thing you can define is the **number of requests to allocate for each device**. You can do it simply by adding the number to each device in parentheses, like this: ``"MULTI:CPU(2),GPU(2)"``. However, this method is not recommended as it is not performance-portable. The suggested approach is to configure individual devices and query the resulting number of requests to be used at the application level, as described in `Configuring Individual Devices and Creating MULTI On Top <#configuring-individual-devices-and-creating-the-multi-device-on-top>`__. -One more thing you can define is the **number of requests to allocate for each device**. You can do it simply by adding the number to each device in parentheses, like this: `"MULTI:CPU(2),GPU(2)"`. However, this method is not recommended as it is not performance-portable. The suggested approach is to configure individual devices and query the resulting number of requests to be used at the application level, as described in [Configuring Individual Devices and Creating MULTI On Top](#config-multi-on-top). +To check what devices are present in the system, you can use the Device API. For information on how to do it, check :doc:`Query device properties and configuration `. -To check what devices are present in the system, you can use the Device API. For information on how to do it, check [Query device properties and configuration](supported_plugins/config_properties.md). +Configuring Individual Devices and Creating the Multi-Device On Top ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ -### Configuring Individual Devices and Creating the Multi-Device On Top As mentioned previously, executing inference with MULTI may be set up by configuring individual devices before creating the "MULTI" device on top. It may be considered for performance reasons. -@sphinxdirective .. tab:: C++ @@ -104,17 +98,15 @@ As mentioned previously, executing inference with MULTI may be set up by configu :language: python :fragment: [MULTI_4] -@endsphinxdirective Alternatively, you can combine all the individual device settings into a single config file and load it for MULTI to parse. See the code example in the next section. +Querying the Optimal Number of Inference Requests ++++++++++++++++++++++++++++++++++++++++++++++++++ - -### Querying the Optimal Number of Inference Requests When using MULTI, you don't need to sum over included devices yourself, you can query the optimal number of requests directly, -using the [configure devices](supported_plugins/config_properties.md) property: +using the :doc:`configure devices ` property: -@sphinxdirective .. tab:: C++ @@ -122,56 +114,52 @@ using the [configure devices](supported_plugins/config_properties.md) property: :language: cpp :fragment: [part5] -@endsphinxdirective - - -## Using the Multi-Device with OpenVINO Samples and Benchmarking Performance +Using the Multi-Device with OpenVINO Samples and Benchmarking Performance +######################################################################### To see how the Multi-Device execution is used in practice and test its performance, take a look at OpenVINO's Benchmark Application which presents the optimal performance of the plugin without the need for additional settings, like the number of requests or CPU threads. Here is an example command to evaluate performance of CPU + GPU: -```sh -./benchmark_app –d MULTI:CPU,GPU –m -i -niter 1000 -``` +.. code-block:: sh + + ./benchmark_app –d MULTI:CPU,GPU –m -i -niter 1000 + + +For more information, refer to the :doc:`C++ ` or :doc:`Python ` version instructions. -For more information, refer to the [C++](../../samples/cpp/benchmark_app/README.md) or [Python](../../tools/benchmark_tool/README.md) version instructions. -@sphinxdirective .. note:: You can keep using the FP16 IR without converting it to FP32, even if some of the listed devices do not support it. The conversion will be done automatically for you. - No demos are yet fully optimized for MULTI, by means of supporting the ov::optimal_number_of_infer_requests property, using the GPU streams/throttling, and so on. -@endsphinxdirective + No demos are yet fully optimized for MULTI, by means of supporting the ``ov::optimal_number_of_infer_requests`` property, using the GPU streams/throttling, and so on. + +Performance Considerations for the Multi-Device Execution +######################################################### -## Performance Considerations for the Multi-Device Execution For best performance when using the MULTI execution mode you should consider a few recommendations: -- MULTI usually performs best when the fastest device is specified first in the device candidate list. -This is particularly important when the request-level parallelism is not sufficient -(e.g. the number of requests is not enough to saturate all devices). -- Just like with any throughput-oriented execution mode, it is highly recommended to query the optimal number of inference requests -directly from the instance of the `ov:compiled_model`. Refer to the code of the previously mentioned `benchmark_app` for more details. -- Execution on certain device combinations, for example CPU+GPU, performs better with certain knobs. Refer to the `benchmark_app` code for details. One specific example is disabling GPU driver polling, which in turn requires multiple GPU streams to balance out slower -communication of inference completion from the device to the host. -- The MULTI logic always attempts to save on copying data between device-agnostic and user-facing inference requests, -and device-specific 'worker' requests that are being actually scheduled behind the scene. -To facilitate the copy savings, it is recommended to run the requests in the order in which they were created. + +- MULTI usually performs best when the fastest device is specified first in the device candidate list. This is particularly important when the request-level parallelism is not sufficient (e.g. the number of requests is not enough to saturate all devices). +- Just like with any throughput-oriented execution mode, it is highly recommended to query the optimal number of inference requests directly from the instance of the ``ov:compiled_model``. Refer to the code of the previously mentioned ``benchmark_app`` for more details. +- Execution on certain device combinations, for example CPU+GPU, performs better with certain knobs. Refer to the ``benchmark_app`` code for details. One specific example is disabling GPU driver polling, which in turn requires multiple GPU streams to balance out slower communication of inference completion from the device to the host. +- The MULTI logic always attempts to save on copying data between device-agnostic and user-facing inference requests, and device-specific 'worker' requests that are being actually scheduled behind the scene. To facilitate the copy savings, it is recommended to run the requests in the order in which they were created. - While performance of accelerators combines well with MULTI, the CPU+GPU execution may introduce certain performance issues. It is due to the devices sharing some resources, like power or bandwidth. Enabling the GPU throttling hint, which saves a CPU thread for CPU inference, is an example of a recommended solution addressing this issue. +Additional Resources +#################### -## Additional Resources +- :doc:`Supported Devices ` +- :doc:`Automatic Device Selection ` -- [Supported Devices](supported_plugins/Supported_Devices.md) -- [Automatic Device Selection](./auto_device_selection.md) -@sphinxdirective .. raw:: html -@endsphinxdirective -> **NOTE**: This video is currently available only for C++, but many of the same concepts apply to Python. +.. note:: This video is currently available only for C++, but many of the same concepts apply to Python. + +@endsphinxdirective From 24ff43aa5b7b60cbd1a809965ef8f582b2016579 Mon Sep 17 00:00:00 2001 From: Ilya Lavrenov Date: Tue, 21 Mar 2023 14:16:07 +0400 Subject: [PATCH 013/296] Fixed comparison of iterators (#16428) --- src/core/tests/any.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/core/tests/any.cpp b/src/core/tests/any.cpp index 6b009cd15f3a6a..f66ae9720f0511 100644 --- a/src/core/tests/any.cpp +++ b/src/core/tests/any.cpp @@ -181,8 +181,8 @@ TEST_F(AnyTests, AnyAsMapOfMapOfAnys) { ASSERT_NE(testMap.find("refMap1"), testMap.end()); auto testMap1 = testMap.at("refMap1").as>(); - ASSERT_NE(testMap1.find("testParamInt"), testMap.end()); - ASSERT_NE(testMap1.find("testParamString"), testMap.end()); + ASSERT_NE(testMap1.find("testParamInt"), testMap1.end()); + ASSERT_NE(testMap1.find("testParamString"), testMap1.end()); int testInt1 = testMap1["testParamInt"].as(); std::string testString1 = testMap1["testParamString"].as(); @@ -192,8 +192,8 @@ TEST_F(AnyTests, AnyAsMapOfMapOfAnys) { ASSERT_NE(testMap.find("refMap2"), testMap.end()); auto testMap2 = testMap.at("refMap2").as>(); - ASSERT_NE(testMap2.find("testParamInt"), testMap.end()); - ASSERT_NE(testMap2.find("testParamString"), testMap.end()); + ASSERT_NE(testMap2.find("testParamInt"), testMap2.end()); + ASSERT_NE(testMap2.find("testParamString"), testMap2.end()); int testInt2 = testMap2["testParamInt"].as(); std::string testString2 = testMap2["testParamString"].as(); From d402b6ed3e1bf44f7a99695b47bd827016a5d954 Mon Sep 17 00:00:00 2001 From: Liubov Talamanova Date: Tue, 21 Mar 2023 10:53:01 +0000 Subject: [PATCH 014/296] [POT] Return Mul to ignored ops for transformers (except CPU_SPR) (#16407) --- .../tools/pot/algorithms/quantization/fake_quantize.py | 2 +- .../pot/openvino/tools/pot/algorithms/quantization/utils.py | 6 ++++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/tools/pot/openvino/tools/pot/algorithms/quantization/fake_quantize.py b/tools/pot/openvino/tools/pot/algorithms/quantization/fake_quantize.py index 2211f2531e8ae1..f931c1e906228c 100644 --- a/tools/pot/openvino/tools/pot/algorithms/quantization/fake_quantize.py +++ b/tools/pot/openvino/tools/pot/algorithms/quantization/fake_quantize.py @@ -178,7 +178,7 @@ def insert_fake_quantize_nodes(config, model, qscheme=None): ignored_params.update(deepcopy(config['ignored'])) if config['model_type']: - ignored_params['operations'] += get_ignored_operations(config['model_type']) + ignored_params['operations'] += get_ignored_operations(config['model_type'], config['target_device']) if qscheme: for key in qscheme: diff --git a/tools/pot/openvino/tools/pot/algorithms/quantization/utils.py b/tools/pot/openvino/tools/pot/algorithms/quantization/utils.py index bdfc908c050cbb..9ad333a72bfcf5 100644 --- a/tools/pot/openvino/tools/pot/algorithms/quantization/utils.py +++ b/tools/pot/openvino/tools/pot/algorithms/quantization/utils.py @@ -321,9 +321,11 @@ def get_input_shape_for_bias(activations_statistics, input_node_name): return input_shape -def get_ignored_operations(model): +def get_ignored_operations(model_type, target_device): operation = {"transformer": [{"type": "Add"}, {"type": "Power"}, {"type": "Squeeze"}, {"type": "Subtract"}, {"type": "ReduceMean"}, {"type": "SquaredDifference"}, {"type": "MVN"}]} - return operation[model] + if target_device != 'CPU_SPR': + operation['transformer'].append({"type": "Multiply"}) + return operation[model_type] From 0893efe0732f77ae4aab03437b16fc305935d4bd Mon Sep 17 00:00:00 2001 From: Tomasz Jankowski Date: Tue, 21 Mar 2023 13:58:34 +0100 Subject: [PATCH 015/296] [Core] Assure TensorVector comparison uniqueness (#16232) * Assure TensorVector comparison uniqueness * Add test * Make the flow clear --- src/core/src/bound_evaluate.cpp | 33 ++++++++++++++++++++----------- src/core/tests/bound_evaluate.cpp | 28 ++++++++++++++++++++++++++ 2 files changed, 49 insertions(+), 12 deletions(-) diff --git a/src/core/src/bound_evaluate.cpp b/src/core/src/bound_evaluate.cpp index 930d71b80cefee..1aea44f9fa7ee2 100644 --- a/src/core/src/bound_evaluate.cpp +++ b/src/core/src/bound_evaluate.cpp @@ -179,14 +179,26 @@ ov::Tensor or_tensor(const ov::Tensor& lhs, const ov::Tensor& rhs) { } struct TensorVectorCmp { + // Comparing Tensor vectors as numbers composed with pointers as digits. + // Indexed loop used to preserve order of comparison. bool operator()(const ov::TensorVector& lhs, const ov::TensorVector& rhs) const { - auto rhs_it = rhs.begin(); - return std::any_of(lhs.begin(), lhs.end(), [&rhs_it](const ov::Tensor& lhs) { - bool is_less = - (lhs && *rhs_it) ? lhs.data() < rhs_it->data() : static_cast(lhs) < static_cast(*rhs_it); - ++rhs_it; - return is_less; - }); + const auto lhs_size = lhs.size(); + const auto rhs_size = rhs.size(); + + if (lhs_size < rhs_size) + return true; + if (lhs_size > rhs_size) + return false; + + for (size_t i = 0; i < lhs_size; ++i) { + if (lhs[i].data() < rhs[i].data()) + return true; + if (lhs[i].data() > rhs[i].data()) + return false; + } + + // if all equals + return false; } }; @@ -281,17 +293,14 @@ bool ov::interval_bound_evaluator(const Node* node, auto low_1 = ov::evaluate_lower_bound(node->get_input_source_output(1)); auto up_0 = ov::evaluate_upper_bound(node->get_input_source_output(0)); auto up_1 = ov::evaluate_upper_bound(node->get_input_source_output(1)); + if (!low_0 || !low_1 || !up_0 || !up_1) + return false; std::set input_variants = {{low_0, low_1}, {low_0, up_1}, {up_0, low_1}, {up_0, up_1}}; - for (const auto& variant_of_input_vector : input_variants) - for (const auto& input_tensor : variant_of_input_vector) - if (!input_tensor) - return false; - if (input_variants.size() == 1) return node->evaluate(upper_output_values, *input_variants.begin()) && node->evaluate(lower_output_values, *input_variants.begin()); diff --git a/src/core/tests/bound_evaluate.cpp b/src/core/tests/bound_evaluate.cpp index cb855ddfa76e16..664f8ebcb810f7 100644 --- a/src/core/tests/bound_evaluate.cpp +++ b/src/core/tests/bound_evaluate.cpp @@ -51,3 +51,31 @@ TEST_F(EvaluateBoundTest, no_exception_when_node_has_output_with_dynamic_element EXPECT_NO_THROW(evaluate_both_bounds(fn_op)); } + +using BoundEvaluatorTest = ::testing::Test; +TEST(BoundEvaluatorTest, no_exception_on_single_bound) { + constexpr auto et = element::i32; + const auto s = Shape{1, 1}; + const auto a = std::make_shared(et, PartialShape{s}); + const auto b = Constant::create(et, s, {1}); + const auto sub = std::make_shared(a, b); + + int32_t a_l[1] = {1}; + a->get_output_tensor(0).set_lower_value(Tensor{et, s, a_l}); + + int32_t o_[1] = {INT32_MIN}; // initial value of output tensor is not needed, it's set to check whether changed + TensorVector output{{et, s, o_}}; + // evaluations won't be performed due to missing upper bound tensor of parameter a + ASSERT_NO_THROW(sub->evaluate_lower(output)); + EXPECT_EQ(o_[0], INT32_MIN); + ASSERT_NO_THROW(sub->evaluate_upper(output)); + EXPECT_EQ(o_[0], INT32_MIN); + + int32_t a_u[1] = {11}; + a->get_output_tensor(0).set_upper_value(Tensor{et, s, a_u}); + // now both bounds of sub node can be calculated + ASSERT_NO_THROW(sub->evaluate_lower(output)); + EXPECT_EQ(o_[0], 0); + ASSERT_NO_THROW(sub->evaluate_upper(output)); + EXPECT_EQ(o_[0], 10); +} From 85d9c11b9761bf03486279c9d7aaaf27eb1f6629 Mon Sep 17 00:00:00 2001 From: Ilya Churaev Date: Tue, 21 Mar 2023 17:13:20 +0400 Subject: [PATCH 016/296] Fixed build (#16442) --- src/plugins/auto/plugin.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/plugins/auto/plugin.cpp b/src/plugins/auto/plugin.cpp index 165f5c3db6cf3c..65bd8793f71a6a 100644 --- a/src/plugins/auto/plugin.cpp +++ b/src/plugins/auto/plugin.cpp @@ -895,9 +895,9 @@ std::string MultiDeviceInferencePlugin::GetDeviceList(const std::map Date: Tue, 21 Mar 2023 15:05:11 +0100 Subject: [PATCH 017/296] flush by recreating constant (#16430) --- .../test_transformations/test_offline_api.py | 5 +++-- .../flush_fp32_subnormals_to_zero.cpp | 22 +++++++++++++++---- 2 files changed, 21 insertions(+), 6 deletions(-) diff --git a/src/bindings/python/tests/test_transformations/test_offline_api.py b/src/bindings/python/tests/test_transformations/test_offline_api.py index fdc7eec0048ce9..e153fc9412c19a 100644 --- a/src/bindings/python/tests/test_transformations/test_offline_api.py +++ b/src/bindings/python/tests/test_transformations/test_offline_api.py @@ -354,5 +354,6 @@ def test_flush_fp32_subnormals_to_zero(): apply_moc_transformations(model, cf=False, smart_reshape=True) # apply_flush_fp32_subnormals_to_zero is called inside - assert np.all(weights.data[4:8] != subnorm_val) - assert np.all(weights.data[4:8] == 0.0) + new_weights = add_node.input_value(1).get_node() + assert np.all(new_weights.data[4:8] != subnorm_val) + assert np.all(new_weights.data[4:8] == 0.0) diff --git a/src/common/transformations/src/transformations/flush_fp32_subnormals_to_zero.cpp b/src/common/transformations/src/transformations/flush_fp32_subnormals_to_zero.cpp index f01c60b1c0389b..ca03c288092260 100644 --- a/src/common/transformations/src/transformations/flush_fp32_subnormals_to_zero.cpp +++ b/src/common/transformations/src/transformations/flush_fp32_subnormals_to_zero.cpp @@ -36,14 +36,28 @@ ov::pass::FlushFP32SubnormalsToZero::FlushFP32SubnormalsToZero() { bool has_subnormals = false; for (size_t i = 0; i < size; ++i) { if (fpclassify(std::abs(data[i])) == FP_SUBNORMAL) { - data[i] = 0.0f; has_subnormals = true; + break; } } - if (has_subnormals) - return true; + if (!has_subnormals) + return false; + + auto new_constant = std::make_shared(ov::element::f32, node->get_shape()); + auto* dst_data = const_cast(new_constant->get_data_ptr()); + + for (size_t i = 0; i < size; ++i) { + if (fpclassify(std::abs(data[i])) != FP_SUBNORMAL) + dst_data[i] = data[i]; + else + dst_data[i] = 0.0f; + } + + new_constant->set_friendly_name(node->get_friendly_name()); + ov::copy_runtime_info(node, new_constant); + ov::replace_node(node, new_constant); - return false; + return true; }; auto m = make_shared(node_pattern, matcher_name); From 234f36e9b74d6b6d4f4a96b87c2660e95278c56d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tomasz=20Do=C5=82bniak?= Date: Tue, 21 Mar 2023 18:23:29 +0100 Subject: [PATCH 018/296] TopK v11 usage in ONNX FE (#16449) --- .../onnx/frontend/src/op/hardmax.cpp | 22 +++++++------ src/frontends/onnx/frontend/src/op/topk.cpp | 33 +++++++++---------- .../src/utils/arg_min_max_factory.cpp | 20 ++++++----- 3 files changed, 39 insertions(+), 36 deletions(-) diff --git a/src/frontends/onnx/frontend/src/op/hardmax.cpp b/src/frontends/onnx/frontend/src/op/hardmax.cpp index eb9b421cd71ed6..5e726e2458b55d 100644 --- a/src/frontends/onnx/frontend/src/op/hardmax.cpp +++ b/src/frontends/onnx/frontend/src/op/hardmax.cpp @@ -4,6 +4,8 @@ #include "op/hardmax.hpp" +#include + #include "exceptions.hpp" #include "ngraph/builder/reshape.hpp" #include "ngraph/op/one_hot.hpp" @@ -37,11 +39,11 @@ OutputVector hardmax(const Node& node) { const auto indices_axis = 1; const auto topk = - std::make_shared(coerced_tensor, - default_opset::Constant::create(ngraph::element::i64, Shape{}, {1}), - indices_axis, - default_opset::TopK::Mode::MAX, - default_opset::TopK::SortType::NONE); + std::make_shared(coerced_tensor, + default_opset::Constant::create(ngraph::element::i64, Shape{}, {1}), + indices_axis, + ov::opset11::TopK::Mode::MAX, + ov::opset11::TopK::SortType::NONE); const auto on_value = default_opset::Constant::create(ngraph::element::i64, Shape{}, {1}); const auto off_value = default_opset::Constant::create(ngraph::element::i64, Shape{}, {0}); @@ -71,11 +73,11 @@ OutputVector hardmax(const Node& node) { row_size = ngraph::onnx_import::reshape::interpret_as_scalar(row_size); const auto topk = - std::make_shared(input, - default_opset::Constant::create(ngraph::element::i64, Shape{}, {1}), - axis, - default_opset::TopK::Mode::MAX, - default_opset::TopK::SortType::NONE); + std::make_shared(input, + default_opset::Constant::create(ngraph::element::i64, Shape{}, {1}), + axis, + ov::opset11::TopK::Mode::MAX, + ov::opset11::TopK::SortType::NONE); const auto on_value = default_opset::Constant::create(ngraph::element::i64, Shape{}, {1}); const auto off_value = default_opset::Constant::create(ngraph::element::i64, Shape{}, {0}); diff --git a/src/frontends/onnx/frontend/src/op/topk.cpp b/src/frontends/onnx/frontend/src/op/topk.cpp index df884eaa54c0a9..dc33b103613df2 100644 --- a/src/frontends/onnx/frontend/src/op/topk.cpp +++ b/src/frontends/onnx/frontend/src/op/topk.cpp @@ -6,6 +6,7 @@ #include #include +#include #include "default_opset.hpp" #include "ngraph/node.hpp" @@ -37,13 +38,12 @@ OutputVector topk(const Node& node) { const auto k_node = node.get_attribute_as_constant("k"); const std::int64_t axis{node.get_attribute_value("axis", -1)}; - std::shared_ptr top_k = - std::make_shared(data, - k_node, - axis, - default_opset::TopK::Mode::MAX, - default_opset::TopK::SortType::SORT_VALUES, - element::i64); + std::shared_ptr top_k = std::make_shared(data, + k_node, + axis, + ov::opset11::TopK::Mode::MAX, + ov::opset11::TopK::SortType::SORT_VALUES, + element::i64); return {top_k->output(0), top_k->output(1)}; } @@ -55,13 +55,12 @@ OutputVector topk(const Node& node) { auto k = get_k(node); const std::int64_t axis{node.get_attribute_value("axis", -1)}; - std::shared_ptr top_k = - std::make_shared(data, - k, - axis, - default_opset::TopK::Mode::MAX, - default_opset::TopK::SortType::SORT_VALUES, - element::i64); + std::shared_ptr top_k = std::make_shared(data, + k, + axis, + ov::opset11::TopK::Mode::MAX, + ov::opset11::TopK::SortType::SORT_VALUES, + element::i64); return {top_k->output(0), top_k->output(1)}; } @@ -79,13 +78,13 @@ OutputVector topk(const Node& node) { const auto sorted = node.get_attribute_value("sorted", 1); // Map attribute values to nGraph enums - const auto sort_type = sorted ? default_opset::TopK::SortType::SORT_VALUES : default_opset::TopK::SortType::NONE; + const auto sort_type = sorted ? ov::opset11::TopK::SortType::SORT_VALUES : ov::opset11::TopK::SortType::NONE; const auto compute_max = static_cast(largest); - const auto mode = compute_max ? default_opset::TopK::Mode::MAX : default_opset::TopK::Mode::MIN; + const auto mode = compute_max ? ov::opset11::TopK::Mode::MAX : ov::opset11::TopK::Mode::MIN; std::shared_ptr top_k = - std::make_shared(data, k, axis, mode, sort_type, element::i64); + std::make_shared(data, k, axis, mode, sort_type, element::i64); return {top_k->output(0), top_k->output(1)}; } diff --git a/src/frontends/onnx/frontend/src/utils/arg_min_max_factory.cpp b/src/frontends/onnx/frontend/src/utils/arg_min_max_factory.cpp index d5a3fdb827061f..9807367273e46f 100644 --- a/src/frontends/onnx/frontend/src/utils/arg_min_max_factory.cpp +++ b/src/frontends/onnx/frontend/src/utils/arg_min_max_factory.cpp @@ -4,6 +4,8 @@ #include "utils/arg_min_max_factory.hpp" +#include + #include "default_opset.hpp" #include "ngraph/opsets/opset1.hpp" #include "ngraph/validation_util.hpp" @@ -18,14 +20,14 @@ ArgMinMaxFactory::ArgMinMaxFactory(const Node& node) m_select_last_index{node.get_attribute_value("select_last_index", 0)} {} std::shared_ptr ArgMinMaxFactory::make_arg_max() const { - return make_topk_subgraph(default_opset::TopK::Mode::MAX); + return make_topk_subgraph(ov::opset11::TopK::Mode::MAX); } std::shared_ptr ArgMinMaxFactory::make_arg_min() const { - return make_topk_subgraph(default_opset::TopK::Mode::MIN); + return make_topk_subgraph(ov::opset11::TopK::Mode::MIN); } -std::shared_ptr ArgMinMaxFactory::make_topk_subgraph(default_opset::TopK::Mode mode) const { +std::shared_ptr ArgMinMaxFactory::make_topk_subgraph(ov::opset11::TopK::Mode mode) const { const auto k_node = default_opset::Constant::create(ngraph::element::i64, Shape{}, {1}); if (m_select_last_index == 1) { @@ -59,11 +61,11 @@ std::shared_ptr ArgMinMaxFactory::make_topk_subgraph(default_opset const auto axis_node = default_opset::Constant::create(ngraph::element::i64, Shape{1}, {normalized_axis}); const auto reverse = std::make_shared(m_input_node, axis_node, opset1::Reverse::Mode::INDEX); - const auto topk = std::make_shared(reverse, - k_node, - normalized_axis, - mode, - default_opset::TopK::SortType::NONE); + const auto topk = std::make_shared(reverse, + k_node, + normalized_axis, + mode, + ov::opset11::TopK::SortType::NONE); const auto data_shape = std::make_shared(m_input_node); const auto dims_on_axis = std::make_shared( @@ -88,7 +90,7 @@ std::shared_ptr ArgMinMaxFactory::make_topk_subgraph(default_opset } const auto topk = - std::make_shared(m_input_node, k_node, m_axis, mode, default_opset::TopK::SortType::NONE); + std::make_shared(m_input_node, k_node, m_axis, mode, ov::opset11::TopK::SortType::NONE); const auto result = std::make_shared(topk->output(1), element::i64); From b70e56d11039c9d3fbb774f2bee4c92304f11fe9 Mon Sep 17 00:00:00 2001 From: Tingqian Li Date: Wed, 22 Mar 2023 02:39:25 +0800 Subject: [PATCH 019/296] [CPU] Support using BF16 in INT8 models (#15663) --- src/plugins/intel_cpu/src/config.cpp | 4 -- src/plugins/intel_cpu/src/config.h | 2 - .../intel_cpu/src/dnnl_postops_composer.cpp | 2 +- src/plugins/intel_cpu/src/graph.cpp | 5 --- src/plugins/intel_cpu/src/graph_dumper.cpp | 4 +- src/plugins/intel_cpu/src/graph_optimizer.cpp | 27 ------------ src/plugins/intel_cpu/src/nodes/conv.cpp | 5 --- .../intel_cpu/src/nodes/fullyconnected.cpp | 34 +++++++-------- src/plugins/intel_cpu/src/nodes/matmul.cpp | 42 +++++-------------- .../src/utils/debug_capabilities.cpp | 5 --- src/plugins/intel_cpu/thirdparty/onednn | 2 +- 11 files changed, 32 insertions(+), 100 deletions(-) diff --git a/src/plugins/intel_cpu/src/config.cpp b/src/plugins/intel_cpu/src/config.cpp index 39a3429c67be53..04c8c6467684f2 100644 --- a/src/plugins/intel_cpu/src/config.cpp +++ b/src/plugins/intel_cpu/src/config.cpp @@ -139,13 +139,11 @@ void Config::readProperties(const std::map &prop) { if (val == PluginConfigParams::YES) { if (dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core)) { enforceBF16 = true; - manualEnforceBF16 = true; } else { IE_THROW() << "Platform doesn't support BF16 format"; } } else if (val == PluginConfigParams::NO) { enforceBF16 = false; - manualEnforceBF16 = false; } else { IE_THROW() << "Wrong value for property key " << PluginConfigParams::KEY_ENFORCE_BF16 << ". Expected only YES/NO"; @@ -159,13 +157,11 @@ void Config::readProperties(const std::map &prop) { if (val == "bf16") { if (dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core)) { enforceBF16 = true; - manualEnforceBF16 = true; } else { IE_THROW() << "Platform doesn't support BF16 format"; } } else if (val == "f32") { enforceBF16 = false; - manualEnforceBF16 = false; } else { IE_THROW() << "Wrong value for property key " << ov::inference_precision.name() << ". Supported values: bf16, f32"; diff --git a/src/plugins/intel_cpu/src/config.h b/src/plugins/intel_cpu/src/config.h index 9f2680fbe88e97..8c399d5189a30f 100644 --- a/src/plugins/intel_cpu/src/config.h +++ b/src/plugins/intel_cpu/src/config.h @@ -52,12 +52,10 @@ struct Config { #if defined(OPENVINO_ARCH_X86) || defined(OPENVINO_ARCH_X86_64) LPTransformsMode lpTransformsMode = LPTransformsMode::On; bool enforceBF16 = true; - bool manualEnforceBF16 = false; #else // Currently INT8 mode is not optimized on ARM / RISCV or other non-x86 platforms, fallback to FP32 mode. LPTransformsMode lpTransformsMode = LPTransformsMode::Off; bool enforceBF16 = false; - bool manualEnforceBF16 = false; #endif DenormalsOptMode denormalsOptMode = DenormalsOptMode::DO_Keep; diff --git a/src/plugins/intel_cpu/src/dnnl_postops_composer.cpp b/src/plugins/intel_cpu/src/dnnl_postops_composer.cpp index 6f1bd97b4085a2..6321ea1cac06aa 100644 --- a/src/plugins/intel_cpu/src/dnnl_postops_composer.cpp +++ b/src/plugins/intel_cpu/src/dnnl_postops_composer.cpp @@ -136,7 +136,7 @@ bool DnnlPostOpsComposer::appendScale(const std::vector& scale, bool isLa if (oscale_values.size() == 1) oscale_mask = 0; else - oscale_mask = 1 << 1; // it works for both Conv/Matmul + oscale_mask = 1 << idxOC; updateOutputScales(); return true; } diff --git a/src/plugins/intel_cpu/src/graph.cpp b/src/plugins/intel_cpu/src/graph.cpp index 9764e881de04f5..62f33e02575db7 100644 --- a/src/plugins/intel_cpu/src/graph.cpp +++ b/src/plugins/intel_cpu/src/graph.cpp @@ -1506,11 +1506,6 @@ bool Graph::InsertNode(NodePtr parent, NodePtr child, NodePtr node, int parentPo // Set all non const data paths precision to BF16 void Graph::EnforceBF16() { - // Floating point parts of FP32 + INT8 or FP32 + BIN mixed precision models will be executed in BF16 precision - // only if enforceBF16 flag was set manually because current performance is not good enough to enable it by default - if (!implication(context->isGraphQuantized(), getConfig().manualEnforceBF16)) - return; - std::function& skipNodes)> searchForNodesToSkip; searchForNodesToSkip = [&](const NodePtr& node, std::unordered_set& skipNodes) -> void { for (size_t i = 0; i < node->getParentEdges().size(); i++) { diff --git a/src/plugins/intel_cpu/src/graph_dumper.cpp b/src/plugins/intel_cpu/src/graph_dumper.cpp index 1bfe65af51e52c..03b4b138f7a8e5 100644 --- a/src/plugins/intel_cpu/src/graph_dumper.cpp +++ b/src/plugins/intel_cpu/src/graph_dumper.cpp @@ -261,7 +261,7 @@ void summary_perf(const Graph &graph) { } const std::string& summaryPerf = graph.getConfig().debugCaps.summaryPerf; - if (summaryPerf.empty()) + if (summaryPerf.empty() || !std::stoi(summaryPerf)) return; std::map perf_by_type; @@ -308,7 +308,7 @@ void summary_perf(const Graph &graph) { std::stringstream ss; int percentage = static_cast(it.second*100/total_avg); if (percentage == 0) break; - ss << std::setw(10) << std::right << percentage << " % :" << it.first << std::endl; + ss << std::setw(10) << std::right << percentage << " % : " << std::setw(8) << std::right << it.second << "(us) " << it.first << std::endl; std::cout << ss.str(); } } diff --git a/src/plugins/intel_cpu/src/graph_optimizer.cpp b/src/plugins/intel_cpu/src/graph_optimizer.cpp index 3e6fd7d7e00cf9..949acf7cd6ab1c 100644 --- a/src/plugins/intel_cpu/src/graph_optimizer.cpp +++ b/src/plugins/intel_cpu/src/graph_optimizer.cpp @@ -734,21 +734,6 @@ void GraphOptimizer::FuseConvolutionAndZeroPoints(Graph &graph) { } } -/** - * @todo FQ fusing was disabled for BF16 output since oneDNN primitives lack support - * for bf16 depthwise postops. - * This is not the case anymore, because after migration to oneDNN 2.3 FQ will be fused as - * multiple binary post ops. - * This check can already be removed for FC fusing, but should be kept for Convolution, - * which still uses legacy depthwise postops for performance reasons. - */ -static bool BF16QuantizeNodeFusing(const NodePtr& parentNode, const NodePtr& childNode) { - return childNode->getType() == Type::FakeQuantize && - one_of(Precision::BF16, - parentNode->getOriginalOutputPrecisionAtPort(0), - childNode->getOriginalOutputPrecisionAtPort(0)); -} - void GraphOptimizer::FuseFullyConnectedAndSimpleOperation(Graph &graph) { auto& graphNodes = graph.GetNodes(); @@ -772,12 +757,6 @@ void GraphOptimizer::FuseFullyConnectedAndSimpleOperation(Graph &graph) { continue; } - // BF16 Quantize Layer Fusing Disabling - if (BF16QuantizeNodeFusing(parentNode, childNode)) { - parent++; - continue; - } - childNode->fuseInto(parentNode); if (childNode->getType() == Type::FakeQuantize || childNode->getType() == Type::Eltwise) { @@ -1066,12 +1045,6 @@ void GraphOptimizer::FuseConvolutionAndSimpleOperation(Graph &graph) { continue; } - // BF16 Quantize Layer Fusing Disabling - if (BF16QuantizeNodeFusing(parentNode, childNode)) { - parent++; - continue; - } - childNode->fuseInto(parentNode); if (childNode->getType() == Type::FakeQuantize || childNode->getType() == Type::Eltwise) { diff --git a/src/plugins/intel_cpu/src/nodes/conv.cpp b/src/plugins/intel_cpu/src/nodes/conv.cpp index 3ce92de2169f6c..ab07b6521e71f7 100644 --- a/src/plugins/intel_cpu/src/nodes/conv.cpp +++ b/src/plugins/intel_cpu/src/nodes/conv.cpp @@ -503,11 +503,6 @@ void Convolution::getSupportedDescriptors() { if (canBeExecutedInInt8()) { DEBUG_LOG(getName(), "Creating I8 descriptor"); - // We have to extend convolution_x8s8s32x from oneDNN to support BF16 output data type - if (outputDataType == memory::data_type::bf16) - outputDataType = memory::data_type::f32; - if (eltwisePrecision == Precision::BF16) - eltwisePrecision = Precision::FP32; // initTryBrgconvFlag depends on outputDataType, should be after outputDataType computed if (!enforceBrgconv) initTryBrgconvFlag(); diff --git a/src/plugins/intel_cpu/src/nodes/fullyconnected.cpp b/src/plugins/intel_cpu/src/nodes/fullyconnected.cpp index 23b6c5be7cee9f..6b4c8e43521426 100644 --- a/src/plugins/intel_cpu/src/nodes/fullyconnected.cpp +++ b/src/plugins/intel_cpu/src/nodes/fullyconnected.cpp @@ -232,29 +232,29 @@ void FullyConnected::getSupportedDescriptors() { auto inputDataType = DnnlExtensionUtils::IEPrecisionToDataType(getOriginalInputPrecisionAtPort(DATA_ID)); outputDataType = DnnlExtensionUtils::IEPrecisionToDataType(getOriginalOutputPrecisionAtPort(DATA_ID)); - if (inputDataType == memory::data_type::f32) { - outputDataType = memory::data_type::f32; - } - if (!fusedWith.empty()) { outputDataType = DnnlExtensionUtils::IEPrecisionToDataType(fusedWith[fusedWith.size() - 1]->getOriginalOutputPrecisionAtPort(0)); } auto weightsDataType = DnnlExtensionUtils::IEPrecisionToDataType(getOriginalInputPrecisionAtPort(WEIGHTS_ID)); - // We have to extend gemm_x8s8s32x_inner_product_fwd_t from oneDNN to support BF16 output data type - if ((!one_of(inputDataType , memory::data_type::u8, memory::data_type::s8) || weightsDataType != memory::data_type::s8) - && inputDataType != memory::data_type::bf16) { - inputDataType = outputDataType = memory::data_type::f32; - } - - if (one_of(inputDataType , memory::data_type::u8, memory::data_type::s8) - && outputDataType == memory::data_type::bf16) { + // revert back outputDataType on special cases + if (inputDataType == memory::data_type::f32) { + // oneDNN only support f32 output when input is f32, even if FQ is fused outputDataType = memory::data_type::f32; - } - - if (inputDataType == memory::data_type::bf16 - && one_of(outputDataType , memory::data_type::u8, memory::data_type::s8)) { - outputDataType = memory::data_type::bf16; + } else if (inputDataType == memory::data_type::bf16) { + // bf16 input only supports bf16/f32 output, even if FQ is fused as post-ops + if (one_of(outputDataType , memory::data_type::u8, memory::data_type::s8)) { + outputDataType = memory::data_type::bf16; + } + } else if (one_of(inputDataType, memory::data_type::u8, memory::data_type::s8)) { + if (weightsDataType != memory::data_type::s8) { + // weight has to be s8 for INT8 mode, otherwise fallback to + // f32 mode + inputDataType = outputDataType = memory::data_type::f32; + } + } else { + // s32/u32/... unsupported input data types, fallback to f32 + inputDataType = outputDataType = memory::data_type::f32; } inDims = isDynamicNode() ? makeDummyInputDims() : getInputShapeAtPort(DATA_ID).getStaticDims(); diff --git a/src/plugins/intel_cpu/src/nodes/matmul.cpp b/src/plugins/intel_cpu/src/nodes/matmul.cpp index 83e4bd1a179294..c1c1381e6631f2 100644 --- a/src/plugins/intel_cpu/src/nodes/matmul.cpp +++ b/src/plugins/intel_cpu/src/nodes/matmul.cpp @@ -204,34 +204,6 @@ MatMul::MatMul(const std::shared_ptr& op, const GraphContext::CPtr } bool MatMul::canFuse(const NodePtr& node) const { - // per channel binary post op for rank > 2D is supported only by oneDNN reference implementation because of unusual MatMul channel axis (issue 6669) - if (getOutputShapeAtPort(0).getRank() > 2) { - if (const auto* eltwiseNode = dynamic_cast(node.get())) { - if (one_of(eltwiseNode->getAlgorithm(), Algorithm::EltwiseAdd, - Algorithm::EltwiseMultiply, - Algorithm::EltwiseSubtract, - Algorithm::EltwiseDivide, - Algorithm::EltwisePrelu, - Algorithm::EltwiseMulAdd, - Algorithm::EltwisePowerStatic) && - eltwiseNode->getBroadcastingPolicy() != Eltwise::PerTensor) { - return false; - } - } else if (const auto* fakeQuantizeNode = dynamic_cast(node.get())) { - if (fakeQuantizeNode->getBroadcastingPolicy() != FakeQuantize::PerTensor) { - return false; - } - } - } - - // Todo: - // Consider the case when Matmul doesn't support execution in int8, but is getting fused with FQ with int8 output. - // Then the Matmul will change its output precision to fp32, but the FQ child will still has the int8 input precision. - // This information should be propagated! Note that we may need to propagate updated precision to child fused nodes. - if (node->getType() == Type::FakeQuantize && - one_of(node->getOriginalOutputPrecisionAtPort(0), Precision::I8, Precision::U8) && - !canBeExecutedInInt8(getOriginalInputPrecisionAtPort(0), getOriginalInputPrecisionAtPort(1))) - return false; return canFuseSimpleOperation(node); } @@ -344,12 +316,20 @@ void MatMul::getSupportedDescriptors() { outPortPrec = firstInPortPrec = secondInPortPrec = Precision::FP32; } + Precision postOpsPrec = outPortPrec; if (!fusedWith.empty()) { - outPortPrec = fusedWith[fusedWith.size() - 1]->getOriginalOutputPrecisionAtPort(0); + postOpsPrec = fusedWith[fusedWith.size() - 1]->getOriginalOutputPrecisionAtPort(0); } - if (!canBeExecutedInInt8(firstInPortPrec, secondInPortPrec) && one_of(outPortPrec, Precision::U8, Precision::I8)) - outPortPrec = Precision::FP32; // INT output is not supported for non-INT inputs + if (canBeExecutedInInt8(firstInPortPrec, secondInPortPrec)) { + // INT8 mode support wide range of output precisions + outPortPrec = postOpsPrec; + } else if (postOpsPrec == Precision::FP32) { + // all non-INT8 modes support fp32 output precision + outPortPrec = postOpsPrec; + } else { + // otherwise we ignore postOpsPrec and stay with getOriginalOutputPrecisionAtPort(0) + } const auto& inputShape0 = getInputShapeAtPort(0); const auto& inputShape1 = getInputShapeAtPort(1); diff --git a/src/plugins/intel_cpu/src/utils/debug_capabilities.cpp b/src/plugins/intel_cpu/src/utils/debug_capabilities.cpp index 31b02bd7cad08e..fb13000708cd74 100644 --- a/src/plugins/intel_cpu/src/utils/debug_capabilities.cpp +++ b/src/plugins/intel_cpu/src/utils/debug_capabilities.cpp @@ -479,11 +479,6 @@ std::ostream & operator<<(std::ostream & os, const PrintableModel& model) { os << std::endl; // recursively output subgraphs - if (auto subgraph = std::dynamic_pointer_cast(op)) { - os << "\t\t snippets Subgraph: " << subgraph->get_friendly_name() << " is_quantized:" << subgraph->is_quantized() << std::endl; - os << PrintableModel(subgraph->body(), tag, prefix + "\t\t"); - } - if (auto msubgraph = std::dynamic_pointer_cast(op)) { auto cnt = msubgraph->get_internal_subgraphs_size(); for (int i = 0; i < cnt; i++) { diff --git a/src/plugins/intel_cpu/thirdparty/onednn b/src/plugins/intel_cpu/thirdparty/onednn index bd3498162fab74..02857209960e9d 160000 --- a/src/plugins/intel_cpu/thirdparty/onednn +++ b/src/plugins/intel_cpu/thirdparty/onednn @@ -1 +1 @@ -Subproject commit bd3498162fab7401b571c6ce77d837f1adcff265 +Subproject commit 02857209960e9d91c1b3df90ab4c7ac359bf0973 From d86d94edad0ff07f14574a73ede288c1bfc01857 Mon Sep 17 00:00:00 2001 From: River Li Date: Wed, 22 Mar 2023 05:55:51 +0800 Subject: [PATCH 020/296] [DOC][CAPI] document for remote tensor (#16408) * [DOC][CAPI] document for remote tensor * Update * Update minor * Update GPU_RemoteTensor_API.md --------- Co-authored-by: Sebastian Golebiewski --- .../supported_plugins/GPU_RemoteTensor_API.md | 254 ++++++++++++---- docs/snippets/gpu/context_sharing_va_c.cpp | 156 ++++++++++ .../gpu/preprocessing_nv12_two_planes_c.cpp | 126 ++++++++ .../gpu/remote_objects_creation_c.cpp | 283 ++++++++++++++++++ 4 files changed, 765 insertions(+), 54 deletions(-) create mode 100644 docs/snippets/gpu/context_sharing_va_c.cpp create mode 100644 docs/snippets/gpu/preprocessing_nv12_two_planes_c.cpp create mode 100644 docs/snippets/gpu/remote_objects_creation_c.cpp diff --git a/docs/OV_Runtime_UG/supported_plugins/GPU_RemoteTensor_API.md b/docs/OV_Runtime_UG/supported_plugins/GPU_RemoteTensor_API.md index dc73deb70965d0..0eac844e4c7701 100644 --- a/docs/OV_Runtime_UG/supported_plugins/GPU_RemoteTensor_API.md +++ b/docs/OV_Runtime_UG/supported_plugins/GPU_RemoteTensor_API.md @@ -45,46 +45,85 @@ To create the ``ov::RemoteContext`` object for user context, explicitly provide of ``ov::RemoteContext`` derived classes. -.. tab:: Linux +.. tab:: Linux/C++ .. tab:: Create from cl_context .. doxygensnippet:: docs/snippets/gpu/remote_objects_creation.cpp :language: cpp - :fragment: context_from_cl_context + :fragment: [context_from_cl_context] .. tab:: Create from cl_queue .. doxygensnippet:: docs/snippets/gpu/remote_objects_creation.cpp :language: cpp - :fragment: context_from_cl_queue + :fragment: [context_from_cl_queue] .. tab:: Create from VADisplay .. doxygensnippet:: docs/snippets/gpu/remote_objects_creation.cpp :language: cpp - :fragment: context_from_va_display + :fragment: [context_from_va_display] -.. tab:: Windows +.. tab:: Windows/C++ .. tab:: Create from cl_context .. doxygensnippet:: docs/snippets/gpu/remote_objects_creation.cpp :language: cpp - :fragment: context_from_cl_context + :fragment: [context_from_cl_context] .. tab:: Create from cl_queue .. doxygensnippet:: docs/snippets/gpu/remote_objects_creation.cpp :language: cpp - :fragment: context_from_cl_queue + :fragment: [context_from_cl_queue] .. tab:: Create from ID3D11Device .. doxygensnippet:: docs/snippets/gpu/remote_objects_creation.cpp :language: cpp - :fragment: context_from_d3d_device + :fragment: [context_from_d3d_device] +.. tab:: Linux/C + + .. tab:: Create from cl_context + + .. doxygensnippet:: docs/snippets/gpu/remote_objects_creation_c.cpp + :language: c + :fragment: [context_from_cl_context] + + .. tab:: Create from cl_queue + + .. doxygensnippet:: docs/snippets/gpu/remote_objects_creation_c.cpp + :language: c + :fragment: [context_from_cl_queue] + + .. tab:: Create from VADisplay + + .. doxygensnippet:: docs/snippets/gpu/remote_objects_creation_c.cpp + :language: c + :fragment: [context_from_va_display] + +.. tab:: Windows/C + + .. tab:: Create from cl_context + + .. doxygensnippet:: docs/snippets/gpu/remote_objects_creation_c.cpp + :language: c + :fragment: [context_from_cl_context] + + .. tab:: Create from cl_queue + + .. doxygensnippet:: docs/snippets/gpu/remote_objects_creation_c.cpp + :language: c + :fragment: [context_from_cl_queue] + + .. tab:: Create from ID3D11Device + + .. doxygensnippet:: docs/snippets/gpu/remote_objects_creation_c.cpp + :language: c + :fragment: [context_from_d3d_device] Getting RemoteContext from the Plugin +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ @@ -96,19 +135,33 @@ Once the plugin options have been changed, the internal context is replaced by t To request the current default context of the plugin, use one of the following methods: +.. tab:: C++ -.. tab:: Get context from Core + .. tab:: Get context from Core - .. doxygensnippet:: docs/snippets/gpu/remote_objects_creation.cpp - :language: cpp - :fragment: default_context_from_core + .. doxygensnippet:: docs/snippets/gpu/remote_objects_creation.cpp + :language: cpp + :fragment: [default_context_from_core] -.. tab:: Get context from compiled model + .. tab:: Get context from compiled model - .. doxygensnippet:: docs/snippets/gpu/remote_objects_creation.cpp - :language: cpp - :fragment: default_context_from_model + .. doxygensnippet:: docs/snippets/gpu/remote_objects_creation.cpp + :language: cpp + :fragment: [default_context_from_model] + +.. tab:: C + + .. tab:: Get context from Core + + .. doxygensnippet:: docs/snippets/gpu/remote_objects_creation_c.cpp + :language: c + :fragment: [default_context_from_core] + .. tab:: Get context from compiled model + + .. doxygensnippet:: docs/snippets/gpu/remote_objects_creation_c.cpp + :language: c + :fragment: [default_context_from_model] Memory Sharing Between Application and GPU Plugin ########################################################### @@ -116,70 +169,140 @@ Memory Sharing Between Application and GPU Plugin The classes that implement the ``ov::RemoteTensor`` interface are the wrappers for native API memory handles (which can be obtained from them at any time). -To create a shared tensor from a native memory handle, use dedicated ``create_tensor``or ``create_tensor_nv12`` methods +To create a shared tensor from a native memory handle, use dedicated ``create_tensor`` or ``create_tensor_nv12`` methods of the ``ov::RemoteContext`` sub-classes. ``ov::intel_gpu::ocl::ClContext`` has multiple overloads of ``create_tensor`` methods which allow to wrap pre-allocated native handles with the ``ov::RemoteTensor`` -object or request plugin to allocate specific device memory. For more details, see the code snippets below: +object or request plugin to allocate specific device memory. There also provides C APIs to do the same things with C++ APIs. +For more details, see the code snippets below: .. tab-set:: - .. tab-item:: Wrap native handles + .. tab-item:: Wrap native handles/C++ + :sync: wrap-native-handles .. tab-set:: .. tab-item:: USM pointer + :sync: usm-pointer .. doxygensnippet:: docs/snippets/gpu/remote_objects_creation.cpp - :language: sh - :fragment: wrap_usm_pointer + :language: cpp + :fragment: [wrap_usm_pointer] .. tab-item:: cl_mem + :sync: cl_mem .. doxygensnippet:: docs/snippets/gpu/remote_objects_creation.cpp - :language: sh - :fragment: wrap_cl_mem + :language: cpp + :fragment: [wrap_cl_mem] .. tab-item:: cl::Buffer + :sync: buffer .. doxygensnippet:: docs/snippets/gpu/remote_objects_creation.cpp - :language: sh - :fragment: wrap_cl_buffer + :language: cpp + :fragment: [wrap_cl_buffer] .. tab-item:: cl::Image2D + :sync: image2D .. doxygensnippet:: docs/snippets/gpu/remote_objects_creation.cpp - :language: sh - :fragment: wrap_cl_image + :language: cpp + :fragment: [wrap_cl_image] .. tab-item:: biplanar NV12 surface + :sync: biplanar .. doxygensnippet:: docs/snippets/gpu/remote_objects_creation.cpp - :language: sh - :fragment: wrap_nv12_surface + :language: cpp + :fragment: [wrap_nv12_surface] - .. tab-item:: Allocate device memory + .. tab-item:: Allocate device memory/C++ + :sync: allocate-device-memory .. tab-set:: - + .. tab-item:: USM host memory + :sync: usm-host-memory .. doxygensnippet:: docs/snippets/gpu/remote_objects_creation.cpp - :language: sh - :fragment: allocate_usm_host + :language: cpp + :fragment: [allocate_usm_host] .. tab-item:: USM device memory + :sync: usm-device-memory .. doxygensnippet:: docs/snippets/gpu/remote_objects_creation.cpp - :language: sh - :fragment: allocate_usm_device + :language: cpp + :fragment: [allocate_usm_device] .. tab-item:: cl::Buffer .. doxygensnippet:: docs/snippets/gpu/remote_objects_creation.cpp - :language: sh - :fragment: allocate_cl_buffer + :language: cpp + :fragment: [allocate_cl_buffer] + +.. tab-set:: + + .. tab-item:: Wrap native handles/C + :sync: wrap-native-handles + + .. tab-set:: + + .. tab-item:: USM pointer + :sync: usm-pointer + .. doxygensnippet:: docs/snippets/gpu/remote_objects_creation_c.cpp + :language: c + :fragment: [wrap_usm_pointer] + + .. tab-item:: cl_mem + :sync: cl_mem + + .. doxygensnippet:: docs/snippets/gpu/remote_objects_creation_c.cpp + :language: c + :fragment: [wrap_cl_mem] + + .. tab-item:: cl::Buffer + :sync: buffer + + .. doxygensnippet:: docs/snippets/gpu/remote_objects_creation_c.cpp + :language: c + :fragment: [wrap_cl_buffer] + + .. tab-item:: cl::Image2D + :sync: image2D + + .. doxygensnippet:: docs/snippets/gpu/remote_objects_creation_c.cpp + :language: c + :fragment: [wrap_cl_image] + + .. tab-item:: biplanar NV12 surface + :sync: biplanar + + .. doxygensnippet:: docs/snippets/gpu/remote_objects_creation_c.cpp + :language: c + :fragment: [create_nv12_surface] + + .. tab-item:: Allocate device memory/C + :sync: allocate-device-memory + + .. tab-set:: + + .. tab-item:: USM host memory + :sync: usm-host-memory + + .. doxygensnippet:: docs/snippets/gpu/remote_objects_creation_c.cpp + :language: c + :fragment: [allocate_usm_host] + + .. tab-item:: USM device memory + :sync: usm-device-memory + + .. doxygensnippet:: docs/snippets/gpu/remote_objects_creation_c.cpp + :language: c + :fragment: [allocate_usm_device] The ``ov::intel_gpu::ocl::D3DContext`` and ``ov::intel_gpu::ocl::VAContext`` classes are derived from ``ov::intel_gpu::ocl::ClContext``. Therefore, they provide the functionality described above and extend it @@ -202,9 +325,17 @@ should be added before model compilation: .. tab:: two-plane - .. doxygensnippet:: docs/snippets/gpu/preprocessing_nv12_two_planes.cpp - :language: cpp - :fragment: [init_preproc] + .. tab:: C++ + + .. doxygensnippet:: docs/snippets/gpu/preprocessing_nv12_two_planes.cpp + :language: cpp + :fragment: [init_preproc] + + .. tab:: C + + .. doxygensnippet:: docs/snippets/gpu/preprocessing_nv12_two_planes_c.cpp + :language: c + :fragment: [init_preproc] .. tab:: single-plane @@ -228,21 +359,29 @@ inputs need to be set via the ``ov::InferRequest::set_tensors`` method with vect .. tab:: two-plane - .. doxygensnippet:: docs/snippets/gpu/preprocessing_nv12_two_planes.cpp - :language: cpp - :fragment: single_batch + .. tab:: C++ + + .. doxygensnippet:: docs/snippets/gpu/preprocessing_nv12_two_planes.cpp + :language: cpp + :fragment: [single_batch] + + .. tab:: C + + .. doxygensnippet:: docs/snippets/gpu/preprocessing_nv12_two_planes_c.cpp + :language: c + :fragment: [single_batch] .. tab:: single-plane .. doxygensnippet:: docs/snippets/gpu/preprocessing_nv12_single_plane.cpp :language: cpp - :fragment: single_batch + :fragment: [single_batch] .. tab:: NV12 to Grey .. doxygensnippet:: docs/snippets/gpu/preprocessing_nv12_to_gray.cpp :language: cpp - :fragment: single_batch + :fragment: [single_batch] .. tab:: Multiple Batches @@ -250,19 +389,19 @@ inputs need to be set via the ``ov::InferRequest::set_tensors`` method with vect .. doxygensnippet:: docs/snippets/gpu/preprocessing_nv12_two_planes.cpp :language: cpp - :fragment: batched_case + :fragment: [batched_case] .. tab:: single-plane .. doxygensnippet:: docs/snippets/gpu/preprocessing_nv12_single_plane.cpp :language: cpp - :fragment: batched_case + :fragment: [batched_case] .. tab:: NV12 to Grey .. doxygensnippet:: docs/snippets/gpu/preprocessing_nv12_to_gray.cpp :language: cpp - :fragment: batched_case + :fragment: [batched_case] I420 color format can be processed in a similar way @@ -283,7 +422,7 @@ on waiting for the completion of inference. The pseudo-code may look as follows: .. doxygensnippet:: docs/snippets/gpu/queue_sharing.cpp :language: cpp - :fragment: queue_sharing + :fragment: [queue_sharing] Limitations @@ -326,20 +465,27 @@ To see pseudo-code of usage examples, refer to the sections below. .. doxygensnippet:: docs/snippets/gpu/context_sharing.cpp :language: cpp - :fragment: context_sharing_get_from_ov + :fragment: [context_sharing_get_from_ov] .. dropdown:: Running GPU Plugin Inference within User-Supplied Shared Context .. doxygensnippet:: docs/snippets/gpu/context_sharing.cpp :language: cpp - :fragment: context_sharing_user_handle + :fragment: [context_sharing_user_handle] .. dropdown:: Direct Consuming of the NV12 VAAPI Video Decoder Surface on Linux - .. doxygensnippet:: docs/snippets/gpu/context_sharing_va.cpp - :language: cpp - :fragment: context_sharing_va + .. tab:: C++ + + .. doxygensnippet:: docs/snippets/gpu/context_sharing_va.cpp + :language: cpp + :fragment: [context_sharing_va] + + .. tab:: C + .. doxygensnippet:: docs/snippets/gpu/context_sharing_va_c.cpp + :language: c + :fragment: [context_sharing_va] See Also ####################################### diff --git a/docs/snippets/gpu/context_sharing_va_c.cpp b/docs/snippets/gpu/context_sharing_va_c.cpp new file mode 100644 index 00000000000000..dd22e330bb5dcf --- /dev/null +++ b/docs/snippets/gpu/context_sharing_va_c.cpp @@ -0,0 +1,156 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#ifdef ENABLE_LIBVA +#include +#include +#include + +VADisplay get_va_display(); +VASurfaceID decode_va_surface(); + +int main() { + ov_core_t* core = NULL; + ov_model_t* model = NULL; + ov_compiled_model_t* compiled_model = NULL; + ov_infer_request_t* infer_request = NULL; + ov_remote_context_t* shared_va_context = NULL; + ov_tensor_t* remote_tensor = NULL; + ov_preprocess_prepostprocessor_t* preprocess = NULL; + ov_preprocess_input_info_t* preprocess_input_info = NULL; + ov_preprocess_input_tensor_info_t* preprocess_input_tensor_info = NULL; + ov_preprocess_preprocess_steps_t* preprocess_input_steps = NULL; + ov_preprocess_input_model_info_t* preprocess_input_model_info = NULL; + ov_layout_t* layout = NULL; + ov_model_t* new_model = NULL; + + ov_output_const_port_t* input_port = NULL; + char* in_tensor_name = NULL; + char* out_tensor_name = NULL; + ov_shape_t* input_shape = NULL; + ov_element_type_e input_type; + + const int height = 480; + const int width = 640; + + // initialize the objects + ov_core_create(&core); + ov_core_read_model(core, "model.xml", "model.bin", &model); + + // ... + + //! [context_sharing_va] + + // ... + + ov_preprocess_prepostprocessor_create(model, &preprocess); + ov_preprocess_prepostprocessor_get_input_info(preprocess, &preprocess_input_info); + ov_preprocess_input_info_get_tensor_info(preprocess_input_info, &preprocess_input_tensor_info); + ov_preprocess_input_tensor_info_set_element_type(preprocess_input_tensor_info, U8); + ov_preprocess_input_tensor_info_set_color_format_with_subname(preprocess_input_tensor_info, + NV12_TWO_PLANES, + 2, + "y", + "uv"); + ov_preprocess_input_tensor_info_set_memory_type(preprocess_input_tensor_info, "GPU_SURFACE"); + ov_preprocess_input_tensor_info_set_spatial_static_shape(preprocess_input_tensor_info, height, width); + ov_preprocess_input_info_get_preprocess_steps(preprocess_input_info, &preprocess_input_steps); + ov_preprocess_preprocess_steps_convert_color(preprocess_input_steps, BGR); + ov_preprocess_preprocess_steps_resize(preprocess_input_steps, RESIZE_LINEAR); + ov_preprocess_input_info_get_model_info(preprocess_input_info, &preprocess_input_model_info); + ov_layout_create("NCHW", &layout); + ov_preprocess_input_model_info_set_layout(preprocess_input_model_info, layout); + ov_preprocess_prepostprocessor_build(preprocess, &new_model); + + VADisplay display = get_va_display(); + // create the shared context object + ov_core_create_context(core, + "GPU", + 4, + &shared_va_context, + ov_property_key_intel_gpu_context_type, + "VA_SHARED", + ov_property_key_intel_gpu_va_device, + display); + + // compile model within a shared context + ov_core_compile_model_with_context(core, new_model, shared_va_context, 0, &compiled_model); + + ov_output_const_port_t* port_0 = NULL; + char* input_name_0 = NULL; + ov_model_const_input_by_index(new_model, 0, &port_0); + ov_port_get_any_name(port_0, &input_name_0); + + ov_output_const_port_t* port_1 = NULL; + char* input_name_1 = NULL; + ov_model_const_input_by_index(new_model, 1, &port_1); + ov_port_get_any_name(port_1, &input_name_1); + + ov_shape_t shape_y = {0, NULL}; + ov_shape_t shape_uv = {0, NULL}; + ov_const_port_get_shape(port_0, &shape_y); + ov_const_port_get_shape(port_1, &shape_uv); + + // execute decoding and obtain decoded surface handle + VASurfaceID va_surface = decode_va_surface(); + // ... + //wrap decoder output into RemoteBlobs and set it as inference input + + ov_tensor_t* remote_tensor_y = NULL; + ov_tensor_t* remote_tensor_uv = NULL; + ov_remote_context_create_tensor(shared_va_context, + U8, + shape_y, + 6, + &remote_tensor_y, + ov_property_key_intel_gpu_shared_mem_type, + "VA_SURFACE", + ov_property_key_intel_gpu_dev_object_handle, + va_surface, + ov_property_key_intel_gpu_va_plane, + 0); + ov_remote_context_create_tensor(shared_va_context, + U8, + shape_uv, + 6, + &remote_tensor_uv, + ov_property_key_intel_gpu_shared_mem_type, + "VA_SURFACE", + ov_property_key_intel_gpu_dev_object_handle, + va_surface, + ov_property_key_intel_gpu_va_plane, + 1); + + ov_compiled_model_create_infer_request(compiled_model, &infer_request); + ov_infer_request_set_tensor(infer_request, input_name_0, remote_tensor_y); + ov_infer_request_set_tensor(infer_request, input_name_1, remote_tensor_uv); + ov_infer_request_infer(infer_request); + //! [context_sharing_va] + + // deinitialization + ov_free(input_name_0); + ov_free(input_name_1); + ov_output_const_port_free(port_0); + ov_output_const_port_free(port_1); + ov_layout_free(layout); + ov_preprocess_input_model_info_free(preprocess_input_model_info); + ov_preprocess_preprocess_steps_free(preprocess_input_steps); + ov_preprocess_input_tensor_info_free(preprocess_input_tensor_info); + ov_preprocess_input_info_free(preprocess_input_info); + ov_model_free(new_model); + ov_preprocess_prepostprocessor_free(preprocess); + ov_tensor_free(remote_tensor_y); + ov_tensor_free(remote_tensor_uv); + ov_shape_free(&shape_y); + ov_shape_free(&shape_uv); + ov_infer_request_free(infer_request); + ov_compiled_model_free(compiled_model); + ov_model_free(model); + ov_model_free(new_model); + ov_remote_context_free(shared_va_context); + ov_core_free(core); + + return 0; +} +#endif // ENABLE_LIBVA diff --git a/docs/snippets/gpu/preprocessing_nv12_two_planes_c.cpp b/docs/snippets/gpu/preprocessing_nv12_two_planes_c.cpp new file mode 100644 index 00000000000000..826af8ddffe32b --- /dev/null +++ b/docs/snippets/gpu/preprocessing_nv12_two_planes_c.cpp @@ -0,0 +1,126 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include +#include +#include + +cl::Image2D get_y_image(); +cl::Image2D get_uv_image(); + +int main() { + ov_core_t* core = NULL; + ov_model_t* model = NULL; + ov_compiled_model_t* compiled_model = NULL; + ov_infer_request_t* infer_request = NULL; + ov_preprocess_prepostprocessor_t* preprocess = NULL; + ov_preprocess_input_info_t* preprocess_input_info = NULL; + ov_preprocess_input_tensor_info_t* preprocess_input_tensor_info = NULL; + ov_preprocess_preprocess_steps_t* preprocess_input_steps = NULL; + ov_preprocess_input_model_info_t* preprocess_input_model_info = NULL; + ov_layout_t* layout = NULL; + ov_model_t* model_with_preproc = NULL; + ov_remote_context_t* gpu_context = NULL; + char* input_name0 = NULL; + char* input_name1 = NULL; + ov_output_const_port_t* input_port0 = NULL; + ov_output_const_port_t* input_port1 = NULL; + size_t height = 480; + size_t width = 640; + + ov_core_create(&core); + ov_core_read_model(core, "model.xml", "model.bin", &model); + + //! [init_preproc] + ov_preprocess_prepostprocessor_create(model, &preprocess); + ov_preprocess_prepostprocessor_get_input_info(preprocess, &preprocess_input_info); + ov_preprocess_input_info_get_tensor_info(preprocess_input_info, &preprocess_input_tensor_info); + ov_preprocess_input_tensor_info_set_element_type(preprocess_input_tensor_info, ov_element_type_e::U8); + ov_preprocess_input_tensor_info_set_color_format_with_subname(preprocess_input_tensor_info, + ov_color_format_e::NV12_TWO_PLANES, + 2, + "y", + "uv"); + ov_preprocess_input_tensor_info_set_memory_type(preprocess_input_tensor_info, "GPU_SURFACE"); + ov_preprocess_input_tensor_info_set_spatial_static_shape(preprocess_input_tensor_info, height, width); + ov_preprocess_input_info_get_preprocess_steps(preprocess_input_info, &preprocess_input_steps); + ov_preprocess_preprocess_steps_convert_color(preprocess_input_steps, ov_color_format_e::BGR); + ov_preprocess_preprocess_steps_resize(preprocess_input_steps, RESIZE_LINEAR); + ov_preprocess_input_info_get_model_info(preprocess_input_info, &preprocess_input_model_info); + ov_layout_create("NCHW", &layout); + ov_preprocess_input_model_info_set_layout(preprocess_input_model_info, layout); + ov_preprocess_prepostprocessor_build(preprocess, &model_with_preproc); + //! [init_preproc] + + ov_core_compile_model(core, model_with_preproc, "GPU", 0, &compiled_model); + ov_compiled_model_get_context(compiled_model, &gpu_context); + ov_compiled_model_create_infer_request(compiled_model, &infer_request); + + { + //! [single_batch] + ov_model_const_input_by_index(model, 0, &input_port0); + ov_model_const_input_by_index(model, 1, &input_port1); + ov_port_get_any_name(input_port0, &input_name0); + ov_port_get_any_name(input_port1, &input_name1); + + ov_shape_t shape_y, shape_uv; + ov_tensor_t* remote_tensor_y = NULL; + ov_tensor_t* remote_tensor_uv = NULL; + ov_const_port_get_shape(input_port0, &shape_y); + ov_const_port_get_shape(input_port1, &shape_uv); + + cl::Image2D image_y = get_y_image(); + cl::Image2D image_uv = get_uv_image(); + ov_remote_context_create_tensor(gpu_context, + ov_element_type_e::U8, + shape_y, + 4, + &remote_tensor_y, + ov_property_key_intel_gpu_shared_mem_type, + "OCL_IMAGE2D", + ov_property_key_intel_gpu_mem_handle, + image_y.get()); + + ov_remote_context_create_tensor(gpu_context, + ov_element_type_e::U8, + shape_uv, + 4, + &remote_tensor_y, + ov_property_key_intel_gpu_shared_mem_type, + "OCL_IMAGE2D", + ov_property_key_intel_gpu_mem_handle, + image_uv.get()); + + ov_infer_request_set_tensor(infer_request, input_name0, remote_tensor_y); + ov_infer_request_set_tensor(infer_request, input_name1, remote_tensor_uv); + ov_infer_request_infer(infer_request); + //! [single_batch] + + ov_free(input_name0); + ov_free(input_name1); + ov_output_const_port_free(input_port0); + ov_output_const_port_free(input_port1); + + ov_layout_free(layout); + ov_preprocess_input_model_info_free(preprocess_input_model_info); + ov_preprocess_preprocess_steps_free(preprocess_input_steps); + ov_preprocess_input_tensor_info_free(preprocess_input_tensor_info); + ov_preprocess_input_info_free(preprocess_input_info); + ov_preprocess_prepostprocessor_free(preprocess); + + ov_tensor_free(remote_tensor_y); + ov_tensor_free(remote_tensor_uv); + ov_shape_free(&shape_y); + ov_shape_free(&shape_uv); + + ov_infer_request_free(infer_request); + ov_compiled_model_free(compiled_model); + ov_model_free(model); + ov_model_free(model_with_preproc); + ov_remote_context_free(gpu_context); + ov_core_free(core); + } + + return 0; +} diff --git a/docs/snippets/gpu/remote_objects_creation_c.cpp b/docs/snippets/gpu/remote_objects_creation_c.cpp new file mode 100644 index 00000000000000..c870da6bb06a26 --- /dev/null +++ b/docs/snippets/gpu/remote_objects_creation_c.cpp @@ -0,0 +1,283 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include +#include +#include + +#ifdef WIN32 +typedef void* ID3D11Device; +#elif defined(ENABLE_LIBVA) +#include +#endif + +void* allocate_usm_buffer(size_t size); +cl_mem allocate_cl_mem(size_t size); +cl_context get_cl_context(); +cl_command_queue get_cl_queue(); +cl::Buffer allocate_buffer(size_t size); +cl::Image2D allocate_image(size_t size); + +#ifdef WIN32 +ID3D11Device* get_d3d_device(); +#elif defined(ENABLE_LIBVA) +VADisplay get_va_display(); +#endif + +int main() { + ov_core_t* core = NULL; + ov_model_t* model = NULL; + ov_compiled_model_t* compiled_model = NULL; + ov_remote_context_t* gpu_context = NULL; + ov_tensor_t* remote_tensor = NULL; + + ov_output_const_port* input_port = NULL; + char* in_tensor_name = NULL; + char* out_tensor_name = NULL; + ov_shape_t input_shape; + ov_element_type_e input_type; + + ov_core_create(&core); + ov_core_read_model(core, "model.xml", "model.bin", &model); + + ov_model_const_input(model, &input_port); + ov_port_get_any_name(input_port, &in_tensor_name); + ov_const_port_get_shape(input_port, &input_shape); + ov_port_get_element_type(input_port, &input_type); + size_t input_size = 1; + for (auto i = 0; i < input_shape.rank; i++) + input_size *= input_shape.dims[i]; + + ov_core_compile_model(core, model, "GPU", 0, &compiled_model); + ov_compiled_model_get_context(compiled_model, &gpu_context); + +{ + //! [wrap_usm_pointer] + void* shared_buffer = allocate_usm_buffer(input_size); + ov_remote_context_create_tensor(gpu_context, + input_type, + input_shape, + 4, + &remote_tensor, + ov_property_key_intel_gpu_shared_mem_type, + "USM_USER_BUFFER", + ov_property_key_intel_gpu_mem_handle, + shared_buffer); + //! [wrap_usm_pointer] +} + +{ + //! [wrap_cl_mem] + cl_mem shared_buffer = allocate_cl_mem(input_size); + ov_remote_context_create_tensor(gpu_context, + input_type, + input_shape, + 4, + &remote_tensor, + ov_property_key_intel_gpu_shared_mem_type, + "OCL_BUFFER", + ov_property_key_intel_gpu_mem_handle, + shared_buffer); + //! [wrap_cl_mem] +} + +{ + //! [wrap_cl_buffer] + cl::Buffer shared_buffer = allocate_buffer(input_size); + ov_remote_context_create_tensor(gpu_context, + input_type, + input_shape, + 4, + &remote_tensor, + ov_property_key_intel_gpu_shared_mem_type, + "OCL_BUFFER", + ov_property_key_intel_gpu_mem_handle, + shared_buffer.get()); + //! [wrap_cl_buffer] +} + +{ + //! [wrap_cl_image] + cl::Image2D shared_buffer = allocate_image(input_size); + ov_remote_context_create_tensor(gpu_context, + input_type, + input_shape, + 4, + &remote_tensor, + ov_property_key_intel_gpu_shared_mem_type, + "OCL_IMAGE2D", + ov_property_key_intel_gpu_mem_handle, + shared_buffer.get()); + //! [wrap_cl_image] +} + +{ + //! [allocate_usm_device] + ov_remote_context_create_tensor(gpu_context, + input_type, + input_shape, + 2, + &remote_tensor, + ov_property_key_intel_gpu_shared_mem_type, + "USM_USER_BUFFER"); + // Extract raw usm pointer from remote tensor + void* usm_ptr = NULL; + ov_tensor_data(remote_tensor, &usm_ptr); + //! [allocate_usm_device] +} + +{ + //! [allocate_usm_host] + ov_remote_context_create_tensor(gpu_context, + input_type, + input_shape, + 2, + &remote_tensor, + ov_property_key_intel_gpu_shared_mem_type, + "USM_HOST_BUFFER"); + // Extract raw usm pointer from remote tensor + void* usm_ptr = NULL; + ov_tensor_data(remote_tensor, &usm_ptr); + //! [allocate_usm_host] +} + +{ + int64_t width = 1024; + int64_t height = 768; + + int64_t y_plane_size = width * height; + int64_t uv_plane_size = width * height / 2; + + ov_shape_t shape_y = {0, NULL}; + int64_t dims_y[4] = {1, 1, height, width}; + ov_shape_t shape_uv = {0, NULL}; + int64_t dims_uv[4] = {1, 2, height / 2, width / 2}; + ov_tensor_t* remote_tensor_y = NULL; + ov_tensor_t* remote_tensor_uv = NULL; + + ov_shape_create(4, dims_y, &shape_y); + ov_shape_create(4, dims_uv, &shape_uv); + + //! [create_nv12_surface] + cl::Image2D y_plane_surface = allocate_image(y_plane_size); + cl::Image2D uv_plane_surface = allocate_image(uv_plane_size); + + ov_remote_context_create_tensor(gpu_context, + input_type, + shape_y, + 4, + &remote_tensor_y, + ov_property_key_intel_gpu_shared_mem_type, + "OCL_IMAGE2D", + ov_property_key_intel_gpu_mem_handle, + y_plane_surface.get()); + + ov_remote_context_create_tensor(gpu_context, + input_type, + shape_uv, + 4, + &remote_tensor_uv, + ov_property_key_intel_gpu_shared_mem_type, + "OCL_IMAGE2D", + ov_property_key_intel_gpu_mem_handle, + uv_plane_surface.get()); + + ov_tensor_free(remote_tensor_y); + ov_tensor_free(remote_tensor_uv); + ov_shape_free(&shape_y); + ov_shape_free(&shape_uv); + //! [create_nv12_surface] +} + +{ + //! [context_from_cl_context] + cl_context cl_context = get_cl_context(); + ov_core_create_context(core, + "GPU", + 4, + &gpu_context, + ov_property_key_intel_gpu_context_type, + "OCL", + ov_property_key_intel_gpu_ocl_context, + cl_context); + //! [context_from_cl_context] +} + +{ + //! [context_from_cl_queue] + cl_command_queue cl_queue = get_cl_queue(); + cl_context cl_context = get_cl_context(); + ov_core_create_context(core, + "GPU", + 6, + &gpu_context, + ov_property_key_intel_gpu_context_type, + "OCL", + ov_property_key_intel_gpu_ocl_context, + cl_context, + ov_property_key_intel_gpu_ocl_queue, + cl_queue); + //! [context_from_cl_queue] +} + +#ifdef WIN32 +{ + //! [context_from_d3d_device] + ID3D11Device* device = get_d3d_device(); + ov_core_create_context(core, + "GPU", + 4, + &gpu_context, + ov_property_key_intel_gpu_context_type, + "VA_SHARED", + ov_property_key_intel_gpu_va_device, + device); + //! [context_from_d3d_device] +} +#elif defined(ENABLE_LIBVA) +{ + //! [context_from_va_display] + VADisplay display = get_va_display(); + ov_core_create_context(core, + "GPU", + 4, + &gpu_context, + ov_property_key_intel_gpu_context_type, + "VA_SHARED", + ov_property_key_intel_gpu_va_device, + display); + //! [context_from_va_display] +} +#endif +{ + //! [default_context_from_core] + ov_core_get_default_context(core, "GPU", &gpu_context); + // Extract ocl context handle from RemoteContext + size_t size = 0; + char* params = nullptr; + // params is format like: "CONTEXT_TYPE OCL OCL_CONTEXT 0x5583b2ec7b40 OCL_QUEUE 0x5583b2e98ff0" + // You need parse it. + ov_remote_context_get_params(gpu_context, &size, ¶ms); + //! [default_context_from_core] +} + +{ + //! [default_context_from_model] + ov_compiled_model_get_context(compiled_model, &gpu_context); + // Extract ocl context handle from RemoteContext + size_t size = 0; + char* params = nullptr; + // params is format like: "CONTEXT_TYPE OCL OCL_CONTEXT 0x5583b2ec7b40 OCL_QUEUE 0x5583b2e98ff0" + // You need parse it. + ov_remote_context_get_params(gpu_context, &size, ¶ms); + //! [default_context_from_model] +} + +ov_compiled_model_free(compiled_model); +ov_model_free(model); +ov_remote_context_free(gpu_context); +ov_core_free(core); + +return 0; +} From 5e98696464489a47e02d05607757c11896f354fa Mon Sep 17 00:00:00 2001 From: Haiqi Pan Date: Tue, 21 Mar 2023 20:29:35 -0700 Subject: [PATCH 021/296] Fix Windows build warnings in template and core tests (#15967) * fix C4305 * 1.0f * Element * fix c4244 * fix truncation from double to float in grn.cpp * Revert "fix truncation from double to float in grn.cpp" This reverts commit 5263b37cb2d4114971db4192305c82ff063edea0. * fix grn.cpp * add 4305 * fix low * add TearDown * revert softmax.cpp * pragram * fix conflicts * fix conflicts * size_t -> ov::label_t * WIN32 --------- Co-authored-by: Ilya Lavrenov --- src/core/tests/CMakeLists.txt | 5 ----- src/core/tests/type_prop/matmul.cpp | 2 +- src/plugins/intel_gpu/src/graph/program_node.cpp | 2 +- .../template/tests/functional/CMakeLists.txt | 4 ---- .../tests/functional/op_reference/grn.cpp | 16 ++++++++-------- .../tests/functional/op_reference/softmax.cpp | 4 ++++ .../include/behavior/plugin/caching_tests.hpp | 3 +++ 7 files changed, 17 insertions(+), 19 deletions(-) diff --git a/src/core/tests/CMakeLists.txt b/src/core/tests/CMakeLists.txt index e1dd3e60b7c1d1..6cd2bc51af1259 100644 --- a/src/core/tests/CMakeLists.txt +++ b/src/core/tests/CMakeLists.txt @@ -2,11 +2,6 @@ # SPDX-License-Identifier: Apache-2.0 # -if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC") - # 'argument': conversion from 'size_t' to 'int', possible loss of data - ie_add_compiler_flags(/wd4267) - ie_add_compiler_flags(/wd4244) -endif() set(TARGET_NAME ov_core_unit_tests) diff --git a/src/core/tests/type_prop/matmul.cpp b/src/core/tests/type_prop/matmul.cpp index f7b6116a259adf..327e8574c53301 100644 --- a/src/core/tests/type_prop/matmul.cpp +++ b/src/core/tests/type_prop/matmul.cpp @@ -528,7 +528,7 @@ TEST(type_prop, matmul_propagate_labels_on_interval_dims) { } TEST(type_prop, matmul_propagate_label_on_b_input_after_reshape) { - constexpr size_t my_label = 2; + constexpr ov::label_t my_label = 2; auto marked_dim = Dimension(2, 3); ov::DimensionTracker::set_label(marked_dim, my_label); diff --git a/src/plugins/intel_gpu/src/graph/program_node.cpp b/src/plugins/intel_gpu/src/graph/program_node.cpp index 6055cd23407f01..5b66ad11a25149 100644 --- a/src/plugins/intel_gpu/src/graph/program_node.cpp +++ b/src/plugins/intel_gpu/src/graph/program_node.cpp @@ -1215,4 +1215,4 @@ void program_node::init_onednn_primitive_attributes() { } -#endif // ENABLE_ONEDNN_FOR_GPU +#endif // ENABLE_ONEDNN_FOR_GPU \ No newline at end of file diff --git a/src/plugins/template/tests/functional/CMakeLists.txt b/src/plugins/template/tests/functional/CMakeLists.txt index 6239c0e3fb599f..b45364d93c606f 100644 --- a/src/plugins/template/tests/functional/CMakeLists.txt +++ b/src/plugins/template/tests/functional/CMakeLists.txt @@ -6,11 +6,7 @@ set(TARGET_NAME ov_template_func_tests) if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC") - ie_add_compiler_flags(/wd4244) - ie_add_compiler_flags(/wd4250) ie_add_compiler_flags(/wd4305) - ie_add_compiler_flags(/wd4756) - ie_add_compiler_flags(/wd4018) endif() ov_add_test_target( diff --git a/src/plugins/template/tests/functional/op_reference/grn.cpp b/src/plugins/template/tests/functional/op_reference/grn.cpp index 4c2b50e5d20b0d..4322da7cd3c921 100644 --- a/src/plugins/template/tests/functional/op_reference/grn.cpp +++ b/src/plugins/template/tests/functional/op_reference/grn.cpp @@ -59,17 +59,17 @@ std::vector generateGrnParams(const element::Type& type) { using T = typename element_type_traits::value_type; std::vector grnParams { // bias 1e-6 // 2D // 3D // 4D - GrnParams(1e-6, PartialShape {3, 4}, type, std::vector {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}, + GrnParams(1e-6f, PartialShape {3, 4}, type, std::vector {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}, std::vector {0.182574, 0.365148, 0.547723, 0.730297, 0.379049, 0.454859, 0.530669, 0.606478, 0.426162, 0.473514, 0.520865, 0.568217}), - GrnParams(1e-6, PartialShape {2, 3, 4}, type, + GrnParams(1e-6f, PartialShape {2, 3, 4}, type, std::vector {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24}, std::vector {0.0966737, 0.169031, 0.224231, 0.267261, 0.483368, 0.507093, 0.523205, 0.534522, 0.870063, 0.845154, 0.822179, 0.801784, 0.433574, 0.441836, 0.449215, 0.455842, 0.566982, 0.568075, 0.569005, 0.569803, 0.700389, 0.694314, 0.688796, 0.683763}), - GrnParams(1e-6, PartialShape {1, 2, 3, 4}, type, + GrnParams(1e-6f, PartialShape {1, 2, 3, 4}, type, std::vector {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24}, std::vector {0.0766965, 0.141421, 0.196116, 0.242536, 0.282166, 0.316228, 0.345705, 0.371391, 0.393919, 0.413803, 0.431455, 0.447214, 0.997055, 0.989949, 0.980581, 0.970143, 0.959365, 0.948683, 0.938343, 0.928477, 0.919145, 0.910366, 0.902134, 0.894427}), - GrnParams(1e-6, PartialShape {2, 2, 3, 4}, type, + GrnParams(1e-6f, PartialShape {2, 2, 3, 4}, type, std::vector {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48}, std::vector {0.0766965, 0.141421, 0.196116, 0.242536, 0.282166, 0.316228, 0.345705, 0.371391, 0.393919, 0.413803, 0.431455, 0.447214, @@ -77,17 +77,17 @@ std::vector generateGrnParams(const element::Type& type) { 0.559857, 0.564684, 0.56921, 0.573462, 0.577465, 0.581238, 0.584802, 0.588172, 0.591364, 0.594391, 0.597266, 0.6, 0.828589, 0.825307, 0.822192, 0.819232, 0.816416, 0.813733, 0.811176, 0.808736, 0.806405, 0.804176, 0.802043, 0.8}), // bias 100.25 // 2D // 3D // 4D - GrnParams(100.25, PartialShape {3, 4}, type, std::vector {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}, + GrnParams(100.25f, PartialShape {3, 4}, type, std::vector {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}, std::vector {0.0876216, 0.175243, 0.262865, 0.350486, 0.301923, 0.362308, 0.422693, 0.483077, 0.385076, 0.427863, 0.470649, 0.513435}), - GrnParams(100.25, PartialShape {2, 3, 4}, type, + GrnParams(100.25f, PartialShape {2, 3, 4}, type, std::vector {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24}, std::vector {0.0694629, 0.129032, 0.179525, 0.222137, 0.347314, 0.387097, 0.418891, 0.444273, 0.625166, 0.645161, 0.658258, 0.66641, 0.41125, 0.421303, 0.430287, 0.438356, 0.537789, 0.541675, 0.54503, 0.547945, 0.664327, 0.662047, 0.659774, 0.657534}), - GrnParams(100.25, PartialShape {1, 2, 3, 4}, type, + GrnParams(100.25f, PartialShape {1, 2, 3, 4}, type, std::vector {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24}, std::vector {0.0608299, 0.115422, 0.164091, 0.207321, 0.245662, 0.279675, 0.309889, 0.336786, 0.360795, 0.38229, 0.401596, 0.418994, 0.790789, 0.807954, 0.820457, 0.829283, 0.835252, 0.839026, 0.841128, 0.841965, 0.841854, 0.841037, 0.839701, 0.837989f}), - GrnParams(100.25, PartialShape {2, 2, 3, 4}, type, + GrnParams(100.25f, PartialShape {2, 2, 3, 4}, type, std::vector {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48}, std::vector {0.0608299, 0.115422, 0.164091, 0.207321, 0.245662, 0.279675, 0.309889, 0.336786, 0.360795, 0.38229, 0.401596, 0.418994, diff --git a/src/plugins/template/tests/functional/op_reference/softmax.cpp b/src/plugins/template/tests/functional/op_reference/softmax.cpp index e0e1a3b1e2c0e3..293df1a3cd5887 100644 --- a/src/plugins/template/tests/functional/op_reference/softmax.cpp +++ b/src/plugins/template/tests/functional/op_reference/softmax.cpp @@ -10,6 +10,10 @@ using namespace reference_tests; using namespace ov; +#ifdef _WIN32 +# pragma warning(disable : 4756) +#endif + namespace { struct SoftmaxParams { template diff --git a/src/tests/functional/plugin/shared/include/behavior/plugin/caching_tests.hpp b/src/tests/functional/plugin/shared/include/behavior/plugin/caching_tests.hpp index 0b4a22f03f2ab2..89b86ae7fe3dec 100644 --- a/src/tests/functional/plugin/shared/include/behavior/plugin/caching_tests.hpp +++ b/src/tests/functional/plugin/shared/include/behavior/plugin/caching_tests.hpp @@ -94,6 +94,9 @@ class LoadNetworkCompiledKernelsCacheTest : virtual public LayerTestsUtils::Laye std::replace(test_name.begin(), test_name.end(), '\\', '_'); cache_path = "LoadNetwork" + test_name + "_cache"; } + void TearDown() override { + APIBaseTest::TearDown(); + } }; DISABLE_WARNING_MSVC_END(4250) From 95636f7715097f2f0c5718491a8ac28c8248f0b0 Mon Sep 17 00:00:00 2001 From: Xuejun Zhai Date: Wed, 22 Mar 2023 11:35:24 +0800 Subject: [PATCH 022/296] [Unicode API] Add wide char for compiler model APIs (#16180) * [Unicode API] Add wide char for compiler model APIs Signed-off-by: Zhai, Xuejun * Avoid duplicated func description Signed-off-by: Zhai, Xuejun * Fix format issue Signed-off-by: Zhai, Xuejun * Add unite test for wstring of complie model Signed-off-by: Zhai, Xuejun * Clear code Signed-off-by: Zhai, Xuejun * Add unite test for other compile model unicode APIs Signed-off-by: Zhai, Xuejun * Clear log output Signed-off-by: Zhai, Xuejun * Add parameter of device for compiled model unicode test Signed-off-by: Zhai, Xuejun --------- Signed-off-by: Zhai, Xuejun --- .../include/openvino/runtime/core.hpp | 39 +++- src/inference/src/core.cpp | 14 ++ .../behavior/ov_plugin/core_integration.hpp | 180 ++++++++++++++++++ 3 files changed, 231 insertions(+), 2 deletions(-) diff --git a/src/inference/include/openvino/runtime/core.hpp b/src/inference/include/openvino/runtime/core.hpp index d761d8a2c52c33..3de1aad5ac0689 100644 --- a/src/inference/include/openvino/runtime/core.hpp +++ b/src/inference/include/openvino/runtime/core.hpp @@ -196,22 +196,29 @@ class OPENVINO_RUNTIME_API Core { * operation. * * @return A compiled model. + * @{ */ CompiledModel compile_model(const std::string& model_path, const AnyMap& properties = {}); +#ifdef OPENVINO_ENABLE_UNICODE_PATH_SUPPORT + CompiledModel compile_model(const std::wstring& model_path, const AnyMap& properties = {}); +#endif + /// @} + /** - * @brief Reads and loads a compiled model from IR / ONNX / PDPD file to the default OpenVINI device selected by + * @brief Reads and loads a compiled model from IR / ONNX / PDPD file to the default OpenVINO device selected by * AUTO plugin. * * This can be more efficient than using read_model + compile_model(Model) flow * especially for cases when caching is enabled and cached model is available * * @tparam Properties Should be the pack of `std::pair` types - * @param model_path path to model + * @param model_path path to model with string or wstring * @param properties Optional pack of pairs: (property name, property value) relevant only for this * load operation * * @return A compiled model + * @{ */ template util::EnableIfAllStringAny compile_model(const std::string& model_path, @@ -219,6 +226,15 @@ class OPENVINO_RUNTIME_API Core { return compile_model(model_path, AnyMap{std::forward(properties)...}); } +#ifdef OPENVINO_ENABLE_UNICODE_PATH_SUPPORT + template + util::EnableIfAllStringAny compile_model(const std::wstring& model_path, + Properties&&... properties) { + return compile_model(model_path, AnyMap{std::forward(properties)...}); + } +#endif + /// @} + /** * @brief Reads a model and creates a compiled model from the IR/ONNX/PDPD file. * @@ -231,11 +247,19 @@ class OPENVINO_RUNTIME_API Core { * operation. * * @return A compiled model. + * @{ */ CompiledModel compile_model(const std::string& model_path, const std::string& device_name, const AnyMap& properties = {}); +#ifdef OPENVINO_ENABLE_UNICODE_PATH_SUPPORT + CompiledModel compile_model(const std::wstring& model_path, + const std::string& device_name, + const AnyMap& properties = {}); +#endif + /// @} + /** * @brief Reads a model and creates a compiled model from the IR/ONNX/PDPD file. * @@ -249,6 +273,7 @@ class OPENVINO_RUNTIME_API Core { * load operation. * * @return A compiled model. + * @{ */ template util::EnableIfAllStringAny compile_model(const std::string& model_path, @@ -257,6 +282,16 @@ class OPENVINO_RUNTIME_API Core { return compile_model(model_path, device_name, AnyMap{std::forward(properties)...}); } +#ifdef OPENVINO_ENABLE_UNICODE_PATH_SUPPORT + template + util::EnableIfAllStringAny compile_model(const std::wstring& model_path, + const std::string& device_name, + Properties&&... properties) { + return compile_model(model_path, device_name, AnyMap{std::forward(properties)...}); + } +#endif + /// @} + /** * @brief Reads a model and creates a compiled model from the IR/ONNX/PDPD memory. * @param model String with a model in IR/ONNX/PDPD format. diff --git a/src/inference/src/core.cpp b/src/inference/src/core.cpp index 9da46ee74fae3e..0a2fba9072b6ff 100644 --- a/src/inference/src/core.cpp +++ b/src/inference/src/core.cpp @@ -131,6 +131,12 @@ CompiledModel Core::compile_model(const std::string& model_path, const AnyMap& c return compile_model(model_path, ov::DEFAULT_DEVICE_NAME, config); } +#ifdef OPENVINO_ENABLE_UNICODE_PATH_SUPPORT +CompiledModel Core::compile_model(const std::wstring& model_path, const AnyMap& config) { + return compile_model(ov::util::wstring_to_string(model_path), config); +} +#endif + CompiledModel Core::compile_model(const std::string& model_path, const std::string& device_name, const AnyMap& config) { OV_CORE_CALL_STATEMENT({ auto exec = _impl->compile_model(model_path, device_name, config); @@ -138,6 +144,14 @@ CompiledModel Core::compile_model(const std::string& model_path, const std::stri }); } +#ifdef OPENVINO_ENABLE_UNICODE_PATH_SUPPORT +CompiledModel Core::compile_model(const std::wstring& model_path, + const std::string& device_name, + const AnyMap& config) { + return compile_model(ov::util::wstring_to_string(model_path), device_name, config); +} +#endif + CompiledModel Core::compile_model(const std::string& model, const ov::Tensor& weights, const std::string& device_name, diff --git a/src/tests/functional/plugin/shared/include/behavior/ov_plugin/core_integration.hpp b/src/tests/functional/plugin/shared/include/behavior/ov_plugin/core_integration.hpp index 96b09fe68f8889..e5421ea7b8c977 100644 --- a/src/tests/functional/plugin/shared/include/behavior/ov_plugin/core_integration.hpp +++ b/src/tests/functional/plugin/shared/include/behavior/ov_plugin/core_integration.hpp @@ -17,6 +17,7 @@ # define GTEST_COUT std::cerr << "[ ] [ INFO ] " # include # include +# include "openvino/pass/manager.hpp" #endif namespace ov { @@ -203,6 +204,185 @@ TEST(OVClassBasicTest, smoke_createMockEngineConfigThrows) { } #ifdef OPENVINO_ENABLE_UNICODE_PATH_SUPPORT +inline void generateModelFile() { + ov::pass::Manager manager; + manager.register_pass("test_model.xml", "test_model.bin"); + auto function = ngraph::builder::subgraph::makeConvPoolReluNoReshapes({1, 3, 227, 227}); + manager.run_passes(function); +} + +TEST(OVClassBasicTest, compile_model_no_property_unicode) { + std::string model_xml_name = "test_model.xml"; + std::string model_bin_name = "test_model.bin"; + generateModelFile(); + for (std::size_t testIndex = 0; testIndex < CommonTestUtils::test_unicode_postfix_vector.size(); testIndex++) { + std::wstring postfix = L"_" + CommonTestUtils::test_unicode_postfix_vector[testIndex]; + std::wstring modelXmlPathW = CommonTestUtils::addUnicodePostfixToPath(model_xml_name, postfix); + std::wstring modelBinPathW = CommonTestUtils::addUnicodePostfixToPath(model_bin_name, postfix); + GTEST_COUT << testIndex << ": " << ::ov::util::wstring_to_string(modelXmlPathW) << std::endl; + + try { + bool is_copy_successfully; + is_copy_successfully = CommonTestUtils::copyFile(model_xml_name, modelXmlPathW); + if (!is_copy_successfully) { + FAIL() << "Unable to copy from '" << model_xml_name << "' to '" + << ::ov::util::wstring_to_string(modelXmlPathW) << "'"; + } + + is_copy_successfully = CommonTestUtils::copyFile(model_bin_name, modelBinPathW); + if (!is_copy_successfully) { + FAIL() << "Unable to copy from '" << model_bin_name << "' to '" + << ::ov::util::wstring_to_string(modelBinPathW) << "'"; + } + + ov::Core core = createCoreWithTemplate(); + + OV_ASSERT_NO_THROW(core.compile_model(modelXmlPathW)); + CommonTestUtils::removeFile(modelXmlPathW); + CommonTestUtils::removeFile(modelBinPathW); + GTEST_COUT << "OK" << std::endl; + } catch (const ov::Exception& e_next) { + CommonTestUtils::removeFile(modelXmlPathW); + CommonTestUtils::removeFile(modelBinPathW); + CommonTestUtils::removeFile(model_xml_name); + CommonTestUtils::removeFile(model_bin_name); + FAIL() << e_next.what(); + } + } + CommonTestUtils::removeFile(model_xml_name); + CommonTestUtils::removeFile(model_bin_name); +} + +TEST(OVClassBasicTest, compile_model_with_property_unicode) { + std::string model_xml_name = "test_model.xml"; + std::string model_bin_name = "test_model.bin"; + generateModelFile(); + for (std::size_t testIndex = 0; testIndex < CommonTestUtils::test_unicode_postfix_vector.size(); testIndex++) { + std::wstring postfix = L"_" + CommonTestUtils::test_unicode_postfix_vector[testIndex]; + std::wstring modelXmlPathW = CommonTestUtils::addUnicodePostfixToPath(model_xml_name, postfix); + std::wstring modelBinPathW = CommonTestUtils::addUnicodePostfixToPath(model_bin_name, postfix); + GTEST_COUT << testIndex << ": " << ::ov::util::wstring_to_string(modelXmlPathW) << std::endl; + + try { + bool is_copy_successfully; + is_copy_successfully = CommonTestUtils::copyFile(model_xml_name, modelXmlPathW); + if (!is_copy_successfully) { + FAIL() << "Unable to copy from '" << model_xml_name << "' to '" + << ::ov::util::wstring_to_string(modelXmlPathW) << "'"; + } + + is_copy_successfully = CommonTestUtils::copyFile(model_bin_name, modelBinPathW); + if (!is_copy_successfully) { + FAIL() << "Unable to copy from '" << model_bin_name << "' to '" + << ::ov::util::wstring_to_string(modelBinPathW) << "'"; + } + + ov::Core core = createCoreWithTemplate(); + + OV_ASSERT_NO_THROW( + core.compile_model(modelXmlPathW, ov::hint::performance_mode(ov::hint::PerformanceMode::LATENCY))); + CommonTestUtils::removeFile(modelXmlPathW); + CommonTestUtils::removeFile(modelBinPathW); + GTEST_COUT << "OK" << std::endl; + } catch (const ov::Exception& e_next) { + CommonTestUtils::removeFile(modelXmlPathW); + CommonTestUtils::removeFile(modelBinPathW); + CommonTestUtils::removeFile(model_xml_name); + CommonTestUtils::removeFile(model_bin_name); + FAIL() << e_next.what(); + } + } + CommonTestUtils::removeFile(model_xml_name); + CommonTestUtils::removeFile(model_bin_name); +} + +TEST_P(OVClassBasicTestP, compile_model_with_device_no_property_unicode) { + std::string model_xml_name = "test_model.xml"; + std::string model_bin_name = "test_model.bin"; + generateModelFile(); + for (std::size_t testIndex = 0; testIndex < CommonTestUtils::test_unicode_postfix_vector.size(); testIndex++) { + std::wstring postfix = L"_" + CommonTestUtils::test_unicode_postfix_vector[testIndex]; + std::wstring modelXmlPathW = CommonTestUtils::addUnicodePostfixToPath(model_xml_name, postfix); + std::wstring modelBinPathW = CommonTestUtils::addUnicodePostfixToPath(model_bin_name, postfix); + GTEST_COUT << testIndex << ": " << ::ov::util::wstring_to_string(modelXmlPathW) << std::endl; + try { + bool is_copy_successfully; + is_copy_successfully = CommonTestUtils::copyFile(model_xml_name, modelXmlPathW); + if (!is_copy_successfully) { + FAIL() << "Unable to copy from '" << model_xml_name << "' to '" + << ::ov::util::wstring_to_string(modelXmlPathW) << "'"; + } + + is_copy_successfully = CommonTestUtils::copyFile(model_bin_name, modelBinPathW); + if (!is_copy_successfully) { + FAIL() << "Unable to copy from '" << model_bin_name << "' to '" + << ::ov::util::wstring_to_string(modelBinPathW) << "'"; + } + + ov::Core core = createCoreWithTemplate(); + + OV_ASSERT_NO_THROW(core.compile_model(modelXmlPathW, target_device)); + CommonTestUtils::removeFile(modelXmlPathW); + CommonTestUtils::removeFile(modelBinPathW); + GTEST_COUT << "OK" << std::endl; + } catch (const ov::Exception& e_next) { + CommonTestUtils::removeFile(modelXmlPathW); + CommonTestUtils::removeFile(modelBinPathW); + CommonTestUtils::removeFile(model_xml_name); + CommonTestUtils::removeFile(model_bin_name); + FAIL() << e_next.what(); + } + } + CommonTestUtils::removeFile(model_xml_name); + CommonTestUtils::removeFile(model_bin_name); +} + +TEST_P(OVClassBasicTestP, compile_model_with_device_with_property_unicode) { + std::string model_xml_name = "test_model.xml"; + std::string model_bin_name = "test_model.bin"; + generateModelFile(); + for (std::size_t testIndex = 0; testIndex < CommonTestUtils::test_unicode_postfix_vector.size(); testIndex++) { + std::wstring postfix = L"_" + CommonTestUtils::test_unicode_postfix_vector[testIndex]; + std::wstring modelXmlPathW = CommonTestUtils::addUnicodePostfixToPath(model_xml_name, postfix); + std::wstring modelBinPathW = CommonTestUtils::addUnicodePostfixToPath(model_bin_name, postfix); + GTEST_COUT << testIndex << ": " << ::ov::util::wstring_to_string(modelXmlPathW) << std::endl; + + try { + bool is_copy_successfully; + is_copy_successfully = CommonTestUtils::copyFile(model_xml_name, modelXmlPathW); + if (!is_copy_successfully) { + FAIL() << "Unable to copy from '" << model_xml_name << "' to '" + << ::ov::util::wstring_to_string(modelXmlPathW) << "'"; + } + + is_copy_successfully = CommonTestUtils::copyFile(model_bin_name, modelBinPathW); + if (!is_copy_successfully) { + FAIL() << "Unable to copy from '" << model_bin_name << "' to '" + << ::ov::util::wstring_to_string(modelBinPathW) << "'"; + } + + ov::Core core = createCoreWithTemplate(); + + OV_ASSERT_NO_THROW(core.compile_model(modelXmlPathW, + target_device, + ov::hint::performance_mode(ov::hint::PerformanceMode::LATENCY))); + CommonTestUtils::removeFile(modelXmlPathW); + CommonTestUtils::removeFile(modelBinPathW); + GTEST_COUT << "OK" << std::endl; + } catch (const ov::Exception& e_next) { + CommonTestUtils::removeFile(modelXmlPathW); + CommonTestUtils::removeFile(modelBinPathW); + CommonTestUtils::removeFile(model_xml_name); + CommonTestUtils::removeFile(model_bin_name); + FAIL() << e_next.what(); + } + } + CommonTestUtils::removeFile(model_xml_name); + CommonTestUtils::removeFile(model_bin_name); +} +#endif + +#ifdef OPENVINO_ENABLE_UNICODE_PATH_SUPPOR TEST_P(OVClassBasicTestP, smoke_registerPluginsXMLUnicodePath) { const std::string pluginXML = getPluginFile(); From a204b04faed3e482cafb7a929f93b876e487c207 Mon Sep 17 00:00:00 2001 From: River Li Date: Wed, 22 Mar 2023 13:45:03 +0800 Subject: [PATCH 023/296] fix mem leak (#16456) --- samples/c/hello_classification/main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/samples/c/hello_classification/main.c b/samples/c/hello_classification/main.c index b3748356ad2fd0..4fbf0c5f7a99b6 100644 --- a/samples/c/hello_classification/main.c +++ b/samples/c/hello_classification/main.c @@ -72,6 +72,7 @@ struct infer_result* tensor_to_infer_result(ov_tensor_t* tensor, size_t* result_ results[i].probability = float_data[i]; } + ov_shape_free(&output_shape); return results; } From a71c83d366b98953c702a50725e3fdcd95dc95e0 Mon Sep 17 00:00:00 2001 From: "Min, Byungil" Date: Wed, 22 Mar 2023 15:15:02 +0900 Subject: [PATCH 024/296] [GPU] Resolve eltwise kernel build failure (#16458) Signed-off-by: Min, Byungil --- .../eltwise/eltwise_kernel_blocked_opt.cpp | 44 +++++++++---------- .../tests/test_cases/eltwise_gpu_test.cpp | 7 ++- 2 files changed, 27 insertions(+), 24 deletions(-) diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/eltwise/eltwise_kernel_blocked_opt.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/eltwise/eltwise_kernel_blocked_opt.cpp index ed520fb4596e44..0ef8692bb348c2 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/eltwise/eltwise_kernel_blocked_opt.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/eltwise/eltwise_kernel_blocked_opt.cpp @@ -12,9 +12,9 @@ namespace kernel_selector { static inline bool InputHasFeatureBroadcast(const eltwise_params& params, const size_t op_num, const size_t input_idx); static inline bool IsBroadcastingPossibleInput(const DataTensor& input, const DataTensor& output); -static inline int SelectVecSizeFromFormat(const eltwise_params& params, size_t index); -static inline int GetInnerFeatureBlockSize(const eltwise_params& arg, size_t index); -static inline int GetInnerBatchBlockSize(const eltwise_params& arg, size_t index); +static inline int SelectVecSizeFromFormat(const DataTensor&); +static inline int GetInnerFeatureBlockSize(const DataTensor&); +static inline int GetInnerBatchBlockSize(const DataTensor&); static inline size_t CalculateTotalWorkItemCount(const eltwise_params& params); @@ -105,13 +105,13 @@ bool EltwiseKernel_blocked_opt::Validate(const Params& params, const optional_pa return false; for (size_t i = 0; i < ewParams.inputs.size(); i++) { - if ((SelectVecSizeFromFormat(ewParams, i) == 1) && + if ((SelectVecSizeFromFormat(ewParams.inputs[i]) == 1) && !IsBroadcastingPossibleInput(ewParams.inputs[i], ewParams.outputs[0])) { return false; } } - const auto vec_size = SelectVecSizeFromFormat(ewParams, 0); + const auto vec_size = SelectVecSizeFromFormat(ewParams.outputs[0]); const auto input0 = ewParams.inputs[0]; const auto& output = ewParams.outputs[0]; // Check that padding before features doesn't mis-align the blocks @@ -148,7 +148,7 @@ bool EltwiseKernel_blocked_opt::Validate(const Params& params, const optional_pa } JitConstants EltwiseKernel_blocked_opt::MakeLoadJitConstants(const eltwise_params& params, bool /*use_vload*/) const { - const auto vec_size = SelectVecSizeFromFormat(params, 0); + const auto vec_size = SelectVecSizeFromFormat(params.outputs[0]); JitConstants jit = {}; std::string vload_decls; @@ -179,7 +179,7 @@ JitConstants EltwiseKernel_blocked_opt::MakeLoadJitConstants(const eltwise_param bool feature_broadcasting = (params.inputs[input_idx].Feature().v == 1 && params.outputs[0].Feature().v != 1); bool spatial_broadcasting = (params.inputs[input_idx].LogicalSize() == params.outputs[0].Feature().v && params.inputs[input_idx].LogicalSize() == params.inputs[input_idx].Feature().v && - GetInnerBatchBlockSize(params, input_idx) == 1 && !Padded(params.inputs[input_idx])); + GetInnerBatchBlockSize(params.inputs[input_idx]) == 1 && !Padded(params.inputs[input_idx])); bool full_tensor = (params.inputs[input_idx].LogicalSize() == params.outputs[0].LogicalSize() && !Padded(params.inputs[input_idx])); // Based on dimension, get a string of indexing for formmatted GET_INDEX @@ -278,9 +278,9 @@ JitConstants EltwiseKernel_blocked_opt::MakeLoadJitConstants(const eltwise_param JitConstants EltwiseKernel_blocked_opt::GetJitConstants(const eltwise_params& params) const { JitConstants jit = MakeBaseParamsJitConstants(params); - const auto vec_size = SelectVecSizeFromFormat(params, 0); - const auto inner_feature_blk_size = GetInnerFeatureBlockSize(params, 0); - const auto inner_batch_blk_size = GetInnerBatchBlockSize(params, 0); + const auto vec_size = SelectVecSizeFromFormat(params.outputs[0]); + const auto inner_feature_blk_size = GetInnerFeatureBlockSize(params.outputs[0]); + const auto inner_batch_blk_size = GetInnerBatchBlockSize(params.outputs[0]); jit.Merge(MakeTypeJitConstants(GetAccumulatorType(params), "ACCUMULATOR")); jit.AddConstant(MakeJitConstant("BLOCK_SIZE", vec_size)); @@ -376,7 +376,7 @@ EltwiseKernelBase::DispatchData EltwiseKernel_blocked_opt::SetDefault(const eltw // so that each global id can be an index of each work group. // It also makes an index for fomatted GET_INDEX macro if needed(e.g. feature broadcasting, fusing). KernelData kd = KernelData::Default(params); - dispatchData.gws = {std::max(CalculateTotalWorkItemCount(params) / SelectVecSizeFromFormat(params, 0), (size_t)1), 1, 1}; + dispatchData.gws = {std::max(CalculateTotalWorkItemCount(params) / SelectVecSizeFromFormat(params.outputs[0]), (size_t)1), 1, 1}; dispatchData.lws = GetOptimalLocalWorkGroupSizes(dispatchData.gws, params.engineInfo); return dispatchData; @@ -384,8 +384,8 @@ EltwiseKernelBase::DispatchData EltwiseKernel_blocked_opt::SetDefault(const eltw // Local static inline size_t CalculateTotalWorkItemCount(const eltwise_params& params) { - auto feature = Align(params.outputs[0].Feature().v, GetInnerFeatureBlockSize(params, 0)); - auto batch = Align(params.outputs[0].Batch().v, GetInnerBatchBlockSize(params, 0)); + auto feature = Align(params.outputs[0].Feature().v, GetInnerFeatureBlockSize(params.outputs[0])); + auto batch = Align(params.outputs[0].Batch().v, GetInnerBatchBlockSize(params.outputs[0])); size_t spatial = 0; if (DataTensor::ChannelsCount(params.outputs[0].GetLayout()) == 5) spatial = params.outputs[0].X().v * params.outputs[0].Y().v * params.outputs[0].Z().v; @@ -395,10 +395,10 @@ static inline size_t CalculateTotalWorkItemCount(const eltwise_params& params) { return (feature * batch * spatial); } -static inline int SelectVecSizeFromFormat(const eltwise_params& arg, size_t index) { +static inline int SelectVecSizeFromFormat(const DataTensor& tensor) { // No feature inner block : not acceptable for calculation of ordered index - auto in_layout = arg.inputs[index].GetLayout(); - switch (in_layout) { + auto layout = tensor.GetLayout(); + switch (layout) { case DataLayout::b_fs_yx_fsv4: return 4; case DataLayout::b_fs_yx_fsv16: @@ -419,9 +419,9 @@ static inline int SelectVecSizeFromFormat(const eltwise_params& arg, size_t inde } } -static inline int GetInnerBatchBlockSize(const eltwise_params& arg, size_t index) { - auto in_layout = arg.inputs[index].GetLayout(); - switch (in_layout) { +static inline int GetInnerBatchBlockSize(const DataTensor& tensor) { + auto layout = tensor.GetLayout(); + switch (layout) { case DataLayout::b_fs_yx_fsv4: case DataLayout::b_fs_yx_fsv16: case DataLayout::b_fs_zyx_fsv16: @@ -445,9 +445,9 @@ static inline int GetInnerBatchBlockSize(const eltwise_params& arg, size_t index return 1; } -static inline int GetInnerFeatureBlockSize(const eltwise_params& arg, size_t index) { - auto in_layout = arg.inputs[index].GetLayout(); - switch (in_layout) { +static inline int GetInnerFeatureBlockSize(const DataTensor& tensor) { + auto layout = tensor.GetLayout(); + switch (layout) { case DataLayout::b_fs_yx_fsv4: return 4; case DataLayout::b_fs_yx_fsv16: diff --git a/src/plugins/intel_gpu/tests/test_cases/eltwise_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/eltwise_gpu_test.cpp index e3e9d06c3ce1a7..b1721f2b7f1ce3 100644 --- a/src/plugins/intel_gpu/tests/test_cases/eltwise_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/test_cases/eltwise_gpu_test.cpp @@ -4331,10 +4331,11 @@ struct eltwise_random_test_param_generator : std::vector Date: Wed, 22 Mar 2023 07:22:44 +0100 Subject: [PATCH 025/296] [PyOV] Align Python API's attributes and methods between its modules (#15889) * Complete alignment * Minor change * Apply discussion results * Apply discussion comments * Clang * Apply CR * Code style --- .../python/src/pyopenvino/core/core.cpp | 15 +++++ .../src/pyopenvino/core/infer_request.cpp | 37 ++++++----- .../python/src/pyopenvino/graph/dimension.cpp | 17 +++++ .../pyopenvino/graph/discrete_type_info.cpp | 6 +- .../python/src/pyopenvino/graph/model.cpp | 32 ++++++++++ .../pyopenvino/graph/types/element_type.cpp | 8 +++ .../python/tests/test_runtime/test_core.py | 12 ++-- .../tests/test_runtime/test_dimension.py | 63 +++++++++++++++++++ .../tests/test_runtime/test_infer_request.py | 16 +++-- .../python/tests/test_runtime/test_model.py | 57 ++++++++++++----- .../python/tests/test_runtime/test_type.py | 16 +++++ .../tests/test_utils/test_data_dispatch.py | 4 +- 12 files changed, 241 insertions(+), 42 deletions(-) create mode 100644 src/bindings/python/tests/test_runtime/test_dimension.py diff --git a/src/bindings/python/src/pyopenvino/core/core.cpp b/src/bindings/python/src/pyopenvino/core/core.cpp index ef94b298c9480f..c110dcd5bd7776 100644 --- a/src/bindings/python/src/pyopenvino/core/core.cpp +++ b/src/bindings/python/src/pyopenvino/core/core.cpp @@ -583,6 +583,21 @@ void regclass_Core(py::module m) { :type extensions: list[openvino.runtime.Extension] )"); + cls.def("get_available_devices", + &ov::Core::get_available_devices, + py::call_guard(), + R"( + Returns devices available for inference Core objects goes over all registered plugins. + + GIL is released while running this function. + + :returns: A list of devices. The devices are returned as: CPU, GPU.0, GPU.1, GNA... + If there more than one device of specific type, they are enumerated with .# suffix. + Such enumerated device can later be used as a device name in all Core methods like: + compile_model, query_model, set_property and so on. + :rtype: list + )"); + cls.def_property_readonly("available_devices", &ov::Core::get_available_devices, py::call_guard(), diff --git a/src/bindings/python/src/pyopenvino/core/infer_request.cpp b/src/bindings/python/src/pyopenvino/core/infer_request.cpp index eb71fd7f953bcf..585441569f9e77 100644 --- a/src/bindings/python/src/pyopenvino/core/infer_request.cpp +++ b/src/bindings/python/src/pyopenvino/core/infer_request.cpp @@ -12,6 +12,7 @@ #include "pyopenvino/core/common.hpp" #include "pyopenvino/core/containers.hpp" +#include "pyopenvino/utils/utils.hpp" PYBIND11_MAKE_OPAQUE(Containers::TensorIndexMap); PYBIND11_MAKE_OPAQUE(Containers::TensorNameMap); @@ -647,21 +648,29 @@ void regclass_InferRequest(py::module m) { :rtype: List[openvino.runtime.ConstOutput] )"); - cls.def_property_readonly("inputs", - &InferRequestWrapper::get_input_tensors, - R"( - Gets all input tensors of this InferRequest. - - :rtype: List[openvino.runtime.Tensor] - )"); + cls.def_property_readonly( + "inputs", + [](InferRequestWrapper& self) { + Common::utils::deprecation_warning("inputs", "2024.0", "Please use 'input_tensors' property instead."); + return self.get_input_tensors(); + }, + R"( + Gets all input tensors of this InferRequest. + + :rtype: List[openvino.runtime.Tensor] + )"); - cls.def_property_readonly("outputs", - &InferRequestWrapper::get_output_tensors, - R"( - Gets all output tensors of this InferRequest. - - :rtype: List[openvino.runtime.Tensor] - )"); + cls.def_property_readonly( + "outputs", + [](InferRequestWrapper& self) { + Common::utils::deprecation_warning("outputs", "2024.0", "Please use 'output_tensors' property instead."); + return self.get_output_tensors(); + }, + R"( + Gets all output tensors of this InferRequest. + + :rtype: List[openvino.runtime.Tensor] + )"); cls.def_property_readonly("input_tensors", &InferRequestWrapper::get_input_tensors, diff --git a/src/bindings/python/src/pyopenvino/graph/dimension.cpp b/src/bindings/python/src/pyopenvino/graph/dimension.cpp index fc98155c95ae36..b8e6241bad4a07 100644 --- a/src/bindings/python/src/pyopenvino/graph/dimension.cpp +++ b/src/bindings/python/src/pyopenvino/graph/dimension.cpp @@ -91,6 +91,15 @@ void regclass_graph_Dimension(py::module m) { Return this dimension's min_dimension as integer. This dimension must be dynamic and non-negative. + :return: Value of the dimension. + :rtype: int + )"); + dim.def_property_readonly("min_length", + &ov::Dimension::get_min_length, + R"( + Return this dimension's min_dimension as integer. + This dimension must be dynamic and non-negative. + :return: Value of the dimension. :rtype: int )"); @@ -103,7 +112,15 @@ void regclass_graph_Dimension(py::module m) { :return: Value of the dimension. :rtype: int )"); + dim.def_property_readonly("max_length", + &ov::Dimension::get_max_length, + R"( + Return this dimension's max_dimension as integer. + This dimension must be dynamic and non-negative. + :return: Value of the dimension. + :rtype: int + )"); dim.def("same_scheme", &ov::Dimension::same_scheme, py::arg("dim"), diff --git a/src/bindings/python/src/pyopenvino/graph/discrete_type_info.cpp b/src/bindings/python/src/pyopenvino/graph/discrete_type_info.cpp index 74b81eb62a45cf..dca43473035be7 100644 --- a/src/bindings/python/src/pyopenvino/graph/discrete_type_info.cpp +++ b/src/bindings/python/src/pyopenvino/graph/discrete_type_info.cpp @@ -9,6 +9,7 @@ #include #include "openvino/core/type.hpp" +#include "pyopenvino/utils/utils.hpp" namespace py = pybind11; @@ -29,7 +30,10 @@ void regclass_graph_DiscreteTypeInfo(py::module m) { discrete_type_info.def_readonly("version_id", &ov::DiscreteTypeInfo::version_id); discrete_type_info.def_readonly("parent", &ov::DiscreteTypeInfo::parent); - discrete_type_info.def("get_version", &ov::DiscreteTypeInfo::get_version); + discrete_type_info.def("get_version", []() { + Common::utils::deprecation_warning("get_version()", "2024.0", "Please use version attribute instead."); + return &ov::DiscreteTypeInfo::get_version; + }); discrete_type_info.def("hash", [](const ov::DiscreteTypeInfo& self) { return self.hash(); }); diff --git a/src/bindings/python/src/pyopenvino/graph/model.cpp b/src/bindings/python/src/pyopenvino/graph/model.cpp index 6c898713f0647e..f95801bdb11258 100644 --- a/src/bindings/python/src/pyopenvino/graph/model.cpp +++ b/src/bindings/python/src/pyopenvino/graph/model.cpp @@ -480,6 +480,14 @@ void regclass_graph_Model(py::module m) { :return: ParameterVector containing model parameters. :rtype: ParameterVector )"); + model.def_property_readonly("parameters", + &ov::Model::get_parameters, + R"( + Return the model parameters. + + :return: ParameterVector containing model parameters. + :rtype: ParameterVector + )"); model.def("get_results", &ov::Model::get_results, R"( @@ -488,6 +496,14 @@ void regclass_graph_Model(py::module m) { :return: ResultVector containing model parameters. :rtype: ResultVector )"); + model.def_property_readonly("results", + &ov::Model::get_results, + R"( + Return a list of model outputs. + + :return: ResultVector containing model parameters. + :rtype: ResultVector + )"); model.def("get_result", &ov::Model::get_result, R"( @@ -496,6 +512,14 @@ void regclass_graph_Model(py::module m) { :return: Node object representing result. :rtype: openvino.runtime.Node )"); + model.def_property_readonly("result", + &ov::Model::get_result, + R"( + Return single result. + + :return: Node object representing result. + :rtype: openvino.runtime.Node + )"); model.def("get_result_index", (int64_t(ov::Model::*)(const ov::Output&) const) & ov::Model::get_result_index, py::arg("value"), @@ -561,6 +585,14 @@ void regclass_graph_Model(py::module m) { :rtype: bool )"); + model.def_property_readonly("dynamic", + &ov::Model::is_dynamic, + R"( + Returns true if any of the op's defined in the model + contains partial shape. + + :rtype: bool + )"); model.def("input", (ov::Output(ov::Model::*)()) & ov::Model::input); model.def("input", (ov::Output(ov::Model::*)(size_t)) & ov::Model::input, py::arg("index")); diff --git a/src/bindings/python/src/pyopenvino/graph/types/element_type.cpp b/src/bindings/python/src/pyopenvino/graph/types/element_type.cpp index 0123dd780e2c1c..41524dfd1b6bad 100644 --- a/src/bindings/python/src/pyopenvino/graph/types/element_type.cpp +++ b/src/bindings/python/src/pyopenvino/graph/types/element_type.cpp @@ -68,11 +68,17 @@ void regclass_graph_Type(py::module m) { type.def("is_static", &ov::element::Type::is_static); type.def("is_dynamic", &ov::element::Type::is_dynamic); type.def("is_real", &ov::element::Type::is_real); + type.def_property_readonly("real", &ov::element::Type::is_real); type.def("is_integral", &ov::element::Type::is_integral); + type.def_property_readonly("integral", &ov::element::Type::is_integral); type.def("is_integral_number", &ov::element::Type::is_integral_number); + type.def_property_readonly("integral_number", &ov::element::Type::is_integral_number); type.def("is_signed", &ov::element::Type::is_signed); + type.def_property_readonly("signed", &ov::element::Type::is_signed); type.def("is_quantized", &ov::element::Type::is_quantized); + type.def_property_readonly("quantized", &ov::element::Type::is_quantized); type.def("get_type_name", &ov::element::Type::get_type_name); + type.def_property_readonly("type_name", &ov::element::Type::get_type_name); type.def("compatible", &ov::element::Type::compatible, py::arg("other"), @@ -121,5 +127,7 @@ void regclass_graph_Type(py::module m) { )"); type.def_property_readonly("size", &ov::element::Type::size); + type.def("get_size", &ov::element::Type::size); type.def_property_readonly("bitwidth", &ov::element::Type::bitwidth); + type.def("get_bitwidth", &ov::element::Type::bitwidth); } diff --git a/src/bindings/python/tests/test_runtime/test_core.py b/src/bindings/python/tests/test_runtime/test_core.py index 15c6a2ed553eb1..87709aa443316c 100644 --- a/src/bindings/python/tests/test_runtime/test_core.py +++ b/src/bindings/python/tests/test_runtime/test_core.py @@ -176,11 +176,13 @@ def test_get_version(device): def test_available_devices(device): core = Core() - devices = core.available_devices - assert device in devices, ( - f"Current device '{device}' is not listed in " - f"available devices '{', '.join(devices)}'" - ) + devices_attr = core.available_devices + devices_method = core.get_available_devices() + for devices in (devices_attr, devices_method): + assert device in devices, ( + f"Current device '{device}' is not listed in " + f"available devices '{', '.join(devices)}'" + ) def test_get_property(device): diff --git a/src/bindings/python/tests/test_runtime/test_dimension.py b/src/bindings/python/tests/test_runtime/test_dimension.py new file mode 100644 index 00000000000000..697e11555590b0 --- /dev/null +++ b/src/bindings/python/tests/test_runtime/test_dimension.py @@ -0,0 +1,63 @@ +# -*- coding: utf-8 -*- +# Copyright (C) 2018-2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +from openvino.runtime import Dimension + + +def test_dynamic_dimension(): + dim = Dimension() + assert dim.is_dynamic + assert str(dim) == "?" + assert dim.to_string() == "?" + assert str(dim.__repr__) == ">" + + +def test_dynamic_dimension_with_bounds(): + dim = Dimension(2, 5) + assert str(dim) == "2..5" + assert dim.to_string() == "2..5" + assert not dim.is_static + assert dim.is_dynamic + assert dim.get_min_length() == 2 + assert dim.min_length == 2 + assert dim.get_max_length() == 5 + assert dim.max_length == 5 + + +def test_static_dimension(): + dim = Dimension(2) + assert str(dim) == "2" + assert dim.to_string() == "2" + assert dim.is_static + assert not dim.is_dynamic + assert len(dim) == 2 + assert dim.get_length() == 2 + + +def test_dim_same_scheme(): + assert Dimension().same_scheme(Dimension()) is True + assert Dimension(3).same_scheme(Dimension(3)) is True + assert Dimension(3).same_scheme(Dimension(4)) is False + assert Dimension().same_scheme(Dimension(4)) is False + + +def test_dim_compatible(): + assert Dimension().compatible(Dimension()) is True + assert Dimension(3).compatible(Dimension(3)) is True + assert Dimension(3).compatible(Dimension(4)) is False + assert Dimension().compatible(Dimension(4)) is True + + +def test_dim_relax(): + assert Dimension().relaxes(Dimension()) is True + assert Dimension(3).relaxes(Dimension(3)) is True + assert Dimension(3).relaxes(Dimension(4)) is False + assert Dimension().relaxes(Dimension(4)) is True + + +def test_dim_refine(): + assert Dimension().refines(Dimension()) is True + assert Dimension(3).refines(Dimension(3)) is True + assert Dimension(3).refines(Dimension(4)) is False + assert Dimension().refines(Dimension(4)) is False diff --git a/src/bindings/python/tests/test_runtime/test_infer_request.py b/src/bindings/python/tests/test_runtime/test_infer_request.py index 4540e9d7ebe8d9..4755fde6a77ca5 100644 --- a/src/bindings/python/tests/test_runtime/test_infer_request.py +++ b/src/bindings/python/tests/test_runtime/test_infer_request.py @@ -267,7 +267,7 @@ def test_batched_tensors(device): assert np.array_equal(actual[idx], _tmp) -def test_inputs_outputs_property(device): +def test_inputs_outputs_property_and_method(device): num_inputs = 10 input_shape = [1] params = [ops.parameter(input_shape, np.uint8) for _ in range(num_inputs)] @@ -277,10 +277,14 @@ def test_inputs_outputs_property(device): request = compiled_model.create_infer_request() data = [np.atleast_1d(i) for i in range(num_inputs)] results = request.infer(data).values() - for result, output_tensor in zip(results, request.outputs): + for result, output_tensor in zip(results, request.output_tensors): assert np.array_equal(result, output_tensor.data) - for input_data, input_tensor in zip(data, request.inputs): + for input_data, input_tensor in zip(data, request.input_tensors): assert np.array_equal(input_data, input_tensor.data) + for input_tensor in request.input_tensors: + assert list(input_tensor.get_shape()) == input_shape + for output_tensor in request.output_tensors: + assert list(output_tensor.get_shape()) == input_shape @pytest.mark.skip(reason="Sporadically failed. Need further investigation. Ticket - 95967") @@ -395,7 +399,7 @@ def test_infer_mixed_values(device, ov_type, numpy_dtype, shared_flag): request.infer([tensor1, array1], shared_memory=shared_flag) - assert np.array_equal(request.outputs[0].data, np.concatenate((tensor1.data, array1))) + assert np.array_equal(request.output_tensors[0].data, np.concatenate((tensor1.data, array1))) @pytest.mark.parametrize(("ov_type", "numpy_dtype"), [ @@ -419,8 +423,7 @@ def test_async_mixed_values(device, ov_type, numpy_dtype, shared_flag): request.start_async([tensor1, array1], shared_memory=shared_flag) request.wait() - - assert np.array_equal(request.outputs[0].data, np.concatenate((tensor1.data, array1))) + assert np.array_equal(request.output_tensors[0].data, np.concatenate((tensor1.data, array1))) @pytest.mark.parametrize(("ov_type", "numpy_dtype"), [ @@ -512,6 +515,7 @@ def test_infer_queue_iteration(device): it = iter(infer_queue) infer_request = next(it) assert isinstance(infer_request, InferRequest) + assert infer_request.userdata is None with pytest.raises(StopIteration): next(it) diff --git a/src/bindings/python/tests/test_runtime/test_model.py b/src/bindings/python/tests/test_runtime/test_model.py index b13e5015275403..20f58201ee84e3 100644 --- a/src/bindings/python/tests/test_runtime/test_model.py +++ b/src/bindings/python/tests/test_runtime/test_model.py @@ -48,8 +48,10 @@ def test_function_add_outputs_tensor_name(): relu2 = ops.relu(relu1, name="relu2") function = Model(relu2, [param], "TestFunction") assert len(function.get_results()) == 1 + assert len(function.results) == 1 new_outs = function.add_outputs("relu_t1") assert len(function.get_results()) == 2 + assert len(function.results) == 2 assert "relu_t1" in function.outputs[1].get_tensor().names assert len(new_outs) == 1 assert new_outs[0].get_node() == function.outputs[1].get_node() @@ -64,8 +66,10 @@ def test_function_add_outputs_op_name(): relu2 = ops.relu(relu1, name="relu2") function = Model(relu2, [param], "TestFunction") assert len(function.get_results()) == 1 + assert len(function.results) == 1 new_outs = function.add_outputs(("relu1", 0)) assert len(function.get_results()) == 2 + assert len(function.results) == 2 assert len(new_outs) == 1 assert new_outs[0].get_node() == function.outputs[1].get_node() assert new_outs[0].get_index() == function.outputs[1].get_index() @@ -78,9 +82,9 @@ def test_function_add_output_port(): relu1.get_output_tensor(0).set_names({"relu_t1"}) relu2 = ops.relu(relu1, name="relu2") function = Model(relu2, [param], "TestFunction") - assert len(function.get_results()) == 1 + assert len(function.results) == 1 new_outs = function.add_outputs(relu1.output(0)) - assert len(function.get_results()) == 2 + assert len(function.results) == 2 assert len(new_outs) == 1 assert new_outs[0].get_node() == function.outputs[1].get_node() assert new_outs[0].get_index() == function.outputs[1].get_index() @@ -94,6 +98,7 @@ def test_function_add_output_incorrect_tensor_name(): relu2 = ops.relu(relu1, name="relu2") function = Model(relu2, [param], "TestFunction") assert len(function.get_results()) == 1 + assert len(function.results) == 1 with pytest.raises(RuntimeError) as e: function.add_outputs("relu_t") # Verify that absent output name is present in error message @@ -108,6 +113,7 @@ def test_function_add_output_incorrect_idx(): relu2 = ops.relu(relu1, name="relu2") function = Model(relu2, [param], "TestFunction") assert len(function.get_results()) == 1 + assert len(function.results) == 1 with pytest.raises(RuntimeError) as e: function.add_outputs(("relu1", 1234)) # Verify that op name and port number are present in error message @@ -123,6 +129,7 @@ def test_function_add_output_incorrect_name(): relu2 = ops.relu(relu1, name="relu2") function = Model(relu2, [param], "TestFunction") assert len(function.get_results()) == 1 + assert len(function.results) == 1 with pytest.raises(RuntimeError) as e: function.add_outputs(("relu_1", 0)) # Verify that absent op name is present in error message @@ -139,8 +146,10 @@ def test_add_outputs_several_tensors(): relu3 = ops.relu(relu2, name="relu3") function = Model(relu3, [param], "TestFunction") assert len(function.get_results()) == 1 + assert len(function.results) == 1 new_outs = function.add_outputs(["relu_t1", "relu_t2"]) assert len(function.get_results()) == 3 + assert len(function.results) == 3 assert len(new_outs) == 2 assert new_outs[0].get_node() == function.outputs[1].get_node() assert new_outs[0].get_index() == function.outputs[1].get_index() @@ -158,8 +167,10 @@ def test_add_outputs_several_ports(): relu3 = ops.relu(relu2, name="relu3") function = Model(relu3, [param], "TestFunction") assert len(function.get_results()) == 1 + assert len(function.results) == 1 new_outs = function.add_outputs([("relu1", 0), ("relu2", 0)]) assert len(function.get_results()) == 3 + assert len(function.results) == 3 assert len(new_outs) == 2 assert new_outs[0].get_node() == function.outputs[1].get_node() assert new_outs[0].get_index() == function.outputs[1].get_index() @@ -175,6 +186,7 @@ def test_add_outputs_incorrect_value(): relu2 = ops.relu(relu1, name="relu2") function = Model(relu2, [param], "TestFunction") assert len(function.get_results()) == 1 + assert len(function.results) == 1 with pytest.raises(TypeError) as e: function.add_outputs(0) assert "Incorrect type of a value to add as output." in str(e.value) @@ -187,6 +199,7 @@ def test_add_outputs_incorrect_outputs_list(): relu1.get_output_tensor(0).set_names({"relu_t1"}) function = Model(relu1, [param], "TestFunction") assert len(function.get_results()) == 1 + assert len(function.results) == 1 with pytest.raises(TypeError) as e: function.add_outputs([0, 0]) assert "Incorrect type of a value to add as output at index 0" in str(e.value) @@ -283,6 +296,9 @@ def test_get_batch(): param = model.get_parameters()[0] param.set_layout(Layout("NC")) assert get_batch(model) == 2 + param = model.parameters[0] + param.set_layout(Layout("NC")) + assert get_batch(model) == 2 def test_get_batch_chwn(): @@ -292,41 +308,53 @@ def test_get_batch_chwn(): add = ops.add(param1, param2) add2 = ops.add(add, param3) model = Model(add2, [param1, param2, param3], "TestFunction") - param = model.get_parameters()[0] - param.set_layout(Layout("CHWN")) + param_method = model.get_parameters()[0] + param_attr = model.parameters[0] + param_method.set_layout(Layout("CHWN")) + param_attr.set_layout(Layout("CHWN")) assert get_batch(model) == 4 def test_set_batch_dimension(): model = generate_add_model() - model_param1 = model.get_parameters()[0] - model_param2 = model.get_parameters()[1] + model_param1_method = model.get_parameters()[0] + model_param2_method = model.get_parameters()[1] + model_param1_attr = model.parameters[0] + model_param2_attr = model.parameters[1] # check batch == 2 - model_param1.set_layout(Layout("NC")) + model_param1_method.set_layout(Layout("NC")) + model_param1_attr.set_layout(Layout("NC")) assert get_batch(model) == 2 # set batch to 1 set_batch(model, Dimension(1)) assert get_batch(model) == 1 # check if shape of param 1 has changed - assert model_param1.get_output_shape(0) == PartialShape([1, 1]) + assert model_param1_method.get_output_shape(0) == PartialShape([1, 1]) + assert model_param1_attr.get_output_shape(0) == PartialShape([1, 1]) # check if shape of param 2 has not changed - assert model_param2.get_output_shape(0) == PartialShape([2, 1]) + assert model_param2_method.get_output_shape(0) == PartialShape([2, 1]) + assert model_param2_attr.get_output_shape(0) == PartialShape([2, 1]) def test_set_batch_int(): model = generate_add_model() - model_param1 = model.get_parameters()[0] - model_param2 = model.get_parameters()[1] + model_param1_method = model.get_parameters()[0] + model_param2_method = model.get_parameters()[1] + model_param1_attr = model.parameters[0] + model_param2_attr = model.parameters[1] # check batch == 2 - model_param1.set_layout(Layout("NC")) + model_param1_method.set_layout(Layout("NC")) + model_param1_attr.set_layout(Layout("NC")) assert get_batch(model) == 2 # set batch to 1 set_batch(model, 1) assert get_batch(model) == 1 # check if shape of param 1 has changed - assert model_param1.get_output_shape(0) == PartialShape([1, 1]) + assert model_param1_method.get_output_shape(0) == PartialShape([1, 1]) + assert model_param1_attr.get_output_shape(0) == PartialShape([1, 1]) # check if shape of param 2 has not changed - assert model_param2.get_output_shape(0) == PartialShape([2, 1]) + assert model_param2_method.get_output_shape(0) == PartialShape([2, 1]) + assert model_param2_attr.get_output_shape(0) == PartialShape([2, 1]) def test_set_batch_default_batch_size(): @@ -335,6 +363,7 @@ def test_set_batch_default_batch_size(): model_param1.set_layout(Layout("NC")) set_batch(model) assert model.is_dynamic() + assert model.dynamic def test_reshape_with_ports(): diff --git a/src/bindings/python/tests/test_runtime/test_type.py b/src/bindings/python/tests/test_runtime/test_type.py index b31f36edf9f7e2..28ed0708d7f3e0 100644 --- a/src/bindings/python/tests/test_runtime/test_type.py +++ b/src/bindings/python/tests/test_runtime/test_type.py @@ -64,11 +64,18 @@ def test_basic_ovtypes(ovtype, assert ovtype.is_static() is static_flag assert ovtype.is_dynamic() is dynamic_flag assert ovtype.is_real() is real_flag + assert ovtype.real is real_flag assert ovtype.is_integral() is integral_flag + assert ovtype.integral is integral_flag assert ovtype.is_signed() is signed_flag + assert ovtype.signed is signed_flag assert ovtype.is_quantized() is quantized_flag + assert ovtype.quantized is quantized_flag assert ovtype.get_type_name() == type_name + assert ovtype.type_name == type_name + assert ovtype.get_size() == type_size assert ovtype.size == type_size + assert ovtype.get_bitwidth() == type_bitwidth assert ovtype.bitwidth == type_bitwidth @@ -77,15 +84,22 @@ def test_undefined_ovtype(): assert ov_type.is_static() is True assert ov_type.is_dynamic() is False assert ov_type.is_real() is False + assert ov_type.real is False assert ov_type.is_integral() is True + assert ov_type.integral is True assert ov_type.is_signed() is False + assert ov_type.signed is False assert ov_type.is_quantized() is False + assert ov_type.quantized is False assert ov_type.get_type_name() == "undefined" + assert ov_type.type_name == "undefined" + assert ov_type.get_size() == 0 assert ov_type.size == 0 # Note: might depend on the system import sys assert ov_type.bitwidth == sys.maxsize * 2 + 1 + assert ov_type.get_bitwidth() == sys.maxsize * 2 + 1 def test_dynamic_ov_type(): @@ -98,7 +112,9 @@ def test_dynamic_ov_type(): assert ov_type.is_quantized() is False assert ov_type.get_type_name() == "dynamic" assert ov_type.size == 0 + assert ov_type.get_size() == 0 assert ov_type.bitwidth == 0 + assert ov_type.get_bitwidth() == 0 @pytest.mark.parametrize(("ovtype_one", "ovtype_two", "expected"), [ diff --git a/src/bindings/python/tests/test_utils/test_data_dispatch.py b/src/bindings/python/tests/test_utils/test_data_dispatch.py index 254cf890458bb8..fad863f61a52e8 100644 --- a/src/bindings/python/tests/test_utils/test_data_dispatch.py +++ b/src/bindings/python/tests/test_utils/test_data_dispatch.py @@ -157,8 +157,8 @@ def test_ndarray_copied_dispatcher(device, input_shape): result, infer_request = _run_dispatcher(device, test_data, False, input_shape) assert result == {} - assert np.array_equal(infer_request.inputs[0].data, test_data) + assert np.array_equal(infer_request.input_tensors[0].data, test_data) test_data[0] = 2.0 - assert not np.array_equal(infer_request.inputs[0].data, test_data) + assert not np.array_equal(infer_request.input_tensors[0].data, test_data) From 04a2c4ce61917a381cc963e8b2c783ac0b62dcba Mon Sep 17 00:00:00 2001 From: Andrew Kwangwoong Park Date: Wed, 22 Mar 2023 16:38:28 +0900 Subject: [PATCH 026/296] [GPU] Add shape agnostic optimized FullyConnectedIMAD kernel (#16417) * [GPU] Added shape agnostic kernel for fully_connected_gpu_imad Signed-off-by: Andrew Park * Add fully_connected_gpu_imad shape agnostic TCs for ov_gpu_unit_tests Signed-off-by: Andrew Park * Apply comments Signed-off-by: Andrew Park --------- Signed-off-by: Andrew Park --- .../cl_kernels/fully_connected_gpu_imad.cl | 64 ++++++++++++++--- .../fully_connected_kernel_imad.cpp | 71 +++++++++++-------- .../test_cases/fully_connected_gpu_test.cpp | 25 +++++++ 3 files changed, 121 insertions(+), 39 deletions(-) diff --git a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/fully_connected_gpu_imad.cl b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/fully_connected_gpu_imad.cl index 4fb15bdcc06d8a..499c153da69c0b 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/fully_connected_gpu_imad.cl +++ b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/fully_connected_gpu_imad.cl @@ -17,6 +17,7 @@ REQD_SUB_GROUP_SIZE(SIMD_SIZE) KERNEL(fully_connected_gpu_imad)( + OPTIONAL_SHAPE_INFO_ARG const __global INPUT0_TYPE* input, __global OUTPUT_TYPE* output, const __global FILTER_TYPE* weights @@ -36,19 +37,34 @@ KERNEL(fully_connected_gpu_imad)( #if HAS_OFM_LEFTOVERS || HAS_IFM_LEFTOVERS const uint sglid = get_sub_group_local_id(); #endif +#if IS_DYNAMIC + // In dynamic kernel, TILE_BATCH is set to the initial tile batch size for stack arrays such as dotProd + // and tile_batch is calculated as an adjusted value from tile_batch_max_size by given global work size +#if OUTPUT_3D + const uint tile_batch = OUTPUT_FEATURE_NUM / (uint)get_global_size(2); +#else + const uint tile_batch = OUTPUT_BATCH_NUM / (uint)get_global_size(1); +#endif +#else + const uint tile_batch = TILE_BATCH; +#endif #if OUTPUT_3D const uint batch = (uint)get_global_id(1); - const uint skip_f = (uint)get_global_id(2) * TILE_BATCH; + const uint skip_f = (uint)get_global_id(2) * tile_batch; #else - const uint batch = (uint)get_global_id(1) * TILE_BATCH; + const uint batch = (uint)get_global_id(1) * tile_batch; const uint skip_f = (uint)get_global_id(2); #endif // Accumulators initialization MAKE_VECTOR_TYPE(int, TILE_OFM) dotProd[TILE_BATCH]; MAKE_VECTOR_TYPE(uint, TILE_OFM) idx_w; - unroll_for (uint ob_idx = 0; ob_idx < TILE_BATCH; ob_idx++) { +#if IS_DYNAMIC + for (uint ob_idx = 0; ob_idx < tile_batch; ob_idx++) { +#else + unroll_for (uint ob_idx = 0; ob_idx < tile_batch; ob_idx++) { +#endif unroll_for(uint of_idx = 0; of_idx < TILE_OFM; of_idx++) { dotProd[ob_idx][of_idx] = 0; #if !HAS_IFM_LEFTOVERS @@ -103,7 +119,11 @@ KERNEL(fully_connected_gpu_imad)( #endif // HAS_OFM_LEFTOVERS } - unroll_for(uint ob_idx = 0; ob_idx < TILE_BATCH; ob_idx++) { + #if IS_DYNAMIC + for (uint ob_idx = 0; ob_idx < tile_batch; ob_idx++) { + #else + unroll_for(uint ob_idx = 0; ob_idx < tile_batch; ob_idx++) { + #endif // Loading inputs #if OUTPUT_3D __global INPUT0_TYPE* current_input = &input[INPUT0_GET_INDEX(batch, skip_f + ob_idx, 0, 0)]; @@ -172,7 +192,11 @@ KERNEL(fully_connected_gpu_imad)( } } - unroll_for (uint ob_idx = 0; ob_idx < TILE_BATCH; ob_idx++) { +#if IS_DYNAMIC + for (uint ob_idx = 0; ob_idx < tile_batch; ob_idx++) { +#else + unroll_for (uint ob_idx = 0; ob_idx < tile_batch; ob_idx++) { +#endif // Loading inputs #if OUTPUT_3D __global INPUT0_TYPE* current_input = &input[INPUT0_GET_INDEX(batch, skip_f + ob_idx, 0, 0)]; @@ -221,7 +245,11 @@ KERNEL(fully_connected_gpu_imad)( #if BIAS_TERM #if BIAS_PER_OUTPUT MAKE_VECTOR_TYPE(uint, TILE_OFM) bias_index[TILE_BATCH]; - unroll_for(uint ob_idx = 0; ob_idx < TILE_BATCH; ob_idx++) { + #if IS_DYNAMIC + for (uint ob_idx = 0; ob_idx < tile_batch; ob_idx++) { + #else + unroll_for(uint ob_idx = 0; ob_idx < tile_batch; ob_idx++) { + #endif unroll_for (uint of_idx = 0; of_idx < TILE_OFM; of_idx++) { #if OUTPUT_3D bias_index[ob_idx][of_idx] = GET_DATA_INDEX(BIAS, batch, skip_f + ob_idx, feature + of_idx * SIMD_SIZE, 0); @@ -238,7 +266,11 @@ KERNEL(fully_connected_gpu_imad)( #endif MAKE_VECTOR_TYPE(float, TILE_OFM) dequantized[TILE_BATCH]; - unroll_for (uint ob_idx = 0; ob_idx < TILE_BATCH; ob_idx++) { +#if IS_DYNAMIC + for (uint ob_idx = 0; ob_idx < tile_batch; ob_idx++) { +#else + unroll_for (uint ob_idx = 0; ob_idx < tile_batch; ob_idx++) { +#endif unroll_for(uint of_idx = 0; of_idx < TILE_OFM; of_idx++) { #if HAS_OFM_LEFTOVERS if (feature + of_idx * SIMD_SIZE < OF_NUMBER) @@ -252,7 +284,11 @@ KERNEL(fully_connected_gpu_imad)( } #else MAKE_VECTOR_TYPE(float, TILE_OFM) dequantized[TILE_BATCH]; - unroll_for (uint ob_idx = 0; ob_idx < TILE_BATCH; ob_idx++) { +#if IS_DYNAMIC + for (uint ob_idx = 0; ob_idx < tile_batch; ob_idx++) { +#else + unroll_for (uint ob_idx = 0; ob_idx < tile_batch; ob_idx++) { +#endif unroll_for(uint of_idx = 0; of_idx < TILE_OFM; of_idx++) { #if HAS_OFM_LEFTOVERS if (feature + of_idx * SIMD_SIZE < OF_NUMBER) @@ -263,7 +299,11 @@ KERNEL(fully_connected_gpu_imad)( #endif #if HAS_FUSED_OPS - unroll_for (uint ob_idx = 0; ob_idx < TILE_BATCH; ob_idx++) { +#if IS_DYNAMIC + for (uint ob_idx = 0; ob_idx < tile_batch; ob_idx++) { +#else + unroll_for (uint ob_idx = 0; ob_idx < tile_batch; ob_idx++) { +#endif unroll_for(uint of_idx = 0; of_idx < TILE_OFM; of_idx++) { #if HAS_OFM_LEFTOVERS if (feature + of_idx * SIMD_SIZE < OF_NUMBER) { @@ -282,7 +322,11 @@ KERNEL(fully_connected_gpu_imad)( } } #else - unroll_for (uint ob_idx = 0; ob_idx < TILE_BATCH; ob_idx++) { +#if IS_DYNAMIC + for (uint ob_idx = 0; ob_idx < tile_batch; ob_idx++) { +#else + unroll_for (uint ob_idx = 0; ob_idx < tile_batch; ob_idx++) { +#endif unroll_for(uint of_idx = 0; of_idx < TILE_OFM; of_idx++) { #if HAS_OFM_LEFTOVERS if (feature + of_idx * SIMD_SIZE < OF_NUMBER) { diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/fully_connected/fully_connected_kernel_imad.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/fully_connected/fully_connected_kernel_imad.cpp index 2a6ebbbd45ec32..a38a7af0c77cb0 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/fully_connected/fully_connected_kernel_imad.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/fully_connected/fully_connected_kernel_imad.cpp @@ -42,6 +42,7 @@ ParamsKey FullyConnectedKernelIMAD::GetSupportedKey() const { k.EnableTensorPitches(); k.EnableBatching(); k.EnableQuantization(QuantizationType::SYMMETRIC); + k.EnableDynamicShapesSupport(); return k; } @@ -57,23 +58,25 @@ DeviceFeaturesKey FullyConnectedKernelIMAD::get_required_device_features_key(con FullyConnectedKernelIMAD::Parent::DispatchData FullyConnectedKernelIMAD::SetDefault(const fully_connected_params& params, int) const { auto dispatchData = Parent::SetDefault(params); - auto tuning_data = GetTuningParams(params); - if (params.outputs[0].GetLayout() == DataLayout::bfyx) { - dispatchData.gws[0] = RoundUp(params.outputs[0].Y().v, tuning_data.sub_group_size * tuning_data.tile_ofm) / - tuning_data.tile_ofm * tuning_data.slm_div_factor; - dispatchData.gws[1] = params.outputs[0].Batch().v; - dispatchData.gws[2] = params.outputs[0].Feature().v / tuning_data.tile_batch; - } else { - dispatchData.gws[0] = RoundUp(params.outputs[0].Feature().v, tuning_data.sub_group_size * tuning_data.tile_ofm) / - tuning_data.tile_ofm * tuning_data.slm_div_factor; - dispatchData.gws[1] = params.outputs[0].Batch().v / tuning_data.tile_batch; - dispatchData.gws[2] = 1; - } + if (!params.has_dynamic_tensors()) { + auto tuning_data = GetTuningParams(params); + if (params.outputs[0].GetLayout() == DataLayout::bfyx) { + dispatchData.gws[0] = RoundUp(params.outputs[0].Y().v, tuning_data.sub_group_size * tuning_data.tile_ofm) / + tuning_data.tile_ofm * tuning_data.slm_div_factor; + dispatchData.gws[1] = params.outputs[0].Batch().v; + dispatchData.gws[2] = params.outputs[0].Feature().v / tuning_data.tile_batch; + } else { + dispatchData.gws[0] = RoundUp(params.outputs[0].Feature().v, tuning_data.sub_group_size * tuning_data.tile_ofm) / + tuning_data.tile_ofm * tuning_data.slm_div_factor; + dispatchData.gws[1] = params.outputs[0].Batch().v / tuning_data.tile_batch; + dispatchData.gws[2] = 1; + } - dispatchData.lws[0] = tuning_data.work_group_size; - dispatchData.lws[1] = 1; - dispatchData.lws[2] = 1; + dispatchData.lws[0] = tuning_data.work_group_size; + dispatchData.lws[1] = 1; + dispatchData.lws[2] = 1; + } return dispatchData; } @@ -86,6 +89,14 @@ bool FullyConnectedKernelIMAD::Validate(const Params& params, const optional_par const auto& fc_params = static_cast(params); const auto& in = fc_params.inputs[0]; const auto& wei = fc_params.weights; + auto out_l = fc_params.outputs[0].GetLayout(); + + // Dynamic kernel doesn't support dynamic weights + if (fc_params.is_shape_agnostic && in.is_dynamic()) { + if ((out_l == DataLayout::bfyx && in.Y().v == 0) || + (out_l == DataLayout::bf && in.Feature().v == 0)) + return false; + } if ((in.X().pad.before != 0) || (in.X().pad.after != 0) || (in.Y().pad.before != 0) || (in.Y().pad.after != 0)) { @@ -93,7 +104,6 @@ bool FullyConnectedKernelIMAD::Validate(const Params& params, const optional_par return false; } - auto out_l = fc_params.outputs[0].GetLayout(); if (out_l == DataLayout::bfyx) { // We don't support 4d output if (in.X().v > 1) @@ -147,25 +157,28 @@ FullyConnectedKernelIMAD::FullyConnectedTuningData FullyConnectedKernelIMAD::Get // In most cases SIMD8 works faster than SIMD16 tuning_data.sub_group_size = 8; - auto mk_size = if_num * ib_num; - auto mn_size = of_num * ob_num; + if (!params.is_shape_agnostic) { + auto mk_size = if_num * ib_num; + auto mn_size = of_num * ob_num; - // Known cases where simd16 works better than simd8 - bool simd16_is_faster = mk_size >= 1000 * 1024 && mn_size >= 1000 * 1024; - simd16_is_faster |= mk_size == 128 * 768 && mn_size == 128 * 3072; + // Known cases where simd16 works better than simd8 + bool simd16_is_faster = mk_size >= 1000 * 1024 && mn_size >= 1000 * 1024; + simd16_is_faster |= mk_size == 128 * 768 && mn_size == 128 * 3072; - // Some specific HW doesn't support SIMD8, force SIMD16 to respect this HW - // For other SIMD16 exceptions check that if_num is divided by 64 (SIMD16 * ISV4) because - // if there are leftovers then SIMD8 is more preferrable - if (!IsSIMDSizeSupported(params.engineInfo, 8) || (simd16_is_faster && if_num % 64 == 0)) { - tuning_data.sub_group_size = 16; + // Some specific HW doesn't support SIMD8, force SIMD16 to respect this HW + // For other SIMD16 exceptions check that if_num is divided by 64 (SIMD16 * ISV4) because + // if there are leftovers then SIMD8 is more preferrable + if (!IsSIMDSizeSupported(params.engineInfo, 8) || (simd16_is_faster && if_num % 64 == 0)) { + tuning_data.sub_group_size = 16; + } } - tuning_data.tile_ofm = 2; tuning_data.tile_batch = tuning_data.sub_group_size == 8 ? 16 : 8; - while (tile_batch_max_size % tuning_data.tile_batch != 0) - tuning_data.tile_batch--; + if (!params.has_dynamic_tensors()) { + while (tile_batch_max_size % tuning_data.tile_batch != 0) + tuning_data.tile_batch--; + } size_t sub_group_pack_size = tuning_data.sub_group_size * tuning_data.pack_size; tuning_data.in_f_blocks_number = CeilDiv(if_num, sub_group_pack_size); diff --git a/src/plugins/intel_gpu/tests/test_cases/fully_connected_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/fully_connected_gpu_test.cpp index e3722341a997c3..c63218e4ff21cb 100644 --- a/src/plugins/intel_gpu/tests/test_cases/fully_connected_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/test_cases/fully_connected_gpu_test.cpp @@ -2180,6 +2180,7 @@ struct dynamic_fully_connected_gpu : ::testing::TestWithParam; using dynamic_fully_connected_gpu_f16_3d = dynamic_fully_connected_gpu; +using dynamic_fully_connected_gpu_i8_3d = dynamic_fully_connected_gpu; static const std::vector dyn_batches_full = {1, 2, 4, 7, 8, 9, 15, 16, 31, 32, 33, 47, 48, 49, 58, 63, 64}; @@ -2194,6 +2195,10 @@ TEST_P(dynamic_fully_connected_gpu_f16_3d, basic) { run_test(); } +TEST_P(dynamic_fully_connected_gpu_i8_3d, basic) { + run_test(); +} + INSTANTIATE_TEST_SUITE_P( smoke, dynamic_fully_connected_gpu_f32_3d, @@ -2214,6 +2219,16 @@ INSTANTIATE_TEST_SUITE_P( ::testing::Values(false, true)) ); +INSTANTIATE_TEST_SUITE_P( + smoke, + dynamic_fully_connected_gpu_i8_3d, + ::testing::Combine( + ::testing::Values(dyn_batches_smoke), + ::testing::Values(10, 32, 42, 53, 64, 128), + ::testing::Values(2, 9, 128), + ::testing::Values(false, true)) +); + INSTANTIATE_TEST_SUITE_P( full, dynamic_fully_connected_gpu_f32_3d, @@ -2233,3 +2248,13 @@ INSTANTIATE_TEST_SUITE_P( ::testing::Values(2, 9, 16, 32, 64, 128), ::testing::Values(false, true)) ); + +INSTANTIATE_TEST_SUITE_P( + full, + dynamic_fully_connected_gpu_i8_3d, + ::testing::Combine( + ::testing::Values(dyn_batches_full), + ::testing::Values(10, 32, 42, 53, 64, 128), + ::testing::Values(2, 9, 16, 32, 64, 128), + ::testing::Values(false, true)) +); From f1c3356cfc00e45cf69153171fc381d34a25d131 Mon Sep 17 00:00:00 2001 From: Ilya Churaev Date: Wed, 22 Mar 2023 12:01:16 +0400 Subject: [PATCH 027/296] Small Plugin DG changes (#16432) --- docs/IE_PLUGIN_DG/Building.md | 3 ++- src/plugins/template/src/compiled_model.hpp | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/docs/IE_PLUGIN_DG/Building.md b/docs/IE_PLUGIN_DG/Building.md index 9330d658a248f0..f40f2296e85122 100644 --- a/docs/IE_PLUGIN_DG/Building.md +++ b/docs/IE_PLUGIN_DG/Building.md @@ -30,6 +30,7 @@ Once the commands above are executed, the OpenVINO Developer Package is generate * `openvino::unitTestUtils` - static library with unit tests utilities * `openvino::ngraphFunctions` - static library with the set of `ov::Model` builders * `openvino::funcSharedTests` - static library with common functional tests + * `openvino::ngraph_reference` - static library with operation reference implementations. > **NOTE**: it's enough just to run `cmake --build . --target ov_dev_targets` command to build only targets from the > OpenVINO Developer package. @@ -61,7 +62,7 @@ $ cmake -DENABLE_FUNCTIONAL_TESTS=OFF -DOpenVINODeveloperPackage_DIR=../openvino - `src/CMakeLists.txt` to build a plugin shared library from sources: @snippet template/src/CMakeLists.txt cmake:plugin - > **NOTE**: `openvino::runtime` target is imported from the OpenVINO Developer Package. + > **NOTE**: `openvino::...` targets are imported from the OpenVINO Developer Package. - `tests/functional/CMakeLists.txt` to build a set of functional plugin tests: @snippet template/tests/functional/CMakeLists.txt cmake:functional_tests diff --git a/src/plugins/template/src/compiled_model.hpp b/src/plugins/template/src/compiled_model.hpp index e8e908a3278979..911edf72abb515 100644 --- a/src/plugins/template/src/compiled_model.hpp +++ b/src/plugins/template/src/compiled_model.hpp @@ -37,7 +37,7 @@ class CompiledModel : public ov::ICompiledModel { void set_property(const ov::AnyMap& properties) override; - virtual ov::Any get_property(const std::string& name) const override; + ov::Any get_property(const std::string& name) const override; std::shared_ptr create_infer_request() const override; From 0070e8d9392490c48ec885c6fdf083ad50abdaba Mon Sep 17 00:00:00 2001 From: Roman Kazantsev Date: Wed, 22 Mar 2023 12:02:59 +0400 Subject: [PATCH 028/296] [TF FE] Fix problems with invalidation of decoders (#16464) * [TF FE] Fix problems with invalidation of decoders Signed-off-by: Kazantsev, Roman * Fix comment --------- Signed-off-by: Kazantsev, Roman --- .../tensorflow/src/decoder_argdef.hpp | 19 ++++++++++++++++- .../tensorflow/src/decoder_proto.hpp | 21 ++++++++++++++++++- .../tensorflow/src/graph_iterator_proto.hpp | 10 +++++---- 3 files changed, 44 insertions(+), 6 deletions(-) diff --git a/src/frontends/tensorflow/src/decoder_argdef.hpp b/src/frontends/tensorflow/src/decoder_argdef.hpp index 5b01025ee4df7a..a188a8a5cb8890 100644 --- a/src/frontends/tensorflow/src/decoder_argdef.hpp +++ b/src/frontends/tensorflow/src/decoder_argdef.hpp @@ -10,6 +10,8 @@ #include "openvino/frontend/tensorflow/decoder.hpp" namespace tensorflow { +class GraphDef; +class FunctionDef; class OpDef_ArgDef; } // namespace tensorflow @@ -19,14 +21,23 @@ namespace tensorflow { class DecoderArgDef : public ov::frontend::tensorflow::DecoderBase { public: - explicit DecoderArgDef(const ::tensorflow::OpDef_ArgDef* arg_def, const std::string& op_type) + explicit DecoderArgDef(const ::tensorflow::OpDef_ArgDef* arg_def, + const std::shared_ptr<::tensorflow::GraphDef>& graph_def, + const std::shared_ptr<::tensorflow::FunctionDef>& func_def, + const std::string& op_type) : m_arg_def(arg_def), + m_graph_def(graph_def), + m_func_def(func_def), m_op_type(op_type) {} explicit DecoderArgDef(const ::tensorflow::OpDef_ArgDef* arg_def, + const std::shared_ptr<::tensorflow::GraphDef>& graph_def, + const std::shared_ptr<::tensorflow::FunctionDef>& func_def, const std::string& op_type, const std::string& producer_name) : m_arg_def(arg_def), + m_graph_def(graph_def), + m_func_def(func_def), m_op_type(op_type), m_producer_name(producer_name) {} @@ -49,6 +60,12 @@ class DecoderArgDef : public ov::frontend::tensorflow::DecoderBase { private: const ::tensorflow::OpDef_ArgDef* m_arg_def; + // For existence of OpDef_ArgDef object corresponding to the main graph node, + // GraphDef object must live in the memory + const std::shared_ptr<::tensorflow::GraphDef> m_graph_def; + // For existence of OpDef_ArgDef object corresponding to the body graph node, + // both GraphDef and FunctionDef objects must be alive in the memory + const std::shared_ptr<::tensorflow::FunctionDef> m_func_def; const std::string m_op_type; const std::string m_producer_name; }; diff --git a/src/frontends/tensorflow/src/decoder_proto.hpp b/src/frontends/tensorflow/src/decoder_proto.hpp index db1f113882451d..570fd7e7eebda5 100644 --- a/src/frontends/tensorflow/src/decoder_proto.hpp +++ b/src/frontends/tensorflow/src/decoder_proto.hpp @@ -12,6 +12,8 @@ #include "types.pb.h" namespace tensorflow { +class GraphDef; +class FunctionDef; class NodeDef; class AttrValue; } // namespace tensorflow @@ -29,7 +31,18 @@ void parse_producer_name(const std::string& producer_port_name, class DecoderProto : public ov::frontend::tensorflow::DecoderBase { public: - explicit DecoderProto(const ::tensorflow::NodeDef* node_def) : m_node_def(node_def) {} + explicit DecoderProto(const ::tensorflow::NodeDef* node_def, + const std::shared_ptr<::tensorflow::GraphDef>& graph_def) + : m_node_def(node_def), + m_graph_def(graph_def), + m_func_def(nullptr) {} + + explicit DecoderProto(const ::tensorflow::NodeDef* node_def, + const std::shared_ptr<::tensorflow::GraphDef>& graph_def, + const std::shared_ptr<::tensorflow::FunctionDef>& func_def) + : m_node_def(node_def), + m_graph_def(graph_def), + m_func_def(func_def) {} ov::Any get_attribute(const std::string& name) const override; @@ -51,6 +64,12 @@ class DecoderProto : public ov::frontend::tensorflow::DecoderBase { private: std::vector<::tensorflow::AttrValue> decode_attribute_helper(const std::string& name) const; const ::tensorflow::NodeDef* m_node_def; + // For existence of NodeDef object corresponding to the main graph node, + // GraphDef object must live in the memory + const std::shared_ptr<::tensorflow::GraphDef> m_graph_def; + // For existence of NodeDef object corresponding to the body graph node, + // both GraphDef and FunctionDef objects must be alive in the memory + const std::shared_ptr<::tensorflow::FunctionDef> m_func_def; }; } // namespace tensorflow } // namespace frontend diff --git a/src/frontends/tensorflow/src/graph_iterator_proto.hpp b/src/frontends/tensorflow/src/graph_iterator_proto.hpp index 1fa836e3b036e1..a5e2fc1ae6c0c0 100644 --- a/src/frontends/tensorflow/src/graph_iterator_proto.hpp +++ b/src/frontends/tensorflow/src/graph_iterator_proto.hpp @@ -45,12 +45,13 @@ class GraphIteratorProto : public GraphIterator { for (int input_ind = 0; input_ind < input_size; ++input_ind) { auto input_arg = &m_func_def->signature().input_arg(input_ind); m_input_names.push_back(input_arg->name()); - m_decoders.push_back(std::make_shared(input_arg, "input_arg")); + m_decoders.push_back(std::make_shared(input_arg, m_graph_def, m_func_def, "input_arg")); } // fill all node defs from library functions for (int node_ind = 0; node_ind < nodes_size; ++node_ind) { - m_decoders.push_back(std::make_shared(&(m_func_def->node_def(node_ind)))); + m_decoders.push_back( + std::make_shared(&(m_func_def->node_def(node_ind)), m_graph_def, m_func_def)); } // fill all outputs from library functions @@ -60,7 +61,8 @@ class GraphIteratorProto : public GraphIterator { auto output_arg = &m_func_def->signature().output_arg(output_ind); m_output_names.push_back(output_arg->name()); auto producer_name = ret_map.at(output_arg->name()); - m_decoders.push_back(std::make_shared(output_arg, "output_arg", producer_name)); + m_decoders.push_back( + std::make_shared(output_arg, m_graph_def, m_func_def, "output_arg", producer_name)); } } @@ -76,7 +78,7 @@ class GraphIteratorProto : public GraphIterator { auto nodes_size = m_graph_def->node_size(); m_decoders.resize(static_cast(nodes_size)); for (int node_ind = 0; node_ind < nodes_size; ++node_ind) { - m_decoders[node_ind] = std::make_shared(&m_graph_def->node(node_ind)); + m_decoders[node_ind] = std::make_shared(&m_graph_def->node(node_ind), m_graph_def); } // initialize a library map From c14e6ef48e4870a19b95519ff9ba65c6c86fcc4c Mon Sep 17 00:00:00 2001 From: hyunback kim Date: Wed, 22 Mar 2023 17:08:10 +0900 Subject: [PATCH 029/296] [GPU] Use 4dim directly for onednn in gemm (#16182) * [GPU] Use 4-dim directly for onednn in gemm We were collapsing n-dim into 3d for onednn gemm, But it is not necessary, up to 4d. Signed-off-by: hyunback --- .../src/graph/impls/onednn/gemm_onednn.cpp | 10 ---------- .../intel_gpu/src/graph/impls/onednn/utils.cpp | 17 ++++++++++------- .../tests/fusions/gemm_fusion_test.cpp | 18 +++++++++++++++--- 3 files changed, 25 insertions(+), 20 deletions(-) diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/gemm_onednn.cpp b/src/plugins/intel_gpu/src/graph/impls/onednn/gemm_onednn.cpp index 84bcdd83d2edf3..309a4e24285437 100644 --- a/src/plugins/intel_gpu/src/graph/impls/onednn/gemm_onednn.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/onednn/gemm_onednn.cpp @@ -301,16 +301,6 @@ struct gemm_onednn : typed_primitive_onednn_impl { } static std::unique_ptr create(const gemm_node& arg, const kernel_impl_params& impl_params) { - bool full_tensor_or_per_tensor = true; - for (auto prim : arg.get_fused_primitives()) { - if (prim.input_layout.is_static() && prim.output_layout.is_static()) { - full_tensor_or_per_tensor &= - prim.input_layout.count() == prim.output_layout.count() || prim.input_layout.count() == 1; - } - } - if (!full_tensor_or_per_tensor) { - IE_THROW() << "Unimplemented: per channel binary post-operation is not supported for onednn gemm. Refer PR(#15353) message."; - } auto& engine = impl_params.prog->get_engine(); auto& config = impl_params.prog->get_config(); auto attr = arg.get_onednn_primitive_attributes(); diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/utils.cpp b/src/plugins/intel_gpu/src/graph/impls/onednn/utils.cpp index 6b217b196c922d..09e977b5edcda1 100644 --- a/src/plugins/intel_gpu/src/graph/impls/onednn/utils.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/onednn/utils.cpp @@ -68,22 +68,25 @@ dnnl::memory::dims convert_tensor(cldnn::tensor t, size_t dims, bool is_grouped) dnnl::memory::dims convert_gemm_tensor(cldnn::tensor t, size_t dims, bool batched_dims_can_be_removed) { auto sizes = t.sizes(default_fmt_for_dims(dims, false)); dnnl::memory::dims res(sizes.begin(), sizes.end()); - if (dims > 3) { - for (size_t i = 0; i < dims - 3; i++) { + if (dims > 4) { + for (size_t i = 0; i < dims - 4; i++) { res[i + 1] *= res[i]; } - res.erase(res.begin(), res.begin() + dims - 3); + res.erase(res.begin(), res.begin() + dims - 4); } - if (res.size() == 3 && batched_dims_can_be_removed) { + if (res.size() == 4 && batched_dims_can_be_removed) { res.erase(res.begin()); } return res; } dnnl::memory::format_tag convert_gemm_data_format(dnnl::memory::dims dims) { - if (dims.size() > 3) - throw std::runtime_error("[clDNN] Unsupported dims size for onednn gemm: should be <= 3"); - return dims.size() == 3 ? dnnl::memory::format_tag::abc : dnnl::memory::format_tag::ab; + switch (dims.size()) { + case 2: return dnnl::memory::format_tag::ab; + case 3: return dnnl::memory::format_tag::abc; + case 4: return dnnl::memory::format_tag::abcd; + default: throw std::invalid_argument("[clDNN] Unsupported conversion from "+ std::to_string(dims.size()) + " to onednn format_tag"); + } } diff --git a/src/plugins/intel_gpu/tests/fusions/gemm_fusion_test.cpp b/src/plugins/intel_gpu/tests/fusions/gemm_fusion_test.cpp index 34b35f26c054ef..847c9192dd83b4 100644 --- a/src/plugins/intel_gpu/tests/fusions/gemm_fusion_test.cpp +++ b/src/plugins/intel_gpu/tests/fusions/gemm_fusion_test.cpp @@ -113,6 +113,9 @@ class GemmFusingTest : public ::BaseFusingTest { #define CASE_GEMM_2IN_FP16_3 { { 1, 1, 64, 64 }, { 1, 1, 64, 64 } }, { 1, 1, 64, 64 }, tensor{ 1 }, tensor{ 0 }, data_types::f16, data_types::f16, data_types::f16, format::bfyx, data_types::f16, format::bfyx #define CASE_GEMM_2IN_FP16_4 { { 1, 2, 64, 128 }, { 1, 2, 256, 64 } }, { 1, 2, 256, 128 }, tensor{ 1 }, tensor{ 0 }, data_types::f16, data_types::f16, data_types::f16, format::bfyx, data_types::f16, format::bfyx #define CASE_GEMM_2IN_FP16_5 { { 2, 3, 2, 2 }, { 2, 3, 2, 2 } }, { 2, 3, 2, 2 }, tensor{ 1 }, tensor{ 0 }, data_types::f16, data_types::f16, data_types::f16, format::bfyx, data_types::f16, format::bfyx +#define CASE_GEMM_2IN_FP16_5D_1 { { 2, 3, 4, 6, 5 }, { 2, 3, 6, 4, 5 } }, { 2, 3, 6, 6, 5 }, tensor{ 1 }, tensor{ 0 }, data_types::f16, data_types::f16, data_types::f16, format::bfzyx, data_types::f16, format::bfzyx +#define CASE_GEMM_2IN_FP16_6D_1 { { 2, 3, 7, 5, 3, 2 }, { 2, 3, 5, 7, 3, 2 } }, { 2, 3, 5, 5, 3, 2 }, tensor{ 1 }, tensor{ 0 }, data_types::f16, data_types::f16, data_types::f16, format::bfwzyx, data_types::f16, format::bfwzyx + #define CASE_GEMM_2IN_U8U8_1 { { 1, 1, 2, 2 }, { 1, 1, 2, 2 } }, { 1, 1, 2, 2 }, tensor{ 1 }, tensor{ 0 }, data_types::u8, data_types::u8, data_types::u8, format::bfyx, data_types::f32, format::bfyx #define CASE_GEMM_2IN_U8U8_2 { { 1, 2, 64, 128 }, { 1, 2, 256, 64 } }, { 1, 2, 256, 128 }, tensor{ 1 }, tensor{ 0 }, data_types::u8, data_types::u8, data_types::u8, format::bfyx, data_types::f32, format::bfyx #define CASE_GEMM_2IN_U8U8_3 { { 1, 1, 16, 32 }, { 1, 1, 32, 16 } }, { 1, 1, 32, 32 }, tensor{ 1 }, tensor{ 0 }, data_types::u8, data_types::u8, data_types::u8, format::bfyx, data_types::f32, format::bfyx @@ -298,11 +301,14 @@ TEST_P(gemm_2in_add, eltwise_postop) { add_data_size.feature[0] = 1; add_data_layout.set_tensor(add_data_size); + auto in_layout0 = get_input_layout(p, 0); + auto in_layout1 = get_input_layout(p, 1); + create_topologies( - input_layout("input0", get_input_layout(p, 0)), - input_layout("input1", get_input_layout(p, 1)), + input_layout("input0", in_layout0), + input_layout("input1", in_layout1), data("add_data", get_mem(add_data_layout, 1.0f/p.kernel.count())), - gemm("gemm_prim", { input_info("input0"), input_info("input1") }, data_types::f32), + gemm("gemm_prim", { input_info("input0"), input_info("input1") }, data_types::f32, false, false, 1.f, 0.f, in_layout0.get_rank(), in_layout1.get_rank()), eltwise("add_prim", { input_info("gemm_prim"), input_info("add_data") }, p.eltwise_m, p.default_type), reorder("reorder_bfyx", input_info("add_prim"), p.default_format, data_types::f32) ); @@ -318,6 +324,12 @@ INSTANTIATE_TEST_SUITE_P(fusings_gpu, gemm_2in_add, ::testing::ValuesIn(std::vec gemm_test_params{ CASE_GEMM_2IN_FP16_5, 3, 4, "", dim_vec_kind::feature, eltwise_mode::sum }, gemm_test_params{ CASE_GEMM_2IN_FP16_5, 3, 4, "", dim_vec_kind::feature, eltwise_mode::prod }, gemm_test_params{ CASE_GEMM_2IN_FP16_5, 3, 4, "", dim_vec_kind::feature, eltwise_mode::sub }, + gemm_test_params{ CASE_GEMM_2IN_FP16_5D_1, 3, 4, "", dim_vec_kind::batch, eltwise_mode::sum }, + gemm_test_params{ CASE_GEMM_2IN_FP16_5D_1, 3, 4, "", dim_vec_kind::batch, eltwise_mode::prod }, + gemm_test_params{ CASE_GEMM_2IN_FP16_5D_1, 3, 4, "", dim_vec_kind::batch, eltwise_mode::sub }, + gemm_test_params{ CASE_GEMM_2IN_FP16_6D_1, 3, 4, "", dim_vec_kind::feature, eltwise_mode::sum }, + gemm_test_params{ CASE_GEMM_2IN_FP16_6D_1, 3, 4, "", dim_vec_kind::feature, eltwise_mode::prod }, + gemm_test_params{ CASE_GEMM_2IN_FP16_6D_1, 3, 4, "", dim_vec_kind::feature, eltwise_mode::sub }, })); class gemm_2in_act_scale_quantize_i8 : public GemmFusingTest {}; From cbb25e94839c8fbb482e19e3754f149eda34c214 Mon Sep 17 00:00:00 2001 From: Sebastian Golebiewski Date: Wed, 22 Mar 2023 09:08:31 +0100 Subject: [PATCH 030/296] [DOCS] Proofreading developer documentation moved from wiki. (#15886) Minor stylistic and grammar corrections. Fixing links * Apply suggestions from code review Co-authored-by: Tatiana Savina --- src/common/snippets/README.md | 3 +- .../snippets/docs/snippets_cpu_target.md | 14 +- .../snippets/docs/snippets_design_guide.md | 108 +++++++------- src/plugins/hetero/README.md | 62 ++++---- src/plugins/intel_cpu/README.md | 13 +- src/plugins/intel_cpu/docs/cpu_emulation.md | 19 ++- .../intel_cpu/docs/debug_capabilities.md | 9 +- .../docs/internal_cpu_plugin_optimization.md | 3 +- .../docs/performance_analysis_ITT_counters.md | 18 ++- .../docs/runtime_parameters_cache.md | 26 +++- src/plugins/intel_gpu/README.md | 17 ++- .../intel_gpu/docs/basic_data_structures.md | 94 ++++++------ .../intel_gpu/docs/execution_of_inference.md | 20 +-- src/plugins/intel_gpu/docs/gpu_debug_utils.md | 140 +++++++++--------- src/plugins/intel_gpu/docs/gpu_kernels.md | 31 ++-- .../intel_gpu/docs/gpu_memory_formats.md | 23 +-- .../docs/gpu_plugin_driver_troubleshooting.md | 23 ++- .../intel_gpu/docs/gpu_plugin_ops_enabling.md | 82 +++++----- .../intel_gpu/docs/gpu_plugin_unit_test.md | 120 +++++++-------- .../docs/graph_optimization_passes.md | 29 ++-- .../docs/memory_allocation_gpu_plugin.md | 47 +++--- .../intel_gpu/docs/simplified_workflow.md | 5 +- .../intel_gpu/docs/source_code_structure.md | 21 +-- src/tests/README.md | 55 +++---- .../plugin/conformance/test_runner/README.md | 127 ++++++++-------- 25 files changed, 587 insertions(+), 522 deletions(-) diff --git a/src/common/snippets/README.md b/src/common/snippets/README.md index eca770a584cda2..8f9d55bc11714b 100644 --- a/src/common/snippets/README.md +++ b/src/common/snippets/README.md @@ -2,12 +2,13 @@ ## Key Contacts -Please contact a member of [openvino-ie-cpu-maintainers](https://github.com/orgs/openvinotoolkit/teams/openvino-ie-cpu-maintainers) group, for assistance regarding snippets. +For assistance regarding snippets, contact a member of [openvino-ie-cpu-maintainers](https://github.com/orgs/openvinotoolkit/teams/openvino-ie-cpu-maintainers) group. * [SnippetS design guide](./docs/snippets_design_guide.md) * [CPU target for SnippetS code generator](./docs/snippets_cpu_target.md) ## See also + * [OpenVINO™ README](../../../README.md) * [OpenVINO Core Components](../../README.md) * [Developer documentation](../../../docs/dev/index.md) \ No newline at end of file diff --git a/src/common/snippets/docs/snippets_cpu_target.md b/src/common/snippets/docs/snippets_cpu_target.md index 04b70f7df8708e..68f03202c4761d 100644 --- a/src/common/snippets/docs/snippets_cpu_target.md +++ b/src/common/snippets/docs/snippets_cpu_target.md @@ -1,12 +1,12 @@ -# CPU target for SnippetS code generator +# CPU Target for SnippetS Code Generator -Snippets in its first generation can be seen as a generalization over generic eltwise node. First generation of snippets has lack of integration with oneDNN and so patterns it supports should be kept orthogonal to what is fused with post-ops. +Snippets in its first generation can be seen as a generalization over a generic eltwise node. First generation of snippets does not have integration with oneDNN, and the patterns it supports should be kept orthogonal to what is fused with post-ops. -POC CPU implementation could be found [here](https://github.com/openvinotoolkit/openvino/pull/2824) +See the example of POC CPU implementation [here](https://github.com/openvinotoolkit/openvino/pull/2824). First 8 kernel parameters are passed by structure which is unpacked inside a kernel into the registers. The rest are passed through the stack. -Loop trip count should be placed to some GP register, as well as work amount. Moreover, we need to load all the parameters into GP registers. If we assume that we have enough registers than it can be done before the loop body. +The loop trip count should be placed to a GP register, as well as the work amount. Moreover, you need to load all the parameters into GP registers. If you assume that you have enough registers, then it can be done before the loop body. ``` auto param0 = abi_params[0]; @@ -18,9 +18,9 @@ auto work_amount = abi_params[3]; ## Memory operations -Load could be Vector, Scalar and Broadcast. Only native vector size for an architecture is supported (e.g. 16 on AVX-512) +A load could be Vector, Scalar, and Broadcast. Only the native vector size for an architecture is supported (for example, 16 on AVX-512). -Memory operation also generates post increments for the pointer it uses. +Memory operation also generates post increments for the pointer it uses. - `MemoryEmitter` - `StoreEmitter` @@ -50,8 +50,8 @@ Tensor data can be passed with strides. `Data` corresponds to a constant table and wraps this entity for the CPU. ## See also + * [OpenVINO™ README](../../../../README.md) * [OpenVINO SnippetS](../README.md) * [OpenVINO Core Components](../../../README.md) * [Developer documentation](../../../../docs/dev/index.md) - \ No newline at end of file diff --git a/src/common/snippets/docs/snippets_design_guide.md b/src/common/snippets/docs/snippets_design_guide.md index 01b005b20e4ec9..d495b35a3fc437 100644 --- a/src/common/snippets/docs/snippets_design_guide.md +++ b/src/common/snippets/docs/snippets_design_guide.md @@ -1,26 +1,26 @@ -# SnippetS design guide -This document describes the design and rationale for snippets code generator. Implementation of code functionality is located [here](https://github.com/openvinotoolkit/openvino/tree/master/src/common/snippets). Proposal for CPU backend integration is [here](https://github.com/openvinotoolkit/openvino/pull/2824). +# SnippetS Design Guide +This document describes the design and rationale for a snippets code generator. Implementation of code functionality is located [here](https://github.com/openvinotoolkit/openvino/tree/master/src/common/snippets). A proposal for CPU backend integration is [here](https://github.com/openvinotoolkit/openvino/pull/2824). ## Rationale -We believe that core **CNN operators (convolution, gemm, fully connected) are limited by compute, the rest is memory bound**. Math approximations (like transcendental functions) are rare in emerging workloads and could be treated with the same machinery. **Snippets are designed to optimize topology for memory**, while leaving compute intensive kernels for backend developers. +Core **CNN operators (convolution, gemm, fully connected) are limited by compute, the rest is memory bound**. Math approximations (like transcendental functions) are rare in emerging workloads and could be treated with the same machinery. **Snippets are designed to optimize topology for memory**, while leaving compute intensive kernels for backend developers. -We believe **potential speedup is proportional to shrink in memory-walked bytes**. So we can transform the problem to a task to optimize for memory walks, whatever pattern snippet has and operations it contains. Number of memory walks should be less or equal to handcrafted optimizations. This guarantees performance improvements over the previous approach (excluding corner cases caused by cache effects). *Shrinkage factor might be encoded to some cost function in future evolution of code generator*. Snippets generator provides diagnostics to estimate this shrinkage factor with `ngraph::snippets::op::Subgraph::print_statistics(bool verbose)` member. +The **potential speedup is proportional to shrink in memory-walked bytes**. Therefore, you can transform the problem to a task to optimize for memory walks, whatever pattern snippet has and operations it contains. The number of memory walks should be less or equal to handcrafted optimizations. This guarantees performance improvements over the previous approach (excluding corner cases caused by cache effects). *Shrinkage factor might be encoded to some cost function in future evolution of code generator*. Snippets generator provides diagnostics to estimate this shrinkage factor with `ngraph::snippets::op::Subgraph::print_statistics(bool verbose)` member. -We design SnippetS generator for back-end developers. The main purpose of inventing snippets code generator is an **operator fusion**, **register allocation** and **target kernel generation** decomposition. This allows modifications (like new fusion support) and feature extensions (like new operation support) to be done in a single point of modification and avoid combinatorial explosion for fusions/types/architectures etc. +The SnippetS generator is designed for back-end developers. The main purpose of inventing the snippets code generator is an **operator fusion**, **register allocation** and **target kernel generation** decomposition. This allows modifications (like new fusion support) and feature extensions (like new operation support) to be done in a single point of modification and avoid combinatorial explosion for fusions/types/architectures etc. -We believe that creating a full-fledged compiler or usage of existing compiler infrastructure (like LLVM & MLIR) is superfluous at this point of evelition. We aim to provide a **flexible and performant framework for operation fusions**, leaving micro optimizations (e.g. instruction scheduling) to the backend H/W. +Creating a full-fledged compiler or usage of existing compiler infrastructure (like LLVM & MLIR) is superfluous at this point of evolution. The aim is to provide a **flexible and performant framework for operation fusions**, leaving micro optimizations (for example, instruction scheduling) to the backend H/W. -We do not aim to invent a DSL for SnippetS and would like to keep it this way. DSL gives users more flexibility to express uncommon operations. However, the shift towards an approach to encode topologies with elementary operations followed by smart enough fusions is already expressive and performant enough. +There are no plans to invent a DSL for SnippetS. DSL gives users more flexibility to express uncommon operations. However, the shift towards an approach to encode topologies with elementary operations followed by smart enough fusions is already expressive and performant enough. -**Snippet** is a compiled compute **kernel** generated from a subgraph using SnippetS code generator for specific architecture with a **scheduling domain**. Using this scheduling domain and calling convention backend can execute generated compute kernels. For the first generation, snippets are **statically scheduled towards the output domain**. Multi-output snippets are supported if all outputs are broadcast-compatible in a sense that domains for all outputs can be broadcasted from one root domain which defines snippet schedule. It’s a subject of extension for future generations. +**Snippet** is a compiled compute **kernel** generated from a subgraph using the SnippetS code generator for a specific architecture with a **scheduling domain**. Using this scheduling domain and calling convention backend can execute generated compute kernels. For the first generation, snippets are **statically scheduled towards the output domain**. Multi-output snippets are supported if all outputs are broadcast-compatible in a sense that domains for all outputs can be broadcasted from one root domain that defines snippet schedule. It is a subject of extension for future generations. -We use nGraph as the highest level IR for subgraph representation and lowering transformations. **Opset1** is a base operation set for code generation. We aim to **keep the minimal possible and sufficient operation set** (or ISA) and keep it **RISC-like** (memory and compute decomposed). +nGraph is used as the highest level IR for subgraph representation and lowering transformations. **Opset1** is a base operation set for code generation. The aim is to **keep the minimal possible and sufficient operation set** (or ISA) and keep it **RISC-like** (memory and compute decomposed). -**One subgraph corresponds to one snippet**. Operations which cannot be scheduled by a single schedule should not be placed in the same subgraph. Snippet somewhat conceptually close to OpenCL kernel without a restriction to express only embarrassingly parallel tasks. +**One subgraph corresponds to one snippet**. Operations which cannot be scheduled by a single schedule should not be placed in the same subgraph. A snippet is somewhat conceptually close to OpenCL kernel without a restriction to express only embarrassingly parallel tasks. **Subgraph** once extracted from full topology IR is **treated as an operation and data flow descriptor in scalar notation** (similar to OpenCL/CUDA). Tensor sizes are used for defining scheduling domain and detecting broadcasts/reductions. -We split operations into 3 groups: **layout-oblivious (LOO), layout-aware(-tolerant) and layout-dependent**. **Layout-oblivious** operation semantics and implementation are completely agnostic to a specific layout in which tensors are placed in memory. For example, elements-wise math and ReLU does in this category. Implementation **layout-aware** operation depends on the layout of input/output tensors. For example, convolutions and other block-wise kernels or layout repaks. For **layout-specific** operation semantics and implementation depends on the layout. For example, the Yolo region. Patterns to fuse constructed in terms of taxonomy above. +Operations are split into 3 groups: **layout-oblivious (LOO), layout-aware(-tolerant) and layout-dependent(-specific)**. **Layout-oblivious** operation semantics and implementation are completely agnostic to a specific layout in which tensors are placed in memory. For example, like elements-wise math and ReLU in this category. Implementation of **layout-aware** operation depends on the layout of input/output tensors. For example, convolutions and other block-wise kernels or layout repacks. **Layout-specific** operation semantics and implementation depend on the layout. For example, the Yolo region. Patterns to fuse are constructed in terms of taxonomy above. ## Design @@ -28,19 +28,19 @@ Code generation is split into 2 phases, **tokenization** and **lowering**. ### Tokenization -Tokenization runs on full topology nGraph function inside a specific plugin in a stage of common transformations. Input of tokenization is a topology graph. Output is a modified topology graph with `ngraph::snippets::op::Subgraph` operations installed. Each subgraph contains nGraph function (called **body**) which holds a part of original topology legal for snippet generation (can be scheduled with a single schedule) +Tokenization runs on full topology nGraph function inside a specific plugin in a stage of common transformations. Input of tokenization is a topology graph. Output is a modified topology graph with `ngraph::snippets::op::Subgraph` operations installed. Each subgraph contains nGraph function (called **body**) which holds a part of original topology legal for snippet generation (can be scheduled with a single schedule). -Procedure of finding subgraphs suitable for code generation is called **tokenization**, meaning that we split the topology tree into subgraphs in the same greedy approach which is used for parsing input stream of characters into the tokens. It also could be seen as and modified into a basic block construction problem, since we also find a leader and potentially terminators. Implementation can be found [here](https://github.com/openvinotoolkit/openvino/blob/master/src/common/snippets/src/pass/collapse_subgraph.cpp). +A procedure of finding subgraphs suitable for code generation is called **tokenization**. During tokenization the topology tree is split into subgraphs in the same greedy approach which is used for parsing input stream of characters into the tokens. It may also be seen as and modified into a basic block construction problem, since there is a leader and potentially terminators. See the example of implementation [here](https://github.com/openvinotoolkit/openvino/blob/master/src/common/snippets/src/pass/collapse_subgraph.cpp). -Tokenization has an advantage over the pattern matching approach (used in traditional and MLIR-based compilers) since it can handle arbitrary patterns of operations. Pattern matching deduces specific configuration of operations to translate to another one, more suitable for target machine or further lowering. This means that relations between operations are fixed. Tokenization on the other hand has the only limitation on specific operation types which are **suitable and profitable** to fuse with respect to original topology correctness (keeping it as a direct acyclic graph). +Tokenization has an advantage over the pattern matching approach (used in traditional and MLIR-based compilers) since it can handle arbitrary patterns of operations. Pattern matching deduces specific configuration of operations to translate to another one, more suitable for target machine or further lowering. This means that relations between operations are fixed. Tokenization, on the other hand, has the only limitation on specific operation types which are **suitable and profitable** to fuse, respecting original topology correctness (keeping it as a direct acyclic graph). -The extracted body comes to a plug-in wrapped as a composite `Subgraph` operation which is seen as a block box from a plugin standpoint and can participate in any plugin specific subroutines (e.g. layout assignment, memory allocation, etc.). +The extracted body comes to a plug-in wrapped as a composite `Subgraph` operation which is seen as a block box from a plugin standpoint and can participate in any plugin specific subroutines (for example, layout assignment, memory allocation, etc.). ### Supported subgraph patterns -Subgraph accepts arbitrary numbers of inputs and outputs. There is 1:1 mapping for external (subgraph node’s) and internal (body) parameters indexes. +Subgraph accepts arbitrary numbers of inputs and outputs. There is 1:1 mapping for external (subgraph node’s) and internal (body) parameters indexes. -Pattern here is an exact subgraph configuration (nodes and edges between them). **The first generation of snippets supports only layout-oblivious operations which may have broadcast on inputs and broadcast-compatible outputs**. For example Shapes `<1, 42, 17, 31>`, `<1, 42, 17, 1>` and `<1, 42, 1, 31>` are considered as broadcast-compatible. Layout-oblivious operation with multiple outputs as a snippet leader and forms a new subgraph. The most beneficial patterns are subgraphs with complex control flow but minimal number of inputs/and outputs. For example, GeLU has a 5x shrinkage factor from original unfused subgraph in number of bytes walked. Subgraph below could be considered as an example of such a subgraph. Leader detection procedure aims to find such subgraphs. +Pattern here is an exact subgraph configuration (nodes and edges between them). **The first generation of snippets supports only layout-oblivious operations which may have broadcast on inputs and broadcast-compatible outputs**. For example Shapes `<1, 42, 17, 31>`, `<1, 42, 17, 1>` and `<1, 42, 1, 31>` are considered as broadcast-compatible. Layout-oblivious operation with multiple outputs serves as a snippet leader and forms a new subgraph. The most beneficial patterns are subgraphs with complex control flow but minimal number of inputs/and outputs. For example, GeLU has a 5x shrinkage factor from original unfused subgraph in number of bytes walked. Subgraph below could be considered as an example of such a subgraph. Leader detection procedure aims to find such subgraphs. ```mermaid flowchart LR @@ -60,12 +60,12 @@ class nodeA3 steel1 ``` Operations are greedily added to the subgraph until -1. New operation doesn’t introduce a loop in a topology function. +1. New operation does not introduce a loop in a topology function. 1. Number of inputs and outputs satisfies target criteria. 1. Operation is not a predecessor of topology output. -1. Resulting subgraph can be scheduled (all outputs are broadcast-compatible). +1. Resulting subgraph can be scheduled (all outputs are broadcast-compatible). -If a potential subgraph doesn’t meet any of criteria above, the procedure continues to find a new leader. +If a potential subgraph does not meet any of the criteria above, the procedure continues to find a new leader. ### Lowering @@ -82,27 +82,27 @@ Lowering is a sequence of subgraph (snippet body) traversal passes to generate a #### Common optimizations -Constants are treated as inputs for a subgraph with an exception for scalar cases (since we don’t need to schedule them). `snippets::op::Scalar` is used to represent this kind of constants. +Constants are treated as inputs for a subgraph with an exception for scalar cases (since they do not need to be scheduled). `snippets::op::Scalar` is used to represent this kind of constants. -If such Scalar comes as a second input of Power operation, it’s replaced with `snippets::op::PowerStatic`. +If such Scalar comes as a second input of Power operation, it is replaced with `snippets::op::PowerStatic`. #### Canonicalization -The goal of this step is to apply target independent and schedule related optimizations and to make snippet **schedulable**. +The goal of this step is to apply target-independent and schedule-related optimizations and to make a snippet **schedulable**. ##### Domain normalization All input and output shapes are normalized to 6D for future schedule generation. If shape propagation fails or leads to inconsistent output shapes an exception is raised. -Layout assigned by user code and passed to a `generate` function is propagated through subgraph on this step as well. Layout is passed to a generate function as a `BlockedShapeVector` which is a `std::vector` , while `BlockedShape` is `std::tuple`. For example, if backend supports `NCHW16c` layout and tensor has size of `<1, 42, 17, 31>` and hold single precision floating point this structure should be `std::make_tuple(ngraph::Shape {1, 3, 17, 31, 16}, ngraph::AxisVector {0, 1, 2, 3, 1}, ngraph::element::f32);`. This allows generic layout representation. +The layout assigned by a user code and passed to a `generate` function is propagated through a subgraph on this step as well. The layout is passed to a `generate` function as a `BlockedShapeVector` which is a `std::vector` , while `BlockedShape` is `std::tuple`. For example, if backend supports `NCHW16c` layout and a tensor has a size of `<1, 42, 17, 31>` and holds single precision floating point, this structure should be `std::make_tuple(ngraph::Shape {1, 3, 17, 31, 16}, ngraph::AxisVector {0, 1, 2, 3, 1}, ngraph::element::f32);`. This allows generic layout representation. ##### Dialect conversion -The goal for this step is to transform a subgraph (body function) into a form possible to code generation. Input for this step is subgraph in a canonical form output is a subgraph in snippets dialect. +The goal for this step is to transform a subgraph (body function) into a form possible for code generation. Input for this step is a subgraph in a canonical form. Output is a subgraph in snippets dialect. -Snippet or kernel is formed around the subgraph body in a sequence of traversal steps. Let’s walk through these steps with the smallest possible subgraph which contains out of single `[Add]` operation. +A snippet or a kernel is formed around the subgraph body in a sequence of traversal steps. Let us walk through these steps with the smallest possible subgraph which contains a single `[Add]` operation. -While we extract subgraphs with the tokenization part we explicitly insert Parameters and Results to its body to form a complete nGraph Function. +When subgraphs are extracted with the tokenization part, Parameters and Results are explicitly inserted to its body to form a complete nGraph Function. ```mermaid flowchart LR @@ -118,11 +118,11 @@ class nodeA8 steel1 class nodeA1,nodeA3 steel1 ``` -This function represents operation dependencies in scalar (similar to OpenCL) notation while shapes of tensors are used to generate schedules. At this point kernel-schedule decomposition is made (similar to Halide/OpenCL/TVM) +This function represents operation dependencies in scalar (similar to OpenCL) notation while shapes of tensors are used to generate schedules. At this point, kernel-schedule decomposition is made (similar to Halide/OpenCL/TVM). ###### Explicit memory operations -As a next step explicit memory operations are placed for each input and output. `InsertLoad` and `InsertStore` passes derived from `MatcherPass`. +As a next step, explicit memory operations are placed for each input and output. `InsertLoad` and `InsertStore` passes derive from `MatcherPass`. ```mermaid flowchart LR @@ -142,16 +142,16 @@ class nodeA8 carbon1 class nodeA1,nodeA3,nodeA6,nodeA7 steel1 ``` -By default, memory operations assumes vector memory access, if scalar access is needed special passes `ReplaceLoadsWithScalarLoads` and `ReplaceStoresWithScalarStores` should be executed. +By default, memory operations assume vector memory access. If scalar access is needed, special `ReplaceLoadsWithScalarLoads` and `ReplaceStoresWithScalarStores` passes should be executed. ###### Explicit broadcast -For each operation in body function inputs are checked against broadcasting. In case of parameters to be broadcasted explicit broadcast operation is generated. For example, if for the subgraph above we have `<1, 42, 17, 31>` and `<1, 42, 17, 1>` resulting subgraph is going to be +For each operation in body function inputs are checked against broadcasting. When Parameters are to be broadcasted, an explicit broadcast operation is generated. For example, with `<1, 42, 17, 31>` and `<1, 42, 17, 1>` for the subgraph above, the resulting subgraph will be: ```mermaid flowchart LR - nodeA1("Parameter\n<1, 42, 17, 1>") --> node6("Load\n<1, 42, 17, 1>") - node6("Load\n<1, 42, 17, 1>") --> nodeA9("BroadcastMove\n<1, 42, 17, 31>") + nodeA1("Parameter\n<1, 42, 17, 1>") --> nodeA6("Load\n<1, 42, 17, 1>") + nodeA6("Load\n<1, 42, 17, 1>") --> nodeA9("BroadcastMove\n<1, 42, 17, 31>") nodeA9("BroadcastMove\n<1, 42, 17, 31>") --> nodeA2(Add) nodeA3("Parameter\n<1, 42, 17, 31>") --> nodeA7("Load\n<1, 42, 17, 31>") nodeA7("Load\n<1, 42, 17, 31>") ---> nodeA2(Add) @@ -164,10 +164,10 @@ classDef daisy1 fill:#FFE17A, stroke: #FEC91B, color: #262626 class nodeA2 daisy1 class nodeA5 moss1 class nodeA8,nodeA9 carbon1 -class nodeA1,nodeA3,node6,nodeA7 steel1 +class nodeA1,nodeA3,nodeA6,nodeA7 steel1 ``` -If load followed by broadcast is detected then this pair is replaced by a single Broadcast load instruction. Like the following +If Load followed by Broadcast is detected, then this pair is replaced by a single BroadcastLoad instruction: ```mermaid flowchart LR @@ -187,7 +187,7 @@ class nodeA8 carbon1 class nodeA1,nodeA3,nodeA6,nodeA7 steel1 ``` -Broadcast and regular streaming vector load is possible from the same pointer. Broadcast load should always go before streaming load. Broadcast load for non the most varying dimension is not generated, however it affects the generated schedule. +Broadcast and regular streaming vector load is possible from the same pointer. BroadcastLoad should always go before streaming load. BroadcastLoad for non the most varying dimension is not generated, however it affects the generated schedule. #### Target-specific optimizations @@ -197,13 +197,13 @@ Target developers can plug in to the code generation pipeline some specific opti #### Register allocation -Canonicalized subgraph in a snippets dialect forms a basic block or region inside a snippet (kernel). Registers are allocated globally for the whole subgraph. Since all operations for a subgraph are assumed to be vector, only vector registers are allocated for the first generation of SnippetS. Linear scan register allocation algorithm is used. Register allocator is implemented as a function pass `ngraph::snippets::pass::AssignRegisters` and store allocated registers for each node into `rt_info`. `rt_info` for a node holds a register for Node's output. *However, this part should be refactored batter, either to become target independent or use target specific abstraction to acquire a new register* +Canonicalized subgraph in a snippets dialect forms a basic block or region inside a snippet (kernel). Registers are allocated globally for the whole subgraph. Since all operations for a subgraph are assumed to be vector, only vector registers are allocated for the first generation of SnippetS. Linear scan register allocation algorithm is used. Register allocator is implemented as the `ngraph::snippets::pass::AssignRegisters` function pass and store allocated registers for each node into `rt_info`. `rt_info` for a node holds a register for Node's output. *However, this part should be refactored better, either to become target independent or to use target-specific abstraction to acquire a new register* -#### Schedule generation +#### Schedule generation -The goal of this step is to transform subgraphs in a scalar notation into kernel functions callable from user code. `Kernel` and `Tile` operations are introduced for this purpose. Each of this operation has a constructor from code region described as a collection of operation and operands pairs `Kernel(const std::vector, ngraph::snippets::RegInfo>>& region);`. +The goal of this step is to transform subgraphs in a scalar notation into kernel functions callable from user code. The `Kernel` and `Tile` operations are introduced for this purpose. Each of these operations has a constructor from code region described as a collection of operation and operand pairs `Kernel(const std::vector, ngraph::snippets::RegInfo>>& region);`. -If we return to example above this comes to a following hierarchical IR. If we limit scope to layout oblivious operations with broadcasting support, tile could be generated as a single loop over the most warning dimension. The second `Tile` is generated to handle tails and can be omitted if not needed. Special pass replaces memory operations on vector to scalar versions for tail subgraph. +The example above can be used for the following hierarchical IR. If the scope to layout oblivious operations with broadcasting support is limited, `Tile` could be generated as a single loop over the most warning dimension. The second `Tile` is generated to handle tails and can be omitted if not needed. A special pass replaces memory operations on vector with scalar versions for tail subgraph. ```mermaid graph LR @@ -244,13 +244,13 @@ class nodeD1 no-stroke ``` Where -* `Kernel` constants a collection of the tiles, corresponds to a Subgraph node and responsible for function signature generation, calls generators for all tiles and data sections -* `Tile` contains single subgraph body, vector or scalar -* `Data` corresponds to data section aggregated for all nodes in all Tile’s subgraphs +* `Kernel` is a collection of the tiles, corresponds to a Subgraph node and is responsible for function signature generation. It calls generators for all tiles and data sections. +* `Tile` contains a single subgraph body, a vector or a scalar. +* `Data` corresponds to data section aggregated for all nodes in all Tile’s subgraphs. #### Target code emission -Target code emission is table based. Target is responsible for filling `jitters` table field in `Generator` class. +A target code emission is table based. A target is responsible for filling `jitters` table field in `Generator` class. ``` std::map(std::shared_ptr)>> jitters; @@ -260,9 +260,9 @@ std::map( An OpenVINO plugin is treated as a target for snippets. -Each nGraph node is mapped to a convertor function which creates `Emitter` form this node. Each specific emitter should extend from `Emitter`. It is used to map this node to target code and has `emit_code` and `emit_data` methods. `emit_data` is used during data section generation. All operations from snippets dialect which are legal for code generation should be expressed as operations derived from nGraph Op as well as Emitter derived snippets::Emitter class which knows how to translate this Op to Target specific ISA. (ex. xbyak is a jit backend for CPU plugin). +Each nGraph node is mapped to a converter function which creates `Emitter` form of the node. Each specific emitter should extend from `Emitter`. It is used to map the node to the target code and has `emit_code` and `emit_data` methods. The `emit_data` is used during data section generation. All operations from snippets dialect which are legal for code generation should be expressed as operations derived from nGraph Op as well as `Emitter` derived `snippets::Emitter` class which knows how to translate this Op to Target-specific ISA. (for example, xbyak is a jit backend for CPU plugin). -For minimal code generator support target should provide emitters for the following operations +For minimal code generator support, a target should provide emitters for the following operations: * `Kernel` * `Tile` @@ -273,29 +273,29 @@ For minimal code generator support target should provide emitters for the follow * `Store` * `ScalarStore` -Once a schedule is generated, target code is emitted from a kernel in Generator::generate method by executing Kernel::emit_code function. Since Kernel and Tile represents hierarchical +Once a schedule is generated, a target code is emitted from a kernel in `Generator::generate` method by executing `Kernel::emit_code` function. Since `Kernel` and `Tile` represent hierarchical IR. ##### Dialect extensibility -Target can potentially extend snippets dialect with target specific operation for code emission. It should implement: +A target can potentially extend the snippets dialect with a target-specific operation for code emission. It should implement: -* nGraph operation (ex. `class FMA : public ngraph::op::Op`) -* Emitter for this operation (ex. `class FmaEmitter : public Emitter` ) -* register this pair in `jitters` map +* nGraph operation (for example, `class FMA : public ngraph::op::Op`) +* Emitter for the operation (for example, `class FmaEmitter : public Emitter` ) +* register the pair in `jitters` map ### Calling convention -Parameters for a generated snippet are split into schedule-invariant and schedule-dependent. Schedule-invariant parameters include pointers to input/output tensors and strides for each of them with the same rank as scheduling domain. +Parameters for a generated snippet are split into schedule invariant and schedule dependent. Schedule-invariant parameters include pointers to input/output tensors and strides for each of them with the same rank as the scheduling domain. ### Diagnostics #### Reference mode -Subgraph can be executed with nGraph references if no generator is present. +A subgraph can be executed with nGraph references if no generator is present. ## See also + * [OpenVINO™ README](../../../../README.md) * [OpenVINO SnippetS](../README.md) * [OpenVINO Core Components](../../../README.md) * [Developer documentation](../../../../docs/dev/index.md) - diff --git a/src/plugins/hetero/README.md b/src/plugins/hetero/README.md index 952013815bc154..0935adc0f363a1 100644 --- a/src/plugins/hetero/README.md +++ b/src/plugins/hetero/README.md @@ -1,4 +1,4 @@ -# OpenVINO Hetero plugin design overview +# OpenVINO Hetero Plugin Design Overview ## Subgraphs selection @@ -6,17 +6,17 @@ Algorithm: For each plugin 1. Select *root* node - * Node not in subgraph previously constructed - * Affinity is equal to plugin name -2. Select adjacent node to any node in already subgraph which is not in rejected list - * if there are no such nodes **end** -3. Check selected node has same affinity -4. Add node to subgraph if check was successful or add to rejected list otherwise -5. Check global condition - * Nodes in rejected list can never be added to subgraph - * Nodes not in subgraph and not in rejected list can possibly be added later - * Check subgraph topology (the only check now is there are no indirect subgraph self-references) -6. If global condition was failed remove last node from subgraph, add it to rejected list and go to step 5 + * A node not in a previously constructed subgraph + * Affinity is equal to the plugin name +2. Select an adjacent node to any node in a present subgraph which is not on the *rejected* list + * If there are no such nodes **end** +3. Verify that the selected node has the same affinity +4. Add a node to a subgraph if the check has been successful or add to the *rejected* list otherwise +5. Check a global condition + * Nodes in the *rejected* list can never be added to a subgraph + * Nodes not in a subgraph and not in the *rejected* list can possibly be added later + * Check the subgraph topology (the only check now is whether there are no indirect subgraph self-references) +6. If a global condition has failed, remove the last node from a subgraph. Add it to the *rejected* list and go to step 5. * we can rollback multiple times here because rejected list is changed every time 7. Go to step 2 @@ -32,7 +32,7 @@ graph TD; 6-->7; ``` -Nodes [1,2,3,5,6,7] are supported in plugin, [4] is not +Nodes [1,2,3,5,6,7] are supported in the plugin, [4] is not Possible roots: [1,2,3,5,6,7] 1. Select root [1] @@ -50,27 +50,27 @@ Possible roots: [1,2,3,5,6,7] 4. Merge [5] * Subgraph: [1,2,3,5] * Rejected: [] - * Global condition: There is possible self-references through node [4] but we do not know yet, ok + * Global condition: There are possible self-references throughout a node [4] but they are not known yet, ok 5. Merge [6] * Subgraph: [1,2,3,5,6] * Rejected: [] - * Global condition: There is possible self-references through node [4] but we do not know yet, ok + * Global condition: There are possible self-references throughout a node [4] but they are not known yet, ok 6. Merge [7] * Subgraph: [1,2,3,5,6,7] * Rejected: [] - * Global condition: There is possible self-references through node [4] but we do not know yet, ok + * Global condition: There are possible self-references throughout a node [4] but they are not known yet, ok 7. Failed to merge [4] * Subgraph: [1,2,3,5,6,7] * Rejected: [4] - * Global condition: There is self-references through node [4], reject + * Global condition: There are self-references throughout a node [4], reject 8. Rollback [7] * Subgraph: [1,2,3,5,6] * Rejected: [4,7] - * Global condition: There is self-references through node [4], reject + * Global condition: There are self-references throughout a node [4], reject 9. Rollback [6] * Subgraph: [1,2,3,5] * Rejected: [4,6,7] - * Global condition: There is self-references through node [4], reject + * Global condition: There are self-references throughout a node [4], reject 10. Rollback [5] * Subgraph: [1,2,3] * Rejected: [4,5,6,7] @@ -97,11 +97,11 @@ Possible roots: [5,6,7] 5. Merge [2] * Subgraph: [2,3,5,6,7] * Rejected: [] - * Global condition: There is possible self-references through node [4] but we do not know yet, ok + * Global condition: There are possible self-references throughout a node [4] but they are not known yet, ok 6. Failed to merge [4] * Subgraph: [2,3,5,6,7] * Rejected: [4] - * Global condition: There is self-references through node [4], reject + * Global condition: There are self-references throughout a node [4], reject 7. Rollback [2] * Subgraph: [3,5,6,7] * Rejected: [2,4] @@ -113,7 +113,7 @@ Possible roots: [] no roots, **END** Subgraphs: [1,2,3], [3,5,6,7] Select best subgraph: -* When we have multiple subgraphs larger ([3,5,6,7]) is always selected, always +* When there are multiple subgraphs, a larger one ([3,5,6,7]) is **always** selected. Repeat previous steps with remaining nodes [1,2] @@ -124,18 +124,18 @@ The final result is: ## Subgraphs self reference detection -1. For each node in network build a list of reachable node (transitive closure) -2. For each pair of nodes in subgraph find `path` nodes (nodes through one node in pair reachable to other) - * assume `src` - one node in pair, `dst` - other node in pair - * get all nodes reachable from `src` - * in those nodes find nodes through you can reach `dst` those will be our `path` node -3. Results for pairs is cached. -4. Check if there intersection between `path` nodes set and rejected nodes set for each nodes pair in subgraph -5. In case of intersection we have a self-reference and subgraph is invalid +1. For each node in a network build a list of reachable nodes (transitive closure). +2. For each pair of nodes in a subgraph find `path` nodes (nodes through one node in pair reachable to other). + * assume `src` - one node in a pair, `dst` - other node in a pair + * get all reachable nodes from `src` + * in the nodes find nodes through which you can reach `dst`. These will be the `path` nodes. +3. Results for pairs are cached. +4. Check whether there is an intersection between `path` nodes set and rejected nodes set for each pair of nodes in a subgraph. +5. If an intersection happens, a self-reference occurs, and a subgraph is invalid. ## See also + * [OpenVINO™ README](../../../README.md) * [OpenVINO Core Components](../../README.md) * [OpenVINO Plugins](../README.md) * [Developer documentation](../../../docs/dev/index.md) - \ No newline at end of file diff --git a/src/plugins/intel_cpu/README.md b/src/plugins/intel_cpu/README.md index f7afe70ab1520f..87530644ebe748 100644 --- a/src/plugins/intel_cpu/README.md +++ b/src/plugins/intel_cpu/README.md @@ -2,17 +2,17 @@ ## Key Contacts -Please contact a member of [openvino-ie-cpu-maintainers](https://github.com/orgs/openvinotoolkit/teams/openvino-ie-cpu-maintainers) group, for assistance regarding CPU. +For assistance regarding CPU, contact a member of [openvino-ie-cpu-maintainers](https://github.com/orgs/openvinotoolkit/teams/openvino-ie-cpu-maintainers) group. ## Components CPU Plugin contains the following components: -* [docs](./docs/) - contains developer documentation pages for the component. -* [src](./src/) - folder contains sources of the core component. -* [tests](./tests/) - contains tests for OpenVINO Plugin components. -* [thirdparty](./thirdparty/) - contains third-party modules. -* [tools](./tools/) - contains tools and helpers for OpenVINO Plugin components. +* [docs](./docs/) - developer documentation pages for the component. +* [src](./src/) - sources of the core component. +* [tests](./tests/) - tests for OpenVINO Plugin components. +* [thirdparty](./thirdparty/) - third-party modules. +* [tools](./tools/) - tools and helpers for OpenVINO Plugin components. ## Tutorials @@ -23,6 +23,7 @@ CPU Plugin contains the following components: * [Internal CPU Plugin Optimizations](./docs/internal_cpu_plugin_optimization.md) ## See also + * [OpenVINO™ README](../../../README.md) * [OpenVINO Core Components](../../README.md) * [OpenVINO Plugins](../README.md) diff --git a/src/plugins/intel_cpu/docs/cpu_emulation.md b/src/plugins/intel_cpu/docs/cpu_emulation.md index d431eda5d0d47d..6b11116e8c1296 100644 --- a/src/plugins/intel_cpu/docs/cpu_emulation.md +++ b/src/plugins/intel_cpu/docs/cpu_emulation.md @@ -2,9 +2,9 @@ Intel SDE can be used for emulating CPU architecture, checking for AVX/SSE transitions, bad pointers and data misalignment, etc. -Also supports debugging within emulation. +It also supports debugging within emulation. -In general the tool can be used for all kind of troubleshooting activities except performance analysis. +In general, the tool can be used for all kinds of troubleshooting activities except performance analysis. See [Documentation](https://www.intel.com/content/www/us/en/developer/articles/tool/software-development-emulator.html) for more information @@ -19,17 +19,24 @@ OV_CPU_BLOB_DUMP_FORMAT=TEXT OV_CPU_BLOB_DUMP_NODE_TYPE=Convolution \ - Running _cpuFuncTests_ on some old architecture, for example Sandy Bridge: -`/path/to/sde -snd -- ./cpuFuncTests` +```sh +/path/to/sde -snd -- ./cpuFuncTests +``` - Count AVX/SSE transitions for the current host: -`/path/to/sde -ast -- ./benchmark_app -m path/to/model.xml` +```sh +/path/to/sde -ast -- ./benchmark_app -m path/to/model.xml +``` -> **NOTE**: Best way to check for AVX/SSE transitions is to run within Alder Lake emulation: +> **NOTE**: The best way to check for AVX/SSE transitions is to run within Alder Lake emulation: -`/path/to/sde -adl -- ./benchmark_app -m path/to/model.xml` +```sh +/path/to/sde -adl -- ./benchmark_app -m path/to/model.xml +``` ## See also + * [OpenVINO™ README](../../../../README.md) * [OpenVINO Core Components](../../../README.md) * [OpenVINO Plugins](../../README.md) diff --git a/src/plugins/intel_cpu/docs/debug_capabilities.md b/src/plugins/intel_cpu/docs/debug_capabilities.md index 6ae506fb4f6968..bffa9aff5fd9b6 100644 --- a/src/plugins/intel_cpu/docs/debug_capabilities.md +++ b/src/plugins/intel_cpu/docs/debug_capabilities.md @@ -1,19 +1,20 @@ -# CPU Plugin debug capabilities +# CPU Plugin Debug Capabilities -The page describes list of useful debug features, controlled by environment variables. +The page describes a list of useful debug features, controlled by environment variables. They can be activated at runtime and might be used for analyzing issues, getting more context, comparing execution results, etc. -To have CPU debug capabilities available at runtime the following CMake option should be used when building the plugin: +To have CPU debug capabilities available at runtime, use the following CMake option when building the plugin: * `ENABLE_DEBUG_CAPS`. Default is `OFF` -The following debug capabilities are available with the latest openvino: +The following debug capabilities are available with the latest OpenVINO: - [Verbose mode](../src/docs/verbose.md) - [Blob dumping](../src/docs/blob_dumping.md) - [Graph serialization](../src/docs/graph_serialization.md) ## See also + * [OpenVINO™ README](../../../../README.md) * [OpenVINO Core Components](../../../README.md) * [OpenVINO Plugins](../../README.md) diff --git a/src/plugins/intel_cpu/docs/internal_cpu_plugin_optimization.md b/src/plugins/intel_cpu/docs/internal_cpu_plugin_optimization.md index 377792a6dc9ec1..169e6eab2255e8 100644 --- a/src/plugins/intel_cpu/docs/internal_cpu_plugin_optimization.md +++ b/src/plugins/intel_cpu/docs/internal_cpu_plugin_optimization.md @@ -135,7 +135,7 @@ class nodeB2,nodeB3,nodeB5,nodeB6,nodeB7,nodeB9 steel1 ``` ## Fusing Convolution and Sum Layers -A combination of convolution, simple, and Eltwise layers with the sum operation results in a single layer called *Convolution*: +A combination of convolution, simple, and Eltwise layers with the sum operation results in a single layer called *Convolution*: ```mermaid flowchart TD @@ -216,6 +216,7 @@ CPU plugin removes a Power layer from a topology if it has the following paramet - offset = 0 ## See also + * [OpenVINO™ README](../../../../README.md) * [OpenVINO Core Components](../../../README.md) * [OpenVINO Plugins](../../README.md) diff --git a/src/plugins/intel_cpu/docs/performance_analysis_ITT_counters.md b/src/plugins/intel_cpu/docs/performance_analysis_ITT_counters.md index 263b043dd12d23..1cb302b9ab2f9f 100644 --- a/src/plugins/intel_cpu/docs/performance_analysis_ITT_counters.md +++ b/src/plugins/intel_cpu/docs/performance_analysis_ITT_counters.md @@ -1,4 +1,4 @@ -# Performance analysis using ITT counters +# Performance Analysis Using ITT Counters ## Contents @@ -21,8 +21,11 @@ For performance analysis, follow the steps below: ### Intel SEAPI -#### Example of tool run: -`python ~/tools/IntelSEAPI/runtool/sea_runtool.py -o trace -f gt ! ./benchmark_app -niter 1 -nireq 1 -nstreams 1 -api sync -m ./resnet-50-pytorch/resnest-50-pytorch.xml` +#### Example of running the tool: + +```sh +python ~/tools/IntelSEAPI/runtool/sea_runtool.py -o trace -f gt ! ./benchmark_app -niter 1 -nireq 1 -nstreams 1 -api sync -m ./resnet-50-pytorch/resnest-50-pytorch.xml +``` #### Mandatory parameters: * -o trace – output file name @@ -34,8 +37,11 @@ Generated file can be opened with google chrome using "chrome://tracing" URL. ### Intel Vtune Profiler -#### Example of tool run: -`vtune -collect hotspots -k sampling-mode=hw -k enable-stack-collection=true -k stack-size=0 -k sampling-interval=0.5 -- ./benchmark_app -nthreads=1 -api sync -niter 1 -nireq 1 -m ./resnet-50-pytorch/resnet-50-pytorch.xml` +#### Example of running the tool: + +```sh +vtune -collect hotspots -k sampling-mode=hw -k enable-stack-collection=true -k stack-size=0 -k sampling-interval=0.5 -- ./benchmark_app -nthreads=1 -api sync -niter 1 -nireq 1 -m ./resnet-50-pytorch/resnet-50-pytorch.xml +``` #### Mandatory parameters: * -collect hotspots @@ -49,9 +55,9 @@ Generated file can be opened with Vtune client. Use API defined in [openvino/itt](https://docs.openvinotoolkit.org/latest/itt_2include_2openvino_2itt_8hpp.html) module. ## See also + * [OpenVINO™ README](../../../../README.md) * [OpenVINO Core Components](../../../README.md) * [OpenVINO Plugins](../../README.md) * [OpenVINO GPU Plugin](../README.md) * [Developer documentation](../../../../docs/dev/index.md) - \ No newline at end of file diff --git a/src/plugins/intel_cpu/docs/runtime_parameters_cache.md b/src/plugins/intel_cpu/docs/runtime_parameters_cache.md index 85ccea276cb2b0..5eee9fcac20cc0 100644 --- a/src/plugins/intel_cpu/docs/runtime_parameters_cache.md +++ b/src/plugins/intel_cpu/docs/runtime_parameters_cache.md @@ -1,22 +1,30 @@ -# CPU plugin runtime parameters cache +# CPU Plugin Runtime Parameters Cache ## Checklist for the runtime cache implementation -1. Determine what data will be cached. We usually use the Executor concept that represents a junction of the executable code, usually JIT generated kernel, with some precomputed algorithm parameters. -2. Provide a key that uniquelly identifies the cached value as a funtion of dynamically changing parameters, i.e. shapes, dynamic input that determines the algorithm parameters, etc. To be used in a hash table, the key must have the following static interface: + +1. Determine what data will be cached. + +1. Determine what data will be cached. It is commonly recommended to use the Executor concept that represents a junction of the executable code, usually JIT generated kernel, with some precomputed algorithm parameters. + +2. Provide a key that uniquely identifies the cached value as a function of dynamically changing parameters, that is, shapes, dynamic input that determines the algorithm parameters, etc. To be used in a hash table, the key must have the following static interface: ```cpp struct KeyType { size_t hash() const; bool operator== () const; }; ``` -3. Provide a builder, that is, a callable object of the following signature: + +3. Provide a builder, that is, a callable object of the following signature: ```cpp ValueType build(const KeyType& key); ``` - The `ValueType` is a type to be cached (e.g. shared pointer to Executor object). Remember that in the current cache implementation, a default constructed `ValueType()` object is considered empty, so it is better to use `std::shared_ptr` as the `ValueType`. The builder instantiates a specific type of cached entity from the `key`, thus the `key` completely defines the cached data. The builder is used to creat the `ValueType` object in case of cache miss. -4. Refactor the specific implementation of the `prepareParams()` method to extract the cached object construction logic (e.g. the algorithm parameters recalculation and JIT kernel generation) into the builder. + The `ValueType` is a type to be cached (for example, a shared pointer to Executor object). Remember that in the current cache implementation, a default constructed `ValueType()` object is considered empty. Therefore, it is better to use `std::shared_ptr` as the `ValueType`. The builder instantiates a specific type of cached entity from the `key`, so the `key` completely defines the cached data. The builder is used to create the `ValueType` object in case of a cache miss. + +4. Refactor the specific implementation of the `prepareParams()` method to extract the cached object construction logic (for example, the algorithm parameters recalculation and JIT kernel generation) into the builder. + 5. Add the key generation code into the `prepareParams()` method to query the cache. -6. Implement cache usage as the following: + +6. Implement cache usage as follows: ```cpp void preapareParams() override { ... //code that prepares parameters for the key @@ -31,6 +39,7 @@ execPtr = result.first; } ``` + 7. To provide smoke testing of these changes, add repeated shapes to the "target shapes" part of the corresponding single layer test definition: ```cpp { //dynamic case description each pair per each input has {{dynamic shape}, {{static shape case1}, {static shape case2}, ...} @@ -38,7 +47,7 @@ {{-1, -1, 5}, {{10, 10, 5}, {5, 5, 5}, {10, 10, 5}}} // input 1 }, ``` - It worth to mention that placing two identical target shapes one after another does not trigger the cache, since another optimization based on the fact that the shapes have not been changed takes place. For example, the following test definition does not properly test the cache: + **Note that placing two identical target shapes one after another does not trigger the cache,** since another optimization based on the fact that the shapes have not been changed takes place. For example, the following test definition does not properly test the cache: ```cpp { // the shape infer and params preparation stages will be skipped for the second target shapes combination since the shapes are not changed {{-1, -1, -1}, {{5, 5, 5}, {5, 5, 5}}}, // input 0 @@ -47,6 +56,7 @@ ``` ## See also + * [OpenVINO™ README](../../../../README.md) * [OpenVINO Core Components](../../../README.md) * [OpenVINO Plugins](../../README.md) diff --git a/src/plugins/intel_gpu/README.md b/src/plugins/intel_gpu/README.md index 40d33a173c49e0..d8b81154f9368e 100644 --- a/src/plugins/intel_gpu/README.md +++ b/src/plugins/intel_gpu/README.md @@ -4,7 +4,7 @@ GPU plugin in [OpenVINO toolkit](https://github.com/openvinotoolkit/openvino) su ## Key Contacts -Please contact a member of [openvino-ie-gpu-maintainers](https://github.com/orgs/openvinotoolkit/teams/openvino-ie-gpu-maintainers) group, for assistance regarding GPU. +For assistance regarding GPU, contact a member of [openvino-ie-gpu-maintainers](https://github.com/orgs/openvinotoolkit/teams/openvino-ie-gpu-maintainers) group. ## Components @@ -32,29 +32,32 @@ GPU Plugin contains the following components: * [GPU plugin unit test](./docs/gpu_plugin_unit_test.md) ## Attached licenses + GPU plugin uses 3rd-party components licensed under following licenses: - *googletest* under [Google License](https://github.com/google/googletest/blob/master/googletest/LICENSE) - *OpenCL™ ICD and C++ Wrapper under [Khronos™ License](https://github.com/KhronosGroup/OpenCL-CLHPP/blob/master/LICENSE.txt) - *RapidJSON* under [Tencent License](https://github.com/Tencent/rapidjson/blob/master/license.txt) ## Support -Please report issues and suggestions -[GitHub issues](https://github.com/openvinotoolkit/openvino/issues). + +To report issues and make suggestions, see [GitHub issues](https://github.com/openvinotoolkit/openvino/issues). ## How to Contribute -We welcome community contributions to GPU plugin. If you have an idea how to improve the library: + +Community contributions to GPU plugin are highly welcome. If you have a suggestion on how to improve the library: - Share your proposal via [GitHub issues](https://github.com/openvinotoolkit/openvino/issues) - Ensure you can build the product and run all the tests with your patch -- In the case of a larger feature, create a test +- In case of a larger feature, create a test - Submit a [pull request](https://github.com/openvinotoolkit/openvino/pulls) We will review your contribution and, if any additional fixes or modifications -are necessary, may provide feedback to guide you. When accepted, your pull -request will be merged into our GitHub repository. +are necessary, we may provide feedback to guide you. Once your pull request +has been approved, it will be merged into our GitHub repository. ## System Requirements + GPU plugin supports Intel® HD Graphics, Intel® Iris® Graphics and Intel® Arc™ Graphics and is optimized for Gen9-Gen12LP, Gen12HP architectures GPU plugin currently uses OpenCL™ with multiple Intel OpenCL™ extensions and requires Intel® Graphics Driver to run. diff --git a/src/plugins/intel_gpu/docs/basic_data_structures.md b/src/plugins/intel_gpu/docs/basic_data_structures.md index 087ea86b4b0d0d..a11f8ab666ab6a 100644 --- a/src/plugins/intel_gpu/docs/basic_data_structures.md +++ b/src/plugins/intel_gpu/docs/basic_data_structures.md @@ -1,4 +1,4 @@ -# Basic data structures of GPU graph and overall flow +# Basic Data Structures of GPU Graph and Overall Flow ## Overall graph data structure @@ -60,23 +60,23 @@ d1 ..> d2 : Dependency ``` There are three levels of abstraction in the graph structures being used in the gpu plugin : *topology*, *program*, *network*.
-The above figure presents the overall data structures. +The above figure presents the overall data structures. -First, the original model should be presented as a corresponding *topology*, which is consisting of primitives and their connections. It can be regarded as a simple graph structure representing the original model. +First, the original model should be presented as a corresponding *topology*, which consists of primitives and their connections. It can be regarded as a simple graph structure representing the original model. -Then the topology is to be converted to a *program*, which is consisting of *program_nodes* corresponding to the original primitives and their connections. +Then the topology is to be converted to a *program*, which consists of *program_nodes* corresponding to the original primitives and their connections. Here, the majority of the transformation and optimizations are performed on the *program*. -Also, the *primitive_impl* is created for each *program_node* at this stage, which holds the selected kernels for each *program_node* and the required information to run the kernels such as work group sizes and kernel arguments, etc. The final source code of the kernels are decided and compiled at this stage, too. -Note that a *program* is common for the streams, i.e., there is only one *program* created for all the streams. +Also, the *primitive_impl* is created for each *program_node* at this stage, which holds the selected kernels for each *program_node* and the required information to run the kernels, such as work group sizes and kernel arguments, etc. The final source code of the kernels is decided and compiled at this stage, too. +Note that a *program* is common for the streams, that is, there is only one *program* created for all the streams. -Once the *program* is finalized, then the *network* is built from the *program* for each stream. -A *network* is consisting of primitive instances (a.k.a *primitive_inst*) that contains the required memory allocations for the kernels. -Then finally we can run the *network* by running the network::execute(). +Once the *program* is finalized, the *network* is built from the *program* for each stream. +A *network* consists of primitive instances (*primitive_inst*) that contain the required memory allocations for the kernels. +Finally, you can run the *network* using the `network::execute()` method. -The more detailed description of each component is to be described in the below sections. +A more detailed description of each component is described in the sections below. -## primitive +## primitive ```cpp struct primitive { ... @@ -87,16 +87,16 @@ struct primitive { ... }; ``` -A *primitive* is the primary representation of an operation in gpu plugin, which comprises a graph structure, i.e., the *topology*. A *primitive* is to be created for a layer operation in the original model and holds the basic information about the operation, such as required input, output, attributes, as well as its own id, a.k.a *primitive_id*. Here, the *primitive_id* is a unique string id assigned to each *primitive* throughout the processing.
+A *primitive* is the primary representation of an operation in GPU plugin, which comprises a graph structure, that is, the *topology*. A *primitive* is to be created for a layer operation in the original model and holds the basic information about the operation, such as required input, output, attributes, as well as its own id (*primitive_id*). Here, the *primitive_id* is a unique string id assigned to each *primitive* throughout the processing.
-The APIs of the available primitives can be found [here](https://github.com/openvinotoolkit/openvino/tree/master/src/plugins/intel_gpu/include/intel_gpu/primitives).
+See the APIs of the available [primitives](https://github.com/openvinotoolkit/openvino/tree/master/src/plugins/intel_gpu/include/intel_gpu/primitives).
-An example creation of a arg_max_min primitive: +An example creation of a `arg_max_min` primitive: ```cpp cldnn::arg_max_min top_k_prim = cldnn::arg_max_min("top_k", { "input" }, arg_max_min::max, top_k, arg_max_min::y, arg_max_min::sort_by_values, false, "", padding(), data_types::f32); ``` -In GPU plugin, the *primitives* are converted from ngraph operations, which can be found [here](https://github.com/openvinotoolkit/openvino/tree/master/src/plugins/intel_gpu/src/plugin/ops). +In GPU plugin, the *primitives* are converted from ngraph [operations](https://github.com/openvinotoolkit/openvino/tree/master/src/plugins/intel_gpu/src/plugin/ops). ## topology ```cpp @@ -107,9 +107,9 @@ struct topology{ }; ``` -A *topology* is a graph structure consisting of *primitives* and their connections. Here a connection is defined by input primitives assigned to a primitive. +A *topology* is a graph structure consisting of *primitives* and their connections. Here, a connection is defined by input primitives assigned to a primitive. -A simple example of creation of a topology, which is consisting of two poolings, one concatenation of the poolings, and a reorder primitive, is shown as follows: +A simple example of creating a topology, which consists of two poolings, one concatenation of the poolings, and a reorder primitive, is as follows: ```cpp auto input0 = engine.allocate_memory({data_types::i8, format::bfyx, {1, 1, 8, 3}}); auto input1 = engine.allocate_memory({data_types::i8, format::bfyx, {1, 1, 8, 3}}); @@ -127,9 +127,9 @@ topology topology(input_layout("input0", input0->get_layout()), reorder("reorder", "concat", reorder_layout)); ``` -In the above example, "pool0" is the *primitive_id* of the first pooling, and "input0" is the *primitive_id* of the input primitive of it. The latter parameters such as pooling_mode::max, {1, 1, 2, 2}, {1, 1, 1, 1} are other properties for pooling primitive, pooling_mode, tensor size, stride, respectively. +In the example above, "pool0" is the *primitive_id* of the first pooling, and "input0" is the *primitive_id* of the input primitive of it. The `pooling_mode::max, {1, 1, 2, 2}, {1, 1, 1, 1}` parameters stand for other properties for pooling: primitive, pooling_mode, tensor size, stride, respectively. -Note that topology is created from ngraph representation in the gpu plugin. Manual definition of a topology shown in the above snippet is usually for unittest purpose. +Note that topology is created from ngraph representation in the GPU plugin. Manual definition of a topology shown in the snippet above is usually for the purpose of a unit test. ## program_node (impl) @@ -147,14 +147,15 @@ struct program_node { ... }; ``` -A program is consisting of program_nodes which are created from primitives. ([link](https://github.com/openvinotoolkit/openvino/blob/4c01d6c50c6d314373dffd2a8ddbc294011b2508/src/plugins/intel_gpu/src/graph/program.cpp#L353)) A program_node is created by a factory for each primitive type, i.e., primitive_type, which is associated to each primitive as type ([link](https://github.com/openvinotoolkit/openvino/blob/173f328c53d39dd42ecdb9de9e04f9d2c266683f/src/plugins/intel_gpu/include/intel_gpu/primitives/primitive.hpp#L79)). (Note that this primitive_type is used to create primitive_inst or call choose_impl too.) +A program consists of *program_nodes* which are created from primitives. ([link](https://github.com/openvinotoolkit/openvino/blob/4c01d6c50c6d314373dffd2a8ddbc294011b2508/src/plugins/intel_gpu/src/graph/program.cpp#L353)) A *program_node* is created by a factory for each *primitive type*, that is, *primitive_type*, which is associated to each primitive as a type ([link](https://github.com/openvinotoolkit/openvino/blob/173f328c53d39dd42ecdb9de9e04f9d2c266683f/src/plugins/intel_gpu/include/intel_gpu/primitives/primitive.hpp#L79)). Note that this *primitive_type* is used to create *primitive_inst* or call *choose_impl* too. -Basically a program_node holds the following information which is to be decided throughout the transformation / optimization processes in a program: -* layout : output layout of a program_node. ([impl](https://github.com/openvinotoolkit/openvino/blob/master/src/plugins/intel_gpu/include/intel_gpu/runtime/layout.hpp)) -* dependencies : a list of program_nodes whose outputs are used by the current program_node as the inputs -* memory dependencies : a list of program_nodes, the live ranges of the outputs of them overlaps with that of the current program_node -* fused operations : fused operations to the current program_node -* selected impl : The primitive_impl object which holds the information for the selected kernel required to run it, such as the selected kernels, work group size, etc. Also this object has the methods to set kernel arguments for a primitive_inst and execute the kernel by enqueueing it to the command queue. +A *program_node* holds the following information which is to be decided throughout the transformation / optimization processes in a program: + +* layout: output layout of a *program_node*. ([impl](https://github.com/openvinotoolkit/openvino/blob/master/src/plugins/intel_gpu/include/intel_gpu/runtime/layout.hpp)) +* dependencies: a list of *program_nodes*, the outputs of which are used by the current *program_node* as the inputs +* memory dependencies : a list of *program_nodes*, the live ranges of their outputs of them overlap with that of the current *program_node* +* fused operations: fused operations to the current *program_node* +* selected impl: The *primitive_impl* object which holds the information for the selected kernel required to run it, such as the selected kernels, work group size, etc. Also, this object has the methods to set kernel arguments for a *primitive_inst* and execute the kernel by enqueueing it to the command queue. ## program (impl) @@ -174,16 +175,16 @@ struct program { ``` The major tasks that are done while building a program are as follows: ([ref](https://github.com/openvinotoolkit/openvino/blob/4c01d6c50c6d314373dffd2a8ddbc294011b2508/src/plugins/intel_gpu/src/graph/program.cpp#L433)) -* Init graph : Create an initial program consisting of program_nodes built from a given topology -* Optimization (Major optimizations will be dealt with from another section TBD) - * pre-optimization ([link](https://github.com/openvinotoolkit/openvino/blob/4c01d6c50c6d314373dffd2a8ddbc294011b2508/src/plugins/intel_gpu/src/graph/program.cpp#L474)): Optimizations done before graph_compilation. Notable passes are as follows: - * prepare_primitive_fusing : decision of fusing - * reorder_inputs : decision of preferred layout / impl (ocl vs onednn) and adding reorders w.r.t the decision - * post-optimization ([link](https://github.com/openvinotoolkit/openvino/blob/4c01d6c50c6d314373dffd2a8ddbc294011b2508/src/plugins/intel_gpu/src/graph/program.cpp#L437)) Optimizations done after graph_compilation
- * post_optimize_weights : Add reorder for the weights toward preferred formats (as generic nodes)
- * propagate_constants : Transfer and reorder original weight data to the generic_nodes created at post_optimize_weights. Here, note that the constant propagation is doing weight reorder by running actual network (w/ is_internal = true). To this end, a temporal program is created/built/run within this pass.
- -* Kernel selection and graph compilations ([link](https://github.com/openvinotoolkit/openvino/blob/4c01d6c50c6d314373dffd2a8ddbc294011b2508/src/plugins/intel_gpu/src/graph/program.cpp#L436)) : Select best kernel for the program_node and create the impl (i.e., primitive_impl), and collect the kernel source code strings to the kernels_cache. +* Init graph: Create an initial program consisting of *program_nodes* built from a given topology. +* Optimization (Major optimizations will be dealt with from another section TBD) + * pre-optimization ([link](https://github.com/openvinotoolkit/openvino/blob/4c01d6c50c6d314373dffd2a8ddbc294011b2508/src/plugins/intel_gpu/src/graph/program.cpp#L474)): Optimizations done before *graph_compilation*. Notable passes are as follows: + * *prepare_primitive_fusing*: decision of fusing + * *reorder_inputs*: decision of preferred layout / impl (ocl vs onednn) and adding reorders w.r.t the decision + * post-optimization ([link](https://github.com/openvinotoolkit/openvino/blob/4c01d6c50c6d314373dffd2a8ddbc294011b2508/src/plugins/intel_gpu/src/graph/program.cpp#L437)) Optimizations done after *graph_compilation*
+ * *post_optimize_weights*: Add reorder for the weights toward preferred formats (as generic nodes)
+ * *propagate_constants*: Transfer and reorder original weight data to the *generic_nodes* created at *post_optimize_weights*. Note that the constant propagation is doing a weight reorder by running the actual network (w/ is_internal = true). To this end, a temporal program is created/built/run within this pass.
+ +* Kernel selection and graph compilations ([link](https://github.com/openvinotoolkit/openvino/blob/4c01d6c50c6d314373dffd2a8ddbc294011b2508/src/plugins/intel_gpu/src/graph/program.cpp#L436)): Select best kernel for the *program_node* and create the impl (that is, *primitive_impl*), and collect the kernel source code strings to the kernels_cache. * Kernel compilation ([link](https://github.com/openvinotoolkit/openvino/blob/4c01d6c50c6d314373dffd2a8ddbc294011b2508/src/plugins/intel_gpu/src/graph/program.cpp#L451)): JIT compilation of the collected kernels. Currently 9 kernels are combined as a batch and compiled at a time. Also the batches are compiled in parallel. See [here](https://github.com/openvinotoolkit/openvino/blob/4c01d6c50c6d314373dffd2a8ddbc294011b2508/src/plugins/intel_gpu/src/runtime/kernels_cache.cpp#L400). ## primitive_inst (impl) @@ -203,12 +204,12 @@ class primitive_inst { ... }; ``` -Once all processing at a program level is finished, a network is to be built from the program. -primitive_inst is the basic component comprising a network. -While each primitive_inst object is still associated to the corresponding program_node, it holds the required memory objects such as output memory objects and intermediate memory objects that are to be used by that node. A brief description for the two kinds of memory allocated for a primitive_inst is as follows: +Once all processing at a program level has been finished, a network is to be built from the program. +The *primitive_inst* is the basic component comprising a network. +While each *primitive_inst* object is still associated with the corresponding *program_node*, it holds the required memory objects, such as output memory objects and intermediate memory objects that are to be used by that node. A brief description of the two kinds of memory allocated for a *primitive_inst* is as follows: -* output memory : An output memory of a primitive_inst is allocated at the creation of each primitive_inst ([impl](https://github.com/openvinotoolkit/openvino/blob/4c01d6c50c6d314373dffd2a8ddbc294011b2508/src/plugins/intel_gpu/src/graph/primitive_inst.cpp#L210)), unless its output is reusing the input memory or the node is a mutable data to be used as a 2nd output. The general output tensors are allocated by the memory pool, so that the memory could be reused by other nodes when it is not needed. (Note that constants data are not reusable and should retain the own memory, so that they could be shared by multiple streams. More descriptions about memory pool will be given by dedicated section (TBD)). -* intermediate memory ([impl](https://github.com/openvinotoolkit/openvino/blob/4c01d6c50c6d314373dffd2a8ddbc294011b2508/src/plugins/intel_gpu/src/graph/primitive_inst.cpp#L215)): Some kernels requires intermediate memories in addition to the input/output memories such as [detection_output](https://github.com/openvinotoolkit/openvino/blob/4c01d6c50c6d314373dffd2a8ddbc294011b2508/src/plugins/intel_gpu/src/kernel_selector/core/actual_kernels/detection_output/detection_output_kernel_ref.cpp#L155). The allocation happens after all primitive_insts are finished ([link](https://github.com/openvinotoolkit/openvino/blob/4c01d6c50c6d314373dffd2a8ddbc294011b2508/src/plugins/intel_gpu/src/graph/network.cpp#L592)), since it needs to be processed in a processing_order to use the predecessors' allocation information while the creation of primitive_inst is done in a order sorted by memory_size. +* output memory: An output memory of a *primitive_inst* is allocated at the creation of each *primitive_inst* ([impl](https://github.com/openvinotoolkit/openvino/blob/4c01d6c50c6d314373dffd2a8ddbc294011b2508/src/plugins/intel_gpu/src/graph/primitive_inst.cpp#L210)), unless its output is reusing the input memory or the node is a mutable data to be used as a second output. The general output tensors are allocated by the memory pool, so that the memory could be reused by other nodes when it is not needed. Note that constants data is not reusable and should retain its own memory so that it could be shared by multiple streams. A more detailed description of the memory pool will be given in the dedicated section (TBD). +* intermediate memory ([impl](https://github.com/openvinotoolkit/openvino/blob/4c01d6c50c6d314373dffd2a8ddbc294011b2508/src/plugins/intel_gpu/src/graph/primitive_inst.cpp#L215)): Some kernels require intermediate memories in addition to the input/output memories such as [detection_output](https://github.com/openvinotoolkit/openvino/blob/4c01d6c50c6d314373dffd2a8ddbc294011b2508/src/plugins/intel_gpu/src/kernel_selector/core/actual_kernels/detection_output/detection_output_kernel_ref.cpp#L155). The allocation happens after all *primitive_insts* are finished ([link](https://github.com/openvinotoolkit/openvino/blob/4c01d6c50c6d314373dffd2a8ddbc294011b2508/src/plugins/intel_gpu/src/graph/network.cpp#L592)), since it needs to be processed in a *processing_order* to use the predecessors' allocation information while the creation of *primitive_inst* is done in an order sorted by *memory_size*. ## network (impl) ```cpp @@ -230,14 +231,15 @@ struct network { void allocate_primitives(); }; ``` -When a network is built, the comprising primitives are allocated and dependencies among them are set ([link](https://github.com/openvinotoolkit/openvino/blob/4c01d6c50c6d314373dffd2a8ddbc294011b2508/src/plugins/intel_gpu/src/graph/network.cpp#L259)). +When a network is built, the comprising primitives are allocated and dependencies among them are set ([link](https://github.com/openvinotoolkit/openvino/blob/4c01d6c50c6d314373dffd2a8ddbc294011b2508/src/plugins/intel_gpu/src/graph/network.cpp#L259)). + +The major processes, done while a network is executed, are as follows ([impl]( https://github.com/openvinotoolkit/openvino/blob/3de428c7139fef69e37b406c3490c26b67b48026/src/plugins/intel_gpu/src/graph/network.cpp#L663)): +* set arguments of the primitives (that is, set the [kernel_args](https://github.com/openvinotoolkit/openvino/blob/master/src/plugins/intel_gpu/include/intel_gpu/runtime/kernel_args.hpp) required for running the kernels such as input/output memory address) -The major processes done while a network is executed are as follows ([impl]( https://github.com/openvinotoolkit/openvino/blob/3de428c7139fef69e37b406c3490c26b67b48026/src/plugins/intel_gpu/src/graph/network.cpp#L663)) : -* set arguments of the primitives (i.e., set the [kernel_args](https://github.com/openvinotoolkit/openvino/blob/master/src/plugins/intel_gpu/include/intel_gpu/runtime/kernel_args.hpp) required for running the kernels such as input/output memory address) +* [execute primitives](https://github.com/openvinotoolkit/openvino/blob/3de428c7139fef69e37b406c3490c26b67b48026/src/plugins/intel_gpu/src/graph/network.cpp#L849): Execute each primitive, that is, enqueue the kernels to the context queue. -* [execute primitives](https://github.com/openvinotoolkit/openvino/blob/3de428c7139fef69e37b406c3490c26b67b48026/src/plugins/intel_gpu/src/graph/network.cpp#L849) : Execute each primitives, i.e., enqueue the kernels to the context queue. +## See Also -## See also * [OpenVINO™ README](../../../../README.md) * [OpenVINO Core Components](../../../README.md) * [OpenVINO Plugins](../../README.md) diff --git a/src/plugins/intel_gpu/docs/execution_of_inference.md b/src/plugins/intel_gpu/docs/execution_of_inference.md index 66a2a2ed2de913..6433ca633efe96 100644 --- a/src/plugins/intel_gpu/docs/execution_of_inference.md +++ b/src/plugins/intel_gpu/docs/execution_of_inference.md @@ -1,31 +1,33 @@ # Execution of Inference -Network execution happens when user calls `inferRequest->infer()` or `inferRequest->start_async()`. [(src)](https://github.com/openvinotoolkit/openvino/blob/f48b23362965fba7e86b0077319ea0d7193ec429/samples/cpp/benchmark_app/main.cpp#L929) +Network execution is triggered when the `inferRequest->infer()` or `inferRequest->start_async()` methods are called. [(src)](https://github.com/openvinotoolkit/openvino/blob/f48b23362965fba7e86b0077319ea0d7193ec429/samples/cpp/benchmark_app/main.cpp#L929) -In high level, all we need to do is enqueuing OCL kernels with buffers. For that purpose, we need to find the `cldnn::network` instance as it contains the required buffers for execution. [(link)](https://github.com/openvinotoolkit/openvino/blob/master/src/plugins/intel_gpu/docs/basic_data_structures.md#network-impl) `CPUStreamExecutor` is holding streams and the stream corresponds to the `cldnn::network` structure. [(src)](https://github.com/openvinotoolkit/openvino/blob/f48b23362965fba7e86b0077319ea0d7193ec429/src/inference/src/threading/ie_cpu_streams_executor.cpp#L263) +At high level, all that is required to do is enqueuing OCL kernels with buffers. For that purpose, you need to find the `cldnn::network` instance, as it contains the required buffers for execution. [(link)](https://github.com/openvinotoolkit/openvino/blob/master/src/plugins/intel_gpu/docs/basic_data_structures.md#network-impl) `CPUStreamExecutor` is holding streams, and the stream corresponds to the `cldnn::network` structure. [(src)](https://github.com/openvinotoolkit/openvino/blob/f48b23362965fba7e86b0077319ea0d7193ec429/src/inference/src/threading/ie_cpu_streams_executor.cpp#L263) The main body of network execution is `cldnn::network::execute_impl`. [(src)](https://github.com/openvinotoolkit/openvino/blob/f48b23362965fba7e86b0077319ea0d7193ec429/src/plugins/intel_gpu/src/graph/network.cpp#L663) In this function, `set_arguments()` is called to set OpenCL arguments and `execute_primitive` is called to enqueue kernels to OCL queue. -In case of synchronous API call(i.e. `inferRequest->infer()`), waiting for completion of kernels is also required. It is called from `cldnn::network_output::get_memory()` function. [(src)](https://github.com/openvinotoolkit/openvino/blob/f48b23362965fba7e86b0077319ea0d7193ec429/src/plugins/intel_gpu/include/intel_gpu/graph/network.hpp#L31) +In case of a synchronous API call (that is, `inferRequest->infer()`), waiting for the completion of kernels is also required. It is called from the `cldnn::network_output::get_memory()` function. [(src)](https://github.com/openvinotoolkit/openvino/blob/f48b23362965fba7e86b0077319ea0d7193ec429/src/plugins/intel_gpu/include/intel_gpu/graph/network.hpp#L31) ## Optimized-out node + During graph compilation [(link)](https://github.com/openvinotoolkit/openvino/blob/master/src/plugins/intel_gpu/docs/graph_optimization_passes.md), some nodes may be optimized out. -For example, concat operation may be executed _implicitly_, or in other words, concat may be _optimized out_. Implicit concat is possible when the input of concat can put the output tensor directly into the result tensor of concat. +For example, concat operation may be executed _implicitly_, or in other words, concat may be _optimized out_. Implicit concat is possible when the input of concat can put the output tensor directly into the resulting tensor of concat. -In such case, we don't remove the node in the graph for integrity of node connection. Concat layer is just marked as **optimized-out** and not executed during runtime. [(src)](https://github.com/openvinotoolkit/openvino/blob/dc6e5c51ee4bfb8a26a02ebd7a899aa6a8eeb239/src/plugins/intel_gpu/src/graph/impls/ocl/primitive_base.hpp#L155) +In such case, you do not remove the node in the graph for the integrity of the node connection. Concat layer is just marked as **optimized-out** and not executed during runtime. [(src)](https://github.com/openvinotoolkit/openvino/blob/dc6e5c51ee4bfb8a26a02ebd7a899aa6a8eeb239/src/plugins/intel_gpu/src/graph/impls/ocl/primitive_base.hpp#L155) ## Dumping layer in/out buffer during execution -`cldnn::network::execute_impl` also contains some logic to dump layer in/out buffers for debugging purpose. As it is related to memory usage, it deserves some description, too. +The `cldnn::network::execute_impl` function also contains some logic to dump layer in/out buffers for debugging purposes. As it is related to memory usage, it deserves some description, too. -In order to dump buffers, we need to wait for the moment that the kernel is about to be called(for source buffer) or just called(for destination buffer). In other moments, we don't have the layer's buffer as the buffers are reused from memory pool. [(link)](https://github.com/openvinotoolkit/openvino/blob/master/src/plugins/intel_gpu/docs/memory_allocation_gpu_plugin.md#memory-dependency-and-memory-pool) +To dump buffers, you need to wait for the moment that the kernel is about to be called (for source buffer) or just called (for destination buffer). In other moments, you do not have the layer's buffer as the buffers are reused from the memory pool. [(link)](https://github.com/openvinotoolkit/openvino/blob/master/src/plugins/intel_gpu/docs/memory_allocation_gpu_plugin.md#memory-dependency-and-memory-pool) -`get_stream().finish()` is called firstly as we need to be synchronous with kernel execution. [(src)](https://github.com/openvinotoolkit/openvino/blob/f48b23362965fba7e86b0077319ea0d7193ec429/src/plugins/intel_gpu/src/graph/network.cpp#L712) Then we can access the buffer. [(src)](https://github.com/openvinotoolkit/openvino/blob/f48b23362965fba7e86b0077319ea0d7193ec429/src/plugins/intel_gpu/src/graph/network.cpp#L114) This access varies depending on the kind of buffer. If it is `usm_host` or `usm_shared`, it is just accessed directly. If it is `usm_device`, it is accessed after copying the data into host memory because host cannot access `usm_device` directly. [(src)](https://github.com/openvinotoolkit/openvino/blob/f48b23362965fba7e86b0077319ea0d7193ec429/src/plugins/intel_gpu/src/runtime/ocl/ocl_memory.cpp#L312) If it is ocl memory, we map this into host memory. [(src)](https://github.com/openvinotoolkit/openvino/blob/f48b23362965fba7e86b0077319ea0d7193ec429/src/plugins/intel_gpu/src/runtime/ocl/ocl_memory.cpp#L46) +The `get_stream().finish()` function is called first as you need to be synchronous with kernel execution. [(src)](https://github.com/openvinotoolkit/openvino/blob/f48b23362965fba7e86b0077319ea0d7193ec429/src/plugins/intel_gpu/src/graph/network.cpp#L712). Then, you can access the buffer. [(src)](https://github.com/openvinotoolkit/openvino/blob/f48b23362965fba7e86b0077319ea0d7193ec429/src/plugins/intel_gpu/src/graph/network.cpp#L114). This access varies depending on the kind of the buffer. If it is `usm_host` or `usm_shared`, it is just accessed directly. If it is `usm_device`, it is accessed after copying the data into host memory because the host cannot access `usm_device` directly. [(src)](https://github.com/openvinotoolkit/openvino/blob/f48b23362965fba7e86b0077319ea0d7193ec429/src/plugins/intel_gpu/src/runtime/ocl/ocl_memory.cpp#L312) If it is OCL memory, you map this into host memory. [(src)](https://github.com/openvinotoolkit/openvino/blob/f48b23362965fba7e86b0077319ea0d7193ec429/src/plugins/intel_gpu/src/runtime/ocl/ocl_memory.cpp#L46) Typical network execution happens with `usm_host` for network input and output and `usm_device` for the buffers inside the network. -For usage of this dumping feature, please see [link](https://github.com/openvinotoolkit/openvino/blob/master/src/plugins/intel_gpu/docs/gpu_debug_utils.md#layer-inout-buffer-dumps). +For usage of this dumping feature, see this [link](https://github.com/openvinotoolkit/openvino/blob/master/src/plugins/intel_gpu/docs/gpu_debug_utils.md#layer-inout-buffer-dumps). ## See also + * [OpenVINO™ README](../../../../README.md) * [OpenVINO Core Components](../../../README.md) * [OpenVINO Plugins](../../README.md) diff --git a/src/plugins/intel_gpu/docs/gpu_debug_utils.md b/src/plugins/intel_gpu/docs/gpu_debug_utils.md index 1acc57867782c5..33c07a06cc4280 100644 --- a/src/plugins/intel_gpu/docs/gpu_debug_utils.md +++ b/src/plugins/intel_gpu/docs/gpu_debug_utils.md @@ -1,21 +1,23 @@ -# GPU plugin debug utils +# GPU Plugin Debug Utils This document is a list of useful debug features / tricks that might be used to find root cause of performance / functional issues. Some of them are available by default, but some others might require plugin recompilation. ## Debug Config -`Debug_config` is an infra structure that contains number of easy-to-use debugging features. It has various control parameters. You can check list of parameters from the source code `cldnn::debug_configuration`. + +`Debug_config` is an infrastructure that contains several easy-to-use debugging features. It has various control parameters, which you can check from the source code `cldnn::debug_configuration`. ### How to use it -First, this feature should be enabled from cmake configuration `ENABLE_DEBUG_CAPS`. When openvino is released, it is turned off by default. -The parameters should be set from environment variable when calling inference engine API. + +First, this feature should be enabled from cmake configuration `ENABLE_DEBUG_CAPS`. When OpenVINO is released, it is turned off by default. +The parameters should be set from an environment variable when calling inference engine API. ``` $ OV_GPU_Verbose=1 ./benchmark_app ... # Run benchmark_app with OV_GPU_Verbose option $ OV_GPU_DumpLayersPath="cldnn/" ./benchmark_app ... # Run benchmark_app and store intermediate buffers into cldnn/ directory. ``` -For Windows OS, please use below syntax. +For Windows OS, use the following syntax: ``` Windows Power Shell: @@ -28,38 +30,42 @@ Windows cmd.exe: ``` ### Options syntax + Plugin is able to parse different naming styles for debug options: 1. `OV_GPU_SOME_OPTION` 2. `OV_GPU_SomeOption` Behavior when both versions are specified is not defined. -Some options also allow multiple prefixes: `OV` and `OV_GPU`. `OV` prefix is intended to be used for options common for all OpenVINO components. In case if an option is set twice with different prefixes, then `OV_GPU` has higher priority. - -### List of parameters (There are actually more than this, please see OV_GPU_Help result) - -* `OV_GPU_Help`: Show help message of debug config. -* `OV_GPU_Verbose`: Verbose execution. Currently, Verbose=1 and 2 are supported. -* `OV_GPU_PrintMultiKernelPerf`: Print kernel latency for multi-kernel primitives. This is turned on by setting 1. Execution time is printed. -* `OV_GPU_DisableUsm`: Disable the usage of usm (unified shared memory). This is turned on by setting 1. -* `OV_GPU_DisableOnednn`: Disable onednn for discrete GPU (no effect for integrated GPU) -* `OV_GPU_DumpGraphs`: Dump optimized graph into the path that this variable points. This is turned on by setting the destination path into this variable. -* `OV_GPU_DumpSources`: Dump opencl sources -* `OV_GPU_DumpLayersPath`: Enable intermediate buffer dump and store the tensors. This is turned on by setting the destination path into this variable. You can check the exact layer name from `OV_GPU_Verbose=1`. -* `OV_GPU_DumpLayers`: Dump intermediate buffers only for the layers that this variable specifies. Multiple layers can be specified with space delimiter. Dump feature should be enabled through `OV_GPU_DumpLayersPath` -* `OV_GPU_DumpLayersResult`: Dump output buffers of result layers only -* `OV_GPU_DumpLayersDstOnly`: When dumping intermediate buffer, dump destination buffer only. This is turned on by setting 1. -* `OV_GPU_DumpLayersLimitBatch`: Limit the size of batch to dump -* `OV_GPU_DryRunPath`: Dry run and serialize execution graph into the specified path -* `OV_GPU_BaseBatchForMemEstimation`: Base batch size to be used in memory estimation -* `OV_GPU_AfterProc`: Run inference after the specified process PIDs are finished, separated by space. Supported on only on linux. -* `OV_GPU_SerialCompile`: Serialize creating primitives and compiling kernels -* `OV_GPU_ForceImplType`: Force implementation type of a target primitive or layer. [primitive or layout_name]:[impl_type] For primitives, fc:onednn, fc:ocl, do:cpu, do:ocl, reduce:ocl and reduce:onednn are supported -* `OV_GPU_MaxKernelsPerBatch`: Maximum number of kernels in a batch during compiling kernels +Some options also allow multiple prefixes: `OV` and `OV_GPU`. `OV` prefix is intended to be used for options common for all OpenVINO components. When an option is set twice with different prefixes, then `OV_GPU` has higher priority. + +### List of parameters + +This is a part of the full list. To get all parameters, see OV_GPU_Help result. + +* `OV_GPU_Help`: Shows help message of debug config. +* `OV_GPU_Verbose`: Verbose execution. Currently, `Verbose=1` and `2` are supported. +* `OV_GPU_PrintMultiKernelPerf`: Prints kernel latency for multi-kernel primitives. This is turned on by setting `1`. Execution time is printed. +* `OV_GPU_DisableUsm`: Disables the usage of usm (unified shared memory). This is turned on by setting `1`. +* `OV_GPU_DisableOnednn`: Disables oneDNN for discrete GPU (no effect for integrated GPU). +* `OV_GPU_DumpGraphs`: Dumps an optimized graph into the path that this variable points. This is turned on by setting the destination path into this variable. +* `OV_GPU_DumpSources`: Dumps openCL sources +* `OV_GPU_DumpLayersPath`: Enables intermediate buffer dump and store the tensors. This is turned on by setting the destination path into this variable. You can check the exact layer name from `OV_GPU_Verbose=1`. +* `OV_GPU_DumpLayers`: Dumps intermediate buffers only for the layers that this variable specifies. Multiple layers can be specified with a space delimiter. Dump feature should be enabled through `OV_GPU_DumpLayersPath`. +* `OV_GPU_DumpLayersResult`: Dumps output buffers of result layers only. +* `OV_GPU_DumpLayersDstOnly`: When dumping intermediate buffer, dumps destination buffer only. This is turned on by setting `1`. +* `OV_GPU_DumpLayersLimitBatch`: Limits the size of a batch to dump. +* `OV_GPU_DryRunPath`: Dry runs and serializes the execution graph into the specified path. +* `OV_GPU_BaseBatchForMemEstimation`: Base batch size to be used in memory estimation. +* `OV_GPU_AfterProc`: Runs inference after the specified process PIDs are finished, separated by space. Supported only on Linux. +* `OV_GPU_SerialCompile`: Serializes creating primitives and compiling kernels. +* `OV_GPU_ForceImplType`: Forces implementation type of a target primitive or a layer. [primitive or layout_name]:[impl_type] For primitives, `fc:onednn`, `fc:ocl`, `do:cpu`, `do:ocl`, `reduce:ocl` and `reduce:oneDNN` are supported +* `OV_GPU_MaxKernelsPerBatch`: Maximum number of kernels in a batch during compiling kernels. ## Dump execution graph -The execution graph (also known as runtime graph) is a device specific graph after all transformations applied by the plugin. It's a very useful -feature for performance analysis and it allows to find a source of performance regressions quickly. Execution graph can be retrieved from the plugin + +The execution graph (also known as a runtime graph) is a device-specific graph after all transformations applied by the plugin. It is a very useful +feature for performance analysis and it allows finding a source of performance regressions quickly. The execution graph can be retrieved from the plugin using `GetExecGraphInfo()` method of `InferenceEngine::ExecutableNetwork` and then serialized as usual IR: ```cpp ExecutableNetwork exeNetwork; @@ -68,8 +74,8 @@ using `GetExecGraphInfo()` method of `InferenceEngine::ExecutableNetwork` and th execGraphInfo.serialize("/path/to/serialized/exec/graph.xml"); ``` -The capability to retrieve execution graph and store it on the disk is integrated into `benchmark_app`. The execution graph can be simply dumped -by setting additional parameter `-exec_graph_path exec_graph.xml` for `benchmark_app`. Output `xml` file has a format similar to usual IR, but contains +The capability to retrieve the execution graph and store it on the disk is integrated into `benchmark_app`. The execution graph can be simply dumped +by setting an additional parameter `-exec_graph_path exec_graph.xml` for `benchmark_app`. Output `xml` file has a format similar to usual IR, but contains execution nodes with some runtime info such as: - Execution time of each node - Mapping between nodes in final device specific graph and original input graph operations @@ -78,7 +84,7 @@ execution nodes with some runtime info such as: - Primitive type - Inference precision -Typical node in GPU execution graph looks as follows: +A typical node in GPU execution graph looks as follows: ``` @@ -101,24 +107,24 @@ Typical node in GPU execution graph looks as follows: ``` -Most of the data here is very handy for the performance analysis. For example, for each node you can check that: -- Nodes fusion works as expected on given models (i.e. some node is missing in execution graph and it's name is a part of `originalLayersNames` list for some other node) +Most of the data here is very handy for performance analysis. For example, for each node you can check whether: +- Nodes fusion works as expected on given models (that is, some node is missing in the execution graph and its name is a part of `originalLayersNames` list for some other node) - Input and output layouts of a node are optimal in each case - Input and output precisions are valid in each case -- The node used expected kernel for execution -- And the most important: actual execution time of each operation +- The node used the expected kernel for execution +- And most important: the actual execution time of each operation This graph can be visualized using Netron tool and all these properties can be analyzed there. -Note: execution time collection for each primitive requires `CONFIG_KEY(PERF_COUNT)` to be enabled (`benchmark_app` does it automatically), thus the overall model execution time is usually much worse in such use cases. +> **NOTE**: execution time collection for each primitive requires `CONFIG_KEY(PERF_COUNT)` to be enabled (`benchmark_app` does it automatically). Therefore, the overall model execution time is usually much worse in such use cases. ## Performance counters -This feature is a simplified version of execution graph as it provides much less information, but it might be more suitable for quick analysis and some kind of +This feature is a simplified version of the execution graph as it provides much less information, but it might be more suitable for quick analysis and some kind of processing with scripts. Performance counters can be retrieved from each `InferenceEngine::InferRequest` object using `getPerformanceCounts()` method. This feature is also integrated -into `benchmark_app` and the counters can be printed to cout using `-pc` parameter. +into `benchmark_app` and the counters can be printed to count using `-pc` parameter. The format looks as follows: @@ -135,17 +141,16 @@ relu OPTIMIZED_OUT layerType: ReLU realTime: 0 Total time: 53877 microseconds ``` -So it allows to quickly check execution time of some operation on the device and make sure that correct primitive is used. Also, the output can be easily -converted into .csv format and then used to collect any kind of statistics (e.g. execution time distribution by layer types). +So it allows you to quickly check the execution time of some operation on the device and make sure that the correct primitive is used. Also, the output can be easily converted into the *.csv* format and then used to collect any kind of statistics (for example, execution time distribution by layer types). ## Graph dumps -intel_gpu plugin allows to dump some info about intermediate stages in graph optimizer. +*Intel_GPU* plugin allows you to dump some info about intermediate stages in the graph optimizer. -* You can dump graphs with `OV_GPU_DumpGraphs` of debug config. For the usage of debug config, please see [link](#debug-config). +* You can dump graphs with `OV_GPU_DumpGraphs` of debug config. For the usage of debug config, see the [link](#debug-config). -* Alternative, you can also enable the dumps from the application source code: -clDNN plugin has the special internal config option `graph_dumps_dir` which can be set from the user app via plugin config: +* Alternatively, you can also enable the dumps from the application source code: +clDNN plugin has the special internal config option - `graph_dumps_dir`, which can be set from the user app via plugin config: ```cpp Core ie; std::map device_config; @@ -153,7 +158,7 @@ device_config[CLDNN_CONFIG_KEY(GRAPH_DUMPS_DIR)] = "/some/existing/path/"; ie.SetConfig(device_config, "GPU"); ``` -For each stage it dumps: +For each stage, it dumps: ``` - cldnn_program_${program_id}_${stage_id}_${stage_name}.graph - graph saved in dot format which can be visualized via graphviz tool - cldnn_program_${program_id}_${stage_id}_${stage_name}.info - graph in text format @@ -162,16 +167,16 @@ For each stage it dumps: - ${program_id}_${stage_id}_${stage_name}.xml - graph in a format of execution graph ``` -Main graph usually has `program_id = 0`, graphs with other `program_id` values are usually created internally for constant propagation or some other purposes. +The main graph usually has `program_id = 0`. Graphs with other `program_id` values are usually created internally for constant propagation or some other purposes. ## Sources dumps -Since intel_gpu source tree contains only *templates* of the OpenCL™ kernels, it's quite important to get full kernels source code. +Since *Intel_GPU* source tree contains only *templates* of the OpenCL™ kernels, it is quite important to get full kernels source code. -* You can use `OV_GPU_DumpSources` of debug config. For the usage of debug config, please see [link](#debug-config). +* You can use `OV_GPU_DumpSources` of debug config. For the usage of debug config, see [link](#debug-config). * You can also dump OpenCL source code by changing OpenVINO source code: -clDNN plugin has the special internal config option `sources_dumps_dir` which can be set from the user app via plugin config: +clDNN plugin has the special internal config option - `sources_dumps_dir`, which can be set from the user app via plugin config: ```cpp Core ie; std::map device_config; @@ -184,12 +189,12 @@ When this key is enabled, the plugin dumps multiple files with the following nam clDNN_program_${program_id}_part_${bucket_id}.cl ``` -Note: `program_id` here might differ from `program_id` for the graph dumps as it's just a static counter for enumerating incoming programs. +> **Note**: `program_id` here might differ from `program_id` for the graph dumps, as it is just a static counter for enumerating incoming programs. -Each file contains a bucket of kernels that are compiled together. In case of any compilation errors, intel_gpu plugin will append compiler output -in the end of corresponding source file. +Each file contains a bucket of kernels that are compiled together. In case of any compilation errors, *Intel_GPU* plugin will append compiler output +to the end of the corresponding source file. -If you want to find some specific layer, then you'll need to use Debug/RelWithDebInfo build or modify base jitter method to append `LayerID` in release build: +To find a specific layer, use "Debug/RelWithDebInfo" build or modify the base jitter method to append `LayerID` in the release build: ```cpp // inference-engine/thirdparty/clDNN/kernel_selector/core/kernel_base.cpp JitConstants KernelBase::MakeBaseParamsJitConstants(const base_params& params) const { @@ -200,19 +205,19 @@ JitConstants KernelBase::MakeBaseParamsJitConstants(const base_params& params) c } ``` -When source is dumped, it actually contains huge amount of macros(`#define`). For readability, you can run c preprocessor to apply the macros. +When the source is dumped, it contains a huge amount of macros(`#define`). For readability, you can run *c preprocessor* to apply the macros. `$ cpp dumped_source.cl > clean_source.cl` ## Layer in/out buffer dumps -In some cases you might want to get actual values in each layer execution to compare it with some reference blob. In order to do that we have -`OV_GPU_DumpLayersPath` option in debug config. For the usage of debug config, please see [link](#debug-config). +In some cases, you might want to get actual values in each layer execution to compare it with some reference blob. To do that, choose the +`OV_GPU_DumpLayersPath` option in debug config. For the usage of debug config, see [link](#debug-config). -As a prerequisite, enable ENABLE_DEBUG_CAPS from cmake configuration. +As a prerequisite, enable `ENABLE_DEBUG_CAPS` from the cmake configuration. -Then, check runtime layer name by executing benchmark_app with OV_GPU_Verbose=1. It is better to be checked with this than through IR because this may be slightly different. OV_GPU_Verbose=1 will show log of execution of each layer. +Then, check the runtime layer name by executing *benchmark_app* with `OV_GPU_Verbose=1`. It is better to check it with `OV_GPU_Verbose=1` than through IR because this may be slightly different. `OV_GPU_Verbose=1` will show the log of execution of each layer. ``` # As a prerequisite, enable ENABLE_DEBUG_CAPS from cmake configuration. @@ -221,30 +226,31 @@ export OV_GPU_DumpLayers="layer_name_to_dump1 layer_name_to_dump2" export OV_GPU_DumpLayersDstOnly=1 # Set as 1 when you want to dump dest buff only ``` -Dump files have the following naming: +Dump files are named in the following convention: ``` ${layer_name_with_underscores}_${src/dst}_${port_id}.txt ``` -Each file contains single buffer in common planar format (`bfyx`, `bfzyx` or `bfwzyx`) where each value is stored on a separate line. The first line in the file constains buffer description, e.g: +Each file contains a single buffer in a common planar format (`bfyx`, `bfzyx`, or `bfwzyx`), where each value is stored on a separate line. The first line in the file contains a buffer description, for example: ``` shape: [b:1, f:1280, x:1, y:1, z:1, w:1, g:1] (count: 1280, original format: b_fs_yx_fsv16) ``` -For accuracy troubleshoot, you may want to compare the GPU plugin result against CPU plugin result. For CPU dump, see [Blob dumping](https://github.com/openvinotoolkit/openvino/blob/master/src/plugins/intel_cpu/src/docs/blob_dumping.md) +For troubleshooting the accuracy, you may want to compare the results of GPU plugin and CPU plugin. For CPU dump, see [Blob dumping](https://github.com/openvinotoolkit/openvino/blob/master/src/plugins/intel_cpu/src/docs/blob_dumping.md) -## Run int8 model on gen9 HW +## Run int8 model on Gen9 HW -As gen9 hw doesn't have hardware acceleration, low precision transformations are disabled by default, thus quantized networks are executed in full precision (fp16 or fp32) with explicit execution of quantize operations. -If you don't have gen12 HW, but want to debug network's accuracy or performance of simple operations (which doesn't require dp4a support), then you can enable low precision pipeline on gen9 using one of the following ways: -1. Add `{PluginConfigInternalParams::KEY_LP_TRANSFORMS_MODE, PluginConfigParams::YES}` option to the plugin config +As Gen9 HW does not have hardware acceleration, low-precision transformations are disabled by default. Therefore, quantized networks are executed in full precision (FP16 or FP32), with explicit execution of quantize operations. +If you do not have Gen12 HW, but want to debug the network's accuracy or performance of simple operations (which does not require dp4a support), then you can enable low precision pipeline on Gen9, with one of the following approaches: +1. Add `{PluginConfigInternalParams::KEY_LP_TRANSFORMS_MODE, PluginConfigParams::YES}` option to the plugin config. 2. Enforce `supports_imad = true` [here](https://github.com/openvinotoolkit/openvino/blob/master/inference-engine/thirdparty/clDNN/src/gpu/device_info.cpp#L226) 3. Enforce `conf.enableInt8 = true` [here](https://github.com/openvinotoolkit/openvino/blob/master/inference-engine/src/cldnn_engine/cldnn_engine.cpp#L366) -After that the plugin will run exactly the same scope of transformations as on gen12HW and generate similar kernels (small difference is possible due to different EUs count) +After that, the plugin will run exactly the same scope of transformations as on Gen12 HW and generate similar kernels (a small difference is possible due to different EUs count). ## See also + * [OpenVINO™ README](../../../../README.md) * [OpenVINO Core Components](../../../README.md) * [OpenVINO Plugins](../../README.md) diff --git a/src/plugins/intel_gpu/docs/gpu_kernels.md b/src/plugins/intel_gpu/docs/gpu_kernels.md index 176300fa04c833..4c675717103177 100644 --- a/src/plugins/intel_gpu/docs/gpu_kernels.md +++ b/src/plugins/intel_gpu/docs/gpu_kernels.md @@ -1,18 +1,18 @@ -# GPU kernels implementation overview +# GPU Kernels Implementation Overview As mentioned in [GPU plugin structure](./source_code_structure.md), kernels for GPU plugin are located in `src/plugins/intel_gpu/src/kernel_selector` folder. -For each operation we usually have multiple kernels that can support different parameters and/or optimized for different scenarios. +For each operation, there are usually multiple kernels that can support different parameters and/or are optimized for different scenarios. Each operation has 3 major entities in kernel selector: - Operation specific `kernel_selector` instance - Operation parameters descriptor - Kernels itself with a set of heuristics inside for optimal selection - ## Kernel selector instance -For each operation we create kernel_selector class derived from `kernel_selector_base`. Basically, this class is needed to specify available kernels -for given operation. Each kernel selector is used as singleton. For example: +## Kernel selector instance +For each operation, you create `kernel_selector` class derived from `kernel_selector_base`. Basically, this class is needed to specify available kernels +for a given operation. Each kernel selector is used as a singleton. For example: ```cpp class mvn_kernel_selector : public kernel_selector_base { @@ -57,7 +57,7 @@ auto best_kernels = kernel_selector.GetBestKernels(mvn_params, mvn_optional_para ## Operation parameters -The parameters of operation for kernel_selector are defined in corresponding `${op_name}_params` class which is derived from `base_params`. For example: +The parameters of operation for `kernel_selector` are defined in corresponding `${op_name}_params` class which is derived from `base_params`. For example: ```cpp struct mvn_params : public base_params { mvn_params() : base_params(KernelType::MVN) {} @@ -79,9 +79,9 @@ struct mvn_params : public base_params { }; ``` -The derived class should parameterize base class with specific `KernelType` and add operation-specific parameters. The only method that must be implemented -is `GetParamsKey()` which is used as a quick check for kernels applicability for current parameters, i.e. we take `ParamsKey` object calculated for input -operation parameters and `ParamsKey` object for each kernel, so we can compare them and discard the kernels that don't support current parameters. +The derived class should parameterize base class with a specific `KernelType` and add operation-specific parameters. The only method that must be implemented +is `GetParamsKey()` which is used as a quick check for kernels applicability for current parameters. In other words, you take a `ParamsKey` object calculated for input +operation parameters and a `ParamsKey` object for each kernel. Then, you can compare them and discard the kernels that do not support current parameters. `ParamsKey` is implemented as a set of bit masks, so the applicability check is quite simple: ```cpp const ParamsKey implKey = some_implementation->GetSupportedKey(); @@ -97,15 +97,15 @@ if (!((implKey.mask & paramsKey.mask) == paramsKey.mask)) Each kernel must specify the following things: - Input parameters checks - - `GetSupportedKey()` method implementation which returns `ParamsKey` object for current implementation - - `Validate()` method that do more complex checks (optional) -- Dispatch data (global/local workgroup sizes, scheduling algorithm, etc) + - `GetSupportedKey()` method implementation, which returns `ParamsKey` object for current implementation. + - `Validate()` method, that does more complex checks (optional). +- Dispatch data (global/local workgroup sizes, scheduling algorithm, etc.) - Kernel name - must be passes to base class c-tor - Kernel arguments specification - description of each argument in corresponding OpenCL™ kernel -- Additional JIT constants required for kernel - set of macro definitions that must be added to thi kernel template to make full specialization for given params -- Supported fused operations (if any) - a list of supported operations that can be fused into current kernel +- Additional JIT constants required for kernel - set of macro definitions that must be added to the kernel template to make full specialization for given params +- Supported fused operations (if any) - a list of supported operations that can be fused into the current kernel. -Let's have a look at the key methods of each kernel implementation: +Key methods of each kernel implementation are as follows: ```cpp class MVNKernelRef : public MVNKernelBase { @@ -132,6 +132,7 @@ protected: ``` ## See also + * [OpenVINO™ README](../../../../README.md) * [OpenVINO Core Components](../../../README.md) * [OpenVINO Plugins](../../README.md) diff --git a/src/plugins/intel_gpu/docs/gpu_memory_formats.md b/src/plugins/intel_gpu/docs/gpu_memory_formats.md index 891814ad59f3d4..bab66dec02c276 100644 --- a/src/plugins/intel_gpu/docs/gpu_memory_formats.md +++ b/src/plugins/intel_gpu/docs/gpu_memory_formats.md @@ -1,4 +1,4 @@ -# GPU memory formats +# GPU Memory Formats The memory format descriptor in GPU plugin usually uses the following letters: - `b` - batch @@ -8,9 +8,9 @@ The memory format descriptor in GPU plugin usually uses the following letters: - `o` - output channels (for weights layout only) - `g` - groups (for weights layout only) -The combination of the characters above defines tensor format, i.e. the actual layout of tensor values in memory buffer. For example: +The combination of the characters above defines the tensor format, that is, the actual layout of tensor values in a memory buffer. For example: `bfyx` format means that the tensor has 4 dimensions in planar layout and `x` coordinate changes faster than `y`, `y` - faster than `f`, and so on. -It means that for tensor with size `[b: 2; f: 2; y: 2; x: 2]` we have a linear memory buffer with `size=16` where: +It means that for a tensor with size `[b: 2; f: 2; y: 2; x: 2]`, there is a linear memory buffer with `size=16`, where: ``` i = 0 => [b=0; f=0; y=0; x=0]; i = 1 => [b=0; f=0; y=0; x=1]; @@ -37,19 +37,19 @@ i = 14 => [b=1; f=1; y=1; x=0]; i = 15 => [b=1; f=1; y=1; x=1]; ``` -Usually, planar memory formats are not very efficient for DNN operations, so GPU plugin has plenty *blocked* format. Blocking means that we take some tensor dimension -and put blocks of adjacent elements closer in memory (in the format with single blocking they are stored linearly in the memory). Consider the most widely used -blocked format in GPU plugin: `b_fs_yx_fsv16`. First of all, let's understand what these additional letters mean. We have `b`, `f`, `y`, `x` dimensions here, so -this is 4D tensor. +Usually, planar memory formats are not very efficient for DNN operations, so GPU plugin has plenty of *blocked* formats. Blocking means that you take some tensor dimension +and put blocks of adjacent elements closer in memory (in the format with a single blocking, they are stored linearly in the memory). Consider the most widely used +blocked format in GPU plugin: `b_fs_yx_fsv16`. First of all, let's understand what these additional letters mean. There are `b`, `f`, `y`, `x` dimensions here, so +this is a 4D tensor. `fs=CeilDiv(f, block_size)`; `fs` means `feature slice` - the blocked dimension. -The block size is specified in the format name: `fsv16` - `block_size = 16`, blocked dimension is `f`; `fsv` means `feature slice vector` +The block size is specified in the format name: `fsv16` - `block_size = 16`, a blocked dimension is `f`; `fsv` means `feature slice vector` Just like with any other layout, the coordinate of the rightmost dimension (`fsv`) is changed first, then coordinate to the left (`x`), and so on. -Note: if the original `f` dimension is not divisible by block size (16 in this case), then it's aligned up to the first divisible value. These pad values +> **Note**: If the original `f` dimension is not divisible by block size (`16` in this case), then it is aligned up to the first divisible value. These pad values are filled with zeroes. -Let's look at the changes with the tensor above if we reorder it into `b_fs_yx_fsv16` format: -1. Actual buffer size becomes `[b: 2; f: 16; y: 2; x: 2]`, and total size = 128 +When you reorder the tensor above into `b_fs_yx_fsv16` format, changes are as follows: +1. Actual buffer size becomes `[b: 2; f: 16; y: 2; x: 2]`, and total size equals 128. 2. The order of elements in memory changes: ``` // first batch @@ -106,6 +106,7 @@ i = 127 => [b=1; f=15; y=1; x=1] == [b=1; fs=0; y=1; x=1; fsv=15]; All formats used by GPU plugin are specified in `src/plugins/intel_gpu/include/intel_gpu/runtime/format.hpp` file. Most of the formats there follow the notation above. ## See also + * [OpenVINO™ README](../../../../README.md) * [OpenVINO Core Components](../../../README.md) * [OpenVINO Plugins](../../README.md) diff --git a/src/plugins/intel_gpu/docs/gpu_plugin_driver_troubleshooting.md b/src/plugins/intel_gpu/docs/gpu_plugin_driver_troubleshooting.md index 9f9485abbb8445..85e0cf033e550c 100644 --- a/src/plugins/intel_gpu/docs/gpu_plugin_driver_troubleshooting.md +++ b/src/plugins/intel_gpu/docs/gpu_plugin_driver_troubleshooting.md @@ -1,8 +1,8 @@ -# Driver issues troubleshooting +# Driver Issues Troubleshooting -If you see errors like "[CLDNN ERROR]. clGetPlatformIDs error -1001" when running OpenVINO samples / demos, then most likely you have some issues with OpenCL runtime on your machine. This document contains several hints on what to check and how to troubleshoot such kind of issues. +If you see errors like `[CLDNN ERROR]. clGetPlatformIDs error -1001` when running OpenVINO samples / demos, then most likely you have some issues with OpenCL runtime on your machine. This document contains several hints on what to check and how to troubleshoot such issues. -In order to make sure that OpenCL runtime is functional on your machine, you can use [clinfo](https://github.com/Oblomov/clinfo) tool. On many linux distributives it can be installed via package manager. If it's not available for your system, it can be easily built from sources. +To make sure that OpenCL runtime is functional on your machine, you can use [clinfo](https://github.com/Oblomov/clinfo) tool. On many linux distributions it can be installed via package manager. If it is not available for your system, it can be easily built from sources. Example of clinfo output: ``` @@ -23,26 +23,30 @@ Number of devices 1 Device Type GPU ``` ## 1. Make sure that you have GPU on your system + Some Intel® CPUs might not have integrated GPU, so if you want to run OpenVINO on iGPU, go to [ark.intel website](https://ark.intel.com/) and make sure that your CPU has it. ## 2. Make sure that OpenCL® Runtime is installed -On Windows OpenCL runtime is a part of the GPU driver, but on linux it should be installed separately. For the installation tips please refer to [OpenVINO docs](https://docs.openvino.ai/latest/openvino_docs_install_guides_installing_openvino_linux_header.html) and [OpenCL Compute Runtime docs](https://github.com/intel/compute-runtime/tree/master/opencl/doc). -To get support of Intel® Iris® Xe MAX Graphics with Linux please follow [driver installation guide](https://dgpu-docs.intel.com/devices/iris-xe-max-graphics/index.html) +OpenCL runtime is a part of the GPU driver on Windows, but on Linux it should be installed separately. For the installation tips, refer to [OpenVINO docs](https://docs.openvino.ai/latest/openvino_docs_install_guides_installing_openvino_linux_header.html) and [OpenCL Compute Runtime docs](https://github.com/intel/compute-runtime/tree/master/opencl/doc). +To get the support of Intel® Iris® Xe MAX Graphics with Linux, follow the [driver installation guide](https://dgpu-docs.intel.com/devices/iris-xe-max-graphics/index.html) ## 3. Make sure that user has all required permissions to work with GPU device + Add the current Linux user to the `video` group: ``` sudo usermod -a -G video "$(whoami)" ``` ## 4. Make sure that iGPU is enabled + ``` $ cat /sys/devices/pci0000\:00/0000\:00\:02.0/enable 1 ``` -## 5. Make sure that "/etc/OpenCL/vendors/intel.icd" contain proper paths to the OpenCL driver +## 5. Make sure that "/etc/OpenCL/vendors/intel.icd" contains proper paths to the OpenCL driver + ``` $ cat /etc/OpenCL/vendors/intel.icd /usr/lib/x86_64-linux-gnu/intel-opencl/libigdrcl.so @@ -50,12 +54,15 @@ $ cat /etc/OpenCL/vendors/intel.icd Note: path to the runtime lib may vary in different driver versions ## 6. Use LD_DEBUG=libs to trace loaded libraries + For more details, see the [OpenCL on Linux](https://github.com/bashbaug/OpenCLPapers/blob/markdown/OpenCLOnLinux.md) ## 7. If you are using dGPU with XMX, ensure that HW_MATMUL feature is recognized -Openvino contains hello_query_device sample application: [link](https://docs.openvino.ai/latest/openvino_inference_engine_ie_bridges_python_sample_hello_query_device_README.html) + +OpenVINO contains *hello_query_device* sample application: [link](https://docs.openvino.ai/latest/openvino_inference_engine_ie_bridges_python_sample_hello_query_device_README.html) With this option, you can check whether Intel XMX(Xe Matrix Extension) feature is properly recognized or not. This is a hardware feature to accelerate matrix operations and available on some discrete GPUs. + ``` $ ./hello_query_device.py ... @@ -68,9 +75,9 @@ install them from [OpenCL Git](https://github.com/KhronosGroup/OpenCL-Headers) ## See also + * [OpenVINO™ README](../../../../README.md) * [OpenVINO Core Components](../../../README.md) * [OpenVINO Plugins](../../README.md) * [OpenVINO GPU Plugin](../README.md) * [Developer documentation](../../../../docs/dev/index.md) - \ No newline at end of file diff --git a/src/plugins/intel_gpu/docs/gpu_plugin_ops_enabling.md b/src/plugins/intel_gpu/docs/gpu_plugin_ops_enabling.md index ae67f22b4ced94..01c202cd7a57bd 100644 --- a/src/plugins/intel_gpu/docs/gpu_plugin_ops_enabling.md +++ b/src/plugins/intel_gpu/docs/gpu_plugin_ops_enabling.md @@ -1,29 +1,29 @@ -# GPU plugin operations enabling flow +# GPU Plugin Operations Enabling Flow ## Terminology + * **NGraph operation**: Building block of neural networks, such as convolution or pooling. * **(clDNN) Primitive**: Basic NN operation that was defined in clDNN. One primitive is usually mapped to one ngraph operation, but graph compilation may cause the mapping not to be 1-to-1. -* **Kernel**: Actual body of execution in GPU. It also refers to specific implementations of **Primitive** for GPU, such as `convolution_gpu_winograd_2x3_s1.cl`. Usually, single kernel fulfills the operation of single primitive, but several kernels may be used to support one primitive. -* **Unittest**: Single-layer test within cldnn. +* **Kernel**: Actual body of execution in GPU. It also refers to specific implementations of **Primitive** for GPU, such as `convolution_gpu_winograd_2x3_s1.cl`. Usually, single kernel fulfills the operation of a single primitive, but several kernels may be used to support one primitive. +* **Unittest**: Single-layer test within clDNN. * **Functional test**: Single-layer test in IE. -
- ## Adding new primitive + 1. Understand the new operation. * Review the [ngraph operation spec](https://github.com/openvinotoolkit/openvino/tree/master/docs/ops) * IE operations(a.k.a primitive or NN-layer) are defined by ngraph. * You can check ngraph reference implementation of the primitive as well - * e.g. [Scatter Elements Update in nGraph](https://github.com/openvinotoolkit/openvino/blob/master/src/core/reference/include/ngraph/runtime/reference/scatter_elements_update.hpp) + * For example, [Scatter Elements Update in nGraph](https://github.com/openvinotoolkit/openvino/blob/master/src/core/reference/include/ngraph/runtime/reference/scatter_elements_update.hpp) 1. Try to find existing primitive that fully or partially covers this operation. * It is also possible to transform the network so that the missing primitive is covered from existing primitive. - * e.g. [Replace reduce with pooling](https://github.com/openvinotoolkit/openvino/blob/23808f46f7b5d464fd649ad278f253eec12721b3/inference-engine/src/cldnn_engine/cldnn_engine.cpp#L205) + * For example, [replace reduce with pooling](https://github.com/openvinotoolkit/openvino/blob/23808f46f7b5d464fd649ad278f253eec12721b3/inference-engine/src/cldnn_engine/cldnn_engine.cpp#L205). + +1. Add new / extend existing clDNN primitive according to the operation spec. + 1. This phase is to enable primitive within clDNN library, without exposing it to IE. + 1. Implement **reference parallel kernel** that supports all parameters of the operation and all input/output data types and layouts. -1. Add new / extend existing cldnn primitive according to the operation spec. - 1. This phase is to enable primitive within cldnn library, without exposing it to IE. - 1. Implement **reference parallel kernel** that supports all parameters of the operation and all input/output data types and layouts - | File | Description | |------|-------------| | [scatter_elements_update_ref.cl](https://github.com/openvinotoolkit/openvino/blob/master/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/scatter_elements_update_ref.cl) | OpenCL Kernel body. For more detail, please see [How to write OCL kernel](#writing-ocl-kernel) section | @@ -31,18 +31,18 @@ | [scatter_elements_update_kernel_selector.(cpp,h)](https://github.com/openvinotoolkit/openvino/blob/master/src/plugins/intel_gpu/src/kernel_selector/kernels/scatter_update/scatter_elements_update_kernel_selector.cpp) | Kernel selector for a primitive | | [register_gpu.(cpp,hpp)](https://github.com/openvinotoolkit/openvino/blob/master/inference-engine/thirdparty/clDNN/src/gpu/register_gpu.cpp) | Primitive registration | | [scatter_elements_update_gpu.cpp](https://github.com/openvinotoolkit/openvino/blob/master/inference-engine/thirdparty/clDNN/src/gpu/scatter_elements_update_gpu.cpp) | Primitive registration, input spec | - | [scatter_elements_update_inst.h](https://github.com/openvinotoolkit/openvino/blob/master/src/plugins/intel_gpu/src/graph/include/scatter_elements_update_inst.h) | Node type declaration for cldnn program | + | [scatter_elements_update_inst.h](https://github.com/openvinotoolkit/openvino/blob/master/src/plugins/intel_gpu/src/graph/include/scatter_elements_update_inst.h) | Node type declaration for clDNN program | | [clDNN/src/scatter_elements_update.cpp](https://github.com/openvinotoolkit/openvino/blob/master/src/plugins/intel_gpu/src/graph/scatter_elements_update.cpp) | Code for scatter_elements_update_inst.h | | [clDNN/api/cldnn/primitives/scatter_elements_update.hpp](https://github.com/openvinotoolkit/openvino/blob/master/src/plugins/intel_gpu/include/intel_gpu/primitives/scatter_elements_update.hpp) | clDNN primitive definition | | [common_types.h](https://github.com/openvinotoolkit/openvino/blob/master/src/plugins/intel_gpu/src/kernel_selector/common_types.h) | Enum declaration for KernelType and arguments | - 1. Add unit tests for the new operation + 1. Add unit tests for the new operation. | File | Description | |------|-------------| | [scatter_elements_update_gpu_test.cpp](https://github.com/openvinotoolkit/openvino/blob/master/src/plugins/intel_gpu/tests/test_cases/scatter_elements_update_gpu_test.cpp) | Unittest for layer | - * Need to add reference code or expected result for checking the result. + * You need to add reference code or expected result for checking the result. * You can also specify the kernel with `force_implementations` in case the primitive contains multiple kernels. ``` @@ -54,31 +54,31 @@ ... ``` - * This unit test is built into `clDNN_unit_tests`. It is a gtest application. + * This unit test is built into `clDNN_unit_tests`. It is a `gtest` application. ``` # Show list of test cases openvino/bin/intel64/Debug$ ./clDNN_unit_tests64 --gtest_list_tests # Run test openvino/bin/intel64/Debug$ ./clDNN_unit_tests64 --gtest_filter=scatter_elements_update_gpu_fp16.* ``` - - * Test scope needs to be comprehensive, but not wasteful. These tests run for every PRs in CI. Let's save the planet. - + + * Test scope needs to be comprehensive, but not wasteful. These tests run for every PR in CI. Let's save the planet. + 1. Support layer fusion, if applicable - * It is usually easy to fuse some layers, such as scale, activation, quantize and eltwise, into previous layer. This fusing rule can be added to `prepare_primitive_fusing::fuse_simple_primitives`. + * It is usually easy to fuse some layers, such as *scale*, *activation*, *quantize*, and *eltwise*, into the previous layer. This fusing rule can be added to `prepare_primitive_fusing::fuse_simple_primitives`. * `fuse_simple_primitives` is called during [graph compilation phase](https://github.com/openvinotoolkit/openvino/blob/71c50c224964bf8c24378d16f015d74e2c1e1ce8/inference-engine/thirdparty/clDNN/src/program.cpp#L430) - * You can see general description of layer fusion [here](https://docs.openvinotoolkit.org/latest/openvino_docs_IE_DG_supported_plugins_CL_DNN.html#optimizations) + * See general description of layer fusion [here](https://docs.openvinotoolkit.org/latest/openvino_docs_IE_DG_supported_plugins_CL_DNN.html#optimizations) * Unit tests for layer fusion are placed in a single file: [fusings_gpu_test.cpp](https://github.com/openvinotoolkit/openvino/blob/master/inference-engine/thirdparty/clDNN/tests/test_cases/fusings_gpu_test.cpp). It is also compiled into `clDNN_unit_tests`. * Code for fused layers are generated with `jitter`. It is created as `FUSED_OPS..` macro in OCL code. This generation logic is in `KernelBase::MakeFusedOpsJitConstants`. -1. Add / update factory for this operation in the GPU plugin to use new primitive in inference-engine +1. Add / update factory for this operation in the GPU plugin to use new primitive in inference-engine. | File | Description | |------|-------------| - | [cldnn_engine/ops/scatter_elements_update.cpp](https://github.com/openvinotoolkit/openvino/blob/master/inference-engine/src/cldnn_engine/ops/scatter_elements_update.cpp) | Instantiation from cldnn plugin for IE | + | [cldnn_engine/ops/scatter_elements_update.cpp](https://github.com/openvinotoolkit/openvino/blob/master/inference-engine/src/cldnn_engine/ops/scatter_elements_update.cpp) | Instantiation from clDNN plugin for IE | | [cldnn_primitives_list.hpp](https://github.com/openvinotoolkit/openvino/blob/master/inference-engine/src/cldnn_engine/cldnn_primitives_list.hpp) | Registration for primitives | -1. Add functional single layer tests for the operation and try to cover most of the difference use cases of this operation +1. Add functional single-layer tests for the operation and try to cover most of the different use cases of this operation. | File | Description | |------|-------------| @@ -86,32 +86,31 @@ * It is possible to use ngraph reference code for result validation. * This is compiled into `gpuFuncTests`. It is also `gtest` application. - * Please also review the [general guideline of test infrastructure](https://github.com/openvinotoolkit/openvino/wiki/InferenceEngineTestsInfrastructure) + * Also, review the [general guideline of test infrastructure](https://github.com/openvinotoolkit/openvino/blob/master/docs/IE_PLUGIN_DG/PluginTesting.md). -1. [Optional] If there are existing IRs with this operation, try to run the full model(s) to be sure that it's correctly processed within the context +1. [Optional] If there are existing IRs with this operation, try to run the full model(s) to be sure that it is correctly processed within the context. -1. [Optional] If there are existing IRs with this operation, try to run the full model(s) and estimate performance impact from this operation on total model execution time +1. [Optional] If there are existing IRs with this operation, try to run the full model(s) and estimate performance impact from this operation on total model execution time. -1. Create PR with your changes +1. Create a PR with your changes. * If you are `OpenVINO` group member in github, CI will be triggered. - * Please review the [OpenVINO contribution guide](https://github.com/openvinotoolkit/openvino/blob/master/CONTRIBUTING.md). - -
+ * Review the [OpenVINO contribution guide](https://github.com/openvinotoolkit/openvino/blob/master/CONTRIBUTING.md). ## Adding new kernel for an existing primitive -* The process is quite similar to previous one. You can skip already existing steps. -* Main work is adding new kernel and registering it from kernel selector. -* You may need to add unit test for that new kernel. Specific kernel can be chosen with `build_option::force_implementations`. -* It is not possible to specify kernel from functional test(IE). -
+* The process is quite similar to the previous one. You can skip already existing steps. +* Main work is adding a new kernel and registering it from the kernel selector. +* You may need to add a unit test for that new kernel. A specific kernel can be chosen with `build_option::force_implementations`. +* It is not possible to specify a kernel from a functional test(IE). ## Writing OCL kernel ### Jitter -In GPU OCL kernels, many conditional statements are processed with `#ifdef` so that it can be handled during compile-time. The definitions are created with `jitter.cpp`. It is set during graph compilation. You can see generated macros following the steps in [source dumps](https://github.com/openvinotoolkit/openvino/blob/master/src/plugins/intel_gpu/docs/gpu_debug_utils.md#sources-dumps). + +In GPU OCL kernels, many conditional statements are processed with `#ifdef` so that they can be handled during compile-time. The definitions are created with `jitter.cpp`. It is set during graph compilation. You can see generated macros, following the steps in [source dumps](https://github.com/openvinotoolkit/openvino/blob/master/src/plugins/intel_gpu/docs/gpu_debug_utils.md#sources-dumps). + Jitter also contains run-time parameters such as input and output size. -Additional macros can be defined from host-code of kernel itself. For example, see below code snippet. It passes `SUB_GROUP_SIZE` through macro definition through jitter. +Additional macros can be defined from the host-code of a kernel itself. For example, see the code snippet below. It passes `SUB_GROUP_SIZE` through macro definition through jitter. ``` // GetJitConstants method of the kernel const size_t sub_group_size = 16; @@ -120,17 +119,22 @@ Additional macros can be defined from host-code of kernel itself. For example, s ``` ### Accessing input and output tensor -Jitter generates macros for index calculations. With these macros, you can program ocl kernel in a layout-agnostic way. If you use the macro `${TENSOR_NAME}_GET_INDEX`, you can get 1d-index from tensor coordinate whether the format is planar(such as `bfyx` or `byxf`) or blocked.(such as `b_fs_yx_fsv16`). You can check [source code for GET_INDEX macro](https://github.com/openvinotoolkit/openvino/blob/7f8d3aa63899a3e3362c95eb7d1b04a5899660bd/inference-engine/thirdparty/clDNN/kernel_selector/core/common/jitter.cpp#L313). + +Jitter generates macros for index calculations. With these macros, you can program OCL kernel in a layout-agnostic way. If you use the macro `${TENSOR_NAME}_GET_INDEX`, you can get 1d-index from a tensor coordinate whether the format is planar (such as `bfyx` or `byxf`) or blocked (such as `b_fs_yx_fsv16`). You can check [source code for GET_INDEX macro](https://github.com/openvinotoolkit/openvino/blob/7f8d3aa63899a3e3362c95eb7d1b04a5899660bd/inference-engine/thirdparty/clDNN/kernel_selector/core/common/jitter.cpp#L313). ### Layout support + If a kernel is not performance-critical, you can support `bfyx`, `bfzyx` and `bfwzyx` only for layout. Those are default layouts. As an optimized format, `b_fs_yx_fsv16`, `b_fs_yx_fsv4` or `byxf` can be used as well. -[General description of layout can be found here](https://github.com/openvinotoolkit/openvino/blob/master/src/plugins/intel_gpu/docs/gpu_memory_formats.md) and [header file is here](https://github.com/openvinotoolkit/openvino/blob/master/inference-engine/thirdparty/clDNN/api/tensor.hpp) + +[General description of layout can be found here](https://github.com/openvinotoolkit/openvino/blob/master/src/plugins/intel_gpu/docs/gpu_memory_formats.md) and [header file is here](https://github.com/openvinotoolkit/openvino/blob/master/inference-engine/thirdparty/clDNN/api/tensor.hpp). ### Layer fusion + When layers are fused, `jitter` will create macros to generate code for fused layers. It is realized into `FUSED_OPS..` in OCL kernel. You can understand the usage from other kernels. There is a [comment that describes layer fusion](https://github.com/openvinotoolkit/openvino/blob/7f8d3aa63899a3e3362c95eb7d1b04a5899660bd/inference-engine/thirdparty/clDNN/kernel_selector/core/kernel_selector_params.h#L521). ## See also + * [OpenVINO™ README](../../../../README.md) * [OpenVINO Core Components](../../../README.md) * [OpenVINO Plugins](../../README.md) diff --git a/src/plugins/intel_gpu/docs/gpu_plugin_unit_test.md b/src/plugins/intel_gpu/docs/gpu_plugin_unit_test.md index 87632f28d8e868..134f96bc258391 100644 --- a/src/plugins/intel_gpu/docs/gpu_plugin_unit_test.md +++ b/src/plugins/intel_gpu/docs/gpu_plugin_unit_test.md @@ -1,14 +1,14 @@ -# GPU plugin unit test +# GPU Plugin Unit Test -GPU plugin has two type tests: first one is functional tests and second one is unit tests. +GPU plugin has two types of tests: functional and unit tests. This article is about the latter. -- The functional test is testing single layer, behavior, sub graph and low precision transformation on inference engine level for various layout and data types such as fp16 and fp32. -- The unit test is testing cldnn primitive and core type modules on GPU plugin level. Unlike functional test, it is possible to test by explicitly specifying the format of the input such as `bfyx` or `b_fs_yx_fsv16`. This documentation is about this type of test. +- The functional test is testing a single layer, behavior, subgraph and low-precision transformation on inference engine level for various layout and data types, such as FP16 and FP32. +- The unit test is testing clDNN primitive and core-type modules on GPU plugin level. Unlike the functional test, it is possible to test by explicitly specifying the format of the input, such as `bfyx` or `b_fs_yx_fsv16`. -# Structure of unit test +# Structure of a unit test -Intel GPU unit test (aka clDNN unit test) is a set of unit tests each of which is for testing all primitives, fusions and fundamental core types of GPU plugin. -There are 4 sub categories of unit tests as below. +Intel GPU unit test (aka clDNN unit test) is a set of unit tests, each of which is for testing all primitives, fusions, and fundamental core types of GPU plugin. +There are four subcategories of unit tests as below. ```bash openvino/src/plugins/intel_gpu/tests - root of Intel GPU unit test @@ -19,42 +19,45 @@ openvino/src/plugins/intel_gpu/tests - root of Intel GPU unit test ``` - ### fusions - - Fusion is an algorithm that fuse several operations into one optimized operation. For example, two nodes of `conv -> relu` may be fused into single node of `conv`. + + - Fusion is an algorithm that fuses several operations into one optimized operation. For example, two nodes of `conv -> relu` may be fused into a single node of `conv`. - Fusion unit tests checks whether the fusion is done as expected. - fusion_test_common.cpp - - The base class for fusing test, i.e., [BaseFusingTest](https://github.com/openvinotoolkit/openvino/blob/master/src/plugins/intel_gpu/tests/fusions/fusion_test_common.hpp#L19), is implemented here. It tests whether the fusing is successful or not by comparing the execution results of the two networks, one is the fused network, the other is non fused network for same topology. - - [BaseFusingTest](https://github.com/openvinotoolkit/openvino/blob/master/src/plugins/intel_gpu/tests/fusions/fusion_test_common.hpp#L19) has an important method called *`compare()`*. - - *`compare()`* method has the following three tasks + - The base class for a fusing test, that is, [BaseFusingTest](https://github.com/openvinotoolkit/openvino/blob/master/src/plugins/intel_gpu/tests/fusions/fusion_test_common.hpp#L19), is implemented here. It tests whether the fusing is successful or not by comparing the execution results of the two networks, one is the fused network, the other is non-fused network for the same topology. + - [BaseFusingTest](https://github.com/openvinotoolkit/openvino/blob/master/src/plugins/intel_gpu/tests/fusions/fusion_test_common.hpp#L19) has an important method called `compare()`. + - `compare()` method has the following three tasks: - Execute two networks (fused network and not fused network) - - Compare the actual number of executed primitives with the expected number of executed primitives in test params + - Compare the actual number of executed primitives with the expected number of executed primitives in test params - Compare the results between fused network and non fused network - eltwise_fusing_test.cpp - - Check whether or not eltwise is fused to other primitives as expected + - Checks whether or not *eltwise* is fused to other primitives as expected - [primitive_name]_fusion_test.cpp - - Check that nodes such as eltwise or activation are fusing to the [primitive_name] as expected + - Checks that nodes such as *eltwise* or *activation* are fusing to the [primitive_name] as expected - The detail of how to add each instance is described [below](#fusions-1). - ### test_cases - - It is mainly checking that cldnn primitives and topology creation are working as designed - - It also checks configurations for OpenCL functionalities such as cl_cache, cl_mem allocation and cl_command_queue modes -- ### module_tests - - Unit tests for fundamental core modules such as ocl_user_events, format, layout, and usm memory - - Check ocl_user_event is working as expected - - Check all format is converted to the string and trait - - Check various layouts are created as expected - - Check usm_host and usm device memory buffer creation and read/write functionality + - It is mainly checking whether clDNN primitives and topology creation are working as designed. + - It also checks configurations for OpenCL functionalities such as *cl_cache*, *cl_mem allocation* and *cl_command_queue* modes + +- ### module_tests + + - Unit tests for fundamental core modules such as `ocl_user_events`, format, layout, and USM memory: + - check whether `ocl_user_event` is working as expected, + - check whether all format is converted to the string and trait, + - check whether various layouts are created as expected, + - check `usm_host` and USM device memory buffer creation and read/write functionality. - ### test_utils - - Defined base functions of unit test such as *`get_test_engine()`* which returns `cldnn::engine` - - Utility functions such as Float16, random_gen and uniform_quantized_real_distribution + - Define base functions of a unit test, such as `get_test_engine()`, which returns `cldnn::engine` + - Utility functions such as `Float16`, `random_gen` and `uniform_quantized_real_distribution` # How to run unit tests ## Build unit test -1. Turn on `ENABLE_TESTS` and `ENABLE_CLDNN_TESTS` in cmake option +1. Turn on `ENABLE_TESTS` and `ENABLE_CLDNN_TESTS` in cmake option: ```bash cmake -DCMAKE_BUILD_TYPE=Release \ @@ -69,21 +72,19 @@ openvino/src/plugins/intel_gpu/tests - root of Intel GPU unit test make clDNN_unit_tests ``` -3. You can find _`clDNN_unit_tests64`_ in bin directory after build - - +3. You can find `clDNN_unit_tests64` in *bin* directory after build ## Run unit test -You can run _`clDNN_unit_tests64`_ in bin directory which is the output of openvino build +You can run _`clDNN_unit_tests64`_ in *bin* directory which is the output of OpenVINO build -If you want to run specific unit test, you can use gtest_filter option as follows: +If you want to run a specific unit test, you can use `gtest_filter` option as follows: ``` ./clDNN_unit_tests64 --gtest_filter='*filter_name*' ``` -Then, you can get the result like this +Then, you can get the result similar to: ```bash openvino/bin/intel64/Release$ export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$PWD @@ -101,34 +102,33 @@ Note: Google Test filter = *fusings_gpu/conv_fp32_reorder_fsv16_to_bfyx.basic/0* [ PASSED ] 1 test. ``` - # How to create new test case ## TEST and TEST_P (GoogleTest macros) -GPU unit tests are using 2 types of test macros(**TEST** and **TEST_P**) in [GoogleTest (aka gtest)](https://google.github.io/googletest/) +GPU unit tests are using two types of test macros (**TEST** and **TEST_P**) in [GoogleTest (aka gtest)](https://google.github.io/googletest/) - ### **TEST** - - **TEST** is the simple test case macro. - - To make test-case using **TEST**, define an individual test named *`TestName`* in the test suite *`TestSuiteName`* + - **TEST** is a simple test case macro. + - To make a test-case using **TEST**, define an individual test named `TestName` in the test suite `TestSuiteName` ``` TEST(TestSuiteName, TestName) { ... test body ... } ``` - - The test body can be any code under test. To determine the outcomes within the test body, use assertion such as *`EXPECT_EQ`* and *`ASSERT_NE`*. - + - The test body can be any code under the test. To determine the outcome within the test body, use assertion types, such as `EXPECT_EQ` and `ASSERT_NE`. + - ### **TEST_P** - - **TEST_P** is used to set test case using test parameter sets - - To make test-case using **TEST_P**, define an individual value-parameterized test named *`TestName`* that uses the test fixture class *`TestFixtureName`* which is the test suite name + - **TEST_P** is used to set a test case using test parameter sets + - To make a test case using **TEST_P**, define an individual value-parameterized test named `TestName` that uses the test fixture class `TestFixtureName`, which is the test suite name: ``` TEST_P(TestFixtureName, TestName) { ... statements ... } ``` - - Then, instantiates the value-parameterized test suite *`TestSuiteName`* which is defined defined with **TEST_P** + - Then, instantiates the value-parameterized test suite `TestSuiteName`, which is defined with **TEST_P** ```c++ INSTANTIATE_TEST_SUITE_P(InstantiationName,TestSuiteName,param_generator) ``` @@ -136,29 +136,28 @@ GPU unit tests are using 2 types of test macros(**TEST** and **TEST_P**) in [G ## module_test and test_cases -- module_test and test_cases are testing GPU plugin using both **TEST_P** and **TEST**. -- Please refer to [the fusion test](#fusions-1) for the test case based on **TEST_P** +- *module_test* and *test_cases* are testing GPU plugin using both **TEST_P** and **TEST**. +- Refer to [the fusion test](#fusions-1) for the test case based on **TEST_P** - **TEST** checks the test result by comparing the execution results with expected values after running network created from the target topology to check. - It is important to generate test input and expected output result in **TEST** - - You can create input data and expected output data using the 3 following ways: - - Generate simple input data and calculate the expected output data from input data manually like [basic_deformable_convolution_def_group1_2](https://github.com/openvinotoolkit/openvino/blob/master/src/plugins/intel_gpu/tests/test_cases/convolution_gpu_test.cpp#L254) - - Generate random input and get the expected output using reference function which is made in the test codes like [mvn_test_across_channels_outside_sqrt_bfyx](https://github.com/openvinotoolkit/openvino/blob/master/src/plugins/intel_gpu/tests/test_cases/mvn_gpu_test.cpp#L108) - - Generate random input and get the expected output from another reference kernel which is existed in cldnn kernels like [mvn_random_test_bsv32](https://github.com/openvinotoolkit/openvino/blob/master/src/plugins/intel_gpu/tests/test_cases/mvn_gpu_test.cpp#L793) + - You can create input data and expected output data using these three approaches: + - Generate simple input data and calculate the expected output data from input data manually, like [basic_deformable_convolution_def_group1_2](https://github.com/openvinotoolkit/openvino/blob/master/src/plugins/intel_gpu/tests/test_cases/convolution_gpu_test.cpp#L254) + - Generate random input and get the expected output, using reference function, which is made in the test codes like [mvn_test_across_channels_outside_sqrt_bfyx](https://github.com/openvinotoolkit/openvino/blob/master/src/plugins/intel_gpu/tests/test_cases/mvn_gpu_test.cpp#L108) + - Generate random input and get the expected output from another reference kernel which exists in clDNN kernels like [mvn_random_test_bsv32](https://github.com/openvinotoolkit/openvino/blob/master/src/plugins/intel_gpu/tests/test_cases/mvn_gpu_test.cpp#L793) -- When you allocate input data, please keep in mind that the layout order in *`engine.allocation_memory`* is not *`bfyx`* but *`bfxy`*. i.e., example, if input is {1,1,4,5}, the layout should be below +- When you allocate input data, keep in mind that the layout order in `engine.allocation_memory` is not `bfyx` but `bfxy`. For example, if input is `{1,1,4,5}`, the layout should be as below: ```c++ auto input = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 1, 5, 4 } }); ``` - ## fusions -- It is implemented based on **TEST_P** because there are many cases where multiple layouts are tested in the same topology -- If the fusing test class is already existed, you can use it. otherwise, you should make new fusing test class which is inherited [BaseFusingTest](https://github.com/openvinotoolkit/openvino/blob/master/src/plugins/intel_gpu/tests/fusions/fusion_test_common.hpp#L19) - - The new fusing test class should create `execute()` method which creates fused / non fused networks and calls *`compare`* method after setting input -- Create test case using **TEST_P** - - You can make the desired networks using create_topologies. +- It is implemented based on **TEST_P** because there are many cases where multiple layouts are tested in the same topology. +- If the fusing test class already exists, you can use it. Otherwise, you should make a new fusing test class, which is inherited [BaseFusingTest](https://github.com/openvinotoolkit/openvino/blob/master/src/plugins/intel_gpu/tests/fusions/fusion_test_common.hpp#L19). + - The new fusing test class should create the `execute()` method, which creates fused / non-fused networks and calls `compare` method after setting input. +- Create a test case, using **TEST_P**: + - You can make the desired networks using create_topologies. ```mermaid flowchart LR nodeA1(bias) --> nodeA2(conv_prim) @@ -186,7 +185,7 @@ class nodeA3 moss1 class nodeA8 steel1 class nodeA4,nodeA1,nodeA6,nodeA9,nodeA11 carbon1 ``` - - For example, if you design the networks like the one above, you can make the test code as follow + - For example, if you design the networks like the one above, you can make the test code as follows: ```c++ class conv_fp32_multi_eltwise_4_clamp : public ConvFusingTest {}; @@ -218,12 +217,12 @@ class nodeA4,nodeA1,nodeA6,nodeA9,nodeA11 carbon1 ``` - - If you want to change some node's layout format to specific format, you can change it using *`build_option::force_implementations`*. - - In the sample codes, *`conv_prim`* is set to *`format::b_fs_yx_fsv16`* by *`build_option::force_implementations`* -- *`tolerance`* is used as to threshold to check whether or not output result are same between fused network and non fused network in *`compare`* function. -- After the test case is implemented, use `INSTANTIATE_TEST_SUITE_P` to set the test suite for each parameter case as follows. - - Check all variables in *`convolution_test_params`* to make `CASE_CONV_FP32_2`. - - In *`convolution_test_params`*, all tensor, format, and data_types are used in common in all convolution fusing tests. So you can define `CASE_CONV_FP32_2` with all variables except *`expected_fused_primitives`* and *`expected_not_fused_primitives`* + - If you want to change some node's layout format to a specific format, you can change it using `build_option::force_implementations`. + - In the sample codes, `conv_prim` is set to `format::b_fs_yx_fsv16` by `build_option::force_implementations`. +- `tolerance` is used as a threshold to check whether or not the output results are the same between a fused network and a non-fused network in the `compare` function. +- After the test case is implemented, use `INSTANTIATE_TEST_SUITE_P` to set the test suite for each parameter case as follows. + - Check all variables in `convolution_test_params` to make `CASE_CONV_FP32_2`. + - In `convolution_test_params`, all tensor, format, and `data_types` are used in common in all convolution fusing tests. Therefore, you can define `CASE_CONV_FP32_2` with all variables except `expected_fused_primitives` and `expected_not_fused_primitives`. ```c++ struct convolution_test_params { @@ -256,6 +255,7 @@ INSTANTIATE_TEST_SUITE_P(fusings_gpu, conv_fp32_scale, ::testing::ValuesIn(std:: ``` ## See also + * [OpenVINO™ README](../../../../README.md) * [OpenVINO Core Components](../../../README.md) * [OpenVINO Plugins](../../README.md) diff --git a/src/plugins/intel_gpu/docs/graph_optimization_passes.md b/src/plugins/intel_gpu/docs/graph_optimization_passes.md index 5a96e74a80c3c5..d30260ca7dc389 100644 --- a/src/plugins/intel_gpu/docs/graph_optimization_passes.md +++ b/src/plugins/intel_gpu/docs/graph_optimization_passes.md @@ -1,25 +1,26 @@ # Graph Optimization Passes -Graph optimization is a collection of optimization passes that happens to convert a general network description into a network-description-for-GPU-execution. It happens in the constructor of `cldnn::program`. In other words, the input of graph optimization is `topology`[(link)](./basic_data_structures.md#topology) and output is `program`[(link)](./basic_data_structures.md#program-impl--). +Graph optimization is a collection of optimization passes that convert a general network description into a network-description-for-GPU-execution. It happens in the constructor of `cldnn::program`. In other words, the input of graph optimization is `topology` [(link)](./basic_data_structures.md#topology) and the output is `program` [(link)](./basic_data_structures.md#program-impl--). -The transformation from original graph into the final graph is quite complicated. The steps are divided into smaller pieces(`pass`). The purpose of this documentation is not to explain every step in detail, but to explain key steps. +The transformation from the original graph into the final graph is quite complicated. The steps are divided into smaller pieces (`pass`). The purpose of this documentation is not to explain every step in detail, but to explain key steps. -For debugging purpose, you can dump the optimized graph after each step. Please see this [link](./gpu_debug_utils.md#graph-dumps) for detail. +For debugging purposes, you can dump the optimized graph after each step. See this [article](./gpu_debug_utils.md#graph-dumps) for details. -Note: The optimization passes runs in sequence and the prefixed number indicates the sequence. However, this sequence number might change in the future. +> **Note**: The optimization passes run in sequence and the prefixed number indicates the sequence. However, the sequence number might change in the future. -* **00_init**: First step of the optimization. If you want to see first cldnn graph, you can check this. It collects network output node information and set node processing order. -* **08_prepare_primitive_fusing**: Fuse post-operations into other primitives. For example, relu is fused into convolution. Element-wise add operation can usually be fused into predecessor, too. The layout for the primitive is not chosen at this point yet, and we don't know which kernel will be chosen for the primitive. However, support for post-operation is dependent on the chosen kernel. That is why this pass contains some logic to guess the layout. -* **09_reorder_inputs**: Select layout format for each primitives. This is done by calling `layout_optimizer::get_preferred_format` function which returns preferred format for a node(or “any” which means that format must be propagated from adjacent nodes if possible). Then it propagate formats for nodes with “any” preferred format to minimize local reorders. After propagating formats, it inserts actual reorders nodes into the graph. As a result of this pass, we get quite complicated graph with many _redundant_ reorders. It will be removed from `remove_redundant_reorders`. -* **17_remove_redundant_reorders**: This pass is about removing reorder, but it has two conceptual purpose. First one is removing _redundant_ reorders. For example, when the network contains a pattern like `reorder - reorder - reorder`, it can be shrunk into single `reorder`. Second one is about supporting cross-layout operation of primitive. For example, when a `convolution` needs to receive `bfyx` input and to generate `b_fs_yx_fsv16` output, the initial graph from `reorder_inputs` looks like this: `data(bfyx) --> reorder(b_fs_yx_fsv16) --> convolution(b_fs_yx_fsv16)`. This pass looks for such pattern and removes the reorder to generate cross-layout graph for the target convolution: `data(bfyx) --> convolution(b_fs_yx_fsv16)` -* **19_prepare_buffer_fusing**: This pass is for implicit concat or implicit crop. Implicit concat is about removing `concatenation` primitive when two predecessors can put result into the target buffer of concat directly. For example, if two convolution results are concatenated along f-axis and the layout is bfyx format and b=1, we can just remove concat primitive and manipulate the output address of the convolutions to point proper locations. -* **20_add_required_reorders**: This pass tries to keep graph consistency and add reorder if current format is not supported by a node. It checks if current input format is present in `implementation_map` defined in `_gpu.cpp` file. If it is not defined, this pass tries to change layout to one of the most common format [bfyx, yxfb, byxf] and picks the first supported format. -* **21_add_onednn_optimization_attributes**: This pass generates onednn attributes for post operation[(link)](https://oneapi-src.github.io/oneDNN/dev_guide_convolution.html#post-ops-and-attributes). OpenVINO gpu plugin(a.k.a. cldnn) has a set of defined post operations and it requires some transformation to map those into onednn post-operations. -* **22_compile_graph**: This pass creates `primitive_impl` through kernel selector. In this pass, the kernel for each node is chosen. For onednn primitives, OpenCL code is compiled in this stage. For cldnn primitives, OpenCL code will be compiled after all passes. -* **26_propagate_constants**: This pass reorders weights for convolution, deconvolution and FC to a required format. As kernel is chosen in `compile_graph` stage, it is now known that some reordering is required for weights. It is because the weights are stored in a simple planar format in IR, but other format is usually required for optimized convolution(or deconv, FC). In order to reorder weights, this pass creates a simple graph that receives weights and generates reordered weights. We get the reordered weights by executing the network and the reordered weights are inserted back into the original graph. -* **31_oooq_memory_dependencies**: In GPU, device memory is a limited resource and it is not necessary to keep all the intermediate results when inferencing a network. Therefore, the buffer is reused when the content is not needed anymore. However, it is necessary to take it into consideration that intel_gpu plugin is using out-of-order queue. As we are not sure the exact sequence of execution, there is additional limitation of reusing buffer. For example, in case of multi-branch structure like inception, there is no direct dependencies between the branches except for the common ancestor. However, in OOOQ execution mode, as we are not sure the sequence of execution in inception module, it is necessary not to reuse the buffer from one branch by another branch. Such _implicit dependency_ information is processed in this pass. +* **00_init**: First step of the optimization. If you want to see the first clDNN graph, you can check this. It collects network output node information and sets node processing order. +* **08_prepare_primitive_fusing**: Fuse post-operations into other primitives. For example, *ReLU* is fused into convolution. Element-wise *add* operation can usually be fused into predecessor, too. The layout for the primitive is not chosen at this point yet, and you do not know which kernel will be chosen for the primitive. However, support for post-operation is dependent on the chosen kernel. That is why this pass contains some logic to guess the layout. +* **09_reorder_inputs**: Select the layout format for each primitives. This is done by calling `layout_optimizer::get_preferred_format` function, which returns preferred format for a node (or “any” which means that the format must be propagated from adjacent nodes if possible). Then it propagates formats for nodes with “any” preferred format to minimize local reorders. After propagating formats, it inserts actual reorder nodes into the graph. The result of this pass is a quite complicated graph with many _redundant_ reorders. It will be removed from `remove_redundant_reorders`. +* **17_remove_redundant_reorders**: This pass is about removing reorder, but it has two conceptual purposes. First one is removing _redundant_ reorders. For example, when the network contains a pattern like `reorder - reorder - reorder`, it can be shrunk into a single `reorder`. Second one is about supporting cross-layout operation of a primitive. For example, when a `convolution` needs to receive `bfyx` input and to generate `b_fs_yx_fsv16` output, the initial graph from `reorder_inputs` looks as follows: `data(bfyx) --> reorder(b_fs_yx_fsv16) --> convolution(b_fs_yx_fsv16)`. This pass looks for such a pattern and removes the reorder to generate a cross-layout graph for the target convolution: `data(bfyx) --> convolution(b_fs_yx_fsv16)` +* **19_prepare_buffer_fusing**: This pass is for implicit concat or implicit crop. Implicit concat is about removing `concatenation` primitive when two predecessors can put result into the target buffer of concat directly. For example, if two convolution results are concatenated along f-axis and the layout is `bfyx` format and `b=1`, you can just remove concat primitive and manipulate the output address of the convolutions to point to proper locations. +* **20_add_required_reorders**: This pass tries to keep graph consistency and add reorder if current format is not supported by a node. It checks if the current input format is present in `implementation_map`, defined in `_gpu.cpp` file. If it is not defined, this pass tries to change layout to one of the most common format `[bfyx, yxfb, byxf]` and picks the first supported format. +* **21_add_onednn_optimization_attributes**: This pass generates oneDNN attributes for post operation [(link)](https://oneapi-src.github.io/oneDNN/dev_guide_convolution.html#post-ops-and-attributes). OpenVINO GPLU plugin (clDNN) has a set of defined post operations and it requires some transformation to map those into oneDNN post-operations. +* **22_compile_graph**: This pass creates `primitive_impl` through the kernel selector. In this pass, the kernel for each node is chosen. For oneDNN primitives, OpenCL code is compiled in this stage. For clDNN primitives, OpenCL code will be compiled after all passes. +* **26_propagate_constants**: This pass reorders weights for convolution, deconvolution and FC to a required format. As the kernel is chosen in `compile_graph` stage, it is now known that some reordering is required for the weights. It is because the weights are stored in a simple planar format in IR, but other format is usually required for optimized convolution(or deconv, FC). To reorder weights, this pass creates a simple graph that receives weights and generates reordered weights. You get the reordered weights by executing the network and the reordered weights are inserted back into the original graph. +* **31_oooq_memory_dependencies**: In GPU, device memory is a limited resource and it is not necessary to keep all the intermediate results when inferencing a network. Therefore, the buffer is reused when the content is not needed anymore. However, it is necessary to take it into consideration that `Intel_GPU` plugin is using out-of-order queue. As you are not sure about the exact sequence of execution, there is an additional limitation of reusing the buffer. For example, in case of a multi-branch structure like inception, there is no direct dependencies between the branches except for the common ancestor. However, in OOOQ execution mode, as you are not sure about the sequence of execution in inception module, it is necessary not to reuse the buffer from one branch by another branch. Such _implicit dependency_ information is processed in this pass. ## See also + * [OpenVINO™ README](../../../../README.md) * [OpenVINO Core Components](../../../README.md) * [OpenVINO Plugins](../../README.md) diff --git a/src/plugins/intel_gpu/docs/memory_allocation_gpu_plugin.md b/src/plugins/intel_gpu/docs/memory_allocation_gpu_plugin.md index aa1d54d3733004..eb548eb386abe4 100644 --- a/src/plugins/intel_gpu/docs/memory_allocation_gpu_plugin.md +++ b/src/plugins/intel_gpu/docs/memory_allocation_gpu_plugin.md @@ -1,23 +1,26 @@ -# Memory allocation in GPU plugin +# Memory Allocation in GPU Plugin ## Allocation types -GPU plugin supports 4 types of memory allocation as below. Note that the prefix `usm_` indicates the allocation type using Intel Unified Shared Memory (USM) extension for OpenCL. For more detailed information about the USM extension, refer to [this](https://www.khronos.org/registry/OpenCL/extensions/intel/cl_intel_unified_shared_memory.html) page. -* `cl_mem` : Standard OpenCL cl_mem allocation -* `usm_host` : Allocated in host memory and accessible by both of host and device. Not migratable. + +GPU plugin supports four types of memory allocation as below. Note that the prefix `usm_` indicates the allocation type using Intel Unified Shared Memory (USM) extension for OpenCL. For more detailed information about the USM extension, refer to [this](https://www.khronos.org/registry/OpenCL/extensions/intel/cl_intel_unified_shared_memory.html) page. +* `cl_mem` : Standard OpenCL cl_mem allocation. +* `usm_host` : Allocated in host memory and accessible by both of host and device. Non-migratable. * `usm_shared` : Allocated in host and devices and accessible by both host and device. The memories are automatically migrated on demand. -* `usm_device` : Allocated in device memory and accessible only by the device which owns the memory. Not migratable. +* `usm_device` : Allocated in device memory and accessible only by the device which owns the memory. Non-migratable. Note that there are a few restrictions on a memory allocation: -* Allocation of single memory object should not exceed the available device memory size, i.e., the value obtained by `CL_DEVICE_GLOBAL_MEM_SIZE`. -* The sum of all memory objects required to execute a kernel (i.e., the sum of inputs and outputs of a kernel) should not exceed the target available memory. For example, if you want to allocate a memory object to the device memory, the above restrictions should be satisfied against the device memory. Otherwise, the memory object should be allocated on the host memory. +* Allocation of a single memory object should not exceed the available device memory size, that is, the value obtained by `CL_DEVICE_GLOBAL_MEM_SIZE`. +* The sum of all memory objects required to execute a kernel (that is, the sum of inputs and outputs of a kernel) should not exceed the target available memory. For example, if you want to allocate a memory object to the device memory, the above restrictions should be satisfied against the device memory. Otherwise, the memory object should be allocated on the host memory. ## Memory allocation API + In GPU plugin, the allocation for each allocation type can be done with [engine::allocate_memory](https://github.com/openvinotoolkit/openvino/blob/de47a3b4a4ba1f8464b85a665c4d58403e0d16b8/src/plugins/intel_gpu/include/intel_gpu/runtime/engine.hpp#L51), which -calls the corresponding memory object wrapper for each allocation type: [gpu_buffer](https://github.com/openvinotoolkit/openvino/blob/de47a3b4a4ba1f8464b85a665c4d58403e0d16b8/src/plugins/intel_gpu/src/runtime/ocl/ocl_memory.cpp#L35), [gpu_usm](https://github.com/openvinotoolkit/openvino/blob/de47a3b4a4ba1f8464b85a665c4d58403e0d16b8/src/plugins/intel_gpu/src/runtime/ocl/ocl_memory.cpp#L291). +calls the corresponding memory object wrapper for each allocation type: [gpu_buffer](https://github.com/openvinotoolkit/openvino/blob/de47a3b4a4ba1f8464b85a665c4d58403e0d16b8/src/plugins/intel_gpu/src/runtime/ocl/ocl_memory.cpp#L35), [gpu_usm](https://github.com/openvinotoolkit/openvino/blob/de47a3b4a4ba1f8464b85a665c4d58403e0d16b8/src/plugins/intel_gpu/src/runtime/ocl/ocl_memory.cpp#L291). + +## Dump memory allocation history -## Dump memory allocation history -The memory allocation history is being managed by the `engine`, which can be dumped by setting the environment variable `OV_GPU_Verbose=1` if the OpenVino is built with the cmake configuration `ENABLE_DEBUG_CAPS=ON`. +The memory allocation history is being managed by the `engine`, which can be dumped by setting the environment variable `OV_GPU_Verbose=1` if OpenVino is built with the cmake configuration `ENABLE_DEBUG_CAPS=ON`. ```cpp ... GPU_Debug: Allocate 58982400 bytes of usm_host allocation type (current=117969612; max=117969612) @@ -26,26 +29,28 @@ GPU_Debug: Allocate 44236800 bytes of usm_host allocation type (current=16220641 GPU_Debug: Allocate 14873856 bytes of usm_device allocation type (current=59500236; max=59500236) ... ``` -Here, `current` denotes the total allocated memory amount at that moment while `max` denotes the peak record of the total memory allocation until that moment. +Here, `current` denotes the amount of total allocated memory at that moment, while `max` denotes the peak record of the total memory allocation until that moment. ## Allocated memory objects -The typical memory allocation performed in the GPU plugin can be categorized as follows: -* `Constant memory allocation`: In GPU plugin, constant data are held by the `data` primitives and the required memory objects are [allocated](https://github.com/openvinotoolkit/openvino/blob/de47a3b4a4ba1f8464b85a665c4d58403e0d16b8/src/plugins/intel_gpu/src/plugin/ops/constant.cpp#L181) and assigned at the creation of the data primitive. First, it is allocated on the host memory and the constant data are copied from the corresponding blob in ngraph. Once all the transformation and optimization processes in `cldnn::program` is finished and the user nodes of those data are known as the GPU operations using the device memory, then the memory is reallocated on the device memory and the constants data are copied to there (i.e., [transferred](https://github.com/openvinotoolkit/openvino/blob/de47a3b4a4ba1f8464b85a665c4d58403e0d16b8/src/plugins/intel_gpu/src/graph/program.cpp#L457)). Note that constant data are shared within batches and streams. -* `Output memory allocation`: A memory object to store the output result of each primitive is created at the creation of each primitive_inst ([link](https://github.com/openvinotoolkit/openvino/blob/de47a3b4a4ba1f8464b85a665c4d58403e0d16b8/src/plugins/intel_gpu/src/graph/primitive_inst.cpp#L263)), except the cases when the output is reusing the input memory. Note that the creation of a primitive_inst is done in an descending order of the output memory size for achieving better memory reusing efficiency. -* `Intermediate memory allocation`: Some primitives such as _detection_output_ and _non_max_suppression_ consisting of multiple kernels require intermediate memories to exchange data b/w those kernels. The allocation of such intermediate memories happens after all allocation for primitive_insts are finished ([link](https://github.com/openvinotoolkit/openvino/blob/4c01d6c50c6d314373dffd2a8ddbc294011b2508/src/plugins/intel_gpu/src/graph/network.cpp#L592)), since it needs to be processed in a processing order to use the predecessors' allocation information to decide whether to allocate it on device memory or not by checking the memory allocation restriction described above. +The typical memory allocation performed in the GPU plugin can be categorized as follows: +* `Constant memory allocation`: In GPU plugin, constant data are held by the `data` primitives and the required memory objects are [allocated](https://github.com/openvinotoolkit/openvino/blob/de47a3b4a4ba1f8464b85a665c4d58403e0d16b8/src/plugins/intel_gpu/src/plugin/ops/constant.cpp#L181) and assigned at the creation of the data primitive. First, it is allocated on the host memory and the constant data are copied from the corresponding blob in ngraph. Once all the transformation and optimization processes in `cldnn::program` are finished and the user nodes of the data are known as the GPU operations using the device memory, then the memory is reallocated on the device memory and the constant data is copied to there (that is, [transferred](https://github.com/openvinotoolkit/openvino/blob/de47a3b4a4ba1f8464b85a665c4d58403e0d16b8/src/plugins/intel_gpu/src/graph/program.cpp#L457)). Note that constant data is shared within batches and streams. +* `Output memory allocation`: A memory object to store the output result of each primitive is created at the creation of each `primitive_inst` ([link](https://github.com/openvinotoolkit/openvino/blob/de47a3b4a4ba1f8464b85a665c4d58403e0d16b8/src/plugins/intel_gpu/src/graph/primitive_inst.cpp#L263)), except when the output is reusing the input memory. Note that the creation of a `primitive_inst` is done in descending order of the output memory size for achieving better memory reusing efficiency. + +* `Intermediate memory allocation`: Some primitives such as _detection_output_ and _non_max_suppression_ consisting of multiple kernels require intermediate memories to exchange data b/w those kernels. The allocation of such intermediate memories happens after all allocation for `primitive_insts` is finished ([link](https://github.com/openvinotoolkit/openvino/blob/4c01d6c50c6d314373dffd2a8ddbc294011b2508/src/plugins/intel_gpu/src/graph/network.cpp#L592)). After all, it needs to be processed in a processing order to use the predecessors' allocation information to decide whether to allocate it on device memory or not by checking the memory allocation restriction described above. ## Memory dependency and memory pool -In GPU plugin, multiple memory objects can be allocated at a same address, when there is no dependency between the users of them. For example, a memory region of a program_node _A_'s output memory can be allocated for another program_node _B_'s output, if the output of _A_ is no longer used by any other program_node, when the result of the _B_ is to be stored. This mechanism is realized by the following two parts; -1. `Memory dependency` : memory_dependencies of a program_node is set by the memory dependency passes. There are two kinds of memory dependency passes as follows: - * `basic_memory_dependencies` : Assuming an in-order-queue execution, this pass adds dependencies to a program_node, which are deduced by checking its direct input and output nodes only. - * `oooq_memory_dependencies` : Assuming an out-of-order-queue execution, this pass adds dependencies to all pair of program_nodes that can potentially be executed at the same time. -2. `Memory pool` : The GPU plugin has a [memory pool](https://github.com/openvinotoolkit/openvino/blob/de47a3b4a4ba1f8464b85a665c4d58403e0d16b8/src/plugins/intel_gpu/include/intel_gpu/runtime/memory_pool.hpp) which is responsible for the decision of allocation or reuse for an allocation request. This memory_pool utilizes the memory dependencies set by the above two passes in the decision of reuse of not. Note that each `cldnn::network` has its own `memory_pool`. + +In GPU plugin, multiple memory objects can be allocated at the same address, when there is no dependency between their users. For example, a memory region of a `program_node` _A_'s output memory can be allocated for another `program_node` _B_'s output, if the output of _A_ is no longer used by any other `program_node`, when the result of the _B_ is to be stored. This mechanism is realized by the following two parts; +1. `Memory dependency` : memory_dependencies of a `program_node` is set by the memory dependency passes. There are two kinds of memory dependency passes: + * `basic_memory_dependencies` : Assuming an in-order-queue execution, this pass adds dependencies to a `program_node`, which are deduced by checking its direct input and output nodes only. + * `oooq_memory_dependencies` : Assuming an out-of-order-queue execution, this pass adds dependencies to all pairs of `program_nodes` that can potentially be executed at the same time. +2. `Memory pool` : The GPU plugin has a [memory pool](https://github.com/openvinotoolkit/openvino/blob/de47a3b4a4ba1f8464b85a665c4d58403e0d16b8/src/plugins/intel_gpu/include/intel_gpu/runtime/memory_pool.hpp), which is responsible for the decision of allocation or reuse for an allocation request. This `memory_pool` utilizes the memory dependencies set by the above two passes in the decision of reuse of not. Note that each `cldnn::network` has its own `memory_pool`. ## See also + * [OpenVINO™ README](../../../../README.md) * [OpenVINO Core Components](../../../README.md) * [OpenVINO Plugins](../../README.md) * [OpenVINO GPU Plugin](../README.md) * [Developer documentation](../../../../docs/dev/index.md) - \ No newline at end of file diff --git a/src/plugins/intel_gpu/docs/simplified_workflow.md b/src/plugins/intel_gpu/docs/simplified_workflow.md index 7d72cc3b9bb6f9..c00f829aadbce5 100644 --- a/src/plugins/intel_gpu/docs/simplified_workflow.md +++ b/src/plugins/intel_gpu/docs/simplified_workflow.md @@ -1,6 +1,6 @@ -# GPU plugin workflow +# GPU Plugin Workflow -The simplified workflow in the GPU plugin is shown on the picture below (click on image for higher resolution): +The simplified workflow in the GPU plugin is shown in the diagram below (click it for higher resolution): ```mermaid classDiagram @@ -147,6 +147,7 @@ class `intel_gpu::device_query` {Detects available devices for given backend} ``` ## See also + * [OpenVINO™ README](../../../../README.md) * [OpenVINO Core Components](../../../README.md) * [OpenVINO Plugins](../../README.md) diff --git a/src/plugins/intel_gpu/docs/source_code_structure.md b/src/plugins/intel_gpu/docs/source_code_structure.md index 59becd7a212a15..0afc73d737f3bf 100644 --- a/src/plugins/intel_gpu/docs/source_code_structure.md +++ b/src/plugins/intel_gpu/docs/source_code_structure.md @@ -1,20 +1,20 @@ -# GPU plugin structure +# GPU Plugin Structure -Historically GPU plugin was built on top of standalone [clDNN library](https://github.com/intel/clDNN) for DNNs inference on Intel® GPUs, +Historically, GPU plugin was built on top of standalone [clDNN library](https://github.com/intel/clDNN) for DNNs inference on Intel® GPUs, but at some point clDNN became a part of OpenVINO, so now it's a part of overall GPU plugin code. Intel® Arc™ Graphics Xe-HPG is supported via embedding of [oneDNN library](https://github.com/oneapi-src/oneDNN) OpenVINO GPU plugin is responsible for: 1. [IE Plugin API](https://docs.openvino.ai/latest/openvino_docs_ie_plugin_dg_overview.html) implementation. - 2. Translation of model from common IE semantic (ov::Function) into plugin specific one (cldnn::topology) which is then compiled into - gpu graph representation (cldnn::network). + 2. Translation of a model from common IE semantic (`ov::Function`) into plugin-specific one (`cldnn::topology`), which is then compiled into + GPU graph representation (`cldnn::network`). 3. Implementation of OpenVINO operation set for Intel® GPU. - 4. Device specific graph transformations. + 4. Device-specific graph transformations. 5. Memory allocation and management logic. - 6. Processing of incoming InferRequests using clDNN objects. + 6. Processing of incoming InferRequests, using clDNN objects. 7. Actual execution on GPU device. -As Intel GPU Plugin source code structure is shown below: +Intel GPU Plugin source code structure is shown below:
 src/plugins/intel_gpu                  - root GPU plugin folder
              ├── include               
@@ -49,19 +49,20 @@ src/plugins/intel_gpu                  - root GPU plugin folder
                  └── rapidjson  - thirdparty RapidJSON lib for reading json files (cache.json)
 
-One last thing that is worth mentioning is functional tests which is located in the following location: +It is worth it to mention the functional tests, which are located in: ``` src/tests/functional/plugin/gpu ``` -Most of the tests are reused across plugins, and each plugin only need to add test instances with some specific parameters. +Most of the tests are reused across plugins, and each plugin only needs to add the test instances with some specific parameters. -Shared tests are located here: +Shared tests are located in: ``` src/tests/functional/plugin/shared <--- test definitions src/tests/functional/plugin/gpu/shared_tests_instances <--- instances for GPU plugin ``` ## See also + * [OpenVINO™ README](../../../../README.md) * [OpenVINO Core Components](../../../README.md) * [OpenVINO Plugins](../../README.md) diff --git a/src/tests/README.md b/src/tests/README.md index f751809b8e3bec..3c83354ab9dace 100644 --- a/src/tests/README.md +++ b/src/tests/README.md @@ -1,7 +1,7 @@ # Inference Engine Test Infrastructure This is OpenVINO Inference Engine testing framework. OpenVINO Inference Engine test system contains: -* **Unit tests** +* **Unit tests** This test type is used for detailed testing of each software instance (including internal classes with their methods) within the tested modules (Inference Engine and Plugins). There are following rules which are **required** for Unit Tests development: @@ -9,50 +9,51 @@ This is OpenVINO Inference Engine testing framework. OpenVINO Inference Engine t * Unit test folder for a particular module should replicate `SRC` folder layout of the corresponding tested module to allow further developers get better understanding which part of software is already covered by unit tests and where to add new tests if needed. - > **Example**: We have `network_serializer.h` and `network_serializer.cpp` files within the `src` folder of the - tested Inference Engine module. Then, new `network_serializer_test.cpp` file should be created within the root of + > **Example**: There are `network_serializer.h` and `network_serializer.cpp` files within the `src` folder of the + tested Inference Engine module. Then, a new `network_serializer_test.cpp` file should be created within the root of the Unit Test folder for this module. This test file should cover all the classes and methods from the original files. - - > **Example**: We have `ie_reshaper.cpp` within the `src/shape_infer` subfolder of the tested module. In this case - new `shape_infer` subfolder should be created within the the root of the Unit Test folder for this module. And new + + > **Example**: There is the `ie_reshaper.cpp` file within the `src/shape_infer` subfolder of the tested module. In this case, + a new `shape_infer` subfolder should be created within the root of the Unit Test folder for this module. And a new `ie_reshaper_test.cpp` file should be created within this newly created subfolder. This test file should cover all the classes and methods from the original file. - - * Each Unit Test should cover the only target classes and methods. If needed, all external interface components should + + * Each Unit Test should cover only the target classes and methods. If needed, all external interface components should be mocked. There are common mock objects provided within the common Unit Test Utilities to stub the general Inference Engine API classes. - > **Example**: We have `cnn_network_impl.hpp` and `cnn_network_impl.cpp` files within the `src` folder of the tested - module. In this case, new `cnn_network_impl_test.cpp` file should be created and it should contain tests on + > **Example**: There are `cnn_network_impl.hpp` and `cnn_network_impl.cpp` files within the `src` folder of the tested + module. In this case, a new `cnn_network_impl_test.cpp` file should be created and it should contain tests on `CNNNetworkImpl` class only. - * It's not prohibited to have several test files for the same file from the tested module. - * It's not prohibited to create a separate test file for a specific classes or functions (not for the whole file). + * It is not prohibited to have several test files for the same file from the tested module. + * It is not prohibited to create a separate test file for specific classes or functions (not for the whole file). -* **Functional tests** +* **Functional tests** This test type is used to verify public Inference Engine API. There are following types of functional tests: - * `inference_engine_tests` are plugin-independent tests. Used to verify Inference Engine API methods which don't - involve any plugin runtime. E.g. `network_reader`, `network_serializer`, `precision` tests. - * `plugin_tests` are plugin-dependent tests. These tests require plugin runtime to be executed during testing. E.g. - any tests using `ExecutableNetwork`, `InferRequest` API can only be implemented within this test group. + * `inference_engine_tests` are plugin-independent tests. They are used to verify Inference Engine API methods that do not + involve any plugin runtime. The examples are: `network_reader`, `network_serializer`, and `precision` tests. + * `plugin_tests` are plugin-dependent tests. These tests require plugin runtime to be executed during testing. For example, + any tests using `ExecutableNetwork`, `InferRequest` API can only be implemented within this test group. - > **Example**: Any new test on creating of a CNNNetwork object and checking of its output info should be included to - to the Inference Engine Functional tests suite. But any new test containing reading of a network and loading it to a + > **Example**: Any new test on creating a CNNNetwork object and checking its output info should be included to + the Inference Engine Functional tests suite. However, any new test containing reading of a network and loading it to a specified plugin is always the plugin test. There are following rules which are **required** for Functional Tests development: * All Functional tests are separated into different executables for the Inference Engine and each plugin. * Pre-converted IR files must not be used within the new Functional Tests. Tested models should be generated during the tests execution. The main method to generate a required model is building of the required NGraph function and - creating of a CNNNetwork using it. If a required layer is not covered by Ngraph it's allowed to build IR file using - `xml_net_builder` utility (please refer to the `ir_net.hpp` file). IR XML files hardcoded as strings within the test + creating a CNNNetwork using it. If a required layer is not covered by Ngraph, it is allowed to build IR file using + `xml_net_builder` utility (refer to the `ir_net.hpp` file). IR XML files hardcoded as strings within the test code should not be used. * All the plugin test cases are parameterized with (at least) the device name and included to the common `funcSharedTests` static library. This library is linked to the Plugin Test binaries. And all the plugin developers just add required test instantiations based on the linked test definitions to own test binary. It should - be done to make all the **shared** test cases always visible and available to instantiate by other plugins. + be done to make all the **shared** test cases always visible and available to instantiate by other plugins. + > **NOTE**: Any new plugin test case should be added to the common test definitions library - (`funcSharedTests`) within the OpenVINO repository first. And then this test case can be instantiated with the + (`funcSharedTests`) within the OpenVINO repository first. Then, this test case can be instantiated with the required parameters inside own plugin's test binary which links this shared tests library. > **NOTE**: `funcSharedTests` library is added to the developer package and available for closed source @@ -60,15 +61,17 @@ This is OpenVINO Inference Engine testing framework. OpenVINO Inference Engine t * All the inference engine functional test cases are defined and instantiated within the single test binary. These test cases are not implemented as a separate library and not available for instantiations outside this binary. -* **Inference Engine tests utilities** +* **Inference Engine tests utilities** The set of utilities which are used by the Inference Engine Functional and Unit tests. Different helper functions, - blob comparators, OS specific constants, etc are implemented within the utilities. + blob comparators, OS-specific constants, etc. are implemented within the utilities. Internal namespaces (for example, `CommonTestUtils::`, `FuncTestUtils::` or `UnitTestUtils::`) must be used to separate utilities by domains. + > **NOTE**: All the utilities libraries are added to the developer package and available for closed source development. - ## See also +## See also + * [OpenVINO™ README](../../README.md) * [OpenVINO Core Components](../README.md) * [Developer documentation](../../docs/dev/index.md) diff --git a/src/tests/functional/plugin/conformance/test_runner/README.md b/src/tests/functional/plugin/conformance/test_runner/README.md index 4844f4e9b14d0e..4c67da79667285 100644 --- a/src/tests/functional/plugin/conformance/test_runner/README.md +++ b/src/tests/functional/plugin/conformance/test_runner/README.md @@ -1,9 +1,11 @@ -# Conformance test runner +# Conformance Test Runner ## Description + Conformance suites certify plugin functionality using a set of tests with plugin specificity independent parameters. There are two types of conformance validation. ### API Conformance + The suite checks the following OpenVINO API entities in a plugin implementation: * plugin * compiled model (executable network) @@ -11,24 +13,24 @@ The suite checks the following OpenVINO API entities in a plugin implementation: Also, there are test instantiations to validate hardware plugin functionality via software plugins (for example, MULTI, HETERO, etc.) for the entities. The other part of the API conformance suite is QueryModel validation: -* `ReadIR_queryModel` tests validate the `query_model` API using a simple single operation graph (Conformance IR) based on model parameters. +* `ReadIR_queryModel` tests validate the `query_model` API, using a simple single operation graph (Conformance IR) based on model parameters. * `OpImplCheck` tests are simple synthetic checks to `query_model` and set implementation status for each operation. -A result of the `apiConformanceTests` run is two xml files: `report_api.xml` and `report_opset.xml`. The first one shows OpenVINO API entities' test statistics for each OpenVINO API entity, such as passed/failed/crashed/skipped/hanging, tests number, pass rates, and implementation status. The second one demonstrates the `query_model` results for each operation. - - +A result of the `apiConformanceTests` run is two *xml* files: `report_api.xml` and `report_opset.xml`. The first one shows OpenVINO API entities' test statistics for each OpenVINO API entity, such as `passed/failed/crashed/skipped/hanging`, tests number, pass rates, and implementation status. The second one demonstrates the `query_model` results for each operation. ### Opset Conformance + The suite validates an OpenVINO operation plugin implementation, using simple single operation graphs (Conformance IR) taken from models. The plugin inference output is compared with the reference. - The suite contains: +The suite contains: * `ReadIR_compareWithRefs` set allows reading IRs from folders recursively, inferring them, and comparing plugin results with the reference. -* `OpImplCheckTest` set checks an operation plugin implementation status, using a simple synthetic single operation graph (`Implemented`/`Not implemented`). The suite checks only `compile_model` without comparison with the reference. +* `OpImplCheckTest` set checks an operation plugin implementation status, using a simple synthetic single operation graph (`Implemented`/`Not implemented`). The suite checks only `compile_model` without comparison with the reference. A result of the `conformanceTests` run is the `report_opset.xml` file. It shows tests statistic, like pass rate, passed, crashed, skipped, failed tests, and plugin implementation per operation for devices. ## How to build -Run the following command in build directory: + +Run the following commands in the build directory: 1. Generate CMake project: ``` cmake -DENABLE_TESTS=ON -DENABLE_FUNCTIONAL_TESTS=ON .. @@ -43,129 +45,128 @@ Run the following command in build directory: ``` make --jobs=$(nproc --all) lib_plugin_name ``` - + ## How to run using [simple conformance runner](./../../../../ie_test_utils/functional_test_utils/layer_tests_summary/run_conformance.py) + There is a simple python runner to complete the whole conformance pipeline locally. Some steps could be excluded from the pipeline by command-line parameter configuration. ### The conformance pipeline steps: -1. (Optional) Download models/conformance IR via URL / copy archieve to working directory / verify dirs / check list-files. + +1. (Optional) Download models/conformance IR via URL / copy archive to working directory / verify dirs / check list-files. 2. (Optional) Run `SubgraphDumper` to generate a simple single op graph based on models or download the `conformance_ir` folder. (if `-s=1`) 3. Run conformance test executable files. 4. Generate conformance reports. ### Command-line arguments + The script has the following arguments: * `-h, --help` show this help message and exit * `-m MODELS_PATH, --models_path MODELS_PATH` - Path to the directory/ies containing models to dump subgraph (the default way is to download conformance IR). It may be directory, archieve file, .lst file or http link to download something . If `--s=0`, specify the Conformance IRs directoryy + Path to the directory/ies containing models to dump subgraph (the default method is to download conformance IR). It may be a directory, an archive file, an `.lst` file, or a URL to download some data. If `--s=0`, specify the Conformance IRs directory. * `-d DEVICE, --device DEVICE` - Specify the target device. The default value is CPU + Specify the target device. The default value is `CPU`. * `-ov OV_PATH, --ov_path OV_PATH` - OV repo path. The default way is try to find the absolute path of OV repo (by using script path) + OV repo path. The default method is to try to find the absolute path of OV repo (by using the script path). * `-w WORKING_DIR, --working_dir WORKING_DIR` - Specify a working directory to save all artifacts, such as reports, models, conformance_irs, etc. + Specify a working directory to save all artifacts, such as reports, models, `conformance_irs`, etc. * `-t TYPE, --type TYPE` - Specify conformance type: `OP` or `API`. The default value is `OP` + Specify conformance type: `OP` or `API`. The default value is `OP`. * `-s DUMP_CONFORMANCE, --dump_conformance DUMP_CONFORMANCE` - Set '1' if you want to create Conformance IRs from custom/downloaded models. In other cases, set `0`. The default value is '1' + Set `1` if you want to create Conformance IRs from custom/downloaded models. In other cases, set `0`. The default value is `1`. * `-j WORKERS, --workers WORKERS` - Specify number of workers to run in parallel. The default value is CPU count - 1 + Specify number of workers to run in parallel. The default value is CPU count - `1` * `--gtest_filter GTEST_FILTER` - Specify gtest filter to apply when running test. E.g. *Add*:*BinaryConv*. The default value is None + Specify gtest filter to apply when running a test. For example, *Add*:*BinaryConv*. The default value is `None`. * `-c OV_CONFIG_PATH, --ov_config_path OV_CONFIG_PATH` - Specify path to file contains plugin config + Specify path to a file, which contains plugin config. * `-sh SHAPE_MODE, --shape_mode SHAPE_MODE` - Specify shape mode for conformance. Default value is ``. Possible values: `static`, `dynamic`, `` + Specify shape mode for conformance. The default value is ``. Possible values: `static`, `dynamic`, `` -> **NOTE**: -> All arguments are optional and have default values to reproduce OMZ conformance results in a default way. +> **NOTE**: All arguments are optional and have default values to reproduce OMZ conformance results in a default method. -> **NOTE**: -> The approach can be used as custom model scope validator! +> **NOTE**: The approach can be used as custom model scope validator! ## Examples of usage: -1. Use the default way to reproduce opset conformance results for OMZ on GPU: + +1. Use the default method to reproduce opset conformance results for OMZ on GPU: ``` python3 run_conformance.py -d GPU -``` +``` 2. Use the conformance pipeline to check new models support (as IRs) on the CPU plugin and save results to a custom directory: ``` python3 run_conformance.py -m /path/to/new/model_irs -s=1 -w /path/to/working/dir -d CPU -``` -3. Use custom OV build to check GNA conformance using pre-generated conformance_irs: +``` +3. Use custom OV build to check GNA conformance, using pre-generated `conformance_irs`: ``` python3 run_conformance.py -m /path/to/conformance_irs -s=0 -ov /path/to/ov_repo_on_custom_branch -d GNA -``` - -> **IMPORTANT NOTE:** -> If you need to debug some conformance tests, use the binary run as the default method. If you want to get conformance results or reproduce CI behavior, use the simple python runner. +``` +> **IMPORTANT NOTE:** If you need to debug some conformance tests, use the binary run as the default method. If you want to get conformance results or reproduce CI behavior, use the simple python runner. ## How to generate Conformance IRs set + Run the following commands: 1. Clone [`Open Model Zoo repo`](https://github.com/openvinotoolkit/open_model_zoo) or prepare custom model scope 2. Download all models using [Downloader tool](https://github.com/openvinotoolkit/open_model_zoo/blob/master/tools/model_tools/downloader.py) from the repo. -3. Convert downloaded models to IR files using [Converter tool](https://github.com/openvinotoolkit/open_model_zoo/blob/master/tools/model_tools/converter.py) from the repo. +3. Convert downloaded models to IR files, using [Converter tool](https://github.com/openvinotoolkit/open_model_zoo/blob/master/tools/model_tools/converter.py) from the repo. 4. Run [Subgraph dumper](./../subgraphs_dumper/README.md) to collect unique operation set from the models. - - ## How to run operation conformance suite + The target is able to take the following command-line arguments: * `-h` prints target command-line options with description. * `--device` specifies target device. -* `--input_folders` specifies the input folders with IRs or '.lst' file contains IRs path. Delimiter is `,` symbol. -* `--plugin_lib_name` is name of plugin library. The example is `openvino_intel_cpu_plugin`. Use only with unregistered in IE Core devices. -* `--disable_test_config` allows to ignore all skipped tests with the exception of `DISABLED_` prefix using. -* `--skip_config_path` allows to specify paths to files contain regular expressions list to skip tests. [Examples](./op_conformance_runner/skip_configs) -* `--config_path` allows to specify path to file contains plugin config. [Example](./op_conformance_runner/config/config_example.txt) -* `--extend_report` allows not to re-write device results to the report (add results of this run to the existing). Mutually exclusive with --report_unique_name. -* `--report_unique_name` allows to save report with unique name (report_pid_timestamp.xml). Mutually exclusive with --extend_report. -* `--save_report_timeout` allows to try to save report in cycle using timeout (in seconds). -* `--output_folder` Paths to the output folder to save report. -* `--extract_body` allows to count extracted operation bodies to report. -* `--shape_mode` Optional. Allows to run `static`, `dynamic` or both scenarios. Default value is empty string allows to run both scenarios. Possible values +* `--input_folders` specifies the input folders with IRs or `.lst` file. It contains paths, separated by a comma `,`. +* `--plugin_lib_name` is a name of a plugin library. The example is `openvino_intel_cpu_plugin`. Use only with unregistered in IE Core devices. +* `--disable_test_config` allows ignoring all skipped tests with the exception of `DISABLED_` prefix using. +* `--skip_config_path` allows specifying paths to files. It contains a list of regular expressions to skip tests. [Examples](./op_conformance_runner/skip_configs/skip_config_example.lst) +* `--config_path` allows specifying the path to a file that contains plugin config. [Example](./op_conformance_runner/config/config_example.txt) +* `--extend_report` allows you not to re-write device results to the report (add results of this run to the existing one). Mutually exclusive with `--report_unique_name`. +* `--report_unique_name` allows you to save a report with a unique name (`report_pid_timestamp.xml`). Mutually exclusive with `--extend_report`. +* `--save_report_timeout` allows saving a report in the cycle, using timeout (in seconds). +* `--output_folder` specifies the path to the output folder to save a report. +* `--extract_body` allows you to count extracted operation bodies to a report. +* `--shape_mode` is optional. It allows you to run `static`, `dynamic` , or both scenarios. The default value is an empty string, which allows running both scenarios. Possible values are `static`, `dynamic`, `` -* `--test_timeout` Setup timeout for each test in seconds, default timeout 900seconds (15 minutes). +* `--test_timeout` specifies setup timeout for each test in seconds. The default timeout is 900 seconds (15 minutes). * All `gtest` command-line parameters > **NOTE**: -> -> Using of [`parallel_runner`](./../../../../ie_test_utils/functional_test_utils/layer_tests_summary/run_parallel.py) tool to run a conformance suite helps to report crashed tests and collect correct statistic after unexpected crashes. -> The tool is able to work in 2 modes: -> * one test is run in separate thread (first run, as the output the cache will be saved as a custom file) -> * similar load time per one worker based on test execution time. May contain different test count per worker -> +> +> Using [`parallel_runner`](./../../../../ie_test_utils/functional_test_utils/layer_tests_summary/run_parallel.py) tool to run a conformance suite helps to report crashed tests and collect correct statistics after unexpected crashes. +> The tool is able to work in two modes: +> * one test is run in a separate thread (first run, as the output the cache will be saved as a custom file). +> * similar load time per one worker based on test execution time. May contain different test count per worker. +> > The example of usage is: > ``` -> python3 run_parallel.py -e=/path/to/openvino/bin/intel64/Debug/conformanceTests -d . -> --gtest_filter=*Add*:*BinaryConv* -- --input_folders=/path/to/ir_1,/path/to/ir_2 --device=CPU +> python3 run_parallel.py -e=/path/to/openvino/bin/intel64/Debug/conformanceTests -d . +> --gtest_filter=*Add*:*BinaryConv* -- --input_folders=/path/to/ir_1,/path/to/ir_2 --device=CPU > --report_unique_name --output_folder=/path/to/temp_output_report_folder > ``` > All arguments after `--` symbol is forwarding to `conformanceTests` target. -> +> > If you use the `--report_unique_name` argument, run -> [the merge xml script](./../../../../ie_test_utils/functional_test_utils/layer_tests_summary/merge_xmls.py) -> to aggregate the results to one xml file. Check command-line arguments with `--help` before running the command. +> [the merge xml script](./../../../../ie_test_utils/functional_test_utils/layer_tests_summary/merge_xmls.py) +> to aggregate the results to one *xml* file. Check command-line arguments with `--help` before running the command. > The example of usage is: > ``` > python3 merge_xmls.py --input_folders=/path/to/temp_output_report_folder --output_folder=/path/to/output_report_folder --output_filename=report_aggregated > ``` ## How to create operation conformance report + Run [the summarize script](./../../../../ie_test_utils/functional_test_utils/layer_tests_summary/summarize.py) to generate `html` and `csv` report. Check command-line arguments with `--help` before running the command. The example of using the script is: ``` python3 summarize.py --xml /opt/repo/infrastructure-master/thirdparty/gtest-parallel/report.xml --out /opt/repo/infrastructure-master/thirdparty/gtest-parallel/ ``` -> **NOTE**: -> -> Please, do not forget to copy [styles folder](./../../../../ie_test_utils/functional_test_utils/layer_tests_summary/template) to the output directory. It -> helps to provide report with the filters and other usable features. +> **NOTE**: Remember to copy [styles folder](./../../../../ie_test_utils/functional_test_utils/layer_tests_summary/template) to the output directory. It helps to provide a report with filters and other useful features. The report contains statistics based on conformance results and filter fields at the top of the page. -## See also +## See Also + * [OpenVINO™ README](../../../../../../README.md) * [OpenVINO Core Components](../../../../../README.md) * [Developer documentation](../../../../../../docs/dev/index.md) \ No newline at end of file From 232c802e074ab7c67ae36b02a878e3714d779042 Mon Sep 17 00:00:00 2001 From: River Li Date: Wed, 22 Mar 2023 16:18:40 +0800 Subject: [PATCH 031/296] [CAPI] Add ov::hint::execution_mode property (#16466) --- .../c/include/openvino/c/ov_property.h | 14 ++++++ src/bindings/c/src/ov_property.cpp | 1 + src/bindings/c/tests/ov_core_test.cpp | 45 +++++++++++++++++++ 3 files changed, 60 insertions(+) diff --git a/src/bindings/c/include/openvino/c/ov_property.h b/src/bindings/c/include/openvino/c/ov_property.h index b00f72aaedafc2..54c887435c5cd6 100644 --- a/src/bindings/c/include/openvino/c/ov_property.h +++ b/src/bindings/c/include/openvino/c/ov_property.h @@ -171,3 +171,17 @@ ov_property_key_enable_profiling; */ OPENVINO_C_VAR(const char*) ov_property_key_device_priorities; + +/** + * @brief Read-write property for high-level OpenVINO Execution hint + * unlike low-level properties that are individual (per-device), the hints are something that every device accepts + * and turns into device-specific settings + * Execution mode hint controls preferred optimization targets (performance or accuracy) for given model + * It can be set to be below value: + * "UNDEFINED" //!< Undefined value, settings may vary from device to device + * "PERFORMANCE", //!< Optimize for max performance + * "ACCURACY", //!< Optimize for max accuracy + * @ingroup ov_property_c_api + */ +OPENVINO_C_VAR(const char*) +ov_property_key_hint_execution_mode; diff --git a/src/bindings/c/src/ov_property.cpp b/src/bindings/c/src/ov_property.cpp index 613d52b376a228..2d6c470ae5df1d 100644 --- a/src/bindings/c/src/ov_property.cpp +++ b/src/bindings/c/src/ov_property.cpp @@ -29,3 +29,4 @@ const char* ov_property_key_hint_model_priority = "MODEL_PRIORITY"; const char* ov_property_key_log_level = "LOG_LEVEL"; const char* ov_property_key_enable_profiling = "PERF_COUNT"; const char* ov_property_key_device_priorities = "MULTI_DEVICE_PRIORITIES"; +const char* ov_property_key_hint_execution_mode = "EXECUTION_MODE_HINT"; diff --git a/src/bindings/c/tests/ov_core_test.cpp b/src/bindings/c/tests/ov_core_test.cpp index 6804504c94053f..0cb2f29f65e878 100644 --- a/src/bindings/c/tests/ov_core_test.cpp +++ b/src/bindings/c/tests/ov_core_test.cpp @@ -147,6 +147,27 @@ TEST_P(ov_core_test, ov_core_compile_model_with_property) { ov_core_free(core); } +TEST_P(ov_core_test, ov_core_compile_model_with_excution_mode) { + std::string device_name = "AUTO"; + ov_core_t* core = nullptr; + OV_EXPECT_OK(ov_core_create(&core)); + EXPECT_NE(nullptr, core); + + ov_model_t* model = nullptr; + OV_EXPECT_OK(ov_core_read_model(core, xml_file_name.c_str(), nullptr, &model)); + EXPECT_NE(nullptr, model); + + ov_compiled_model_t* compiled_model = nullptr; + const char* key = ov_property_key_hint_execution_mode; + const char* value = "PERFORMANCE"; + OV_EXPECT_OK(ov_core_compile_model(core, model, device_name.c_str(), 2, &compiled_model, key, value)); + EXPECT_NE(nullptr, compiled_model); + + ov_compiled_model_free(compiled_model); + ov_model_free(model); + ov_core_free(core); +} + TEST_P(ov_core_test, ov_core_compile_model_with_property_invalid) { auto device_name = GetParam(); ov_core_t* core = nullptr; @@ -306,6 +327,30 @@ TEST_P(ov_core_test, ov_core_get_property) { ov_core_free(core); } +TEST_P(ov_core_test, ov_core_set_and_get_property_execution_mode) { + std::string device_name = "AUTO"; + ov_core_t* core = nullptr; + OV_EXPECT_OK(ov_core_create(&core)); + EXPECT_NE(nullptr, core); + + const char* key = ov_property_key_hint_execution_mode; + char* property_value = nullptr; + OV_EXPECT_OK(ov_core_get_property(core, device_name.c_str(), key, &property_value)); + ov_free(property_value); + + const char* value1 = "ACCURACY"; + OV_EXPECT_OK(ov_core_set_property(core, device_name.c_str(), key, value1)); + OV_EXPECT_OK(ov_core_get_property(core, device_name.c_str(), key, &property_value)); + EXPECT_STREQ(value1, property_value); + + const char* value2 = "PERFORMANCE"; + OV_EXPECT_OK(ov_core_set_property(core, device_name.c_str(), key, value2)); + OV_EXPECT_OK(ov_core_get_property(core, device_name.c_str(), key, &property_value)); + EXPECT_STREQ(value2, property_value); + + ov_core_free(core); +} + TEST_P(ov_core_test, ov_core_set_get_property_str) { #ifdef __aarch64__ GTEST_SKIP() << "Skip this test for ARM CPU for now, cause no string property supported"; From 14e70e76fbe32311beca58a349c082d1d94fefb2 Mon Sep 17 00:00:00 2001 From: Sebastian Golebiewski Date: Wed, 22 Mar 2023 09:39:32 +0100 Subject: [PATCH 032/296] DOCS shift to rst - Further Low-Level Implementation Details (#16444) --- docs/{img => _static/images}/batch_device.svg | 0 .../cpu_execution_conventional_approach.svg | 0 .../images}/cpu_execution_streams.svg | 0 .../images}/cpu_execution_streams_2.svg | 0 .../images}/large_batch_approach.svg | 0 .../dldt_deployment_optimization_internals.md | 186 +++++------------- 6 files changed, 44 insertions(+), 142 deletions(-) rename docs/{img => _static/images}/batch_device.svg (100%) rename docs/{img => _static/images}/cpu_execution_conventional_approach.svg (100%) rename docs/{img => _static/images}/cpu_execution_streams.svg (100%) rename docs/{img => _static/images}/cpu_execution_streams_2.svg (100%) rename docs/{img => _static/images}/large_batch_approach.svg (100%) diff --git a/docs/img/batch_device.svg b/docs/_static/images/batch_device.svg similarity index 100% rename from docs/img/batch_device.svg rename to docs/_static/images/batch_device.svg diff --git a/docs/img/cpu_execution_conventional_approach.svg b/docs/_static/images/cpu_execution_conventional_approach.svg similarity index 100% rename from docs/img/cpu_execution_conventional_approach.svg rename to docs/_static/images/cpu_execution_conventional_approach.svg diff --git a/docs/img/cpu_execution_streams.svg b/docs/_static/images/cpu_execution_streams.svg similarity index 100% rename from docs/img/cpu_execution_streams.svg rename to docs/_static/images/cpu_execution_streams.svg diff --git a/docs/img/cpu_execution_streams_2.svg b/docs/_static/images/cpu_execution_streams_2.svg similarity index 100% rename from docs/img/cpu_execution_streams_2.svg rename to docs/_static/images/cpu_execution_streams_2.svg diff --git a/docs/img/large_batch_approach.svg b/docs/_static/images/large_batch_approach.svg similarity index 100% rename from docs/img/large_batch_approach.svg rename to docs/_static/images/large_batch_approach.svg diff --git a/docs/optimization_guide/dldt_deployment_optimization_internals.md b/docs/optimization_guide/dldt_deployment_optimization_internals.md index b03742d351f180..ab596e49c98e0b 100644 --- a/docs/optimization_guide/dldt_deployment_optimization_internals.md +++ b/docs/optimization_guide/dldt_deployment_optimization_internals.md @@ -1,168 +1,70 @@ # Further Low-Level Implementation Details {#openvino_docs_deployment_optimization_guide_internals} -## Throughput on the CPU: Internals -As explained in the [throughput-related section](./dldt_deployment_optimization_tput.md), the OpenVINO streams are means of running multiple requests in parallel. -In order to best serve multiple inference requests executed simultaneously, the inference threads are grouped/pinned to the particular CPU cores, constituting the "CPU" streams. -This provides much better performance for the networks than batching, especially for the multiple-core systems: - -@sphinxdirective - -.. container:: row-two-col-content - - .. container:: column-two-col-content - - **Conventional Approach** - - | Every CNN op is internally parallelized over a full number of CPU cores and it is detrimental for non-scalable ops. - | A lot of synchronization between many threads results in overhead. - | An only option to improve efficiency is batching. - - .. container:: column-two-col-content - - **Streams** - - | CPU cores are evenly distributed between execution streams (each 1-4 threads). - | Less threads per stream means less synchronization, better locality, and finer granularity. - -@endsphinxdirective - -@sphinxdirective - -.. raw:: html - -
-
- -@endsphinxdirective - - -![](../img/cpu_execution_conventional_approach.svg) @sphinxdirective -.. raw:: html +Throughput on the CPU: Internals +################################ -
-
- -@endsphinxdirective - - -![](../img/cpu_execution_streams.svg) - -@sphinxdirective - -.. raw:: html - -
-
- -@endsphinxdirective - - -@sphinxdirective - -.. container:: row-two-col-content - - .. container:: column-two-col-content - - .. raw:: html +As explained in the :doc:`throughput-related section `, the OpenVINO streams are means of running multiple requests in parallel. +In order to best serve multiple inference requests executed simultaneously, the inference threads are grouped/pinned to the particular CPU cores, constituting the "CPU" streams. +This provides much better performance for the networks than batching, especially for the multiple-core systems: -
+.. list-table:: + :header-rows: 1 - .. container:: column-two-col-content + * - Conventional Approach + - Streams + * - | Every CNN op is internally parallelized over a full number of CPU cores and it is detrimental for non-scalable ops. + | A lot of synchronization between many threads results in overhead. + | An only option to improve efficiency is batching. + - | CPU cores are evenly distributed between execution streams (each 1-4 threads). + | Less threads per stream means less synchronization, better locality, and finer granularity. + * - |conventional-approach| + - | |execution-streams| + | Requests are executed in parallel with a small number of threads. + | Layer-wise, the streams imply much less synchronization. - | Requests are executed in parallel with a small number of threads. - | **Layer-wise, the streams imply much less synchronization.** - -@endsphinxdirective +.. |conventional-approach| image:: _static/images/cpu_execution_conventional_approach.svg +.. |execution-streams| image:: _static/images/cpu_execution_streams.svg Compared to the batching, the parallelism is somewhat transposed (performed over inputs with much less synchronization within CNN ops): -@sphinxdirective - -.. container:: row-two-col-content - - .. container:: column-two-col-content - - **Large Batch Approach** - - | All threads process all inputs at once. - | Assumes all layers are parallelized well. - | "Fat" requests are executed one by one. - - .. container:: column-two-col-content - - **Streams** - - | CPU cores are evenly distributed between execution streams. - | "Parallelize the outermost loop" rule of thumb. - | Individual requests are executed in parallel. - -@endsphinxdirective - - -@sphinxdirective - -.. raw:: html - -
-
- -@endsphinxdirective - - -![](../img/large_batch_approach.svg) - -@sphinxdirective - -.. raw:: html +.. list-table:: + :header-rows: 1 -
-
+ * - Large Batch Approach + - Streams + * - | All threads process all inputs at once. + | Assumes all layers are parallelized well. + | “Fat” requests are executed one by one. + - | CPU cores are evenly distributed between execution streams. + | “Parallelize the outermost loop” rule of thumb. + | Individual requests are executed in parallel. + * - |large-batch-approach| + - | |execution-streams-2| + | Inputs-wise the streams are the “transposed” batch. -@endsphinxdirective - - -![](../img/cpu_execution_streams_2.svg) - -@sphinxdirective - -.. raw:: html +.. |large-batch-approach| image:: _static/images/large_batch_approach.svg -
-
- -@endsphinxdirective - - -@sphinxdirective +.. |execution-streams-2| image:: _static/images/cpu_execution_streams_2.svg -.. container:: row-two-col-content - .. container:: column-two-col-content +Keep in mind that :doc:`high-level performance hints ` allow the implementation to select the optimal number of streams depending on model's compute demands and CPU capabilities, including :doc:`int8 inference ` hardware acceleration, number of cores, etc. - .. raw:: html +Automatic Batching Internals +############################ -
- - .. container:: column-two-col-content - - **Inputs-wise the streams are the “transposed” batch.** - -@endsphinxdirective - - -Keep in mind that [high-level performance hints](../OV_Runtime_UG/performance_hints.md) allow the implementation to select the optimal number of streams depending on model's compute demands and CPU capabilities, including [int8 inference](@ref openvino_docs_model_optimization_guide) hardware acceleration, number of cores, etc. - -## Automatic Batching Internals -[Automatic batching](../OV_Runtime_UG/automatic_batching.md) performs on-the-fly grouping of inference requests to improve device utilization. +:doc:`Automatic batching ` performs on-the-fly grouping of inference requests to improve device utilization. It relaxes the requirement for an application to saturate devices such as GPU by using a large batch "explicitly". It performs transparent input gathering from individual inference requests followed by the actual batched execution, with no programming effort from the user: -![](../img/batch_device.svg) + +.. image:: _static/images/batch_device.svg Essentially, Automatic Batching shifts asynchronicity from individual requests to groups of requests that constitute the batches. Furthermore, for the execution to be efficient, it is very important that the requests arrive timely, without causing a batching timeout. Normally, the timeout should never be hit. It is rather a graceful way to handle the application exit (when the inputs are not arriving anymore, so the full batch is not possible to collect). If a workload experiences timeouts, which lead to a drop in performance due to increased latency of every request, consider balancing its value against the batch size. For example, a smaller batch size and timeout value may yield better results than a large batch size coupled with a timeout value that cannot guarantee accommodating all the required requests. -Finally, following the `get_tensor` idiom section from the [general optimizations](./dldt_deployment_optimization_common.md) helps Automatic Batching to save on inputs/outputs copies. According to that, you should always prefer the "get" versions of the tensors' data access APIs in your applications. +Finally, following the ``get_tensor`` idiom section from the :doc:`general optimizations ` helps Automatic Batching to save on inputs/outputs copies. According to that, you should always prefer the "get" versions of the tensors' data access APIs in your applications. + +@endsphinxdirective From 2f69305aa3736610c8cb0e7d7724a182b6403a57 Mon Sep 17 00:00:00 2001 From: Sebastian Golebiewski Date: Wed, 22 Mar 2023 09:41:59 +0100 Subject: [PATCH 033/296] DOCS shift to rst (#16445) --- .../{img => _static/images}/nncf_workflow.svg | 0 docs/optimization_guide/nncf/introduction.md | 101 +++++++++++------- 2 files changed, 65 insertions(+), 36 deletions(-) rename docs/{img => _static/images}/nncf_workflow.svg (100%) diff --git a/docs/img/nncf_workflow.svg b/docs/_static/images/nncf_workflow.svg similarity index 100% rename from docs/img/nncf_workflow.svg rename to docs/_static/images/nncf_workflow.svg diff --git a/docs/optimization_guide/nncf/introduction.md b/docs/optimization_guide/nncf/introduction.md index ba2a2662ba3a17..a4fcbbead198b4 100644 --- a/docs/optimization_guide/nncf/introduction.md +++ b/docs/optimization_guide/nncf/introduction.md @@ -9,10 +9,11 @@ qat_introduction filter_pruning -@endsphinxdirective -## Introduction -Training-time model compression improves model performance by applying optimizations (such as quantization) during the training. The training process minimizes the loss associated with the lower-precision optimizations, so it is able to maintain the model’s accuracy while reducing its latency and memory footprint. Generally, training-time model optimization results in better model performance and accuracy than [post-training optimization](@ref pot_introduction), but it can require more effort to set up. +Introduction +#################### + +Training-time model compression improves model performance by applying optimizations (such as quantization) during the training. The training process minimizes the loss associated with the lower-precision optimizations, so it is able to maintain the model’s accuracy while reducing its latency and memory footprint. Generally, training-time model optimization results in better model performance and accuracy than :doc:`post-training optimization `, but it can require more effort to set up. OpenVINO provides the Neural Network Compression Framework (NNCF) tool for implementing compression algorithms on models to improve their performance. NNCF is a Python library that integrates into PyTorch and TensorFlow training pipelines to add training-time compression methods to the pipeline. To apply training-time compression methods with NNCF, you need: @@ -22,65 +23,93 @@ OpenVINO provides the Neural Network Compression Framework (NNCF) tool for imple Adding compression to a training pipeline only requires a few lines of code. The compression techniques are defined through a single configuration file that specifies which algorithms to use during fine-tuning. -### NNCF Quick Start Examples +NNCF Quick Start Examples ++++++++++++++++++++++++++ + See the following Jupyter Notebooks for step-by-step examples showing how to add model compression to a PyTorch or Tensorflow training pipeline with NNCF: -- [Quantization Aware Training with NNCF and PyTorch](https://docs.openvino.ai/latest/notebooks/302-pytorch-quantization-aware-training-with-output.html). -- [Quantization Aware Training with NNCF and TensorFlow](https://docs.openvino.ai/latest/notebooks/305-tensorflow-quantization-aware-training-with-output.html). +- `Quantization Aware Training with NNCF and PyTorch `__. +- `Quantization Aware Training with NNCF and TensorFlow `__. + +Installation +#################### + +NNCF is open-sourced on `GitHub `__ and distributed as a separate package from OpenVINO. It is also available on PyPI. Install it to the same Python environment where PyTorch or TensorFlow is installed. -## Installation -NNCF is open-sourced on [GitHub](https://github.com/openvinotoolkit/nncf) and distributed as a separate package from OpenVINO. It is also available on PyPI. Install it to the same Python environment where PyTorch or TensorFlow is installed. +Install from PyPI +++++++++++++++++++++ -### Install from PyPI To install the latest released version via pip manager run the following command: -``` -pip install nncf -``` -> **NOTE**: To install with specific frameworks, use the `pip install nncf[extras]` command, where extras is a list of possible extras, for example, `torch`, `tf`, `onnx`. +.. code-block:: sh + + pip install nncf + + +.. note:: + + To install with specific frameworks, use the `pip install nncf[extras]` command, where extras is a list of possible extras, for example, `torch`, `tf`, `onnx`. + -To install the latest NNCF version from source follow the instruction on [GitHub](https://github.com/openvinotoolkit/nncf#installation). +To install the latest NNCF version from source follow the instruction on `GitHub `__. -> **NOTE**: NNCF does not have OpenVINO as an installation requirement. To deploy optimized models you should install OpenVINO separately. +.. note:: + + NNCF does not have OpenVINO as an installation requirement. To deploy optimized models you should install OpenVINO separately. + +Working with NNCF +#################### -## Working with NNCF The figure below shows a common workflow of applying training-time compressions with NNCF. The NNCF optimizations are added to the TensorFlow or PyTorch training script, and then the model undergoes fine-tuning. The optimized model can then be exported to OpenVINO IR format for accelerated performance with OpenVINO Runtime. -![](../../img/nncf_workflow.svg) +.. image:: _static/images/nncf_workflow.svg + +Training-Time Compression Methods ++++++++++++++++++++++++++++++++++ -### Training-Time Compression Methods -NNCF provides several methods for improving model performance with training-time compression. +NNCF provides several methods for improving model performance with training-time compression. -#### Quantization -Quantization is the process of converting the weights and activation values in a neural network from a high-precision format (such as 32-bit floating point) to a lower-precision format (such as 8-bit integer). It helps to reduce the model’s memory footprint and latency. NNCF uses quantization-aware training to quantize models. +Quantization +-------------------- +Quantization is the process of converting the weights and activation values in a neural network from a high-precision format (such as 32-bit floating point) to a lower-precision format (such as 8-bit integer). It helps to reduce the model’s memory footprint and latency. NNCF uses quantization-aware training to quantize models. Quantization-aware training inserts nodes into the neural network during training that simulate the effect of lower precision. This allows the training algorithm to consider quantization errors as part of the overall training loss that gets minimized during training. The network is then able to achieve enhanced accuracy when quantized. -The officially supported method of quantization in NNCF is uniform 8-bit quantization. This means all the weights and activation functions in the neural network are converted to 8-bit values. See the [Quantization-ware Training guide](@ref qat_introduction) to learn more. +The officially supported method of quantization in NNCF is uniform 8-bit quantization. This means all the weights and activation functions in the neural network are converted to 8-bit values. See the :doc:`Quantization-ware Training guide ` to learn more. + +Filter pruning +-------------------- + +Filter pruning algorithms compress models by zeroing out the output filters of convolutional layers based on a certain filter importance criterion. During fine-tuning, an importance criteria is used to search for redundant filters that don’t significantly contribute to the network’s output and zero them out. After fine-tuning, the zeroed-out filters are removed from the network. For more information, see the :doc:`Filter Pruning ` page. -#### Filter pruning -Filter pruning algorithms compress models by zeroing out the output filters of convolutional layers based on a certain filter importance criterion. During fine-tuning, an importance criteria is used to search for redundant filters that don’t significantly contribute to the network’s output and zero them out. After fine-tuning, the zeroed-out filters are removed from the network. For more information, see the [Filter Pruning](@ref filter_pruning) page. +Experimental methods +-------------------- -#### Experimental methods NNCF also provides state-of-the-art compression techniques that are still in experimental stages of development and are only recommended for expert developers. These include: - Mixed-precision quantization - Sparsity - Binarization -To learn more about these methods, visit the [NNCF repository on GitHub](https://github.com/openvinotoolkit/nncf). +To learn more about these methods, visit the `NNCF repository on GitHub `__. + +Recommended Workflow +++++++++++++++++++++ -### Recommended Workflow Using compression-aware training requires a training pipeline, an annotated dataset, and compute resources (such as CPUs or GPUs). If you don't already have these set up and available, it can be easier to start post-training quantization to quickly see quantized results. Then you can use compression-aware training if the model isn't accurate enough. We recommend the following workflow for compressing models with NNCF: -1. [Perform post-training quantization](@ref pot_introduction) on your model and then compare performance to the original model. -2. If the accuracy is too degraded, use [Quantization-aware Training](@ref qat_introduction) to increase accuracy while still achieving faster inference time. -3. If the quantized model is still too slow, use [Filter Pruning](@ref filter_pruning) to further improve the model’s inference speed. +1. :doc:`Perform post-training quantization ` on your model and then compare performance to the original model. +2. If the accuracy is too degraded, use :doc:`Quantization-aware Training ` to increase accuracy while still achieving faster inference time. +3. If the quantized model is still too slow, use :doc:`Filter Pruning ` to further improve the model’s inference speed. + +Additional Resources +#################### -## Additional Resources -- [Quantizing Models Post-training](@ref pot_introduction) -- [NNCF GitHub repository](https://github.com/openvinotoolkit/nncf) -- [NNCF FAQ](https://github.com/openvinotoolkit/nncf/blob/develop/docs/FAQ.md) -- [Quantization Aware Training with NNCF and PyTorch](https://docs.openvino.ai/latest/notebooks/302-pytorch-quantization-aware-training-with-output.html) -- [Quantization Aware Training with NNCF and TensorFlow](https://docs.openvino.ai/latest/notebooks/305-tensorflow-quantization-aware-training-with-output.html) \ No newline at end of file +- :doc:`Quantizing Models Post-training ` +- `NNCF GitHub repository `__ +- `NNCF FAQ `__ +- `Quantization Aware Training with NNCF and PyTorch `__ +- `Quantization Aware Training with NNCF and TensorFlow `__ + +@endsphinxdirective From 066ef694f5ee5fc04c276f7f99e406ed33545ac6 Mon Sep 17 00:00:00 2001 From: Sebastian Golebiewski Date: Wed, 22 Mar 2023 09:42:47 +0100 Subject: [PATCH 034/296] DOCS shift to rst - Deploying Your Application with Deployment Manager (#16453) --- .../deployment/deployment-manager-tool.md | 224 ++++++++---------- .../images}/configuration_dialog.png | 0 .../images}/selection_dialog.png | 0 3 files changed, 103 insertions(+), 121 deletions(-) rename docs/{OV_Runtime_UG/img => _static/images}/configuration_dialog.png (100%) rename docs/{OV_Runtime_UG/img => _static/images}/selection_dialog.png (100%) diff --git a/docs/OV_Runtime_UG/deployment/deployment-manager-tool.md b/docs/OV_Runtime_UG/deployment/deployment-manager-tool.md index b48525217f267b..5fdd5a2112ac57 100644 --- a/docs/OV_Runtime_UG/deployment/deployment-manager-tool.md +++ b/docs/OV_Runtime_UG/deployment/deployment-manager-tool.md @@ -1,202 +1,184 @@ # Deploying Your Application with Deployment Manager {#openvino_docs_install_guides_deployment_manager_tool} -The OpenVINO™ Deployment Manager is a Python command-line tool that creates a deployment package by assembling the model, OpenVINO IR files, your application, and associated dependencies into a runtime package for your target device. This tool is delivered within the Intel® Distribution of OpenVINO™ toolkit for Linux, Windows and macOS release packages. It is available in the `/tools/deployment_manager` directory after installation. +@sphinxdirective + +The OpenVINO™ Deployment Manager is a Python command-line tool that creates a deployment package by assembling the model, OpenVINO IR files, your application, and associated dependencies into a runtime package for your target device. This tool is delivered within the Intel® Distribution of OpenVINO™ toolkit for Linux, Windows and macOS release packages. It is available in the ``/tools/deployment_manager`` directory after installation. This article provides instructions on how to create a package with Deployment Manager and then deploy the package to your target systems. -## Prerequisites +Prerequisites +#################### To use the Deployment Manager tool, the following requirements need to be met: -* Intel® Distribution of OpenVINO™ toolkit is installed. See the [Installation Guide](../../install_guides/installing-openvino-overview.md) for instructions on different operating systems. + +* Intel® Distribution of OpenVINO™ toolkit is installed. See the :doc:`Installation Guide ` for instructions on different operating systems. * To run inference on a target device other than CPU, device drivers must be pre-installed: - * **For GPU**, see [Configurations for Intel® Processor Graphics (GPU)](../../install_guides/configurations-for-intel-gpu.md). - * **For GNA**, see [Intel® Gaussian & Neural Accelerator (GNA)](../../install_guides/configurations-for-intel-gna.md) -> **IMPORTANT**: The operating system on the target system must be the same as the development system on which you are creating the package. For example, if the target system is Ubuntu 18.04, the deployment package must be created from the OpenVINO™ toolkit installed on Ubuntu 18.04. + * **For GPU**, see :doc:`Configurations for Intel® Processor Graphics (GPU) `. + * **For GNA**, see :doc:`Intel® Gaussian & Neural Accelerator (GNA) ` + +.. important:: + + The operating system on the target system must be the same as the development system on which you are creating the package. For example, if the target system is Ubuntu 18.04, the deployment package must be created from the OpenVINO™ toolkit installed on Ubuntu 18.04. + +.. tip:: -> **TIP**: If your application requires additional dependencies, including the Microsoft Visual C++ Redistributable, use the ['--user_data' option](https://docs.openvino.ai/latest/openvino_docs_install_guides_deployment_manager_tool.html#run-standard-cli-mode) to add them to the deployment archive. Install these dependencies on the target host before running inference. + If your application requires additional dependencies, including the Microsoft Visual C++ Redistributable, use the `'--user_data' option <#running-deployment-manager-in-standard-cli-mode>`__ to add them to the deployment archive. Install these dependencies on the target host before running inference. -## Creating Deployment Package Using Deployment Manager +Creating Deployment Package Using Deployment Manager +#################################################### To create a deployment package that includes inference-related components of OpenVINO™ toolkit, you can run the Deployment Manager tool in either interactive or standard CLI mode . -### Running Deployment Manager in Interactive Mode +Running Deployment Manager in Interactive Mode +++++++++++++++++++++++++++++++++++++++++++++++ -@sphinxdirective +.. dropdown:: Click to expand/collapse -.. raw:: html + The interactive mode provides a user-friendly command-line interface that guides through the process with text prompts. -
+ To launch the Deployment Manager in interactive mode, open a new terminal window, go to the Deployment Manager tool directory, and run the tool script without parameters: -@endsphinxdirective + .. tab:: Linux -The interactive mode provides a user-friendly command-line interface that guides through the process with text prompts. + .. code-block:: sh -To launch the Deployment Manager in interactive mode, open a new terminal window, go to the Deployment Manager tool directory, and run the tool script without parameters: - -@sphinxdirective - -.. tab:: Linux - - .. code-block:: sh - - cd /tools/deployment_manager - - ./deployment_manager.py - -.. tab:: Windows - - .. code-block:: bat - - cd \deployment_tools\tools\deployment_manager - .\deployment_manager.py - -.. tab:: macOS - - .. code-block:: sh - - cd /tools/deployment_manager - ./deployment_manager.py - -@endsphinxdirective + cd /tools/deployment_manager -The target device selection dialog is displayed: - -![Deployment Manager selection dialog](../img/selection_dialog.png) + ./deployment_manager.py -Use the options provided on the screen to complete the selection of the target devices, and press **Enter** to proceed to the package generation dialog. To interrupt the generation process and exit the program, type **q** and press **Enter**. + .. tab:: Windows -Once the selection is accepted, the package generation dialog will appear: - -![Deployment Manager configuration dialog](../img/configuration_dialog.png) + .. code-block:: bat -The target devices selected in the previous step appear on the screen. To go back and change the selection, type **b** and press **Enter**. Use the default settings, or use the following options to configure the generation process: - -* `o. Change output directory` (optional): the path to the output directory. By default, it is set to your home directory. + cd \deployment_tools\tools\deployment_manager + .\deployment_manager.py -* `u. Provide (or change) path to folder with user data` (optional): the path to a directory with user data (OpenVINO IR, model, dataset, etc.) files and subdirectories required for inference, which will be added to the deployment archive. By default, it is set to `None`, which means that copying the user data to the target system need to be done separately. + .. tab:: macOS -* `t. Change archive name` (optional): the deployment archive name without extension. By default, it is set to `openvino_deployment_package`. - -After all the parameters are set, type **g** and press **Enter** to generate the package for the selected target devices. To interrupt the generation process and exit the program, type **q** and press **Enter**. + .. code-block:: sh -Once the script has successfully completed, the deployment package is generated in the specified output directory. + cd /tools/deployment_manager + ./deployment_manager.py -@sphinxdirective -.. raw:: html + The target device selection dialog is displayed: -
+ .. image:: _static/images/selection_dialog.png + :alt: Deployment Manager selection dialog -@endsphinxdirective + Use the options provided on the screen to complete the selection of the target devices, and press **Enter** to proceed to the package generation dialog. To interrupt the generation process and exit the program, type **q** and press **Enter**. -### Running Deployment Manager in Standard CLI Mode + Once the selection is accepted, the package generation dialog will appear: -@sphinxdirective + .. image:: _static/images/configuration_dialog.png + :alt: Deployment Manager configuration dialog -.. raw:: html + The target devices selected in the previous step appear on the screen. To go back and change the selection, type **b** and press **Enter**. Use the default settings, or use the following options to configure the generation process: -
+ * ``o. Change output directory`` (optional): the path to the output directory. By default, it is set to your home directory. -@endsphinxdirective + * ``u. Provide (or change) path to folder with user data`` (optional): the path to a directory with user data (OpenVINO IR, model, dataset, etc.) files and subdirectories required for inference, which will be added to the deployment archive. By default, it is set to ``None``, which means that copying the user data to the target system need to be done separately. -You can also run the Deployment Manager tool in the standard CLI mode. In this mode, specify the target devices and other parameters as command-line arguments of the Deployment Manager Python script. This mode facilitates integrating the tool in an automation pipeline. + * ``t. Change archive name`` (optional): the deployment archive name without extension. By default, it is set to ``openvino_deployment_package``. -To launch the Deployment Manager tool in the standard mode: open a new terminal window, go to the Deployment Manager tool directory, and run the tool command with the following syntax: + After all the parameters are set, type **g** and press **Enter** to generate the package for the selected target devices. To interrupt the generation process and exit the program, type **q** and press **Enter**. -@sphinxdirective + Once the script has successfully completed, the deployment package is generated in the specified output directory. -.. tab:: Linux - .. code-block:: sh +Running Deployment Manager in Standard CLI Mode ++++++++++++++++++++++++++++++++++++++++++++++++ - cd /tools/deployment_manager - ./deployment_manager.py <--targets> [--output_dir] [--archive_name] [--user_data] -.. tab:: Windows +.. dropdown:: Click to expand/collapse - .. code-block:: bat + You can also run the Deployment Manager tool in the standard CLI mode. In this mode, specify the target devices and other parameters as command-line arguments of the Deployment Manager Python script. This mode facilitates integrating the tool in an automation pipeline. - cd \tools\deployment_manager - .\deployment_manager.py <--targets> [--output_dir] [--archive_name] [--user_data] + To launch the Deployment Manager tool in the standard mode: open a new terminal window, go to the Deployment Manager tool directory, and run the tool command with the following syntax: -.. tab:: macOS + .. tab:: Linux - .. code-block:: sh + .. code-block:: sh - cd /tools/deployment_manager - ./deployment_manager.py <--targets> [--output_dir] [--archive_name] [--user_data] + cd /tools/deployment_manager + ./deployment_manager.py <--targets> [--output_dir] [--archive_name] [--user_data] -@endsphinxdirective + .. tab:: Windows -The following options are available: + .. code-block:: bat -* `<--targets>` (required): the list of target devices to run inference. To specify more than one target, separate them with spaces, for example, `--targets cpu gpu`. -To get a list of currently available targets, run the program with the `-h` option. + cd \tools\deployment_manager + .\deployment_manager.py <--targets> [--output_dir] [--archive_name] [--user_data] -* `[--output_dir]` (optional): the path to the output directory. By default, it is set to your home directory. + .. tab:: macOS -* `[--archive_name]` (optional): a deployment archive name without extension. By default, it is set to `openvino_deployment_package`. + .. code-block:: sh -* `[--user_data]` (optional): the path to a directory with user data (OpenVINO IR, model, dataset, etc.) files and subdirectories required for inference, which will be added to the deployment archive. By default, it is set to `None`, which means copying the user data to the target system need to be performed separately. + cd /tools/deployment_manager + ./deployment_manager.py <--targets> [--output_dir] [--archive_name] [--user_data] -Once the script has successfully completed, the deployment package is generated in the output directory specified. -@sphinxdirective + The following options are available: -.. raw:: html + * ``<--targets>`` (required): the list of target devices to run inference. To specify more than one target, separate them with spaces, for example, ``--targets cpu gpu``. + To get a list of currently available targets, run the program with the ``-h`` option. -
+ * ``[--output_dir]`` (optional): the path to the output directory. By default, it is set to your home directory. -@endsphinxdirective + * ``[--archive_name]`` (optional): a deployment archive name without extension. By default, it is set to ``openvino_deployment_package``. + + * ``[--user_data]`` (optional): the path to a directory with user data (OpenVINO IR, model, dataset, etc.) files and subdirectories required for inference, which will be added to the deployment archive. By default, it is set to ``None``, which means copying the user data to the target system need to be performed separately. -## Deploying Package on Target Systems + Once the script has successfully completed, the deployment package is generated in the output directory specified. -Once the Deployment Manager has successfully completed, the `.tar.gz` (on Linux or macOS) or `.zip` (on Windows) package is generated in the specified output directory. + +Deploying Package on Target Systems +################################### + +Once the Deployment Manager has successfully completed, the ``.tar.gz`` (on Linux or macOS) or ``.zip`` (on Windows) package is generated in the specified output directory. To deploy the OpenVINO Runtime components from the development machine to the target system, perform the following steps: 1. Copy the generated archive to the target system by using your preferred method. -2. Extract the archive to the destination directory on the target system. If the name of your archive is different from the default one shown below, replace `openvino_deployment_package` with your specified name. -@sphinxdirective - -.. tab:: Linux +2. Extract the archive to the destination directory on the target system. If the name of your archive is different from the default one shown below, replace ``openvino_deployment_package`` with your specified name. - .. code-block:: sh + .. tab:: Linux - tar xf openvino_deployment_package.tar.gz -C + .. code-block:: sh -.. tab:: Windows + tar xf openvino_deployment_package.tar.gz -C - .. code-block:: bat + .. tab:: Windows - Use the archiver of your choice to unzip the file. + .. code-block:: bat -.. tab:: macOS + Use the archiver of your choice to unzip the file. - .. code-block:: sh + .. tab:: macOS - tar xf openvino_deployment_package.tar.gz -C + .. code-block:: sh -@endsphinxdirective + tar xf openvino_deployment_package.tar.gz -C Now, the package is extracted to the destination directory. The following files and subdirectories are created: - - * `setupvars.sh` — a copy of `setupvars.sh`. - * `runtime` — contains the OpenVINO runtime binary files. - * `install_dependencies` — a snapshot of the `install_dependencies` directory from the OpenVINO installation directory. - * `` — the directory with the user data (OpenVINO IR, model, dataset, etc.) specified while configuring the package. - -3. On a target Linux system, to run inference install additional dependencies by running the `install_openvino_dependencies.sh` script: - ```sh - cd /openvino/install_dependencies - sudo -E ./install_openvino_dependencies.sh - ``` + + * ``setupvars.sh`` — a copy of ``setupvars.sh``. + * ``runtime`` — contains the OpenVINO runtime binary files. + * ``install_dependencies`` — a snapshot of the ``install_dependencies`` directory from the OpenVINO installation directory. + * ```` — the directory with the user data (OpenVINO IR, model, dataset, etc.) specified while configuring the package. + +3. On a target Linux system, to run inference install additional dependencies by running the ``install_openvino_dependencies.sh`` script: + + .. code-block: sh + + cd /openvino/install_dependencies + sudo -E ./install_openvino_dependencies.sh + 4. Set up the environment variables: -@sphinxdirective .. tab:: Linux @@ -219,7 +201,7 @@ To deploy the OpenVINO Runtime components from the development machine to the ta cd /openvino/ source ./setupvars.sh -@endsphinxdirective - Now, you have finished the deployment of the OpenVINO Runtime components to the target system. + +@endsphinxdirective diff --git a/docs/OV_Runtime_UG/img/configuration_dialog.png b/docs/_static/images/configuration_dialog.png similarity index 100% rename from docs/OV_Runtime_UG/img/configuration_dialog.png rename to docs/_static/images/configuration_dialog.png diff --git a/docs/OV_Runtime_UG/img/selection_dialog.png b/docs/_static/images/selection_dialog.png similarity index 100% rename from docs/OV_Runtime_UG/img/selection_dialog.png rename to docs/_static/images/selection_dialog.png From 90100451a31a2a81b2cbb1c2a45dc33ea8c3b57a Mon Sep 17 00:00:00 2001 From: Sebastian Golebiewski Date: Wed, 22 Mar 2023 09:43:44 +0100 Subject: [PATCH 035/296] DOCS shift to rst - Libraries for Local Distribution (#16469) --- .../deployment/local-distribution.md | 196 +++++++++--------- .../images}/deployment_full.svg | 0 2 files changed, 102 insertions(+), 94 deletions(-) rename docs/{img => _static/images}/deployment_full.svg (100%) diff --git a/docs/OV_Runtime_UG/deployment/local-distribution.md b/docs/OV_Runtime_UG/deployment/local-distribution.md index 1b4e02143ad1dc..cd68ac4bdbf085 100644 --- a/docs/OV_Runtime_UG/deployment/local-distribution.md +++ b/docs/OV_Runtime_UG/deployment/local-distribution.md @@ -1,155 +1,163 @@ # Libraries for Local Distribution {#openvino_docs_deploy_local_distribution} +@sphinxdirective + With a local distribution, each C or C++ application/installer will have its own copies of OpenVINO Runtime binaries. However, OpenVINO has a scalable plugin-based architecture, which means that some components can be loaded in runtime only when they are really needed. Therefore, it is important to understand which minimal set of libraries is really needed to deploy the application. This guide helps you to achieve that goal. +Local distribution is also appropriate for OpenVINO binaries built from sources using `Build instructions `__, +but the guide below supposes OpenVINO Runtime is built dynamically. For case of `Static OpenVINO Runtime `__ select the required OpenVINO capabilities on CMake configuration stage using `CMake Options for Custom Compilation `__, the build and link the OpenVINO components into the final application. -Local dsitribution is also appropriate for OpenVINO binaries built from sources using [Build instructions](https://github.com/openvinotoolkit/openvino/wiki#how-to-build), but the guide below supposes OpenVINO Runtime is built dynamically. For case of [Static OpenVINO Runtime](https://github.com/openvinotoolkit/openvino/blob/master/docs/dev/static_libaries.md) select the required OpenVINO capabilities on CMake configuration stage using [CMake Options for Custom Compilation](https://github.com/openvinotoolkit/openvino/blob/master/docs/dev/cmake_options_for_custom_comiplation.md), the build and link the OpenVINO components into the final application. +.. note:: -> **NOTE**: The steps below are operating system independent and refer to a library file name without any prefixes (like `lib` on Unix systems) or suffixes (like `.dll` on Windows OS). Do not put `.lib` files on Windows OS to the distribution, because such files are needed only on a linker stage. + The steps below are operating system independent and refer to a library file name without any prefixes (like ``lib`` on Unix systems) or suffixes (like ``.dll`` on Windows OS). Do not put ``.lib`` files on Windows OS to the distribution, because such files are needed only on a linker stage. -## Library Requirements for C++ and C Languages -Independent on the language used to write the application, the `openvino` library must always be put to the final distribution, since it's a core library which orchestrates with all the inference and frontend plugins. In Intel® Distribution of OpenVINO™ toolkit, `openvino` depends on the TBB libraries which are used by OpenVINO Runtime to optimally saturate the devices with computations, so it must be put to the distribution package. +Library Requirements for C++ and C Languages +############################################ -If your application is written with C language, you need to put the `openvino_c` library additionally. +Independent on the language used to write the application, the ``openvino`` library must always be put to the final distribution, since it's a core library which orchestrates with all the inference and frontend plugins. In Intel® Distribution of OpenVINO™ toolkit, ``openvino`` depends on the TBB libraries which are used by OpenVINO Runtime to optimally saturate the devices with computations, so it must be put to the distribution package. -The `plugins.xml` file with information about inference devices must also be taken as a support file for `openvino`. +If your application is written with C language, you need to put the ``openvino_c`` library additionally. +The ``plugins.xml`` file with information about inference devices must also be taken as a support file for ``openvino``. -## Libraries for Pluggable Components + +Libraries for Pluggable Components +################################## The picture below presents dependencies between the OpenVINO Runtime core and pluggable libraries: -![](../../img/deployment_full.svg) +.. image:: _static/images/deployment_full.svg -### Libraries for Compute Devices +Libraries for Compute Devices ++++++++++++++++++++++++++++++ For each inference device, OpenVINO Runtime has its own plugin library: -- `openvino_intel_cpu_plugin` for [Intel® CPU devices](../supported_plugins/CPU.md). -- `openvino_intel_gpu_plugin` for [Intel® GPU devices](../supported_plugins/GPU.md). -- `openvino_intel_gna_plugin` for [Intel® GNA devices](../supported_plugins/GNA.md). -- `openvino_arm_cpu_plugin` for [ARM CPU devices](../supported_plugins/ARM_CPU.md). + +- ``openvino_intel_cpu_plugin`` for :doc:`Intel® CPU devices `. +- ``openvino_intel_gpu_plugin`` for :doc:`Intel® GPU devices `. +- ``openvino_intel_gna_plugin`` for :doc:`Intel® GNA devices `. +- ``openvino_arm_cpu_plugin`` for :doc:`ARM CPU devices `. Depending on what devices are used in the app, the appropriate libraries need to be put to the distribution package. As it is shown on the picture above, some plugin libraries may have OS-specific dependencies which are either backend libraries or additional supports files with firmware, etc. Refer to the table below for details: -@sphinxdirective +.. dropdown:: Windows OS: -.. raw:: html + .. list-table:: + :header-rows: 1 -
+ * - Device + - Dependency + * - CPU + - ``-`` + * - GPU + - ``OpenCL.dll``, ``cache.json`` + * - GNA + - ``gna.dll`` + * - Arm® CPU + - ``-`` -@endsphinxdirective -| Device | Dependency | -|-------------|------------| -| CPU | `-` | -| GPU | `OpenCL.dll`, `cache.json` | -| GNA | `gna.dll` | -| Arm® CPU | `-` | +.. dropdown:: Linux OS: -@sphinxdirective + .. list-table:: + :header-rows: 1 -.. raw:: html + * - Device + - Dependency + * - CPU + - ``-`` + * - GPU + - ``libOpenCL.so``, ``cache.json`` + * - GNA + - ``gna.dll`` + * - Arm® CPU + - ``-`` -
-@endsphinxdirective -@sphinxdirective +.. dropdown:: MacOS: -.. raw:: html + .. list-table:: + :header-rows: 1 -
+ * - Device + - Dependency + * - CPU + - ``-`` + * - Arm® CPU + - ``-`` -@endsphinxdirective -| Device | Dependency | -|-------------|-------------| -| CPU | `-` | -| GPU | `libOpenCL.so`, `cache.json` | -| GNA | `gna.dll` | -| Arm® CPU | `-` | +Libraries for Execution Modes ++++++++++++++++++++++++++++++ -@sphinxdirective +The ``HETERO``, ``MULTI``, ``BATCH`` and ``AUTO`` execution modes can also be used explicitly or implicitly by the application. Use the following recommendation scheme to decide whether to put the appropriate libraries to the distribution package: -.. raw:: html +- If :doc:`AUTO ` is used explicitly in the application or `ov::Core::compile_model `__ is used without specifying a device, put ``openvino_auto_plugin`` to the distribution. -
+ .. note:: -@endsphinxdirective -@sphinxdirective - -.. raw:: html + Automatic Device Selection relies on :doc:`[inference device plugins `. If you are not sure about what inference devices are available on target system, put all the inference plugin libraries to the distribution. If `ov::device::priorities `__ is used for `AUTO` to specify a limited device list, grab the corresponding device plugins only. -
+- If :doc:`MULTI ` is used explicitly, put ``openvino_auto_plugin`` to the distribution. +- If :doc:`HETERO ` is either used explicitly or `ov::hint::performance_mode `__ is used with GPU, put ``openvino_hetero_plugin`` to the distribution. +- If :doc:`BATCH ` is either used explicitly or ``ov::hint::performance_mode`` is used with GPU, put ``openvino_batch_plugin`` to the distribution. -@endsphinxdirective +Frontend Libraries for Reading Models ++++++++++++++++++++++++++++++++++++++ -| Device | Dependency | -|-------------|-------------| -| CPU | `-` | -| Arm® CPU | `-` | - -@sphinxdirective - -.. raw:: html - -
- -@endsphinxdirective - -### Libraries for Execution Modes - -The `HETERO`, `MULTI`, `BATCH` and `AUTO` execution modes can also be used explicitly or implicitly by the application. Use the following recommendation scheme to decide whether to put the appropriate libraries to the distribution package: -- If [AUTO](../auto_device_selection.md) is used explicitly in the application or `ov::Core::compile_model` is used without specifying a device, put `openvino_auto_plugin` to the distribution. - > **NOTE**: Automatic Device Selection relies on [inference device plugins](../supported_plugins/Device_Plugins.md). If you are not sure about what inference devices are available on target system, put all the inference plugin libraries to the distribution. If `ov::device::priorities` is used for `AUTO` to specify a limited device list, grab the corresponding device plugins only. +OpenVINO Runtime uses frontend libraries dynamically to read models in different formats: -- If [MULTI](../multi_device.md) is used explicitly, put `openvino_auto_plugin` to the distribution. -- If [HETERO](../hetero_execution.md) is either used explicitly or `ov::hint::performance_mode` is used with GPU, put `openvino_hetero_plugin` to the distribution. -- If [BATCH](../automatic_batching.md) is either used explicitly or `ov::hint::performance_mode` is used with GPU, put `openvino_batch_plugin` to the distribution. +- ``openvino_ir_frontend`` is used to read OpenVINO IR. +- ``openvino_tensorflow_frontend`` is used to read TensorFlow file format. +- ``openvino_onnx_frontend`` is used to read ONNX file format. +- ``openvino_paddle_frontend`` is used to read Paddle file format. -### Frontend Libraries for Reading Models +Depending on the model format types that are used in the application in `ov::Core::read_model `__, pick up the appropriate libraries. -OpenVINO Runtime uses frontend libraries dynamically to read models in different formats: -- `openvino_ir_frontend` is used to read OpenVINO IR. -- `openvino_tensorflow_frontend` is used to read TensorFlow file format. -- `openvino_onnx_frontend` is used to read ONNX file format. -- `openvino_paddle_frontend` is used to read Paddle file format. +.. note:: -Depending on the model format types that are used in the application in `ov::Core::read_model`, pick up the appropriate libraries. + To optimize the size of final distribution package, you are recommended to convert models to OpenVINO IR by using :doc:`Model Optimizer `. This way you don't have to keep TensorFlow, ONNX, PaddlePaddle, and other frontend libraries in the distribution package. -> **NOTE**: To optimize the size of final distribution package, you are recommended to convert models to OpenVINO IR by using [Model Optimizer](../../MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md). This way you don't have to keep TensorFlow, ONNX, PaddlePaddle, and other frontend libraries in the distribution package. +(Legacy) Preprocessing via G-API +++++++++++++++++++++++++++++++++ -### (Legacy) Preprocessing via G-API +.. note:: -> **NOTE**: [G-API](../../gapi/gapi_intro.md) preprocessing is a legacy functionality, use [preprocessing capabilities from OpenVINO 2.0](../preprocessing_overview.md) which do not require any additional libraries. + :doc:`G-API ` preprocessing is a legacy functionality, use :doc:`preprocessing capabilities from OpenVINO 2.0 ` which do not require any additional libraries. -If the application uses `InferenceEngine::PreProcessInfo::setColorFormat` or `InferenceEngine::PreProcessInfo::setResizeAlgorithm` methods, OpenVINO Runtime dynamically loads `openvino_gapi_preproc` plugin to perform preprocessing via G-API. +If the application uses `InferenceEngine::PreProcessInfo::setColorFormat `__ or `InferenceEngine::PreProcessInfo::setResizeAlgorithm `__ methods, OpenVINO Runtime dynamically loads `openvino_gapi_preproc` plugin to perform preprocessing via G-API. -## Examples +Examples +#################### **CPU + OpenVINO IR in C application** In this example, the application is written in C language, performs inference on CPU, and reads models stored as the OpenVINO IR format. The following libraries are used: -- The `openvino_c` library is a main dependency of the application. It links against this library. -- The `openvino` library is used as a private dependency for `openvino_c` and is also used in the deployment. -- `openvino_intel_cpu_plugin` is used for inference. -- `openvino_ir_frontend` is used to read source models. +- The ``openvino_c`` library is a main dependency of the application. It links against this library. +- The ``openvino`` library is used as a private dependency for ``openvino_c`` and is also used in the deployment. +- ``openvino_intel_cpu_plugin`` is used for inference. +- ``openvino_ir_frontend`` is used to read source models. **MULTI execution on GPU and CPU in `tput` mode** -In this example, the application is written in C++, performs inference [simultaneously on GPU and CPU devices](../multi_device.md) with the `ov::hint::PerformanceMode::THROUGHPUT` property set, and reads models stored in the ONNX format. The following libraries are used: -- The `openvino` library is a main dependency of the application. It links against this library. -- `openvino_intel_gpu_plugin` and `openvino_intel_cpu_plugin` are used for inference. -- `openvino_auto_plugin` is used for Multi-Device Execution. -- `openvino_auto_batch_plugin` can be also put to the distribution to improve the saturation of [Intel® GPU](../supported_plugins/GPU.md) device. If there is no such plugin, [Automatic Batching](../automatic_batching.md) is turned off. -- `openvino_onnx_frontend` is used to read source models. +In this example, the application is written in C++, performs inference :doc:`simultaneously on GPU and CPU devices ` with the `ov::hint::PerformanceMode::THROUGHPUT `__ property set, and reads models stored in the ONNX format. The following libraries are used: + +- The ``openvino`` library is a main dependency of the application. It links against this library. +- ``openvino_intel_gpu_plugin`` and ``openvino_intel_cpu_plugin`` are used for inference. +- ``openvino_auto_plugin`` is used for Multi-Device Execution. +- ``openvino_auto_batch_plugin`` can be also put to the distribution to improve the saturation of :doc:`Intel® GPU ` device. If there is no such plugin, :doc:`Automatic Batching ` is turned off. +- ``openvino_onnx_frontend`` is used to read source models. **Auto-Device Selection between GPU and CPU** -In this example, the application is written in C++, performs inference with the [Automatic Device Selection](../auto_device_selection.md) mode, limiting device list to GPU and CPU, and reads models [created using C++ code](../model_representation.md). The following libraries are used: -- The `openvino` library is a main dependency of the application. It links against this library. -- `openvino_auto_plugin` is used to enable Automatic Device Selection. -- `openvino_intel_gpu_plugin` and `openvino_intel_cpu_plugin` are used for inference. AUTO selects between CPU and GPU devices according to their physical existence on the deployed machine. -- No frontend library is needed because `ov::Model` is created in code. +In this example, the application is written in C++, performs inference with the :doc:`Automatic Device Selection ` mode, limiting device list to GPU and CPU, and reads models :doc:`created using C++ code `. The following libraries are used: + +- The ``openvino`` library is a main dependency of the application. It links against this library. +- ``openvino_auto_plugin`` is used to enable Automatic Device Selection. +- ``openvino_intel_gpu_plugin`` and ``openvino_intel_cpu_plugin`` are used for inference. AUTO selects between CPU and GPU devices according to their physical existence on the deployed machine. +- No frontend library is needed because ``ov::Model`` is created in code. + +@endsphinxdirective diff --git a/docs/img/deployment_full.svg b/docs/_static/images/deployment_full.svg similarity index 100% rename from docs/img/deployment_full.svg rename to docs/_static/images/deployment_full.svg From 57c91e0c5685921fdec4c25ff5c4df38e611ca28 Mon Sep 17 00:00:00 2001 From: Chen Xu Date: Wed, 22 Mar 2023 17:28:38 +0800 Subject: [PATCH 036/296] [CPU] Fix issue in reducing HW with small channel size in npsc layout (#16467) --- src/plugins/intel_cpu/src/nodes/reduce.cpp | 36 ++++++++++--------- .../single_layer_tests/reduce_ops.cpp | 33 +++++++++++++++++ 2 files changed, 52 insertions(+), 17 deletions(-) diff --git a/src/plugins/intel_cpu/src/nodes/reduce.cpp b/src/plugins/intel_cpu/src/nodes/reduce.cpp index a5abc1ead506fb..0eefdf73146389 100644 --- a/src/plugins/intel_cpu/src/nodes/reduce.cpp +++ b/src/plugins/intel_cpu/src/nodes/reduce.cpp @@ -2147,22 +2147,24 @@ void Reduce::reduce_PLN(const uint8_t *in_ptr, uint8_t *out_ptr) { } else if (!ReduceC && ReduceD && ReduceH && !ReduceW) { size_t IWB = IW / blk_size; if (ReduceDH_opt) { - // reduce parallelly in D dimension - // step1: !ReduceD && ReduceH && !ReduceW - uint8_t *prc_ptr_n = &vec_reduceDH_prc[0]; - init_dst_data(prc_ptr_n, prc_size); - parallel_for2d(ID, IWB, [&](size_t id, size_t iwb){ - size_t pd = id, pwb = iwb; - reduce_kernel_process(in_ptr_n + (id * IH * IW + iwb * blk_size) * src_data_size, - prc_ptr_n + (pd * PW + pwb * blk_size) * prc_data_size, blk_size, 0, IH); - }); - // step2: ReduceD - reduce_stride = PW; - parallel_for(IWB, [&](size_t iwb){ - size_t pwb = iwb, owb = iwb; - reduce_kernel_process(prc_ptr_n + pwb * blk_size * prc_data_size, - out_ptr_n + owb * blk_size * dst_data_size, blk_size, 0, ID); - }); + if (IWB > 0) { + // reduce parallelly in D dimension + // step1: !ReduceD && ReduceH && !ReduceW + uint8_t *prc_ptr_n = &vec_reduceDH_prc[0]; + init_dst_data(prc_ptr_n, prc_size); + parallel_for2d(ID, IWB, [&](size_t id, size_t iwb){ + size_t pd = id, pwb = iwb; + reduce_kernel_process(in_ptr_n + (id * IH * IW + iwb * blk_size) * src_data_size, + prc_ptr_n + (pd * PW + pwb * blk_size) * prc_data_size, blk_size, 0, IH); + }); + // step2: ReduceD + reduce_stride = PW; + parallel_for(IWB, [&](size_t iwb){ + size_t pwb = iwb, owb = iwb; + reduce_kernel_process(prc_ptr_n + pwb * blk_size * prc_data_size, + out_ptr_n + owb * blk_size * dst_data_size, blk_size, 0, ID); + }); + } // reduce tail reduce_stride = IW; size_t tail_start = IWB * blk_size; @@ -2740,7 +2742,7 @@ inline void Reduce::set_reduce_dim_flags() { ReduceH = IH != OH && OH == 1; ReduceW = IW != OW && OW == 1; - // must be done before the above dimension change + // must be done after the above dimension change create_DH_working_memory(); // suit for parallel diff --git a/src/plugins/intel_cpu/tests/functional/single_layer_tests/reduce_ops.cpp b/src/plugins/intel_cpu/tests/functional/single_layer_tests/reduce_ops.cpp index 7fa7871b0243ad..f41e74dd4221e5 100644 --- a/src/plugins/intel_cpu/tests/functional/single_layer_tests/reduce_ops.cpp +++ b/src/plugins/intel_cpu/tests/functional/single_layer_tests/reduce_ops.cpp @@ -241,6 +241,10 @@ const std::vector> axes5DFusing = { {0, 2, 4}, }; +const std::vector> axesHW = { + {2, 3} +}; + std::vector opTypes = { CommonTestUtils::OpType::SCALAR, CommonTestUtils::OpType::VECTOR, @@ -294,6 +298,11 @@ std::vector> inputShapes_Int32 = { {{{{1, 5}, 19, {1, 5}, {1, 10}}, {{2, 19, 2, 2}, {2, 19, 2, 3}}}}, }; +std::vector> inputShapes_SmallChannel = { + {{{}, {{2, 3, 2, 9}}}}, + {{{{1, 5}, 3, {1, 5}, {1, 10}}, {{2, 3, 2, 2}, {2, 3, 2, 9}}}}, +}; + std::vector cpuParams_4D = { CPUSpecificParams({nChw16c}, {nChw16c}, {}, {}), CPUSpecificParams({nchw}, {nchw}, {}, {}), @@ -316,6 +325,10 @@ std::vector cpuParams_HybridLayout_5D = { CPUSpecificParams({ndhwc}, {}, {}, {}) }; +std::vector cpuParams_NHWC_4D = { + CPUSpecificParams({nhwc}, {nhwc}, {}, {}) +}; + const std::vector fusingParamsSet { /* activations */ fusingSwish, @@ -431,6 +444,19 @@ const auto params_Int32 = testing::Combine( testing::Values(emptyCPUSpec), testing::Values(emptyFusingSpec)); +const auto params_NHWC_SmallChannel = testing::Combine( + testing::Combine( + testing::ValuesIn(axesHW), + testing::Values(CommonTestUtils::OpType::VECTOR), + testing::Values(true), + testing::ValuesIn(reductionTypes), + testing::ValuesIn(inpOutPrc), + testing::Values(ElementType::undefined), + testing::Values(ElementType::undefined), + testing::ValuesIn(inputShapes_SmallChannel)), + testing::ValuesIn(filterCPUSpecificParams(cpuParams_NHWC_4D)), + testing::Values(emptyFusingSpec)); + INSTANTIATE_TEST_SUITE_P( smoke_Reduce_OneAxis_CPU, ReduceCPULayerTest, @@ -480,6 +506,13 @@ INSTANTIATE_TEST_SUITE_P( ReduceCPULayerTest::getTestCaseName ); +INSTANTIATE_TEST_SUITE_P( + smoke_Reducea_NHWC_SmallChannel_CPU, + ReduceCPULayerTest, + params_NHWC_SmallChannel, + ReduceCPULayerTest::getTestCaseName +); + /* ================================ 1.2 No fusion - Logical ================================ */ const auto params_OneAxis_Logical = testing::Combine( testing::Combine( From 1b72352f6f2348de31337b484b2e0830ee61748c Mon Sep 17 00:00:00 2001 From: Ilya Lavrenov Date: Wed, 22 Mar 2023 14:20:03 +0400 Subject: [PATCH 037/296] Fixed CVS-93736 (#16471) --- src/core/src/descriptor/tensor.cpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/core/src/descriptor/tensor.cpp b/src/core/src/descriptor/tensor.cpp index 5a7a4c24a69f34..ed87ce606bf21a 100644 --- a/src/core/src/descriptor/tensor.cpp +++ b/src/core/src/descriptor/tensor.cpp @@ -100,10 +100,8 @@ const ov::Shape& ov::descriptor::Tensor::get_shape() const { size_t ov::descriptor::Tensor::size() const { const bool bitwidth_less_than_byte = m_element_type.bitwidth() < 8; - if (bitwidth_less_than_byte) { - return static_cast(ceil((1.0 * shape_size(get_shape()) * m_element_type.bitwidth()) / 8)); - } - return shape_size(get_shape()) * m_element_type.size(); + return bitwidth_less_than_byte ? (shape_size(get_shape()) * m_element_type.bitwidth() + 7) >> 3 + : (shape_size(get_shape()) * m_element_type.size()); } const std::unordered_set& ov::descriptor::Tensor::get_names() const { From 8509d0dd82cce761e06b88ce002a419e362aa333 Mon Sep 17 00:00:00 2001 From: Xuejun Zhai Date: Wed, 22 Mar 2023 23:09:14 +0800 Subject: [PATCH 038/296] [Deprecated API] remove `version` (#16426) * [Remove version] Remove version from py openvino Signed-off-by: Zhai, Xuejun * Modify caused by remove version Signed-off-by: Zhai, Xuejun * Fix clang format issue Signed-off-by: Zhai, Xuejun * Revert "Fix clang format issue" This reverts commit 132787286fdcf4865da3480655e328e3fb607249. * Fix CI format issue Signed-off-by: Zhai, Xuejun * Fix CI format issue Signed-off-by: Zhai, Xuejun * Fix merge conflict error Signed-off-by: Zhai, Xuejun --------- Signed-off-by: Zhai, Xuejun --- .../pyngraph/discrete_type_info.cpp | 6 +- .../src/compatibility/pyngraph/node.cpp | 11 ---- .../pyopenvino/graph/discrete_type_info.cpp | 9 +-- .../python/src/pyopenvino/graph/node.cpp | 9 --- .../python/tests/test_graph/test_basic.py | 7 --- .../python/tests/test_graph/test_core.py | 4 +- .../test_ngraph/test_basic.py | 6 -- .../test_ngraph/test_core.py | 4 +- .../low_precision/markup_precisions.hpp | 6 +- .../markup_quantization_granularity.hpp | 6 +- ...avg_pool_precision_preserved_attribute.hpp | 2 +- .../rt_info/intervals_alignment_attribute.hpp | 2 +- .../rt_info/precision_preserved_attribute.hpp | 2 +- .../rt_info/precisions_attribute.hpp | 2 +- .../quantization_alignment_attribute.hpp | 2 +- .../quantization_granularity_attribute.hpp | 2 +- .../rt_info/quantization_mode_attribute.hpp | 2 +- .../rt_info/skip_cleanup_attribute.hpp | 2 +- .../src/markup_precisions.cpp | 8 +-- .../src/markup_quantization_granularity.cpp | 8 +-- .../include/mask_attribute.hpp | 2 +- .../src/pass/common_optimizations.cpp | 2 +- .../include/ov_ops/nms_static_shape_ie.hpp | 7 +-- .../include/ov_ops/type_relaxed.hpp | 5 +- .../tests/utils/compare_functions_test.cpp | 4 +- src/core/include/ngraph/node.hpp | 20 +++--- src/core/include/openvino/core/model.hpp | 2 +- src/core/include/openvino/core/node.hpp | 8 --- src/core/include/openvino/core/rtti.hpp | 38 ++++++----- .../openvino/core/runtime_attribute.hpp | 2 +- src/core/include/openvino/core/type.hpp | 18 +----- src/core/include/openvino/op/acosh.hpp | 2 +- src/core/include/openvino/op/add.hpp | 2 +- src/core/include/openvino/op/asinh.hpp | 2 +- src/core/include/openvino/op/assign.hpp | 4 +- src/core/include/openvino/op/atanh.hpp | 2 +- src/core/include/openvino/op/avg_pool.hpp | 2 +- src/core/include/openvino/op/batch_norm.hpp | 2 +- .../include/openvino/op/batch_to_space.hpp | 2 +- .../openvino/op/binary_convolution.hpp | 2 +- src/core/include/openvino/op/broadcast.hpp | 4 +- src/core/include/openvino/op/bucketize.hpp | 2 +- src/core/include/openvino/op/convert_like.hpp | 2 +- src/core/include/openvino/op/convolution.hpp | 4 +- .../op/ctc_greedy_decoder_seq_len.hpp | 2 +- src/core/include/openvino/op/ctc_loss.hpp | 2 +- .../openvino/op/deformable_convolution.hpp | 2 +- .../openvino/op/deformable_psroi_pooling.hpp | 2 +- src/core/include/openvino/op/dft.hpp | 2 +- src/core/include/openvino/op/divide.hpp | 2 +- src/core/include/openvino/op/einsum.hpp | 2 +- .../openvino/op/embedding_segments_sum.hpp | 2 +- .../openvino/op/embeddingbag_offsets_sum.hpp | 2 +- .../openvino/op/embeddingbag_packedsum.hpp | 2 +- src/core/include/openvino/op/equal.hpp | 2 +- ...xperimental_detectron_detection_output.hpp | 2 +- ...erimental_detectron_generate_proposals.hpp | 2 +- ...imental_detectron_prior_grid_generator.hpp | 2 +- .../op/experimental_detectron_roi_feature.hpp | 2 +- .../op/experimental_detectron_topkrois.hpp | 2 +- .../openvino/op/extractimagepatches.hpp | 2 +- src/core/include/openvino/op/floor_mod.hpp | 2 +- src/core/include/openvino/op/gather.hpp | 4 +- .../include/openvino/op/gather_elements.hpp | 2 +- src/core/include/openvino/op/gather_nd.hpp | 2 +- src/core/include/openvino/op/gather_tree.hpp | 2 +- src/core/include/openvino/op/gelu.hpp | 4 +- src/core/include/openvino/op/greater.hpp | 2 +- src/core/include/openvino/op/greater_eq.hpp | 2 +- src/core/include/openvino/op/group_conv.hpp | 4 +- src/core/include/openvino/op/gru_cell.hpp | 2 +- src/core/include/openvino/op/gru_sequence.hpp | 2 +- src/core/include/openvino/op/hsigmoid.hpp | 2 +- src/core/include/openvino/op/hswish.hpp | 2 +- src/core/include/openvino/op/idft.hpp | 2 +- src/core/include/openvino/op/interpolate.hpp | 4 +- src/core/include/openvino/op/less.hpp | 2 +- src/core/include/openvino/op/less_eq.hpp | 2 +- src/core/include/openvino/op/log_softmax.hpp | 2 +- src/core/include/openvino/op/logical_and.hpp | 2 +- src/core/include/openvino/op/logical_not.hpp | 2 +- src/core/include/openvino/op/logical_or.hpp | 2 +- src/core/include/openvino/op/logical_xor.hpp | 2 +- src/core/include/openvino/op/loop.hpp | 2 +- src/core/include/openvino/op/lstm_cell.hpp | 2 +- .../include/openvino/op/lstm_sequence.hpp | 2 +- src/core/include/openvino/op/max_pool.hpp | 2 +- src/core/include/openvino/op/maximum.hpp | 2 +- src/core/include/openvino/op/minimum.hpp | 2 +- src/core/include/openvino/op/mish.hpp | 2 +- src/core/include/openvino/op/mod.hpp | 2 +- src/core/include/openvino/op/multiply.hpp | 2 +- src/core/include/openvino/op/mvn.hpp | 2 +- .../openvino/op/non_max_suppression.hpp | 10 +-- src/core/include/openvino/op/non_zero.hpp | 2 +- src/core/include/openvino/op/not_equal.hpp | 2 +- src/core/include/openvino/op/one_hot.hpp | 2 +- src/core/include/openvino/op/op.hpp | 3 +- src/core/include/openvino/op/pad.hpp | 2 +- src/core/include/openvino/op/power.hpp | 2 +- src/core/include/openvino/op/proposal.hpp | 2 +- src/core/include/openvino/op/range.hpp | 2 +- src/core/include/openvino/op/read_value.hpp | 4 +- src/core/include/openvino/op/reduce_l1.hpp | 2 +- src/core/include/openvino/op/reduce_l2.hpp | 2 +- .../openvino/op/reduce_logical_and.hpp | 2 +- .../include/openvino/op/reduce_logical_or.hpp | 2 +- src/core/include/openvino/op/reduce_max.hpp | 2 +- src/core/include/openvino/op/reduce_mean.hpp | 2 +- src/core/include/openvino/op/reduce_min.hpp | 2 +- src/core/include/openvino/op/reduce_prod.hpp | 2 +- src/core/include/openvino/op/reduce_sum.hpp | 2 +- src/core/include/openvino/op/reshape.hpp | 2 +- src/core/include/openvino/op/reverse.hpp | 2 +- src/core/include/openvino/op/rnn_sequence.hpp | 2 +- src/core/include/openvino/op/roi_align.hpp | 2 +- src/core/include/openvino/op/roll.hpp | 2 +- src/core/include/openvino/op/round.hpp | 2 +- .../openvino/op/scatter_elements_update.hpp | 2 +- .../include/openvino/op/scatter_nd_update.hpp | 2 +- .../include/openvino/op/scatter_update.hpp | 2 +- src/core/include/openvino/op/select.hpp | 2 +- src/core/include/openvino/op/shape_of.hpp | 2 +- src/core/include/openvino/op/softmax.hpp | 2 +- src/core/include/openvino/op/softplus.hpp | 2 +- .../include/openvino/op/space_to_batch.hpp | 2 +- src/core/include/openvino/op/split.hpp | 2 +- .../include/openvino/op/strided_slice.hpp | 2 +- src/core/include/openvino/op/subtract.hpp | 2 +- src/core/include/openvino/op/swish.hpp | 2 +- src/core/include/openvino/op/topk.hpp | 6 +- src/core/include/openvino/op/transpose.hpp | 2 +- .../include/openvino/op/variadic_split.hpp | 2 +- src/core/src/node.cpp | 10 ++- src/core/src/pass/low_latency.cpp | 2 +- src/core/src/pass/pass.cpp | 2 +- src/core/src/pass/serialize.cpp | 4 +- src/core/src/type.cpp | 32 ++++------ src/core/tests/graph_rewrite.cpp | 10 +-- src/core/tests/opset.cpp | 8 +-- src/core/tests/pass_config.cpp | 12 ++-- src/core/tests/rtti.cpp | 6 +- src/core/tests/type_info.cpp | 63 +++++++++---------- src/core/tests/type_prop/broadcast.cpp | 1 - src/frontends/ir/src/ir_deserializer.cpp | 4 +- src/inference/src/ie_network_reader.cpp | 2 +- .../swap_convert_transpose.cpp | 2 +- src/plugins/intel_cpu/src/nodes/if.cpp | 2 +- .../src/nodes/non_max_suppression.cpp | 2 +- .../include/legacy/ngraph_ops/onehot_ie.hpp | 6 -- .../include/legacy/ngraph_ops/pad_ie.hpp | 6 -- .../convert_ngraph_to_cnn_network_tests.cpp | 8 +-- .../src/transformations/insert_copy_layer.cpp | 10 +-- .../src/transformations/pwl_approximation.cpp | 4 +- src/plugins/intel_gpu/src/plugin/program.cpp | 6 +- .../common_test_utils/graph_comparator.cpp | 8 +-- .../src/utils/ngraph_helpers.cpp | 8 +-- 157 files changed, 274 insertions(+), 385 deletions(-) diff --git a/src/bindings/python/src/compatibility/pyngraph/discrete_type_info.cpp b/src/bindings/python/src/compatibility/pyngraph/discrete_type_info.cpp index b7418def4d3acf..49f8bb97953ebf 100644 --- a/src/bindings/python/src/compatibility/pyngraph/discrete_type_info.cpp +++ b/src/bindings/python/src/compatibility/pyngraph/discrete_type_info.cpp @@ -28,14 +28,14 @@ void regclass_pyngraph_DiscreteTypeInfo(py::module m) { discrete_type_info.def(py::self != py::self); discrete_type_info.def_readonly("name", &ngraph::DiscreteTypeInfo::name); - discrete_type_info.def_readonly("version", &ngraph::DiscreteTypeInfo::version); + discrete_type_info.def_readonly("version_id", &ngraph::DiscreteTypeInfo::version_id); discrete_type_info.def_readonly("parent", &ngraph::DiscreteTypeInfo::parent); discrete_type_info.def("__repr__", [](const ngraph::DiscreteTypeInfo& self) { std::string name = std::string(self.name); - std::string version = std::to_string(self.version); + std::string version = std::string(self.version_id); if (self.parent != nullptr) { - std::string parent_version = std::to_string(self.parent->version); + std::string parent_version = std::string(self.parent->version_id); std::string parent_name = self.parent->name; return ""; diff --git a/src/bindings/python/src/compatibility/pyngraph/node.cpp b/src/bindings/python/src/compatibility/pyngraph/node.cpp index 02bfb3d1395630..f696a4297cad7f 100644 --- a/src/bindings/python/src/compatibility/pyngraph/node.cpp +++ b/src/bindings/python/src/compatibility/pyngraph/node.cpp @@ -277,16 +277,6 @@ void regclass_pyngraph_Node(py::module m) { get_rt_info : PyRTMap A dictionary of user defined data. )"); - node.def("get_version", - &ngraph::Node::get_version, - R"( - Returns operation's version of the node. - - Returns - ---------- - get_version : int - Operation version. - )"); node.def("set_argument", &ngraph::Node::set_argument); node.def("set_arguments", [](const std::shared_ptr& self, const ngraph::NodeVector& args) { @@ -301,7 +291,6 @@ void regclass_pyngraph_Node(py::module m) { node.def_property_readonly("rt_info", (PyRTMap & (ngraph::Node::*)()) & ngraph::Node::get_rt_info, py::return_value_policy::reference_internal); - node.def_property_readonly("version", &ngraph::Node::get_version); node.def_property_readonly("type_info", &ngraph::Node::get_type_info); node.def_property("friendly_name", &ngraph::Node::get_friendly_name, &ngraph::Node::set_friendly_name); diff --git a/src/bindings/python/src/pyopenvino/graph/discrete_type_info.cpp b/src/bindings/python/src/pyopenvino/graph/discrete_type_info.cpp index dca43473035be7..cd19fa2a121cbc 100644 --- a/src/bindings/python/src/pyopenvino/graph/discrete_type_info.cpp +++ b/src/bindings/python/src/pyopenvino/graph/discrete_type_info.cpp @@ -26,23 +26,18 @@ void regclass_graph_DiscreteTypeInfo(py::module m) { discrete_type_info.def(py::self != py::self); discrete_type_info.def_readonly("name", &ov::DiscreteTypeInfo::name); - discrete_type_info.def_readonly("version", &ov::DiscreteTypeInfo::version); discrete_type_info.def_readonly("version_id", &ov::DiscreteTypeInfo::version_id); discrete_type_info.def_readonly("parent", &ov::DiscreteTypeInfo::parent); - discrete_type_info.def("get_version", []() { - Common::utils::deprecation_warning("get_version()", "2024.0", "Please use version attribute instead."); - return &ov::DiscreteTypeInfo::get_version; - }); discrete_type_info.def("hash", [](const ov::DiscreteTypeInfo& self) { return self.hash(); }); discrete_type_info.def("__repr__", [](const ov::DiscreteTypeInfo& self) { std::string name = std::string(self.name); - std::string version = std::to_string(self.version); + std::string version = std::string(self.version_id); if (self.parent != nullptr) { - std::string parent_version = std::to_string(self.parent->version); + std::string parent_version = std::string(self.parent->version_id); std::string parent_name = self.parent->name; return ""; diff --git a/src/bindings/python/src/pyopenvino/graph/node.cpp b/src/bindings/python/src/pyopenvino/graph/node.cpp index 599194ab9b96f0..1e99bff44210e0 100644 --- a/src/bindings/python/src/pyopenvino/graph/node.cpp +++ b/src/bindings/python/src/pyopenvino/graph/node.cpp @@ -304,14 +304,6 @@ void regclass_graph_Node(py::module m) { :return: A dictionary of user defined data. :rtype: openvino.runtime.RTMap )"); - node.def("get_version", - &ov::Node::get_version, - R"( - Returns operation's version of the node. - - :return: Operation version. - :rtype: int - )"); node.def("set_argument", &ov::Node::set_argument); node.def("set_arguments", [](const std::shared_ptr& self, const ov::NodeVector& args) { @@ -326,7 +318,6 @@ void regclass_graph_Node(py::module m) { node.def_property_readonly("rt_info", (PyRTMap & (ov::Node::*)()) & ov::Node::get_rt_info, py::return_value_policy::reference_internal); - node.def_property_readonly("version", &ov::Node::get_version); node.def_property_readonly("type_info", &ov::Node::get_type_info); node.def_property("friendly_name", &ov::Node::get_friendly_name, &ov::Node::set_friendly_name); diff --git a/src/bindings/python/tests/test_graph/test_basic.py b/src/bindings/python/tests/test_graph/test_basic.py index 67bb1f1afad7c5..b4cc21edb27150 100644 --- a/src/bindings/python/tests/test_graph/test_basic.py +++ b/src/bindings/python/tests/test_graph/test_basic.py @@ -537,13 +537,6 @@ def test_sink_function_ctor(): assert function.get_friendly_name() == "TestModel" -def test_node_version(): - node = ops.add([1], [2]) - - assert node.get_version() == 1 - assert node.version == 1 - - def test_strides_iteration_methods(): data = np.array([1, 2, 3]) strides = Strides(data) diff --git a/src/bindings/python/tests/test_graph/test_core.py b/src/bindings/python/tests/test_graph/test_core.py index bd02af0fe69089..57e0d26252eec4 100644 --- a/src/bindings/python/tests/test_graph/test_core.py +++ b/src/bindings/python/tests/test_graph/test_core.py @@ -369,10 +369,10 @@ def test_discrete_type_info(): assert n1.get_type_info().name == "TopK" assert n3.get_type_info().name == "Sin" assert n1.type_info.name == n2.type_info.name - assert n1.type_info.version == n2.type_info.version + assert n1.type_info.version_id == n2.type_info.version_id assert n1.type_info.parent == n2.type_info.parent assert n1.get_type_info().name == n2.get_type_info().name - assert n1.get_type_info().version == n2.get_type_info().version + assert n1.get_type_info().version_id == n2.get_type_info().version_id assert n1.get_type_info().parent == n2.get_type_info().parent assert n1.get_type_info().name != n3.get_type_info().name assert n1.get_type_info().name > n3.get_type_info().name diff --git a/src/bindings/python/tests_compatibility/test_ngraph/test_basic.py b/src/bindings/python/tests_compatibility/test_ngraph/test_basic.py index de83f6a77089bf..5acc1a29fd2d49 100644 --- a/src/bindings/python/tests_compatibility/test_ngraph/test_basic.py +++ b/src/bindings/python/tests_compatibility/test_ngraph/test_basic.py @@ -414,9 +414,3 @@ def test_sink_function_ctor(): assert len(function.get_results()) == 1 assert function.get_friendly_name() == "TestFunction" - -def test_node_version(): - node = ng.add([1], [2]) - - assert node.get_version() == 1 - assert node.version == 1 diff --git a/src/bindings/python/tests_compatibility/test_ngraph/test_core.py b/src/bindings/python/tests_compatibility/test_ngraph/test_core.py index 9b50732396e5a5..949946ef842bdc 100644 --- a/src/bindings/python/tests_compatibility/test_ngraph/test_core.py +++ b/src/bindings/python/tests_compatibility/test_ngraph/test_core.py @@ -250,10 +250,10 @@ def test_discrete_type_info(): assert n1.get_type_info().name == "TopK" assert n3.get_type_info().name == "Sin" assert n1.type_info.name == n2.type_info.name - assert n1.type_info.version == n2.type_info.version + assert n1.type_info.version_id == n2.type_info.version_id assert n1.type_info.parent == n2.type_info.parent assert n1.get_type_info().name == n2.get_type_info().name - assert n1.get_type_info().version == n2.get_type_info().version + assert n1.get_type_info().version_id == n2.get_type_info().version_id assert n1.get_type_info().parent == n2.get_type_info().parent assert n1.get_type_info().name != n3.get_type_info().name assert n1.get_type_info().name > n3.get_type_info().name diff --git a/src/common/low_precision_transformations/include/low_precision/markup_precisions.hpp b/src/common/low_precision_transformations/include/low_precision/markup_precisions.hpp index 783dcafb13f678..97f3bc7569984e 100644 --- a/src/common/low_precision_transformations/include/low_precision/markup_precisions.hpp +++ b/src/common/low_precision_transformations/include/low_precision/markup_precisions.hpp @@ -39,12 +39,12 @@ class ngraph::pass::low_precision::MarkupPrecisions : public ngraph::pass::Funct class Restriction { public: explicit Restriction(const bool versionIsRequired) : versionIsRequired(versionIsRequired) {} - void add(const uint64_t version, const ngraph::pass::low_precision::PrecisionsRestriction::PrecisionsByPorts& precisions) { - precisionsByVersion.emplace(version, precisions); + void add(const std::string version_id, const ngraph::pass::low_precision::PrecisionsRestriction::PrecisionsByPorts& precisions) { + precisionsByVersion.emplace(version_id, precisions); } bool versionIsRequired; - std::unordered_map precisionsByVersion; + std::unordered_map precisionsByVersion; }; OPENVINO_RTTI("MarkupPrecisions", "0"); diff --git a/src/common/low_precision_transformations/include/low_precision/markup_quantization_granularity.hpp b/src/common/low_precision_transformations/include/low_precision/markup_quantization_granularity.hpp index 23558ec81a26d5..098da265cee0b4 100644 --- a/src/common/low_precision_transformations/include/low_precision/markup_quantization_granularity.hpp +++ b/src/common/low_precision_transformations/include/low_precision/markup_quantization_granularity.hpp @@ -37,12 +37,12 @@ class ngraph::pass::low_precision::MarkupQuantizationGranularity : public ngraph class PerTensorQuantization { public: explicit PerTensorQuantization(const bool versionIsRequired) : versionIsRequired(versionIsRequired) {} - void add(const uint64_t version, const std::vector& restrictions) { - portsByVersion.emplace(version, restrictions); + void add(const std::string version_id, const std::vector& restrictions) { + portsByVersion.emplace(version_id, restrictions); } bool versionIsRequired; - std::unordered_map> portsByVersion; + std::unordered_map> portsByVersion; }; OPENVINO_RTTI("MarkupPerTensorQuantization", "0"); diff --git a/src/common/low_precision_transformations/include/low_precision/rt_info/avg_pool_precision_preserved_attribute.hpp b/src/common/low_precision_transformations/include/low_precision/rt_info/avg_pool_precision_preserved_attribute.hpp index 4064edabf03006..c7c84e2122960c 100644 --- a/src/common/low_precision_transformations/include/low_precision/rt_info/avg_pool_precision_preserved_attribute.hpp +++ b/src/common/low_precision_transformations/include/low_precision/rt_info/avg_pool_precision_preserved_attribute.hpp @@ -23,7 +23,7 @@ namespace ngraph { */ class LP_TRANSFORMATIONS_API AvgPoolPrecisionPreservedAttribute : public PrecisionPreservedAttribute { public: - OPENVINO_RTTI("LowPrecision::AvgPoolPrecisionPreserved", "", ov::RuntimeAttribute, 0); + OPENVINO_RTTI("LowPrecision::AvgPoolPrecisionPreserved", "", ov::RuntimeAttribute); using PrecisionPreservedAttribute::PrecisionPreservedAttribute; void merge_attributes(std::vector& attributes); bool is_skipped() const; diff --git a/src/common/low_precision_transformations/include/low_precision/rt_info/intervals_alignment_attribute.hpp b/src/common/low_precision_transformations/include/low_precision/rt_info/intervals_alignment_attribute.hpp index dcdf552856062f..99d6e814c2abe0 100644 --- a/src/common/low_precision_transformations/include/low_precision/rt_info/intervals_alignment_attribute.hpp +++ b/src/common/low_precision_transformations/include/low_precision/rt_info/intervals_alignment_attribute.hpp @@ -58,7 +58,7 @@ class LP_TRANSFORMATIONS_API IntervalsAlignmentSharedValue { */ class LP_TRANSFORMATIONS_API IntervalsAlignmentAttribute : public SharedAttribute { public: - OPENVINO_RTTI("LowPrecision::IntervalsAlignment", "", ov::RuntimeAttribute, 0); + OPENVINO_RTTI("LowPrecision::IntervalsAlignment", "", ov::RuntimeAttribute); IntervalsAlignmentAttribute() = default; IntervalsAlignmentAttribute(IntervalsAlignmentSharedValue::Interval combinedInterval, size_t levels); IntervalsAlignmentAttribute( diff --git a/src/common/low_precision_transformations/include/low_precision/rt_info/precision_preserved_attribute.hpp b/src/common/low_precision_transformations/include/low_precision/rt_info/precision_preserved_attribute.hpp index 13baf73c3eb1b6..f5d14342a8f230 100644 --- a/src/common/low_precision_transformations/include/low_precision/rt_info/precision_preserved_attribute.hpp +++ b/src/common/low_precision_transformations/include/low_precision/rt_info/precision_preserved_attribute.hpp @@ -22,7 +22,7 @@ namespace ngraph { */ class LP_TRANSFORMATIONS_API PrecisionPreservedAttribute : public SharedAttribute { public: - OPENVINO_RTTI("LowPrecision::PrecisionPreserved", "", ov::RuntimeAttribute, 0); + OPENVINO_RTTI("LowPrecision::PrecisionPreserved", "", ov::RuntimeAttribute); PrecisionPreservedAttribute() = default; PrecisionPreservedAttribute(const bool value); diff --git a/src/common/low_precision_transformations/include/low_precision/rt_info/precisions_attribute.hpp b/src/common/low_precision_transformations/include/low_precision/rt_info/precisions_attribute.hpp index 41f88a290b6a7c..0a69000b85e4e1 100644 --- a/src/common/low_precision_transformations/include/low_precision/rt_info/precisions_attribute.hpp +++ b/src/common/low_precision_transformations/include/low_precision/rt_info/precisions_attribute.hpp @@ -26,7 +26,7 @@ namespace ngraph { */ class LP_TRANSFORMATIONS_API PrecisionsAttribute : public SharedAttribute> { public: - OPENVINO_RTTI("LowPrecision::Precisions", "", ov::RuntimeAttribute, 0); + OPENVINO_RTTI("LowPrecision::Precisions", "", ov::RuntimeAttribute); PrecisionsAttribute(const std::vector& precisions); static ov::Any create( diff --git a/src/common/low_precision_transformations/include/low_precision/rt_info/quantization_alignment_attribute.hpp b/src/common/low_precision_transformations/include/low_precision/rt_info/quantization_alignment_attribute.hpp index 0c766b71885ab5..30daa5f8073082 100644 --- a/src/common/low_precision_transformations/include/low_precision/rt_info/quantization_alignment_attribute.hpp +++ b/src/common/low_precision_transformations/include/low_precision/rt_info/quantization_alignment_attribute.hpp @@ -27,7 +27,7 @@ namespace ngraph { */ class LP_TRANSFORMATIONS_API QuantizationAlignmentAttribute : public SharedAttribute { public: - OPENVINO_RTTI("LowPrecision::QuantizationAlignment", "", ov::RuntimeAttribute, 0); + OPENVINO_RTTI("LowPrecision::QuantizationAlignment", "", ov::RuntimeAttribute); QuantizationAlignmentAttribute(const bool value = false); static ov::Any create( diff --git a/src/common/low_precision_transformations/include/low_precision/rt_info/quantization_granularity_attribute.hpp b/src/common/low_precision_transformations/include/low_precision/rt_info/quantization_granularity_attribute.hpp index 84f2bf474c82a2..df466ef041cf4a 100644 --- a/src/common/low_precision_transformations/include/low_precision/rt_info/quantization_granularity_attribute.hpp +++ b/src/common/low_precision_transformations/include/low_precision/rt_info/quantization_granularity_attribute.hpp @@ -22,7 +22,7 @@ namespace ngraph { */ class LP_TRANSFORMATIONS_API QuantizationGranularityAttribute : public ov::RuntimeAttribute { public: - OPENVINO_RTTI("LowPrecision::QuantizationGranularity", "", ov::RuntimeAttribute, 0); + OPENVINO_RTTI("LowPrecision::QuantizationGranularity", "", ov::RuntimeAttribute); enum class Granularity { PerChannel, diff --git a/src/common/low_precision_transformations/include/low_precision/rt_info/quantization_mode_attribute.hpp b/src/common/low_precision_transformations/include/low_precision/rt_info/quantization_mode_attribute.hpp index a3658061510ee0..3c3a454e4b9519 100644 --- a/src/common/low_precision_transformations/include/low_precision/rt_info/quantization_mode_attribute.hpp +++ b/src/common/low_precision_transformations/include/low_precision/rt_info/quantization_mode_attribute.hpp @@ -12,7 +12,7 @@ namespace ngraph { class LP_TRANSFORMATIONS_API QuantizationModeAttribute : public ov::RuntimeAttribute { public: - OPENVINO_RTTI("LowPrecision::QuantizationModeAttribute", "", ov::RuntimeAttribute, 0); + OPENVINO_RTTI("LowPrecision::QuantizationModeAttribute", "", ov::RuntimeAttribute); enum class Mode { Asymmetric, diff --git a/src/common/low_precision_transformations/include/low_precision/rt_info/skip_cleanup_attribute.hpp b/src/common/low_precision_transformations/include/low_precision/rt_info/skip_cleanup_attribute.hpp index 1a11bbc2983ea2..1b323ee9424794 100644 --- a/src/common/low_precision_transformations/include/low_precision/rt_info/skip_cleanup_attribute.hpp +++ b/src/common/low_precision_transformations/include/low_precision/rt_info/skip_cleanup_attribute.hpp @@ -11,7 +11,7 @@ namespace ngraph { class LP_TRANSFORMATIONS_API SkipCleanupAttribute : public ov::RuntimeAttribute { public: - OPENVINO_RTTI("LowPrecision::SkipCleanup", "", ov::RuntimeAttribute, 0); + OPENVINO_RTTI("LowPrecision::SkipCleanup", "", ov::RuntimeAttribute); static ov::Any create(const std::shared_ptr& node); }; } // namespace ngraph diff --git a/src/common/low_precision_transformations/src/markup_precisions.cpp b/src/common/low_precision_transformations/src/markup_precisions.cpp index d5c168d4502131..a1748036012f13 100644 --- a/src/common/low_precision_transformations/src/markup_precisions.cpp +++ b/src/common/low_precision_transformations/src/markup_precisions.cpp @@ -30,10 +30,10 @@ ngraph::pass::low_precision::MarkupPrecisions::MarkupPrecisions( OPENVINO_SUPPRESS_DEPRECATED_START if (it == restrictionsByOperation.end()) { Restriction r(restriction.specifyVersion); - r.precisionsByVersion.emplace(restriction.operationType.version, restriction.precisionsByPorts); + r.precisionsByVersion.emplace(restriction.operationType.version_id, restriction.precisionsByPorts); restrictionsByOperation.emplace(restriction.operationType.name, r); } else { - it->second.add(restriction.operationType.version, restriction.precisionsByPorts); + it->second.add(restriction.operationType.version_id, restriction.precisionsByPorts); } OPENVINO_SUPPRESS_DEPRECATED_END } @@ -108,9 +108,7 @@ bool ngraph::pass::low_precision::MarkupPrecisions::run_on_model(const std::shar if (it != restrictionsByOperation.end()) { const Restriction& r = it->second; if (r.versionIsRequired) { - OPENVINO_SUPPRESS_DEPRECATED_START - const auto it2 = r.precisionsByVersion.find(typeInfo.version); - OPENVINO_SUPPRESS_DEPRECATED_END + const auto it2 = r.precisionsByVersion.find(typeInfo.version_id); if (it2 == r.precisionsByVersion.end()) { continue; } diff --git a/src/common/low_precision_transformations/src/markup_quantization_granularity.cpp b/src/common/low_precision_transformations/src/markup_quantization_granularity.cpp index 7b86c1d2203c50..bbe448d83423ec 100644 --- a/src/common/low_precision_transformations/src/markup_quantization_granularity.cpp +++ b/src/common/low_precision_transformations/src/markup_quantization_granularity.cpp @@ -20,10 +20,10 @@ ngraph::pass::low_precision::MarkupQuantizationGranularity::MarkupQuantizationGr OPENVINO_SUPPRESS_DEPRECATED_START if (it == restrictionsByOperation.end()) { PerTensorQuantization r(restriction.specifyVersion); - r.portsByVersion.emplace(restriction.operationType.version, restriction.restrictions); + r.portsByVersion.emplace(restriction.operationType.version_id, restriction.restrictions); restrictionsByOperation.emplace(restriction.operationType.name, r); } else { - it->second.add(restriction.operationType.version, restriction.restrictions); + it->second.add(restriction.operationType.version_id, restriction.restrictions); } OPENVINO_SUPPRESS_DEPRECATED_END } @@ -74,9 +74,7 @@ bool ngraph::pass::low_precision::MarkupQuantizationGranularity::run_on_model(co } if (restriction.versionIsRequired) { - OPENVINO_SUPPRESS_DEPRECATED_START - const auto it2 = restriction.portsByVersion.find(node->get_type_info().version); - OPENVINO_SUPPRESS_DEPRECATED_END + const auto it2 = restriction.portsByVersion.find(node->get_type_info().version_id); if (it2 == restriction.portsByVersion.end()) { continue; } diff --git a/src/common/offline_transformations/include/mask_attribute.hpp b/src/common/offline_transformations/include/mask_attribute.hpp index 33ec516ded9c94..22ad9ba70d054a 100644 --- a/src/common/offline_transformations/include/mask_attribute.hpp +++ b/src/common/offline_transformations/include/mask_attribute.hpp @@ -28,7 +28,7 @@ namespace ngraph { class Mask : public std::vector>, public std::enable_shared_from_this { public: static const ::ov::DiscreteTypeInfo& get_type_info_static() { - static const ::ov::DiscreteTypeInfo type_info_static{"Mask", 0, "0"}; + static const ::ov::DiscreteTypeInfo type_info_static{"Mask", "0"}; return type_info_static; } diff --git a/src/common/snippets/src/pass/common_optimizations.cpp b/src/common/snippets/src/pass/common_optimizations.cpp index 787fb8f650d5be..04278526ce8c34 100644 --- a/src/common/snippets/src/pass/common_optimizations.cpp +++ b/src/common/snippets/src/pass/common_optimizations.cpp @@ -17,7 +17,7 @@ #include "snippets/utils.hpp" #include "snippets/itt.hpp" -NGRAPH_RTTI_DEFINITION(ngraph::snippets::pass::CommonOptimizations, "Snippets::CommonOptimizations", 0); +NGRAPH_RTTI_DEFINITION(ngraph::snippets::pass::CommonOptimizations, "Snippets::CommonOptimizations"); namespace ngraph { namespace snippets { diff --git a/src/common/transformations/include/ov_ops/nms_static_shape_ie.hpp b/src/common/transformations/include/ov_ops/nms_static_shape_ie.hpp index 0c4f38628dce1b..5da183db15a240 100644 --- a/src/common/transformations/include/ov_ops/nms_static_shape_ie.hpp +++ b/src/common/transformations/include/ov_ops/nms_static_shape_ie.hpp @@ -29,14 +29,9 @@ namespace internal { template class NmsStaticShapeIE : public BaseNmsOp { public: - OPENVINO_SUPPRESS_DEPRECATED_START // TODO: it should be std::string("NmsStaticShapeIE_") + BaseNmsOp::get_type_info_static().name, // but currently it does not pass conversion to Legacy Opset correctly - OPENVINO_RTTI(BaseNmsOp::get_type_info_static().name, - "ie_internal_opset", - BaseNmsOp, - BaseNmsOp::get_type_info_static().version); - OPENVINO_SUPPRESS_DEPRECATED_END + OPENVINO_RTTI(BaseNmsOp::get_type_info_static().name, "ie_internal_opset", BaseNmsOp); NmsStaticShapeIE() = default; diff --git a/src/common/transformations/include/ov_ops/type_relaxed.hpp b/src/common/transformations/include/ov_ops/type_relaxed.hpp index 47a791a4b0be42..fa84ab4eb7c945 100644 --- a/src/common/transformations/include/ov_ops/type_relaxed.hpp +++ b/src/common/transformations/include/ov_ops/type_relaxed.hpp @@ -189,10 +189,7 @@ OPENVINO_SUPPRESS_DEPRECATED_START template class TypeRelaxed : public BaseOp, public TypeRelaxedBase { public: - OPENVINO_OP(BaseOp::get_type_info_static().name, - BaseOp::get_type_info_static().version_id, - BaseOp, - BaseOp::get_type_info_static().version); + OPENVINO_OP(BaseOp::get_type_info_static().name, BaseOp::get_type_info_static().version_id, BaseOp); using BaseOp::BaseOp; diff --git a/src/common/transformations/tests/utils/compare_functions_test.cpp b/src/common/transformations/tests/utils/compare_functions_test.cpp index f27b30f18b933b..c89eddb97a4611 100644 --- a/src/common/transformations/tests/utils/compare_functions_test.cpp +++ b/src/common/transformations/tests/utils/compare_functions_test.cpp @@ -201,7 +201,7 @@ TEST(TransformationTests, CompareFunctoinsTINegative) { const auto fc = FunctionsComparator::with_default().enable(FunctionsComparator::ATTRIBUTES); auto res = fc(f, f_ref); EXPECT_FALSE(res.valid); - EXPECT_THAT(res.message, HasSubstr("LSTMCell/4 != Relu/0")); + EXPECT_THAT(res.message, HasSubstr("LSTMCell/opset4 != Relu/opset1")); } TEST(TransformationTests, CompareFunctoinsTINegativeDifferentElementTypeBetweenSubGraphsInputs) { @@ -514,7 +514,7 @@ class DummyConstant : public ngraph::op::Op { DummyConstant& operator=(const DummyConstant&) = delete; const NodeTypeInfo& get_type_info() const override { - static const NodeTypeInfo type_info{typeid(this).name(), static_cast(0)}; + static const NodeTypeInfo type_info{typeid(this).name(), "0"}; return type_info; } diff --git a/src/core/include/ngraph/node.hpp b/src/core/include/ngraph/node.hpp index 776dcb6bebc653..76090d3cca32b1 100644 --- a/src/core/include/ngraph/node.hpp +++ b/src/core/include/ngraph/node.hpp @@ -150,21 +150,19 @@ using ov::check_new_args_count; } #endif -#define _NGRAPH_RTTI_DEFINITION_WITH_PARENT(CLASS, TYPE_NAME, _VERSION_INDEX, PARENT_CLASS) \ - const ::ngraph::Node::type_info_t& CLASS::get_type_info_static() { \ - static const ::ngraph::Node::type_info_t type_info_static{TYPE_NAME, \ - static_cast(_VERSION_INDEX), \ - &PARENT_CLASS::get_type_info_static()}; \ - return type_info_static; \ - } \ - _NGRAPH_RTTI_DEFINITION_COMMON(CLASS) - -#define _NGRAPH_RTTI_DEFINITION_NO_PARENT(CLASS, TYPE_NAME, _VERSION_INDEX) \ +#define _NGRAPH_RTTI_DEFINITION_WITH_PARENT(CLASS, TYPE_NAME, PARENT_CLASS) \ const ::ngraph::Node::type_info_t& CLASS::get_type_info_static() { \ - static const ::ngraph::Node::type_info_t type_info_static{TYPE_NAME, static_cast(_VERSION_INDEX)}; \ + static const ::ngraph::Node::type_info_t type_info_static{TYPE_NAME, &PARENT_CLASS::get_type_info_static()}; \ return type_info_static; \ } \ _NGRAPH_RTTI_DEFINITION_COMMON(CLASS) + +#define _NGRAPH_RTTI_DEFINITION_NO_PARENT(CLASS, TYPE_NAME) \ + const ::ngraph::Node::type_info_t& CLASS::get_type_info_static() { \ + static const ::ngraph::Node::type_info_t type_info_static{TYPE_NAME}; \ + return type_info_static; \ + } \ + _NGRAPH_RTTI_DEFINITION_COMMON(CLASS) #define NGRAPH_RTTI_DEFINITION(...) \ _OPENVINO_RTTI_EXPAND(_OPENVINO_RTTI_DEFINITION_SELECTOR(__VA_ARGS__, \ _NGRAPH_RTTI_DEFINITION_WITH_PARENT, \ diff --git a/src/core/include/openvino/core/model.hpp b/src/core/include/openvino/core/model.hpp index 159a2c57533a77..e5d0158e538ff4 100644 --- a/src/core/include/openvino/core/model.hpp +++ b/src/core/include/openvino/core/model.hpp @@ -47,7 +47,7 @@ class OPENVINO_API Model : public std::enable_shared_from_this { public: _OPENVINO_HIDDEN_METHOD static const ::ov::DiscreteTypeInfo& get_type_info_static() { - static const ::ov::DiscreteTypeInfo type_info_static{"Model", static_cast(0)}; + static const ::ov::DiscreteTypeInfo type_info_static{"Model"}; return type_info_static; } const ::ov::DiscreteTypeInfo& get_type_info() const { diff --git a/src/core/include/openvino/core/node.hpp b/src/core/include/openvino/core/node.hpp index 549fca57e2c73a..c2bb3fef23404f 100644 --- a/src/core/include/openvino/core/node.hpp +++ b/src/core/include/openvino/core/node.hpp @@ -409,14 +409,6 @@ class OPENVINO_API Node : public std::enable_shared_from_this { /// Get all the nodes that uses the current node NodeVector get_users(bool check_is_used = false) const; - /// \return Version of this node - OPENVINO_DEPRECATED("This method is deprecated and will be removed soon.") - virtual size_t get_version() const { - OPENVINO_SUPPRESS_DEPRECATED_START - return get_type_info().version; - OPENVINO_SUPPRESS_DEPRECATED_END - } - /// Use instance ids for comparison instead of memory addresses to improve determinism bool operator<(const Node& other) const { return m_instance_id < other.m_instance_id; diff --git a/src/core/include/openvino/core/rtti.hpp b/src/core/include/openvino/core/rtti.hpp index 505d6e687eb29b..06b541204d88c3 100644 --- a/src/core/include/openvino/core/rtti.hpp +++ b/src/core/include/openvino/core/rtti.hpp @@ -7,14 +7,14 @@ #include "openvino/core/type.hpp" #include "openvino/core/visibility.hpp" -#define _OPENVINO_RTTI_EXPAND(X) X -#define _OPENVINO_RTTI_DEFINITION_SELECTOR(_1, _2, _3, _4, NAME, ...) NAME +#define _OPENVINO_RTTI_EXPAND(X) X +#define _OPENVINO_RTTI_DEFINITION_SELECTOR(_1, _2, _3, NAME, ...) NAME #define _OPENVINO_RTTI_WITH_TYPE(TYPE_NAME) _OPENVINO_RTTI_WITH_TYPE_VERSION(TYPE_NAME, "util") #define _OPENVINO_RTTI_WITH_TYPE_VERSION(TYPE_NAME, VERSION_NAME) \ _OPENVINO_HIDDEN_METHOD static const ::ov::DiscreteTypeInfo& get_type_info_static() { \ - static ::ov::DiscreteTypeInfo type_info_static{TYPE_NAME, 0, VERSION_NAME}; \ + static ::ov::DiscreteTypeInfo type_info_static{TYPE_NAME, VERSION_NAME}; \ type_info_static.hash(); \ return type_info_static; \ } \ @@ -23,19 +23,18 @@ } #define _OPENVINO_RTTI_WITH_TYPE_VERSION_PARENT(TYPE_NAME, VERSION_NAME, PARENT_CLASS) \ - _OPENVINO_RTTI_WITH_TYPE_VERSIONS_PARENT(TYPE_NAME, VERSION_NAME, PARENT_CLASS, 0) + _OPENVINO_RTTI_WITH_TYPE_VERSIONS_PARENT(TYPE_NAME, VERSION_NAME, PARENT_CLASS) -#define _OPENVINO_RTTI_WITH_TYPE_VERSIONS_PARENT(TYPE_NAME, VERSION_NAME, PARENT_CLASS, OLD_VERSION) \ - _OPENVINO_HIDDEN_METHOD static const ::ov::DiscreteTypeInfo& get_type_info_static() { \ - static ::ov::DiscreteTypeInfo type_info_static{TYPE_NAME, \ - OLD_VERSION, \ - VERSION_NAME, \ - &PARENT_CLASS::get_type_info_static()}; \ - type_info_static.hash(); \ - return type_info_static; \ - } \ - const ::ov::DiscreteTypeInfo& get_type_info() const override { \ - return get_type_info_static(); \ +#define _OPENVINO_RTTI_WITH_TYPE_VERSIONS_PARENT(TYPE_NAME, VERSION_NAME, PARENT_CLASS) \ + _OPENVINO_HIDDEN_METHOD static const ::ov::DiscreteTypeInfo& get_type_info_static() { \ + static ::ov::DiscreteTypeInfo type_info_static{TYPE_NAME, \ + VERSION_NAME, \ + &PARENT_CLASS::get_type_info_static()}; \ + type_info_static.hash(); \ + return type_info_static; \ + } \ + const ::ov::DiscreteTypeInfo& get_type_info() const override { \ + return get_type_info_static(); \ } /// Helper macro that puts necessary declarations of RTTI block inside a class definition. @@ -92,11 +91,10 @@ /// OPENVINO_RTTI(name, version_id) /// OPENVINO_RTTI(name, version_id, parent) /// OPENVINO_RTTI(name, version_id, parent, old_version) -#define OPENVINO_RTTI(...) \ - _OPENVINO_RTTI_EXPAND(_OPENVINO_RTTI_DEFINITION_SELECTOR(__VA_ARGS__, \ - _OPENVINO_RTTI_WITH_TYPE_VERSIONS_PARENT, \ - _OPENVINO_RTTI_WITH_TYPE_VERSION_PARENT, \ - _OPENVINO_RTTI_WITH_TYPE_VERSION, \ +#define OPENVINO_RTTI(...) \ + _OPENVINO_RTTI_EXPAND(_OPENVINO_RTTI_DEFINITION_SELECTOR(__VA_ARGS__, \ + _OPENVINO_RTTI_WITH_TYPE_VERSION_PARENT, \ + _OPENVINO_RTTI_WITH_TYPE_VERSION, \ _OPENVINO_RTTI_WITH_TYPE)(__VA_ARGS__)) /// Note: Please don't use this macros for new operations diff --git a/src/core/include/openvino/core/runtime_attribute.hpp b/src/core/include/openvino/core/runtime_attribute.hpp index 4aca1b00330fdb..5502c3eb8ff280 100644 --- a/src/core/include/openvino/core/runtime_attribute.hpp +++ b/src/core/include/openvino/core/runtime_attribute.hpp @@ -20,7 +20,7 @@ class Any; class OPENVINO_API RuntimeAttribute { public: _OPENVINO_HIDDEN_METHOD static const DiscreteTypeInfo& get_type_info_static() { - static const ::ov::DiscreteTypeInfo type_info_static{"RuntimeAttribute", static_cast(0)}; + static const ::ov::DiscreteTypeInfo type_info_static{"RuntimeAttribute"}; return type_info_static; } virtual const DiscreteTypeInfo& get_type_info() const { diff --git a/src/core/include/openvino/core/type.hpp b/src/core/include/openvino/core/type.hpp index 6fa3e3d56f6285..6ceaa39cbe08e0 100644 --- a/src/core/include/openvino/core/type.hpp +++ b/src/core/include/openvino/core/type.hpp @@ -30,14 +30,11 @@ namespace ov { */ struct OPENVINO_API DiscreteTypeInfo { const char* name; - OPENVINO_DEPRECATED("This member was deprecated. Please use version_id instead.") - uint64_t version; const char* version_id; // A pointer to a parent type info; used for casting and inheritance traversal, not for // exact type identification const DiscreteTypeInfo* parent; - OPENVINO_SUPPRESS_DEPRECATED_START DiscreteTypeInfo() = default; DiscreteTypeInfo(const DiscreteTypeInfo&) = default; DiscreteTypeInfo(DiscreteTypeInfo&&) = default; @@ -47,29 +44,16 @@ struct OPENVINO_API DiscreteTypeInfo { const char* _version_id, const DiscreteTypeInfo* _parent = nullptr) : name(_name), - version(0), version_id(_version_id), parent(_parent), hash_value(0) {} - constexpr DiscreteTypeInfo(const char* _name, uint64_t _version, const DiscreteTypeInfo* _parent = nullptr) + constexpr DiscreteTypeInfo(const char* _name, const DiscreteTypeInfo* _parent = nullptr) : name(_name), - version(_version), version_id(nullptr), parent(_parent), hash_value(0) {} - constexpr DiscreteTypeInfo(const char* _name, - uint64_t _version, - const char* _version_id, - const DiscreteTypeInfo* _parent = nullptr) - : name(_name), - version(_version), - version_id(_version_id), - parent(_parent), - hash_value(0) {} - OPENVINO_SUPPRESS_DEPRECATED_END - bool is_castable(const DiscreteTypeInfo& target_type) const; std::string get_version() const; diff --git a/src/core/include/openvino/op/acosh.hpp b/src/core/include/openvino/op/acosh.hpp index 97224708df1f50..59a2dd45e4b06c 100644 --- a/src/core/include/openvino/op/acosh.hpp +++ b/src/core/include/openvino/op/acosh.hpp @@ -16,7 +16,7 @@ namespace v3 { /// \ingroup ov_ops_cpp_api class OPENVINO_API Acosh : public util::UnaryElementwiseArithmetic { public: - OPENVINO_OP("Acosh", "opset4", util::UnaryElementwiseArithmetic, 3); + OPENVINO_OP("Acosh", "opset4", util::UnaryElementwiseArithmetic); /// \brief Constructs an Acosh operation. Acosh() = default; diff --git a/src/core/include/openvino/op/add.hpp b/src/core/include/openvino/op/add.hpp index 0e2a812a9dc938..054ec3302d846c 100644 --- a/src/core/include/openvino/op/add.hpp +++ b/src/core/include/openvino/op/add.hpp @@ -16,7 +16,7 @@ namespace v1 { /// \ingroup ov_ops_cpp_api class OPENVINO_API Add : public util::BinaryElementwiseArithmetic { public: - OPENVINO_OP("Add", "opset1", util::BinaryElementwiseArithmetic, 1); + OPENVINO_OP("Add", "opset1", util::BinaryElementwiseArithmetic); /// \brief Constructs an uninitialized addition operation Add() : util::BinaryElementwiseArithmetic(AutoBroadcastType::NUMPY) {} diff --git a/src/core/include/openvino/op/asinh.hpp b/src/core/include/openvino/op/asinh.hpp index 8fa8eedd687dc4..96a7f02bfc7e12 100644 --- a/src/core/include/openvino/op/asinh.hpp +++ b/src/core/include/openvino/op/asinh.hpp @@ -15,7 +15,7 @@ namespace v3 { /// class OPENVINO_API Asinh : public util::UnaryElementwiseArithmetic { public: - OPENVINO_OP("Asinh", "opset4", util::UnaryElementwiseArithmetic, 3); + OPENVINO_OP("Asinh", "opset4", util::UnaryElementwiseArithmetic); /// \brief Constructs an Asinh operation. Asinh() = default; diff --git a/src/core/include/openvino/op/assign.hpp b/src/core/include/openvino/op/assign.hpp index 7c5f2b477a573e..2ba16d46195ffc 100644 --- a/src/core/include/openvino/op/assign.hpp +++ b/src/core/include/openvino/op/assign.hpp @@ -14,7 +14,7 @@ namespace v3 { /// \ingroup ov_ops_cpp_api class OPENVINO_API Assign : public util::AssignBase { public: - OPENVINO_OP("Assign", "opset3", util::AssignBase, 3); + OPENVINO_OP("Assign", "opset3", util::AssignBase); Assign() = default; /// \brief Constructs an Assign operation. @@ -44,7 +44,7 @@ namespace v6 { /// \ingroup ov_ops_cpp_api class OPENVINO_API Assign : public util::AssignBase { public: - OPENVINO_OP("Assign", "opset6", util::AssignBase, 6); + OPENVINO_OP("Assign", "opset6", util::AssignBase); Assign() = default; /// \brief Constructs an Assign operation. diff --git a/src/core/include/openvino/op/atanh.hpp b/src/core/include/openvino/op/atanh.hpp index 5fc62facb560af..4d4220b6fdcaa5 100644 --- a/src/core/include/openvino/op/atanh.hpp +++ b/src/core/include/openvino/op/atanh.hpp @@ -16,7 +16,7 @@ namespace v3 { /// \ingroup ov_ops_cpp_api class OPENVINO_API Atanh : public util::UnaryElementwiseArithmetic { public: - OPENVINO_OP("Atanh", "opset4", util::UnaryElementwiseArithmetic, 3); + OPENVINO_OP("Atanh", "opset4", util::UnaryElementwiseArithmetic); /// \brief Constructs an Atanh operation. Atanh() = default; diff --git a/src/core/include/openvino/op/avg_pool.hpp b/src/core/include/openvino/op/avg_pool.hpp index a69e6cc8502da0..24298c8602bbd2 100644 --- a/src/core/include/openvino/op/avg_pool.hpp +++ b/src/core/include/openvino/op/avg_pool.hpp @@ -14,7 +14,7 @@ namespace v1 { /// class OPENVINO_API AvgPool : public Op { public: - OPENVINO_OP("AvgPool", "opset1", op::Op, 1); + OPENVINO_OP("AvgPool", "opset1", op::Op); /// \brief Constructs a batched average pooling operation. AvgPool() = default; diff --git a/src/core/include/openvino/op/batch_norm.hpp b/src/core/include/openvino/op/batch_norm.hpp index 7131f0078d665d..3800090312cb7f 100644 --- a/src/core/include/openvino/op/batch_norm.hpp +++ b/src/core/include/openvino/op/batch_norm.hpp @@ -59,7 +59,7 @@ namespace v5 { /// \ingroup ov_ops_cpp_api class OPENVINO_API BatchNormInference : public Op { public: - OPENVINO_OP("BatchNormInference", "opset5", op::Op, 5); + OPENVINO_OP("BatchNormInference", "opset5", op::Op); BatchNormInference() = default; /// \param input [., C, ...] /// \param gamma gamma scaling for normalized value. [C] diff --git a/src/core/include/openvino/op/batch_to_space.hpp b/src/core/include/openvino/op/batch_to_space.hpp index 4d28c16d067229..6609e539087628 100644 --- a/src/core/include/openvino/op/batch_to_space.hpp +++ b/src/core/include/openvino/op/batch_to_space.hpp @@ -23,7 +23,7 @@ namespace v1 { /// \ingroup ov_ops_cpp_api class OPENVINO_API BatchToSpace : public Op { public: - OPENVINO_OP("BatchToSpace", "opset2", op::Op, 1); + OPENVINO_OP("BatchToSpace", "opset2", op::Op); BatchToSpace() = default; /// \brief Constructs a BatchToSpace operation. /// diff --git a/src/core/include/openvino/op/binary_convolution.hpp b/src/core/include/openvino/op/binary_convolution.hpp index 0dbb2d494bd6eb..8cdcd91e1b03a2 100644 --- a/src/core/include/openvino/op/binary_convolution.hpp +++ b/src/core/include/openvino/op/binary_convolution.hpp @@ -16,7 +16,7 @@ namespace v1 { /// \ingroup ov_ops_cpp_api class OPENVINO_API BinaryConvolution : public Op { public: - OPENVINO_OP("BinaryConvolution", "opset1", op::Op, 1); + OPENVINO_OP("BinaryConvolution", "opset1", op::Op); enum class BinaryConvolutionMode { // Interpret input data and kernel values: 0 as -1, 1 as 1 diff --git a/src/core/include/openvino/op/broadcast.hpp b/src/core/include/openvino/op/broadcast.hpp index e41c7e6c601193..fccffc5ed1d22e 100644 --- a/src/core/include/openvino/op/broadcast.hpp +++ b/src/core/include/openvino/op/broadcast.hpp @@ -17,7 +17,7 @@ namespace v3 { /// \ingroup ov_ops_cpp_api class OPENVINO_API Broadcast : public util::BroadcastBase { public: - OPENVINO_OP("Broadcast", "opset3", op::util::BroadcastBase, 3); + OPENVINO_OP("Broadcast", "opset3", op::util::BroadcastBase); /// \brief Constructs a broadcast operation. Broadcast() = default; @@ -81,7 +81,7 @@ namespace v1 { /// \ingroup ov_ops_cpp_api class OPENVINO_API Broadcast : public util::BroadcastBase { public: - OPENVINO_OP("Broadcast", "opset1", op::util::BroadcastBase, 1); + OPENVINO_OP("Broadcast", "opset1", op::util::BroadcastBase); /// \brief Constructs a broadcast operation. Broadcast() = default; diff --git a/src/core/include/openvino/op/bucketize.hpp b/src/core/include/openvino/op/bucketize.hpp index fa8d34579e7273..33bbfbc8003862 100644 --- a/src/core/include/openvino/op/bucketize.hpp +++ b/src/core/include/openvino/op/bucketize.hpp @@ -13,7 +13,7 @@ namespace v3 { /// \ingroup ov_ops_cpp_api class OPENVINO_API Bucketize : public Op { public: - OPENVINO_OP("Bucketize", "opset3", op::Op, 3); + OPENVINO_OP("Bucketize", "opset3", op::Op); Bucketize() = default; /// \brief Constructs a Bucketize node diff --git a/src/core/include/openvino/op/convert_like.hpp b/src/core/include/openvino/op/convert_like.hpp index 27bd89c6e9f106..2621a1ce860131 100644 --- a/src/core/include/openvino/op/convert_like.hpp +++ b/src/core/include/openvino/op/convert_like.hpp @@ -13,7 +13,7 @@ namespace v1 { /// \ingroup ov_ops_cpp_api class OPENVINO_API ConvertLike : public Op { public: - OPENVINO_OP("ConvertLike", "opset1", op::Op, 1); + OPENVINO_OP("ConvertLike", "opset1", op::Op); /// \brief Constructs a conversion operation. ConvertLike() = default; diff --git a/src/core/include/openvino/op/convolution.hpp b/src/core/include/openvino/op/convolution.hpp index 87c17f8167c41d..f340fbb544c8c5 100644 --- a/src/core/include/openvino/op/convolution.hpp +++ b/src/core/include/openvino/op/convolution.hpp @@ -16,7 +16,7 @@ namespace v1 { /// \ingroup ov_ops_cpp_api class OPENVINO_API Convolution : public Op { public: - OPENVINO_OP("Convolution", "opset1", op::Op, 1); + OPENVINO_OP("Convolution", "opset1", op::Op); /// \brief Constructs a batched convolution operation. Convolution() = default; @@ -129,7 +129,7 @@ class OPENVINO_API Convolution : public Op { /// \ingroup ov_ops_cpp_api class OPENVINO_API ConvolutionBackpropData : public Op { public: - OPENVINO_OP("ConvolutionBackpropData", "opset1", op::Op, 1); + OPENVINO_OP("ConvolutionBackpropData", "opset1", op::Op); /// \brief Constructs a batched-convolution data batch-backprop operation. ConvolutionBackpropData() = default; diff --git a/src/core/include/openvino/op/ctc_greedy_decoder_seq_len.hpp b/src/core/include/openvino/op/ctc_greedy_decoder_seq_len.hpp index 4846f750ceaf6b..e07493a781128c 100644 --- a/src/core/include/openvino/op/ctc_greedy_decoder_seq_len.hpp +++ b/src/core/include/openvino/op/ctc_greedy_decoder_seq_len.hpp @@ -14,7 +14,7 @@ namespace v6 { /// \ingroup ov_ops_cpp_api class OPENVINO_API CTCGreedyDecoderSeqLen : public Op { public: - OPENVINO_OP("CTCGreedyDecoderSeqLen", "opset6", op::Op, 6); + OPENVINO_OP("CTCGreedyDecoderSeqLen", "opset6", op::Op); CTCGreedyDecoderSeqLen() = default; /// \brief Constructs a CTCGreedyDecoderSeqLen operation /// diff --git a/src/core/include/openvino/op/ctc_loss.hpp b/src/core/include/openvino/op/ctc_loss.hpp index 21f143608fc90f..3a1077d9394e22 100644 --- a/src/core/include/openvino/op/ctc_loss.hpp +++ b/src/core/include/openvino/op/ctc_loss.hpp @@ -14,7 +14,7 @@ namespace v4 { /// \ingroup ov_ops_cpp_api class OPENVINO_API CTCLoss : public Op { public: - OPENVINO_OP("CTCLoss", "opset4", op::Op, 4); + OPENVINO_OP("CTCLoss", "opset4", op::Op); CTCLoss() = default; /// \brief Constructs a CTCLoss operation diff --git a/src/core/include/openvino/op/deformable_convolution.hpp b/src/core/include/openvino/op/deformable_convolution.hpp index 3eb673cd340ad6..993c779e5cb2bd 100644 --- a/src/core/include/openvino/op/deformable_convolution.hpp +++ b/src/core/include/openvino/op/deformable_convolution.hpp @@ -16,7 +16,7 @@ namespace v1 { /// \ingroup ov_ops_cpp_api class OPENVINO_API DeformableConvolution : public op::util::DeformableConvolutionBase { public: - OPENVINO_OP("DeformableConvolution", "opset1", op::util::DeformableConvolutionBase, 1); + OPENVINO_OP("DeformableConvolution", "opset1", op::util::DeformableConvolutionBase); /// \brief Constructs a conversion operation. DeformableConvolution() = default; diff --git a/src/core/include/openvino/op/deformable_psroi_pooling.hpp b/src/core/include/openvino/op/deformable_psroi_pooling.hpp index 60bc07f3dd2861..3e3315a95b93de 100644 --- a/src/core/include/openvino/op/deformable_psroi_pooling.hpp +++ b/src/core/include/openvino/op/deformable_psroi_pooling.hpp @@ -14,7 +14,7 @@ namespace v1 { /// \ingroup ov_ops_cpp_api class OPENVINO_API DeformablePSROIPooling : public Op { public: - OPENVINO_OP("DeformablePSROIPooling", "opset1", op::Op, 1); + OPENVINO_OP("DeformablePSROIPooling", "opset1", op::Op); DeformablePSROIPooling() = default; /// \brief Constructs a DeformablePSROIPooling operation diff --git a/src/core/include/openvino/op/dft.hpp b/src/core/include/openvino/op/dft.hpp index 6072f711650b9c..b87262fd78b919 100644 --- a/src/core/include/openvino/op/dft.hpp +++ b/src/core/include/openvino/op/dft.hpp @@ -29,7 +29,7 @@ namespace v7 { /// \ingroup ov_ops_cpp_api class OPENVINO_API DFT : public util::FFTBase { public: - OPENVINO_OP("DFT", "opset7", util::FFTBase, 7); + OPENVINO_OP("DFT", "opset7", util::FFTBase); DFT() = default; /// \brief Constructs a DFT operation. DFT is performed for full size axes. diff --git a/src/core/include/openvino/op/divide.hpp b/src/core/include/openvino/op/divide.hpp index c4ef648683c293..4d83d0043f4a64 100644 --- a/src/core/include/openvino/op/divide.hpp +++ b/src/core/include/openvino/op/divide.hpp @@ -13,7 +13,7 @@ namespace v1 { /// \ingroup ov_ops_cpp_api class OPENVINO_API Divide : public util::BinaryElementwiseArithmetic { public: - OPENVINO_OP("Divide", "opset1", util::BinaryElementwiseArithmetic, 1); + OPENVINO_OP("Divide", "opset1", util::BinaryElementwiseArithmetic); /// \brief Constructs a division operation. Divide() : util::BinaryElementwiseArithmetic(AutoBroadcastType::NUMPY) {} diff --git a/src/core/include/openvino/op/einsum.hpp b/src/core/include/openvino/op/einsum.hpp index e42cdc0a94f806..9d1c179c2c5503 100644 --- a/src/core/include/openvino/op/einsum.hpp +++ b/src/core/include/openvino/op/einsum.hpp @@ -13,7 +13,7 @@ namespace v7 { /// \ingroup ov_ops_cpp_api class OPENVINO_API Einsum : public Op { public: - OPENVINO_OP("Einsum", "opset7", op::Op, 7); + OPENVINO_OP("Einsum", "opset7", op::Op); Einsum() = default; diff --git a/src/core/include/openvino/op/embedding_segments_sum.hpp b/src/core/include/openvino/op/embedding_segments_sum.hpp index 8601bd9ac4ec12..55108f6ccb9d5c 100644 --- a/src/core/include/openvino/op/embedding_segments_sum.hpp +++ b/src/core/include/openvino/op/embedding_segments_sum.hpp @@ -14,7 +14,7 @@ namespace v3 { /// \ingroup ov_ops_cpp_api class OPENVINO_API EmbeddingSegmentsSum : public Op { public: - OPENVINO_OP("EmbeddingSegmentsSum", "opset3", op::Op, 3); + OPENVINO_OP("EmbeddingSegmentsSum", "opset3", op::Op); /// \brief Constructs a EmbeddingSegmentsSum operation. EmbeddingSegmentsSum() = default; /// \brief Constructs a EmbeddingSegmentsSum operation. diff --git a/src/core/include/openvino/op/embeddingbag_offsets_sum.hpp b/src/core/include/openvino/op/embeddingbag_offsets_sum.hpp index 0d88bdbd21af16..7c3ad7a7b74e7e 100644 --- a/src/core/include/openvino/op/embeddingbag_offsets_sum.hpp +++ b/src/core/include/openvino/op/embeddingbag_offsets_sum.hpp @@ -15,7 +15,7 @@ namespace v3 { /// \ingroup ov_ops_cpp_api class OPENVINO_API EmbeddingBagOffsetsSum : public util::EmbeddingBagOffsetsBase { public: - OPENVINO_OP("EmbeddingBagOffsetsSum", "opset3", util::EmbeddingBagOffsetsBase, 3); + OPENVINO_OP("EmbeddingBagOffsetsSum", "opset3", util::EmbeddingBagOffsetsBase); /// \brief Constructs a EmbeddingBagOffsetsSum operation. EmbeddingBagOffsetsSum() = default; /// \brief Constructs a EmbeddingBagOffsetsSum operation. diff --git a/src/core/include/openvino/op/embeddingbag_packedsum.hpp b/src/core/include/openvino/op/embeddingbag_packedsum.hpp index 169b44484cbeab..b095b226a9e86d 100644 --- a/src/core/include/openvino/op/embeddingbag_packedsum.hpp +++ b/src/core/include/openvino/op/embeddingbag_packedsum.hpp @@ -15,7 +15,7 @@ namespace v3 { /// \ingroup ov_ops_cpp_api class OPENVINO_API EmbeddingBagPackedSum : public util::EmbeddingBagPackedBase { public: - OPENVINO_OP("EmbeddingBagPackedSum", "opset3", util::EmbeddingBagPackedBase, 3); + OPENVINO_OP("EmbeddingBagPackedSum", "opset3", util::EmbeddingBagPackedBase); /// \brief Constructs a EmbeddingBagPackedSum operation. EmbeddingBagPackedSum() = default; /// \brief Constructs a EmbeddingBagPackedSum operation. diff --git a/src/core/include/openvino/op/equal.hpp b/src/core/include/openvino/op/equal.hpp index c8e2af9cd575b5..ae16f6c6d52703 100644 --- a/src/core/include/openvino/op/equal.hpp +++ b/src/core/include/openvino/op/equal.hpp @@ -29,7 +29,7 @@ namespace v1 { // clang-format on class OPENVINO_API Equal : public util::BinaryElementwiseComparison { public: - OPENVINO_OP("Equal", "opset1", op::util::BinaryElementwiseComparison, 1); + OPENVINO_OP("Equal", "opset1", op::util::BinaryElementwiseComparison); /// \brief Constructs an equal operation. Equal() : util::BinaryElementwiseComparison(AutoBroadcastType::NUMPY) {} /// \brief Constructs an equal operation. diff --git a/src/core/include/openvino/op/experimental_detectron_detection_output.hpp b/src/core/include/openvino/op/experimental_detectron_detection_output.hpp index c8e84bd6f09622..17221d907cb27e 100644 --- a/src/core/include/openvino/op/experimental_detectron_detection_output.hpp +++ b/src/core/include/openvino/op/experimental_detectron_detection_output.hpp @@ -20,7 +20,7 @@ namespace v6 { /// \ingroup ov_ops_cpp_api class OPENVINO_API ExperimentalDetectronDetectionOutput : public Op { public: - OPENVINO_OP("ExperimentalDetectronDetectionOutput", "opset6", op::Op, 6); + OPENVINO_OP("ExperimentalDetectronDetectionOutput", "opset6", op::Op); /// \brief Structure that specifies attributes of the operation struct Attributes { diff --git a/src/core/include/openvino/op/experimental_detectron_generate_proposals.hpp b/src/core/include/openvino/op/experimental_detectron_generate_proposals.hpp index 83bf6f769c73e1..af2bfe1511f9fe 100644 --- a/src/core/include/openvino/op/experimental_detectron_generate_proposals.hpp +++ b/src/core/include/openvino/op/experimental_detectron_generate_proposals.hpp @@ -19,7 +19,7 @@ namespace v6 { /// \ingroup ov_ops_cpp_api class OPENVINO_API ExperimentalDetectronGenerateProposalsSingleImage : public Op { public: - OPENVINO_OP("ExperimentalDetectronGenerateProposalsSingleImage", "opset6", op::Op, 6); + OPENVINO_OP("ExperimentalDetectronGenerateProposalsSingleImage", "opset6", op::Op); /// \brief Structure that specifies attributes of the operation struct Attributes { diff --git a/src/core/include/openvino/op/experimental_detectron_prior_grid_generator.hpp b/src/core/include/openvino/op/experimental_detectron_prior_grid_generator.hpp index 0dd697aec211a2..0865654a3dc358 100644 --- a/src/core/include/openvino/op/experimental_detectron_prior_grid_generator.hpp +++ b/src/core/include/openvino/op/experimental_detectron_prior_grid_generator.hpp @@ -19,7 +19,7 @@ namespace v6 { /// \ingroup ov_ops_cpp_api class OPENVINO_API ExperimentalDetectronPriorGridGenerator : public Op { public: - OPENVINO_OP("ExperimentalDetectronPriorGridGenerator", "opset6", op::Op, 6); + OPENVINO_OP("ExperimentalDetectronPriorGridGenerator", "opset6", op::Op); /// \brief Structure that specifies attributes of the operation struct Attributes { diff --git a/src/core/include/openvino/op/experimental_detectron_roi_feature.hpp b/src/core/include/openvino/op/experimental_detectron_roi_feature.hpp index 109115d08abec9..b6b3d73b47ddbe 100644 --- a/src/core/include/openvino/op/experimental_detectron_roi_feature.hpp +++ b/src/core/include/openvino/op/experimental_detectron_roi_feature.hpp @@ -20,7 +20,7 @@ namespace v6 { /// \ingroup ov_ops_cpp_api class OPENVINO_API ExperimentalDetectronROIFeatureExtractor : public Op { public: - OPENVINO_OP("ExperimentalDetectronROIFeatureExtractor", "opset6", op::Op, 6); + OPENVINO_OP("ExperimentalDetectronROIFeatureExtractor", "opset6", op::Op); /// \brief Structure that specifies attributes of the operation struct Attributes { diff --git a/src/core/include/openvino/op/experimental_detectron_topkrois.hpp b/src/core/include/openvino/op/experimental_detectron_topkrois.hpp index c12e1dcb374fc2..90bb99ebc5c89c 100644 --- a/src/core/include/openvino/op/experimental_detectron_topkrois.hpp +++ b/src/core/include/openvino/op/experimental_detectron_topkrois.hpp @@ -19,7 +19,7 @@ namespace v6 { /// \ingroup ov_ops_cpp_api class OPENVINO_API ExperimentalDetectronTopKROIs : public Op { public: - OPENVINO_OP("ExperimentalDetectronTopKROIs", "opset6", op::Op, 6); + OPENVINO_OP("ExperimentalDetectronTopKROIs", "opset6", op::Op); ExperimentalDetectronTopKROIs() = default; /// \brief Constructs a ExperimentalDetectronTopKROIs operation. diff --git a/src/core/include/openvino/op/extractimagepatches.hpp b/src/core/include/openvino/op/extractimagepatches.hpp index d9a0ca6ff6b89d..37d506fe1895b8 100644 --- a/src/core/include/openvino/op/extractimagepatches.hpp +++ b/src/core/include/openvino/op/extractimagepatches.hpp @@ -14,7 +14,7 @@ namespace v3 { /// \ingroup ov_ops_cpp_api class OPENVINO_API ExtractImagePatches : public Op { public: - OPENVINO_OP("ExtractImagePatches", "opset3", op::Op, 3); + OPENVINO_OP("ExtractImagePatches", "opset3", op::Op); ExtractImagePatches() = default; /// \brief Constructs a ExtractImagePatches operation diff --git a/src/core/include/openvino/op/floor_mod.hpp b/src/core/include/openvino/op/floor_mod.hpp index 2216de28b617e4..4df54f3bcd7334 100644 --- a/src/core/include/openvino/op/floor_mod.hpp +++ b/src/core/include/openvino/op/floor_mod.hpp @@ -14,7 +14,7 @@ namespace v1 { /// \ingroup ov_ops_cpp_api class OPENVINO_API FloorMod : public util::BinaryElementwiseArithmetic { public: - OPENVINO_OP("FloorMod", "opset1", op::util::BinaryElementwiseArithmetic, 1); + OPENVINO_OP("FloorMod", "opset1", op::util::BinaryElementwiseArithmetic); /// \brief Constructs an uninitialized addition operation FloorMod() : util::BinaryElementwiseArithmetic(AutoBroadcastType::NUMPY) {} diff --git a/src/core/include/openvino/op/gather.hpp b/src/core/include/openvino/op/gather.hpp index 55564080af1651..e752259271cfb4 100644 --- a/src/core/include/openvino/op/gather.hpp +++ b/src/core/include/openvino/op/gather.hpp @@ -13,7 +13,7 @@ namespace v1 { /// \ingroup ov_ops_cpp_api class OPENVINO_API Gather : public op::util::GatherBase { public: - OPENVINO_OP("Gather", "opset1", op::util::GatherBase, 1); + OPENVINO_OP("Gather", "opset1", op::util::GatherBase); static constexpr int64_t AXIS_NOT_SET_VALUE = std::numeric_limits::max(); Gather() = default; /// \param data The tensor from which slices are gathered @@ -33,7 +33,7 @@ namespace v7 { /// \ingroup ov_ops_cpp_api class OPENVINO_API Gather : public op::util::GatherBase { public: - OPENVINO_OP("Gather", "opset7", op::util::GatherBase, 7); + OPENVINO_OP("Gather", "opset7", op::util::GatherBase); Gather() = default; /// \param data The tensor from which slices are gathered diff --git a/src/core/include/openvino/op/gather_elements.hpp b/src/core/include/openvino/op/gather_elements.hpp index c184d86e3628b4..4d8c419e616a10 100644 --- a/src/core/include/openvino/op/gather_elements.hpp +++ b/src/core/include/openvino/op/gather_elements.hpp @@ -14,7 +14,7 @@ namespace v6 { /// \ingroup ov_ops_cpp_api class OPENVINO_API GatherElements : public Op { public: - OPENVINO_OP("GatherElements", "opset6", op::Op, 6); + OPENVINO_OP("GatherElements", "opset6", op::Op); GatherElements() = default; /// \brief Constructs a GatherElements operation. diff --git a/src/core/include/openvino/op/gather_nd.hpp b/src/core/include/openvino/op/gather_nd.hpp index 146a1511664968..59680a37d4e4c6 100644 --- a/src/core/include/openvino/op/gather_nd.hpp +++ b/src/core/include/openvino/op/gather_nd.hpp @@ -13,7 +13,7 @@ namespace v5 { /// \ingroup ov_ops_cpp_api class OPENVINO_API GatherND : public op::util::GatherNDBase { public: - OPENVINO_OP("GatherND", "opset5", op::util::GatherNDBase, 5); + OPENVINO_OP("GatherND", "opset5", op::util::GatherNDBase); GatherND() = default; /// \brief Constructs a GatherND operation. diff --git a/src/core/include/openvino/op/gather_tree.hpp b/src/core/include/openvino/op/gather_tree.hpp index 67f455be8a2aa6..bc2169019a8cea 100644 --- a/src/core/include/openvino/op/gather_tree.hpp +++ b/src/core/include/openvino/op/gather_tree.hpp @@ -14,7 +14,7 @@ namespace v1 { /// \ingroup ov_ops_cpp_api class OPENVINO_API GatherTree : public Op { public: - OPENVINO_OP("GatherTree", "opset1", op::Op, 1); + OPENVINO_OP("GatherTree", "opset1", op::Op); GatherTree() = default; /// \param step_ids Tensor of shape [MAX_TIME, BATCH_SIZE, BEAM_WIDTH] with diff --git a/src/core/include/openvino/op/gelu.hpp b/src/core/include/openvino/op/gelu.hpp index 021a7e0ef8bd02..ae868e3909bbfd 100644 --- a/src/core/include/openvino/op/gelu.hpp +++ b/src/core/include/openvino/op/gelu.hpp @@ -15,7 +15,7 @@ namespace v0 { /// \ingroup ov_ops_cpp_api class OPENVINO_API Gelu : public util::UnaryElementwiseArithmetic { public: - OPENVINO_OP("Gelu", "opset2", util::UnaryElementwiseArithmetic, 0); + OPENVINO_OP("Gelu", "opset2", util::UnaryElementwiseArithmetic); Gelu(); /// \brief Constructs a Gelu operation. @@ -43,7 +43,7 @@ namespace v7 { /// \ingroup ov_ops_cpp_api class OPENVINO_API Gelu : public util::UnaryElementwiseArithmetic { public: - OPENVINO_OP("Gelu", "opset7", util::UnaryElementwiseArithmetic, 7); + OPENVINO_OP("Gelu", "opset7", util::UnaryElementwiseArithmetic); Gelu() = default; /// \brief Constructs a Gelu operation. diff --git a/src/core/include/openvino/op/greater.hpp b/src/core/include/openvino/op/greater.hpp index 1693c09579b1e7..de889a0acae370 100644 --- a/src/core/include/openvino/op/greater.hpp +++ b/src/core/include/openvino/op/greater.hpp @@ -13,7 +13,7 @@ namespace v1 { /// \ingroup ov_ops_cpp_api class OPENVINO_API Greater : public util::BinaryElementwiseComparison { public: - OPENVINO_OP("Greater", "opset1", op::util::BinaryElementwiseComparison, 1); + OPENVINO_OP("Greater", "opset1", op::util::BinaryElementwiseComparison); /// \brief Constructs a greater-than operation. Greater() : util::BinaryElementwiseComparison(AutoBroadcastType::NUMPY) {} /// \brief Constructs a greater-than operation. diff --git a/src/core/include/openvino/op/greater_eq.hpp b/src/core/include/openvino/op/greater_eq.hpp index f4731a2da74a50..1f5fe1f984c95d 100644 --- a/src/core/include/openvino/op/greater_eq.hpp +++ b/src/core/include/openvino/op/greater_eq.hpp @@ -13,7 +13,7 @@ namespace v1 { /// \ingroup ov_ops_cpp_api class OPENVINO_API GreaterEqual : public util::BinaryElementwiseComparison { public: - OPENVINO_OP("GreaterEqual", "opset1", op::util::BinaryElementwiseComparison, 1); + OPENVINO_OP("GreaterEqual", "opset1", op::util::BinaryElementwiseComparison); /// \brief Constructs a greater-than-or-equal operation. GreaterEqual() : util::BinaryElementwiseComparison(AutoBroadcastType::NUMPY) {} /// \brief Constructs a greater-than-or-equal operation. diff --git a/src/core/include/openvino/op/group_conv.hpp b/src/core/include/openvino/op/group_conv.hpp index f01c2a7f3dc891..a37a26e480e8de 100644 --- a/src/core/include/openvino/op/group_conv.hpp +++ b/src/core/include/openvino/op/group_conv.hpp @@ -14,7 +14,7 @@ namespace v1 { /// \brief Batched convolution operation, with optional window dilation and stride. class OPENVINO_API GroupConvolution : public Op { public: - OPENVINO_OP("GroupConvolution", "opset1", op::Op, 1); + OPENVINO_OP("GroupConvolution", "opset1", op::Op); /// \brief Constructs a batched convolution operation. GroupConvolution() = default; @@ -126,7 +126,7 @@ class OPENVINO_API GroupConvolution : public Op { /// \brief Data batch backprop for batched convolution operation. class OPENVINO_API GroupConvolutionBackpropData : public Op { public: - OPENVINO_OP("GroupConvolutionBackpropData", "opset1", op::Op, 1); + OPENVINO_OP("GroupConvolutionBackpropData", "opset1", op::Op); /// \brief Constructs a batched-convolution data batch-backprop operation. GroupConvolutionBackpropData(); diff --git a/src/core/include/openvino/op/gru_cell.hpp b/src/core/include/openvino/op/gru_cell.hpp index 2610c4731ae139..15543eec2b943d 100644 --- a/src/core/include/openvino/op/gru_cell.hpp +++ b/src/core/include/openvino/op/gru_cell.hpp @@ -24,7 +24,7 @@ namespace v3 { /// \ingroup ov_ops_cpp_api class OPENVINO_API GRUCell : public util::RNNCellBase { public: - OPENVINO_OP("GRUCell", "opset3", op::util::RNNCellBase, 3); + OPENVINO_OP("GRUCell", "opset3", op::util::RNNCellBase); GRUCell(); /// /// \brief Constructs GRUCell node. diff --git a/src/core/include/openvino/op/gru_sequence.hpp b/src/core/include/openvino/op/gru_sequence.hpp index 1fc9e7c9147fcf..fae54509ad0c64 100644 --- a/src/core/include/openvino/op/gru_sequence.hpp +++ b/src/core/include/openvino/op/gru_sequence.hpp @@ -19,7 +19,7 @@ namespace v5 { /// \ingroup ov_ops_cpp_api class OPENVINO_API GRUSequence : public util::RNNCellBase { public: - OPENVINO_OP("GRUSequence", "opset5", op::Op, 5); + OPENVINO_OP("GRUSequence", "opset5", op::Op); GRUSequence(); GRUSequence(const Output& X, diff --git a/src/core/include/openvino/op/hsigmoid.hpp b/src/core/include/openvino/op/hsigmoid.hpp index abf8d2e1cf87a9..71b07ada902617 100644 --- a/src/core/include/openvino/op/hsigmoid.hpp +++ b/src/core/include/openvino/op/hsigmoid.hpp @@ -17,7 +17,7 @@ namespace v5 { /// \ingroup ov_ops_cpp_api class OPENVINO_API HSigmoid : public util::UnaryElementwiseArithmetic { public: - OPENVINO_OP("HSigmoid", "opset5", op::util::UnaryElementwiseArithmetic, 5); + OPENVINO_OP("HSigmoid", "opset5", op::util::UnaryElementwiseArithmetic); HSigmoid() = default; /// \brief Constructs a HSigmoid operation. diff --git a/src/core/include/openvino/op/hswish.hpp b/src/core/include/openvino/op/hswish.hpp index b20e3498b90dd0..34cff2955b5ab6 100644 --- a/src/core/include/openvino/op/hswish.hpp +++ b/src/core/include/openvino/op/hswish.hpp @@ -17,7 +17,7 @@ namespace v4 { /// \ingroup ov_ops_cpp_api class OPENVINO_API HSwish : public util::UnaryElementwiseArithmetic { public: - OPENVINO_OP("HSwish", "opset4", op::util::UnaryElementwiseArithmetic, 4); + OPENVINO_OP("HSwish", "opset4", op::util::UnaryElementwiseArithmetic); HSwish() = default; /// \brief Constructs a HSwish (hard version of Swish) operation. diff --git a/src/core/include/openvino/op/idft.hpp b/src/core/include/openvino/op/idft.hpp index ea6b0a737a44ba..1fd0948bc9c8a2 100644 --- a/src/core/include/openvino/op/idft.hpp +++ b/src/core/include/openvino/op/idft.hpp @@ -17,7 +17,7 @@ namespace v7 { /// \ingroup ov_ops_cpp_api class OPENVINO_API IDFT : public util::FFTBase { public: - OPENVINO_OP("IDFT", "opset7", util::FFTBase, 7); + OPENVINO_OP("IDFT", "opset7", util::FFTBase); IDFT() = default; /// \brief Constructs a IDFT operation. IDFT is performed for full size axes. diff --git a/src/core/include/openvino/op/interpolate.hpp b/src/core/include/openvino/op/interpolate.hpp index cec3a88e3f5f3f..d6e32cc28cb3c2 100644 --- a/src/core/include/openvino/op/interpolate.hpp +++ b/src/core/include/openvino/op/interpolate.hpp @@ -83,7 +83,7 @@ namespace v4 { /// \ingroup ov_ops_cpp_api class OPENVINO_API Interpolate : public util::InterpolateBase { public: - OPENVINO_OP("Interpolate", "opset4", util::InterpolateBase, 4); + OPENVINO_OP("Interpolate", "opset4", util::InterpolateBase); Interpolate() = default; /// \brief Constructs a Interpolate operation without 'axes' input. @@ -190,7 +190,7 @@ namespace v11 { /// \ingroup ov_ops_cpp_api class OPENVINO_API Interpolate : public util::InterpolateBase { public: - OPENVINO_OP("Interpolate", "opset11", util::InterpolateBase, 11); + OPENVINO_OP("Interpolate", "opset11", util::InterpolateBase); Interpolate() = default; /// \brief Constructs a Interpolate operation without 'axes' input. /// diff --git a/src/core/include/openvino/op/less.hpp b/src/core/include/openvino/op/less.hpp index d11285b871e392..3d87ab9b6ffbe3 100644 --- a/src/core/include/openvino/op/less.hpp +++ b/src/core/include/openvino/op/less.hpp @@ -13,7 +13,7 @@ namespace v1 { /// \ingroup ov_ops_cpp_api class OPENVINO_API Less : public util::BinaryElementwiseComparison { public: - OPENVINO_OP("Less", "opset1", op::util::BinaryElementwiseComparison, 1); + OPENVINO_OP("Less", "opset1", op::util::BinaryElementwiseComparison); /// \brief Constructs a less-than operation. Less() : util::BinaryElementwiseComparison(AutoBroadcastType::NUMPY) {} /// \brief Constructs a less-than operation. diff --git a/src/core/include/openvino/op/less_eq.hpp b/src/core/include/openvino/op/less_eq.hpp index 6b725f749c23ae..111f4c07140af5 100644 --- a/src/core/include/openvino/op/less_eq.hpp +++ b/src/core/include/openvino/op/less_eq.hpp @@ -13,7 +13,7 @@ namespace v1 { /// \ingroup ov_ops_cpp_api class OPENVINO_API LessEqual : public util::BinaryElementwiseComparison { public: - OPENVINO_OP("LessEqual", "opset1", op::util::BinaryElementwiseComparison, 1); + OPENVINO_OP("LessEqual", "opset1", op::util::BinaryElementwiseComparison); /// \brief Constructs a less-than-or-equal operation. LessEqual() : util::BinaryElementwiseComparison(AutoBroadcastType::NUMPY) {} diff --git a/src/core/include/openvino/op/log_softmax.hpp b/src/core/include/openvino/op/log_softmax.hpp index d441e645998aea..b4bca830a0832f 100644 --- a/src/core/include/openvino/op/log_softmax.hpp +++ b/src/core/include/openvino/op/log_softmax.hpp @@ -14,7 +14,7 @@ namespace v5 { /// \ingroup ov_ops_cpp_api class OPENVINO_API LogSoftmax : public Op { public: - OPENVINO_OP("LogSoftmax", "opset5", op::Op, 5); + OPENVINO_OP("LogSoftmax", "opset5", op::Op); LogSoftmax() = default; /// \brief Constructs a LogSoftmax operation. /// diff --git a/src/core/include/openvino/op/logical_and.hpp b/src/core/include/openvino/op/logical_and.hpp index 8580accfe46df7..6d55f8f3585e0f 100644 --- a/src/core/include/openvino/op/logical_and.hpp +++ b/src/core/include/openvino/op/logical_and.hpp @@ -16,7 +16,7 @@ namespace v1 { /// \ingroup ov_ops_cpp_api class OPENVINO_API LogicalAnd : public util::BinaryElementwiseLogical { public: - OPENVINO_OP("LogicalAnd", "opset1", util::BinaryElementwiseLogical, 1); + OPENVINO_OP("LogicalAnd", "opset1", util::BinaryElementwiseLogical); /// \brief Constructs a logical-and operation. LogicalAnd() = default; diff --git a/src/core/include/openvino/op/logical_not.hpp b/src/core/include/openvino/op/logical_not.hpp index 9b50a8e2ecce1e..c5421b8db14a47 100644 --- a/src/core/include/openvino/op/logical_not.hpp +++ b/src/core/include/openvino/op/logical_not.hpp @@ -13,7 +13,7 @@ namespace v1 { /// \ingroup ov_ops_cpp_api class OPENVINO_API LogicalNot : public Op { public: - OPENVINO_OP("LogicalNot", "opset1", op::Op, 1); + OPENVINO_OP("LogicalNot", "opset1", op::Op); /// \brief Constructs a logical negation operation. LogicalNot() = default; /// \brief Constructs a logical negation operation. diff --git a/src/core/include/openvino/op/logical_or.hpp b/src/core/include/openvino/op/logical_or.hpp index 379b773d37617f..15c00eea04baf3 100644 --- a/src/core/include/openvino/op/logical_or.hpp +++ b/src/core/include/openvino/op/logical_or.hpp @@ -16,7 +16,7 @@ namespace v1 { /// \ingroup ov_ops_cpp_api class OPENVINO_API LogicalOr : public util::BinaryElementwiseLogical { public: - OPENVINO_OP("LogicalOr", "opset1", util::BinaryElementwiseLogical, 1); + OPENVINO_OP("LogicalOr", "opset1", util::BinaryElementwiseLogical); LogicalOr() = default; /// \brief Constructs a logical-or operation. /// diff --git a/src/core/include/openvino/op/logical_xor.hpp b/src/core/include/openvino/op/logical_xor.hpp index 9e94a1756f98c0..41ad89abca2638 100644 --- a/src/core/include/openvino/op/logical_xor.hpp +++ b/src/core/include/openvino/op/logical_xor.hpp @@ -16,7 +16,7 @@ namespace v1 { /// \ingroup ov_ops_cpp_api class OPENVINO_API LogicalXor : public util::BinaryElementwiseLogical { public: - OPENVINO_OP("LogicalXor", "opset2", util::BinaryElementwiseLogical, 1); + OPENVINO_OP("LogicalXor", "opset2", util::BinaryElementwiseLogical); LogicalXor() = default; /// \brief Constructs a logical-xor operation. /// diff --git a/src/core/include/openvino/op/loop.hpp b/src/core/include/openvino/op/loop.hpp index 7bbc00dc75c7fb..cb174d588b7bc3 100644 --- a/src/core/include/openvino/op/loop.hpp +++ b/src/core/include/openvino/op/loop.hpp @@ -31,7 +31,7 @@ class OPENVINO_API Loop : public op::util::SubGraphOp { int64_t body_condition_output_idx = -1; }; - OPENVINO_OP("Loop", "opset5", op::util::SubGraphOp, 5); + OPENVINO_OP("Loop", "opset5", op::util::SubGraphOp); /// \brief Constructs a Loop operation. Loop() = default; diff --git a/src/core/include/openvino/op/lstm_cell.hpp b/src/core/include/openvino/op/lstm_cell.hpp index 3c9e53be10c46b..249b3dccdc2bf6 100644 --- a/src/core/include/openvino/op/lstm_cell.hpp +++ b/src/core/include/openvino/op/lstm_cell.hpp @@ -278,7 +278,7 @@ namespace v4 { /// \ingroup ov_ops_cpp_api class OPENVINO_API LSTMCell : public util::RNNCellBase { public: - OPENVINO_OP("LSTMCell", "opset4", op::util::RNNCellBase, 4); + OPENVINO_OP("LSTMCell", "opset4", op::util::RNNCellBase); LSTMCell(); /// diff --git a/src/core/include/openvino/op/lstm_sequence.hpp b/src/core/include/openvino/op/lstm_sequence.hpp index 3296bd54208134..1e7599a35ba982 100644 --- a/src/core/include/openvino/op/lstm_sequence.hpp +++ b/src/core/include/openvino/op/lstm_sequence.hpp @@ -127,7 +127,7 @@ namespace v5 { /// \ingroup ov_ops_cpp_api class OPENVINO_API LSTMSequence : public util::RNNCellBase { public: - OPENVINO_OP("LSTMSequence", "opset5", util::RNNCellBase, 5); + OPENVINO_OP("LSTMSequence", "opset5", util::RNNCellBase); LSTMSequence() = default; using direction = RecurrentSequenceDirection; diff --git a/src/core/include/openvino/op/max_pool.hpp b/src/core/include/openvino/op/max_pool.hpp index 2acfdb9d12b6ce..c1741eef6cb717 100644 --- a/src/core/include/openvino/op/max_pool.hpp +++ b/src/core/include/openvino/op/max_pool.hpp @@ -15,7 +15,7 @@ namespace v1 { /// \ingroup ov_ops_cpp_api class OPENVINO_API MaxPool : public op::util::MaxPoolBase { public: - OPENVINO_OP("MaxPool", "opset1", op::util::MaxPoolBase, 1); + OPENVINO_OP("MaxPool", "opset1", op::util::MaxPoolBase); /// \brief Constructs a batched max pooling operation. MaxPool() = default; diff --git a/src/core/include/openvino/op/maximum.hpp b/src/core/include/openvino/op/maximum.hpp index 5c21463c2ec727..742878b09c4eba 100644 --- a/src/core/include/openvino/op/maximum.hpp +++ b/src/core/include/openvino/op/maximum.hpp @@ -13,7 +13,7 @@ namespace v1 { /// \ingroup ov_ops_cpp_api class OPENVINO_API Maximum : public util::BinaryElementwiseArithmetic { public: - OPENVINO_OP("Maximum", "opset1", op::util::BinaryElementwiseArithmetic, 1); + OPENVINO_OP("Maximum", "opset1", op::util::BinaryElementwiseArithmetic); /// \brief Constructs a maximum operation. Maximum() : util::BinaryElementwiseArithmetic(AutoBroadcastType::NUMPY) {} diff --git a/src/core/include/openvino/op/minimum.hpp b/src/core/include/openvino/op/minimum.hpp index afd75c41ea577b..c8cfc5c9d7c999 100644 --- a/src/core/include/openvino/op/minimum.hpp +++ b/src/core/include/openvino/op/minimum.hpp @@ -13,7 +13,7 @@ namespace v1 { /// \ingroup ov_ops_cpp_api class OPENVINO_API Minimum : public util::BinaryElementwiseArithmetic { public: - OPENVINO_OP("Minimum", "opset1", op::util::BinaryElementwiseArithmetic, 1); + OPENVINO_OP("Minimum", "opset1", op::util::BinaryElementwiseArithmetic); /// \brief Constructs a minimum operation. Minimum() : util::BinaryElementwiseArithmetic(AutoBroadcastType::NUMPY) {} diff --git a/src/core/include/openvino/op/mish.hpp b/src/core/include/openvino/op/mish.hpp index 1d3a53ca7534c4..455bd713166f95 100644 --- a/src/core/include/openvino/op/mish.hpp +++ b/src/core/include/openvino/op/mish.hpp @@ -15,7 +15,7 @@ namespace v4 { /// \ingroup ov_ops_cpp_api class OPENVINO_API Mish : public util::UnaryElementwiseArithmetic { public: - OPENVINO_OP("Mish", "opset4", util::UnaryElementwiseArithmetic, 4); + OPENVINO_OP("Mish", "opset4", util::UnaryElementwiseArithmetic); Mish() = default; /// \brief Constructs an Mish operation. diff --git a/src/core/include/openvino/op/mod.hpp b/src/core/include/openvino/op/mod.hpp index 749a7ae2b7e5d4..5e58a2ec03d733 100644 --- a/src/core/include/openvino/op/mod.hpp +++ b/src/core/include/openvino/op/mod.hpp @@ -14,7 +14,7 @@ namespace v1 { /// \ingroup ov_ops_cpp_api class OPENVINO_API Mod : public util::BinaryElementwiseArithmetic { public: - OPENVINO_OP("Mod", "opset1", op::util::BinaryElementwiseArithmetic, 1); + OPENVINO_OP("Mod", "opset1", op::util::BinaryElementwiseArithmetic); /// \brief Constructs a Mod node. Mod() : util::BinaryElementwiseArithmetic(AutoBroadcastType::NUMPY) {} diff --git a/src/core/include/openvino/op/multiply.hpp b/src/core/include/openvino/op/multiply.hpp index 259c0b9f03a117..2e2f3bd4c73000 100644 --- a/src/core/include/openvino/op/multiply.hpp +++ b/src/core/include/openvino/op/multiply.hpp @@ -13,7 +13,7 @@ namespace v1 { /// \ingroup ov_ops_cpp_api class OPENVINO_API Multiply : public util::BinaryElementwiseArithmetic { public: - OPENVINO_OP("Multiply", "opset1", util::BinaryElementwiseArithmetic, 1); + OPENVINO_OP("Multiply", "opset1", util::BinaryElementwiseArithmetic); /// \brief Constructs a multiplication operation. Multiply() : util::BinaryElementwiseArithmetic(AutoBroadcastType::NUMPY) {} diff --git a/src/core/include/openvino/op/mvn.hpp b/src/core/include/openvino/op/mvn.hpp index c9a3920aedbb4b..7f198ec1444047 100644 --- a/src/core/include/openvino/op/mvn.hpp +++ b/src/core/include/openvino/op/mvn.hpp @@ -99,7 +99,7 @@ namespace v6 { /// \ingroup ov_ops_cpp_api class OPENVINO_API MVN : public Op { public: - OPENVINO_OP("MVN", "opset6", op::Op, 6); + OPENVINO_OP("MVN", "opset6", op::Op); MVN() = default; /// \brief Constructs an MVN operation. diff --git a/src/core/include/openvino/op/non_max_suppression.hpp b/src/core/include/openvino/op/non_max_suppression.hpp index 143df579e0ac69..5216c53700684f 100644 --- a/src/core/include/openvino/op/non_max_suppression.hpp +++ b/src/core/include/openvino/op/non_max_suppression.hpp @@ -16,7 +16,7 @@ class OPENVINO_API NonMaxSuppression : public Op { public: enum class BoxEncodingType { CORNER, CENTER }; - OPENVINO_OP("NonMaxSuppression", "opset1", op::Op, 1); + OPENVINO_OP("NonMaxSuppression", "opset1", op::Op); NonMaxSuppression() = default; @@ -86,7 +86,7 @@ class OPENVINO_API NonMaxSuppression : public Op { public: enum class BoxEncodingType { CORNER, CENTER }; - OPENVINO_OP("NonMaxSuppression", "opset3", op::Op, 3); + OPENVINO_OP("NonMaxSuppression", "opset3", op::Op); NonMaxSuppression() = default; /// \brief Constructs a NonMaxSuppression operation. @@ -166,7 +166,7 @@ namespace v4 { /// \ingroup ov_ops_cpp_api class OPENVINO_API NonMaxSuppression : public op::v3::NonMaxSuppression { public: - OPENVINO_OP("NonMaxSuppression", "opset4", op::v3::NonMaxSuppression, 4); + OPENVINO_OP("NonMaxSuppression", "opset4", op::v3::NonMaxSuppression); NonMaxSuppression() = default; /// \brief Constructs a NonMaxSuppression operation. @@ -217,7 +217,7 @@ namespace v5 { /// \ingroup ov_ops_cpp_api class OPENVINO_API NonMaxSuppression : public Op { public: - OPENVINO_OP("NonMaxSuppression", "opset5", op::Op, 5); + OPENVINO_OP("NonMaxSuppression", "opset5", op::Op); enum class BoxEncodingType { CORNER, CENTER }; NonMaxSuppression() = default; @@ -365,7 +365,7 @@ namespace v9 { /// class OPENVINO_API NonMaxSuppression : public Op { public: - OPENVINO_OP("NonMaxSuppression", "opset9", op::Op, 9); + OPENVINO_OP("NonMaxSuppression", "opset9", op::Op); enum class BoxEncodingType { CORNER, CENTER }; NonMaxSuppression() = default; diff --git a/src/core/include/openvino/op/non_zero.hpp b/src/core/include/openvino/op/non_zero.hpp index f45ae824a47cd5..e14d757e5ff341 100644 --- a/src/core/include/openvino/op/non_zero.hpp +++ b/src/core/include/openvino/op/non_zero.hpp @@ -20,7 +20,7 @@ namespace v3 { /// \ingroup ov_ops_cpp_api class OPENVINO_API NonZero : public Op { public: - OPENVINO_OP("NonZero", "opset3", op::Op, 3); + OPENVINO_OP("NonZero", "opset3", op::Op); /// \brief Constructs a NonZero operation. NonZero() = default; /// \brief Constructs a NonZero operation. diff --git a/src/core/include/openvino/op/not_equal.hpp b/src/core/include/openvino/op/not_equal.hpp index 930244094d37a6..dfae8b59a8fd0e 100644 --- a/src/core/include/openvino/op/not_equal.hpp +++ b/src/core/include/openvino/op/not_equal.hpp @@ -13,7 +13,7 @@ namespace v1 { /// \ingroup ov_ops_cpp_api class OPENVINO_API NotEqual : public util::BinaryElementwiseComparison { public: - OPENVINO_OP("NotEqual", "opset1", op::util::BinaryElementwiseComparison, 1); + OPENVINO_OP("NotEqual", "opset1", op::util::BinaryElementwiseComparison); /// \brief Constructs a not-equal operation. NotEqual() : util::BinaryElementwiseComparison(AutoBroadcastType::NUMPY) {} /// \brief Constructs a not-equal operation. diff --git a/src/core/include/openvino/op/one_hot.hpp b/src/core/include/openvino/op/one_hot.hpp index e911d838a52baa..621fd8483c0649 100644 --- a/src/core/include/openvino/op/one_hot.hpp +++ b/src/core/include/openvino/op/one_hot.hpp @@ -14,7 +14,7 @@ namespace v1 { /// \ingroup ov_ops_cpp_api class OPENVINO_API OneHot : public Op { public: - OPENVINO_OP("OneHot", "opset1", op::Op, 1); + OPENVINO_OP("OneHot", "opset1", op::Op); /// \brief Constructs a one-hot operation. OneHot() = default; diff --git a/src/core/include/openvino/op/op.hpp b/src/core/include/openvino/op/op.hpp index c0a57a90323660..7964007180775f 100644 --- a/src/core/include/openvino/op/op.hpp +++ b/src/core/include/openvino/op/op.hpp @@ -16,7 +16,6 @@ #define OPENVINO_OP(...) \ _OPENVINO_RTTI_EXPAND(_OPENVINO_RTTI_DEFINITION_SELECTOR(__VA_ARGS__, \ - _OPENVINO_RTTI_WITH_TYPE_VERSIONS_PARENT, \ _OPENVINO_RTTI_WITH_TYPE_VERSION_PARENT, \ _OPENVINO_RTTI_OP_WITH_TYPE_VERSION, \ _OPENVINO_RTTI_OP_WITH_TYPE)(__VA_ARGS__)) \ @@ -40,7 +39,7 @@ class OPENVINO_API Op : public Node { public: _OPENVINO_HIDDEN_METHOD static const ::ov::Node::type_info_t& get_type_info_static() { - static ::ov::Node::type_info_t info{"Op", 0, "util"}; + static ::ov::Node::type_info_t info{"Op", "util"}; info.hash(); return info; } diff --git a/src/core/include/openvino/op/pad.hpp b/src/core/include/openvino/op/pad.hpp index 4f8779f6d16b4b..a45c1f33dd52ef 100644 --- a/src/core/include/openvino/op/pad.hpp +++ b/src/core/include/openvino/op/pad.hpp @@ -15,7 +15,7 @@ namespace v1 { /// \ingroup ov_ops_cpp_api class OPENVINO_API Pad : public Op { public: - OPENVINO_OP("Pad", "opset1", op::Op, 1); + OPENVINO_OP("Pad", "opset1", op::Op); /// \brief Constructs a generic padding operation. /// diff --git a/src/core/include/openvino/op/power.hpp b/src/core/include/openvino/op/power.hpp index c2bb08f4154bcf..c89a98c61a203b 100644 --- a/src/core/include/openvino/op/power.hpp +++ b/src/core/include/openvino/op/power.hpp @@ -28,7 +28,7 @@ namespace v1 { // clang-format on class OPENVINO_API Power : public util::BinaryElementwiseArithmetic { public: - OPENVINO_OP("Power", "opset1", op::util::BinaryElementwiseArithmetic, 1); + OPENVINO_OP("Power", "opset1", op::util::BinaryElementwiseArithmetic); Power() : util::BinaryElementwiseArithmetic(AutoBroadcastType::NUMPY) {} diff --git a/src/core/include/openvino/op/proposal.hpp b/src/core/include/openvino/op/proposal.hpp index 6f7960133d3171..c09282594d3bcb 100644 --- a/src/core/include/openvino/op/proposal.hpp +++ b/src/core/include/openvino/op/proposal.hpp @@ -78,7 +78,7 @@ namespace v4 { /// \ingroup ov_ops_cpp_api class OPENVINO_API Proposal : public op::v0::Proposal { public: - OPENVINO_OP("Proposal", "opset4", op::Op, 4); + OPENVINO_OP("Proposal", "opset4", op::Op); Proposal() = default; /// \brief Constructs a Proposal operation /// diff --git a/src/core/include/openvino/op/range.hpp b/src/core/include/openvino/op/range.hpp index 1cd44aed49f2de..1222d14874dba0 100644 --- a/src/core/include/openvino/op/range.hpp +++ b/src/core/include/openvino/op/range.hpp @@ -13,7 +13,7 @@ namespace v4 { /// \ingroup ov_ops_cpp_api class OPENVINO_API Range : public Op { public: - OPENVINO_OP("Range", "opset4", op::Op, 4); + OPENVINO_OP("Range", "opset4", op::Op); /// \brief Constructs an unitialized range operation. Range() = default; diff --git a/src/core/include/openvino/op/read_value.hpp b/src/core/include/openvino/op/read_value.hpp index 87a861b4a1e3bf..38c539427b0c4c 100644 --- a/src/core/include/openvino/op/read_value.hpp +++ b/src/core/include/openvino/op/read_value.hpp @@ -15,7 +15,7 @@ namespace v3 { /// \ingroup ov_ops_cpp_api class OPENVINO_API ReadValue : public util::ReadValueBase { public: - OPENVINO_OP("ReadValue", "opset3", util::ReadValueBase, 3); + OPENVINO_OP("ReadValue", "opset3", util::ReadValueBase); ReadValue() = default; /// \brief Constructs a ReadValue operation. @@ -45,7 +45,7 @@ namespace v6 { /// \ingroup ov_ops_cpp_api class OPENVINO_API ReadValue : public util::ReadValueBase { public: - OPENVINO_OP("ReadValue", "opset6", util::ReadValueBase, 6); + OPENVINO_OP("ReadValue", "opset6", util::ReadValueBase); ReadValue() = default; /// \brief Constructs a ReadValue operation. diff --git a/src/core/include/openvino/op/reduce_l1.hpp b/src/core/include/openvino/op/reduce_l1.hpp index a9f5024c6aaf06..4889e5c52a6aba 100644 --- a/src/core/include/openvino/op/reduce_l1.hpp +++ b/src/core/include/openvino/op/reduce_l1.hpp @@ -16,7 +16,7 @@ namespace v4 { /// \ingroup ov_ops_cpp_api class OPENVINO_API ReduceL1 : public util::ArithmeticReductionKeepDims { public: - OPENVINO_OP("ReduceL1", "opset4", util::ArithmeticReductionKeepDims, 4); + OPENVINO_OP("ReduceL1", "opset4", util::ArithmeticReductionKeepDims); /// \brief Constructs a reducet L1-norm operation. ReduceL1() = default; /// \brief Constructs a reduce L1-norm operation. diff --git a/src/core/include/openvino/op/reduce_l2.hpp b/src/core/include/openvino/op/reduce_l2.hpp index c2a18ac3668483..9f9b38b7dc5747 100644 --- a/src/core/include/openvino/op/reduce_l2.hpp +++ b/src/core/include/openvino/op/reduce_l2.hpp @@ -15,7 +15,7 @@ namespace v4 { /// \ingroup ov_ops_cpp_api class OPENVINO_API ReduceL2 : public util::ArithmeticReductionKeepDims { public: - OPENVINO_OP("ReduceL2", "opset4", util::ArithmeticReductionKeepDims, 4); + OPENVINO_OP("ReduceL2", "opset4", util::ArithmeticReductionKeepDims); /// \brief Constructs a reducet L2-norm operation. ReduceL2() = default; /// \brief Constructs a reduce L2-norm operation. diff --git a/src/core/include/openvino/op/reduce_logical_and.hpp b/src/core/include/openvino/op/reduce_logical_and.hpp index b7e839ab1069da..1358702a1fd39a 100644 --- a/src/core/include/openvino/op/reduce_logical_and.hpp +++ b/src/core/include/openvino/op/reduce_logical_and.hpp @@ -16,7 +16,7 @@ namespace v1 { /// \ingroup ov_ops_cpp_api class OPENVINO_API ReduceLogicalAnd : public util::LogicalReductionKeepDims { public: - OPENVINO_OP("ReduceLogicalAnd", "opset1", util::LogicalReductionKeepDims, 1); + OPENVINO_OP("ReduceLogicalAnd", "opset1", util::LogicalReductionKeepDims); ReduceLogicalAnd() = default; /// \brief Constructs a ReduceLogicalAnd node. /// diff --git a/src/core/include/openvino/op/reduce_logical_or.hpp b/src/core/include/openvino/op/reduce_logical_or.hpp index 67fe065db4585b..36a3fd34759b24 100644 --- a/src/core/include/openvino/op/reduce_logical_or.hpp +++ b/src/core/include/openvino/op/reduce_logical_or.hpp @@ -16,7 +16,7 @@ namespace v1 { /// \ingroup ov_ops_cpp_api class OPENVINO_API ReduceLogicalOr : public util::LogicalReductionKeepDims { public: - OPENVINO_OP("ReduceLogicalOr", "opset1", util::LogicalReductionKeepDims, 1); + OPENVINO_OP("ReduceLogicalOr", "opset1", util::LogicalReductionKeepDims); ReduceLogicalOr() = default; /// \brief Constructs a ReduceLogicalOr node. /// diff --git a/src/core/include/openvino/op/reduce_max.hpp b/src/core/include/openvino/op/reduce_max.hpp index b1579ad5ccbf03..499dec82bb9f77 100644 --- a/src/core/include/openvino/op/reduce_max.hpp +++ b/src/core/include/openvino/op/reduce_max.hpp @@ -14,7 +14,7 @@ namespace v1 { /// \ingroup ov_ops_cpp_api class OPENVINO_API ReduceMax : public util::ArithmeticReductionKeepDims { public: - OPENVINO_OP("ReduceMax", "opset1", util::ArithmeticReductionKeepDims, 1); + OPENVINO_OP("ReduceMax", "opset1", util::ArithmeticReductionKeepDims); /// \brief Constructs a summation operation. ReduceMax() = default; /// \brief Constructs a summation operation. diff --git a/src/core/include/openvino/op/reduce_mean.hpp b/src/core/include/openvino/op/reduce_mean.hpp index 41459857e081da..7b50dd57b7dafc 100644 --- a/src/core/include/openvino/op/reduce_mean.hpp +++ b/src/core/include/openvino/op/reduce_mean.hpp @@ -14,7 +14,7 @@ namespace v1 { /// \ingroup ov_ops_cpp_api class OPENVINO_API ReduceMean : public util::ArithmeticReductionKeepDims { public: - OPENVINO_OP("ReduceMean", "opset1", util::ArithmeticReductionKeepDims, 1); + OPENVINO_OP("ReduceMean", "opset1", util::ArithmeticReductionKeepDims); ReduceMean() = default; /// \param arg The tensor to be summed. diff --git a/src/core/include/openvino/op/reduce_min.hpp b/src/core/include/openvino/op/reduce_min.hpp index 464b232ed5fbf9..830021a0bb2ae0 100644 --- a/src/core/include/openvino/op/reduce_min.hpp +++ b/src/core/include/openvino/op/reduce_min.hpp @@ -14,7 +14,7 @@ namespace v1 { /// \ingroup ov_ops_cpp_api class OPENVINO_API ReduceMin : public util::ArithmeticReductionKeepDims { public: - OPENVINO_OP("ReduceMin", "opset1", util::ArithmeticReductionKeepDims, 1); + OPENVINO_OP("ReduceMin", "opset1", util::ArithmeticReductionKeepDims); /// \brief Constructs a summation operation. ReduceMin() = default; /// \brief Constructs a summation operation. diff --git a/src/core/include/openvino/op/reduce_prod.hpp b/src/core/include/openvino/op/reduce_prod.hpp index c8697c81bcdc2a..4a9af6339b6797 100644 --- a/src/core/include/openvino/op/reduce_prod.hpp +++ b/src/core/include/openvino/op/reduce_prod.hpp @@ -15,7 +15,7 @@ namespace v1 { /// \ingroup ov_ops_cpp_api class OPENVINO_API ReduceProd : public util::ArithmeticReductionKeepDims { public: - OPENVINO_OP("ReduceProd", "opset1", util::ArithmeticReductionKeepDims, 1); + OPENVINO_OP("ReduceProd", "opset1", util::ArithmeticReductionKeepDims); /// \brief Constructs a product reduction operation. ReduceProd() = default; /// \brief Constructs a product reduction operation. diff --git a/src/core/include/openvino/op/reduce_sum.hpp b/src/core/include/openvino/op/reduce_sum.hpp index 60622fd2b7e3ea..7a3221c68e52ef 100644 --- a/src/core/include/openvino/op/reduce_sum.hpp +++ b/src/core/include/openvino/op/reduce_sum.hpp @@ -61,7 +61,7 @@ namespace v1 { // clang-format on class OPENVINO_API ReduceSum : public util::ArithmeticReductionKeepDims { public: - OPENVINO_OP("ReduceSum", "opset1", util::ArithmeticReductionKeepDims, 1); + OPENVINO_OP("ReduceSum", "opset1", util::ArithmeticReductionKeepDims); /// \brief Constructs a summation operation. ReduceSum() = default; /// \brief Constructs a summation operation. diff --git a/src/core/include/openvino/op/reshape.hpp b/src/core/include/openvino/op/reshape.hpp index 2905e4ad5ec182..9d4ecc18da1cc0 100644 --- a/src/core/include/openvino/op/reshape.hpp +++ b/src/core/include/openvino/op/reshape.hpp @@ -17,7 +17,7 @@ namespace v1 { /// \ingroup ov_ops_cpp_api class OPENVINO_API Reshape : public Op { public: - OPENVINO_OP("Reshape", "opset1", op::Op, 1); + OPENVINO_OP("Reshape", "opset1", op::Op); Reshape() = default; /// \brief Constructs a dynamic reshape operation. This operation does not perform /// transpose. diff --git a/src/core/include/openvino/op/reverse.hpp b/src/core/include/openvino/op/reverse.hpp index 37266573f7232d..7b1a904aacf201 100644 --- a/src/core/include/openvino/op/reverse.hpp +++ b/src/core/include/openvino/op/reverse.hpp @@ -14,7 +14,7 @@ namespace v1 { /// \ingroup ov_ops_cpp_api class OPENVINO_API Reverse : public Op { public: - OPENVINO_OP("Reverse", "opset1", op::Op, 1); + OPENVINO_OP("Reverse", "opset1", op::Op); enum class Mode { INDEX, MASK }; diff --git a/src/core/include/openvino/op/rnn_sequence.hpp b/src/core/include/openvino/op/rnn_sequence.hpp index 44eaf247281a6a..0ee5543687422f 100644 --- a/src/core/include/openvino/op/rnn_sequence.hpp +++ b/src/core/include/openvino/op/rnn_sequence.hpp @@ -18,7 +18,7 @@ namespace v5 { /// \ingroup ov_ops_cpp_api class OPENVINO_API RNNSequence : public util::RNNCellBase { public: - OPENVINO_OP("RNNSequence", "opset5", util::RNNCellBase, 4); + OPENVINO_OP("RNNSequence", "opset5", util::RNNCellBase); RNNSequence(); diff --git a/src/core/include/openvino/op/roi_align.hpp b/src/core/include/openvino/op/roi_align.hpp index 2922a7bc001df3..ba1ceefe09d3ed 100644 --- a/src/core/include/openvino/op/roi_align.hpp +++ b/src/core/include/openvino/op/roi_align.hpp @@ -14,7 +14,7 @@ namespace v3 { /// \ingroup ov_ops_cpp_api class OPENVINO_API ROIAlign : public Op { public: - OPENVINO_OP("ROIAlign", "opset3", op::Op, 3); + OPENVINO_OP("ROIAlign", "opset3", op::Op); enum class PoolingMode { AVG, MAX }; ROIAlign() = default; diff --git a/src/core/include/openvino/op/roll.hpp b/src/core/include/openvino/op/roll.hpp index 35344dffbad668..844a39b19cfd68 100644 --- a/src/core/include/openvino/op/roll.hpp +++ b/src/core/include/openvino/op/roll.hpp @@ -13,7 +13,7 @@ namespace v7 { /// \ingroup ov_ops_cpp_api class OPENVINO_API Roll : public Op { public: - OPENVINO_OP("Roll", "opset7", op::Op, 7); + OPENVINO_OP("Roll", "opset7", op::Op); Roll() = default; diff --git a/src/core/include/openvino/op/round.hpp b/src/core/include/openvino/op/round.hpp index 994b5507ca944c..c63cee3738464a 100644 --- a/src/core/include/openvino/op/round.hpp +++ b/src/core/include/openvino/op/round.hpp @@ -18,7 +18,7 @@ namespace v5 { class OPENVINO_API Round : public util::UnaryElementwiseArithmetic { public: enum class RoundMode { HALF_TO_EVEN, HALF_AWAY_FROM_ZERO }; - OPENVINO_OP("Round", "opset5", util::UnaryElementwiseArithmetic, 5); + OPENVINO_OP("Round", "opset5", util::UnaryElementwiseArithmetic); /// \brief Constructs a round operation. Round() = default; diff --git a/src/core/include/openvino/op/scatter_elements_update.hpp b/src/core/include/openvino/op/scatter_elements_update.hpp index 903b1fb9bab0cc..4172e99afc50df 100644 --- a/src/core/include/openvino/op/scatter_elements_update.hpp +++ b/src/core/include/openvino/op/scatter_elements_update.hpp @@ -14,7 +14,7 @@ namespace v3 { /// \ingroup ov_ops_cpp_api class OPENVINO_API ScatterElementsUpdate : public Op { public: - OPENVINO_OP("ScatterElementsUpdate", "opset3", op::Op, 3); + OPENVINO_OP("ScatterElementsUpdate", "opset3", op::Op); ScatterElementsUpdate() = default; /// \brief Constructs a ScatterElementsUpdate node diff --git a/src/core/include/openvino/op/scatter_nd_update.hpp b/src/core/include/openvino/op/scatter_nd_update.hpp index 1c7ac4355e1009..ab28cd374dfc6b 100644 --- a/src/core/include/openvino/op/scatter_nd_update.hpp +++ b/src/core/include/openvino/op/scatter_nd_update.hpp @@ -13,7 +13,7 @@ namespace v3 { /// \ingroup ov_ops_cpp_api class OPENVINO_API ScatterNDUpdate : public util::ScatterNDBase { public: - OPENVINO_OP("ScatterNDUpdate", "opset4", util::ScatterNDBase, 3); + OPENVINO_OP("ScatterNDUpdate", "opset4", util::ScatterNDBase); ScatterNDUpdate() = default; /// \param inputs Tensor /// \param indices Index tensor: Data type must be `element::i32` or `element::i64` diff --git a/src/core/include/openvino/op/scatter_update.hpp b/src/core/include/openvino/op/scatter_update.hpp index 66936cf2f73654..4e29bf9ab2ac2a 100644 --- a/src/core/include/openvino/op/scatter_update.hpp +++ b/src/core/include/openvino/op/scatter_update.hpp @@ -15,7 +15,7 @@ namespace v3 { /// \ingroup ov_ops_cpp_api class OPENVINO_API ScatterUpdate : public util::ScatterBase { public: - OPENVINO_OP("ScatterUpdate", "opset3", util::ScatterBase, 3); + OPENVINO_OP("ScatterUpdate", "opset3", util::ScatterBase); ScatterUpdate() = default; /// /// \brief Constructs ScatterUpdate operator object. diff --git a/src/core/include/openvino/op/select.hpp b/src/core/include/openvino/op/select.hpp index 5d2bec70d6779f..78e8e802ab94d3 100644 --- a/src/core/include/openvino/op/select.hpp +++ b/src/core/include/openvino/op/select.hpp @@ -30,7 +30,7 @@ namespace v1 { // clang-format on class OPENVINO_API Select : public Op { public: - OPENVINO_OP("Select", "opset1", op::Op, 1); + OPENVINO_OP("Select", "opset1", op::Op); /// \brief Constructs a selection operation. Select() : m_auto_broadcast(AutoBroadcastSpec(AutoBroadcastType::NUMPY)) {} diff --git a/src/core/include/openvino/op/shape_of.hpp b/src/core/include/openvino/op/shape_of.hpp index 5e2cace77f9d51..e7ec34c1c87d86 100644 --- a/src/core/include/openvino/op/shape_of.hpp +++ b/src/core/include/openvino/op/shape_of.hpp @@ -13,7 +13,7 @@ namespace v3 { /// \ingroup ov_ops_cpp_api class OPENVINO_API ShapeOf : public util::ShapeOfBase { public: - OPENVINO_OP("ShapeOf", "opset3", util::ShapeOfBase, 3); + OPENVINO_OP("ShapeOf", "opset3", util::ShapeOfBase); ShapeOf() = default; /// \brief Constructs a shape-of operation. ShapeOf(const Output& arg, const element::Type output_type = element::i64); diff --git a/src/core/include/openvino/op/softmax.hpp b/src/core/include/openvino/op/softmax.hpp index fc9c414df5af02..8a43c6dae7bdef 100644 --- a/src/core/include/openvino/op/softmax.hpp +++ b/src/core/include/openvino/op/softmax.hpp @@ -14,7 +14,7 @@ namespace v1 { /// \ingroup ov_ops_cpp_api class OPENVINO_API Softmax : public Op { public: - OPENVINO_OP("Softmax", "opset1", op::Op, 1); + OPENVINO_OP("Softmax", "opset1", op::Op); Softmax() = default; /// \brief Constructs a softmax operation. diff --git a/src/core/include/openvino/op/softplus.hpp b/src/core/include/openvino/op/softplus.hpp index d3358268ac326c..aaff04caa53471 100644 --- a/src/core/include/openvino/op/softplus.hpp +++ b/src/core/include/openvino/op/softplus.hpp @@ -15,7 +15,7 @@ namespace v4 { /// \ingroup ov_ops_cpp_api class OPENVINO_API SoftPlus : public util::UnaryElementwiseArithmetic { public: - OPENVINO_OP("SoftPlus", "opset4", util::UnaryElementwiseArithmetic, 4); + OPENVINO_OP("SoftPlus", "opset4", util::UnaryElementwiseArithmetic); SoftPlus() = default; /// \brief Constructs an SoftPlus operation. diff --git a/src/core/include/openvino/op/space_to_batch.hpp b/src/core/include/openvino/op/space_to_batch.hpp index ceaac33345b2c3..83d47b96ba4c45 100644 --- a/src/core/include/openvino/op/space_to_batch.hpp +++ b/src/core/include/openvino/op/space_to_batch.hpp @@ -23,7 +23,7 @@ namespace v1 { /// \ingroup ov_ops_cpp_api class OPENVINO_API SpaceToBatch : public Op { public: - OPENVINO_OP("SpaceToBatch", "opset2", op::Op, 1); + OPENVINO_OP("SpaceToBatch", "opset2", op::Op); SpaceToBatch() = default; diff --git a/src/core/include/openvino/op/split.hpp b/src/core/include/openvino/op/split.hpp index 6a2b70434af549..918457c0d84a05 100644 --- a/src/core/include/openvino/op/split.hpp +++ b/src/core/include/openvino/op/split.hpp @@ -17,7 +17,7 @@ namespace v1 { /// \ingroup ov_ops_cpp_api class OPENVINO_API Split : public Op { public: - OPENVINO_OP("Split", "opset1", op::Op, 1); + OPENVINO_OP("Split", "opset1", op::Op); /// \brief Constructs a split operation. Split() = default; diff --git a/src/core/include/openvino/op/strided_slice.hpp b/src/core/include/openvino/op/strided_slice.hpp index f52b095424c196..5535e2925800c9 100644 --- a/src/core/include/openvino/op/strided_slice.hpp +++ b/src/core/include/openvino/op/strided_slice.hpp @@ -18,7 +18,7 @@ namespace v1 { /// \ingroup ov_ops_cpp_api class OPENVINO_API StridedSlice : public Op { public: - OPENVINO_OP("StridedSlice", "opset1", op::Op, 1); + OPENVINO_OP("StridedSlice", "opset1", op::Op); StridedSlice() = default; diff --git a/src/core/include/openvino/op/subtract.hpp b/src/core/include/openvino/op/subtract.hpp index 3c129cb1f9b233..5fd58da3bd6ff5 100644 --- a/src/core/include/openvino/op/subtract.hpp +++ b/src/core/include/openvino/op/subtract.hpp @@ -13,7 +13,7 @@ namespace v1 { /// \ingroup ov_ops_cpp_api class OPENVINO_API Subtract : public util::BinaryElementwiseArithmetic { public: - OPENVINO_OP("Subtract", "opset1", util::BinaryElementwiseArithmetic, 1); + OPENVINO_OP("Subtract", "opset1", util::BinaryElementwiseArithmetic); Subtract() : util::BinaryElementwiseArithmetic(AutoBroadcastType::NUMPY) {} diff --git a/src/core/include/openvino/op/swish.hpp b/src/core/include/openvino/op/swish.hpp index 1395e62e409522..bc9935d7f3e88f 100644 --- a/src/core/include/openvino/op/swish.hpp +++ b/src/core/include/openvino/op/swish.hpp @@ -16,7 +16,7 @@ namespace v4 { /// \ingroup ov_ops_cpp_api class OPENVINO_API Swish : public Op { public: - OPENVINO_OP("Swish", "opset4", op::Op, 4); + OPENVINO_OP("Swish", "opset4", op::Op); Swish() = default; /// \brief Constructs an Swish operation. diff --git a/src/core/include/openvino/op/topk.hpp b/src/core/include/openvino/op/topk.hpp index 2af15c7baefb92..9c2ec7a9ce1492 100644 --- a/src/core/include/openvino/op/topk.hpp +++ b/src/core/include/openvino/op/topk.hpp @@ -18,7 +18,7 @@ namespace v1 { /// \ingroup ov_ops_cpp_api class OPENVINO_API TopK : public util::TopKBase { public: - OPENVINO_OP("TopK", "opset1", op::util::TopKBase, 1); + OPENVINO_OP("TopK", "opset1", op::util::TopKBase); using SortType = TopKSortType; using Mode = TopKMode; @@ -69,7 +69,7 @@ namespace v3 { /// \ingroup ov_ops_cpp_api class OPENVINO_API TopK : public util::TopKBase { public: - OPENVINO_OP("TopK", "opset3", op::util::TopKBase, 3); + OPENVINO_OP("TopK", "opset3", op::util::TopKBase); /// \brief Constructs a TopK operation TopK() = default; /// \brief Constructs a TopK operation with two outputs: values and indices. @@ -111,7 +111,7 @@ namespace v11 { /// \ingroup ov_ops_cpp_api class OPENVINO_API TopK : public util::TopKBase { public: - OPENVINO_OP("TopK", "opset11", op::util::TopKBase, 11); + OPENVINO_OP("TopK", "opset11", op::util::TopKBase); /// \brief Constructs a TopK operation TopK() = default; /// \brief Constructs a TopK operation with two outputs: values and indices. diff --git a/src/core/include/openvino/op/transpose.hpp b/src/core/include/openvino/op/transpose.hpp index 341906128d3273..2b4af853893270 100644 --- a/src/core/include/openvino/op/transpose.hpp +++ b/src/core/include/openvino/op/transpose.hpp @@ -13,7 +13,7 @@ namespace v1 { /// \ingroup ov_ops_cpp_api class OPENVINO_API Transpose : public Op { public: - OPENVINO_OP("Transpose", "opset1", op::Op, 1); + OPENVINO_OP("Transpose", "opset1", op::Op); Transpose() = default; /// diff --git a/src/core/include/openvino/op/variadic_split.hpp b/src/core/include/openvino/op/variadic_split.hpp index 8c5034cf031cc9..2d6f751d48d3ba 100644 --- a/src/core/include/openvino/op/variadic_split.hpp +++ b/src/core/include/openvino/op/variadic_split.hpp @@ -14,7 +14,7 @@ namespace v1 { /// \ingroup ov_ops_cpp_api class OPENVINO_API VariadicSplit : public Op { public: - OPENVINO_OP("VariadicSplit", "opset1", op::Op, 1); + OPENVINO_OP("VariadicSplit", "opset1", op::Op); /// \brief Constructs a variadic split operation. VariadicSplit() = default; diff --git a/src/core/src/node.cpp b/src/core/src/node.cpp index 70b18e710f46e3..2a9baaeb90d144 100644 --- a/src/core/src/node.cpp +++ b/src/core/src/node.cpp @@ -385,9 +385,13 @@ std::ostream& ov::Node::write_description(std::ostream& out, uint32_t depth) con if (depth == 0) { out << get_friendly_name(); } else { - OPENVINO_SUPPRESS_DEPRECATED_START - out << "v" << get_type_info().version << "::" << get_type_info().name << " " << get_friendly_name() << " ("; - OPENVINO_SUPPRESS_DEPRECATED_END + auto version = get_type_info().version_id; + if (version) + out << "v" << version << "::" << get_type_info().name << " " << get_friendly_name() << " ("; + else + out << "v" + << " " + << "::" << get_type_info().name << " " << get_friendly_name() << " ("; string sep = ""; for (const auto& arg : input_values()) { out << sep << arg; diff --git a/src/core/src/pass/low_latency.cpp b/src/core/src/pass/low_latency.cpp index 455adc024cd382..5364b08fd8a88e 100644 --- a/src/core/src/pass/low_latency.cpp +++ b/src/core/src/pass/low_latency.cpp @@ -18,7 +18,7 @@ #include NGRAPH_SUPPRESS_DEPRECATED_START -NGRAPH_RTTI_DEFINITION(ngraph::pass::LowLatency, "LowLatency", 0); +NGRAPH_RTTI_DEFINITION(ngraph::pass::LowLatency, "LowLatency"); using namespace std; diff --git a/src/core/src/pass/pass.cpp b/src/core/src/pass/pass.cpp index 01f24f05fba1d3..6b246959ac3f13 100644 --- a/src/core/src/pass/pass.cpp +++ b/src/core/src/pass/pass.cpp @@ -57,6 +57,6 @@ ov::pass::ModelPass::~ModelPass() = default; OPENVINO_SUPPRESS_DEPRECATED_START -NGRAPH_RTTI_DEFINITION(ngraph::pass::NodePass, "ngraph::pass::NodePass", 0); +NGRAPH_RTTI_DEFINITION(ngraph::pass::NodePass, "ngraph::pass::NodePass"); ngraph::pass::NodePass::~NodePass() = default; diff --git a/src/core/src/pass/serialize.cpp b/src/core/src/pass/serialize.cpp index 81d2618e122084..dc7f634488bf5d 100644 --- a/src/core/src/pass/serialize.cpp +++ b/src/core/src/pass/serialize.cpp @@ -1002,11 +1002,9 @@ void ngfunction_2_ir(pugi::xml_node& netXml, // WA for LSTMCellv0, peephole input shall not be serialized if (e.to_port == 6) { const auto& type_info = ordered_ops[e.to_layer]->get_type_info(); - OPENVINO_SUPPRESS_DEPRECATED_START - if (!strcmp(type_info.name, "LSTMCell") && type_info.version == 0) { + if (!strcmp(type_info.name, "LSTMCell")) { continue; } - OPENVINO_SUPPRESS_DEPRECATED_END } pugi::xml_node edge = edges.append_child("edge"); edge.append_attribute("from-layer").set_value(e.from_layer); diff --git a/src/core/src/type.cpp b/src/core/src/type.cpp index 3def4c0adde5f2..be48b68f1feb40 100644 --- a/src/core/src/type.cpp +++ b/src/core/src/type.cpp @@ -18,12 +18,9 @@ size_t DiscreteTypeInfo::hash() const { if (hash_value != 0) return hash_value; size_t name_hash = name ? std::hash()(std::string(name)) : 0; - OPENVINO_SUPPRESS_DEPRECATED_START - size_t version_hash = std::hash()(version); - OPENVINO_SUPPRESS_DEPRECATED_END size_t version_id_hash = version_id ? std::hash()(std::string(version_id)) : 0; - return ov::util::hash_combine(std::vector{name_hash, version_hash, version_id_hash}); + return ov::util::hash_combine(std::vector{name_hash, version_id_hash}); } size_t DiscreteTypeInfo::hash() { @@ -40,9 +37,7 @@ std::string DiscreteTypeInfo::get_version() const { if (version_id) { return std::string(version_id); } - OPENVINO_SUPPRESS_DEPRECATED_START - return std::to_string(version); - OPENVINO_SUPPRESS_DEPRECATED_END + return nullptr; } DiscreteTypeInfo::operator std::string() const { @@ -51,10 +46,7 @@ DiscreteTypeInfo::operator std::string() const { std::ostream& operator<<(std::ostream& s, const DiscreteTypeInfo& info) { std::string version_id = info.version_id ? info.version_id : "(empty)"; - OPENVINO_SUPPRESS_DEPRECATED_START - s << "DiscreteTypeInfo{name: " << info.name << ", version_id: " << version_id << ", old_version: " << info.version - << ", parent: "; - OPENVINO_SUPPRESS_DEPRECATED_END + s << "DiscreteTypeInfo{name: " << info.name << ", version_id: " << version_id << ", parent: "; if (!info.parent) s << info.parent; else @@ -66,10 +58,7 @@ std::ostream& operator<<(std::ostream& s, const DiscreteTypeInfo& info) { // parent is commented to fix type relaxed operations bool DiscreteTypeInfo::operator<(const DiscreteTypeInfo& b) const { - OPENVINO_SUPPRESS_DEPRECATED_START - if (version < b.version) - return true; - if (version == b.version && name != nullptr && b.name != nullptr) { + if (name != nullptr && b.name != nullptr) { int cmp_status = strcmp(name, b.name); if (cmp_status < 0) return true; @@ -81,15 +70,20 @@ bool DiscreteTypeInfo::operator<(const DiscreteTypeInfo& b) const { } } - OPENVINO_SUPPRESS_DEPRECATED_END return false; } bool DiscreteTypeInfo::operator==(const DiscreteTypeInfo& b) const { if (hash_value != 0 && b.hash_value != 0) return hash() == b.hash(); - OPENVINO_SUPPRESS_DEPRECATED_START - return version == b.version && strcmp(name, b.name) == 0; - OPENVINO_SUPPRESS_DEPRECATED_END + if (name != nullptr && b.name != nullptr) { + if (strcmp(name, b.name) == 0) { + std::string v_id(version_id == nullptr ? "" : version_id); + std::string bv_id(b.version_id == nullptr ? "" : b.version_id); + if (v_id == bv_id) + return true; + } + } + return false; } bool DiscreteTypeInfo::operator<=(const DiscreteTypeInfo& b) const { return *this == b || *this < b; diff --git a/src/core/tests/graph_rewrite.cpp b/src/core/tests/graph_rewrite.cpp index c7fb67d243c4c3..d85146b1ffd2d0 100644 --- a/src/core/tests/graph_rewrite.cpp +++ b/src/core/tests/graph_rewrite.cpp @@ -55,9 +55,9 @@ class Anchor : public ngraph::pass::GraphRewrite { Anchor() : GraphRewrite() {} }; -NGRAPH_RTTI_DEFINITION(TestPass, "TestPass", 0); -NGRAPH_RTTI_DEFINITION(Anchor, "Anchor", 0); -NGRAPH_RTTI_DEFINITION(GatherNodesPass, "GatherNodesPass", 0); +NGRAPH_RTTI_DEFINITION(TestPass, "TestPass"); +NGRAPH_RTTI_DEFINITION(Anchor, "Anchor"); +NGRAPH_RTTI_DEFINITION(GatherNodesPass, "GatherNodesPass"); std::shared_ptr get_function() { auto data = std::make_shared(ngraph::element::f32, ngraph::Shape{3, 1, 2}); @@ -165,7 +165,7 @@ class PrivateDivide : public ngraph::opset3::Divide { using ngraph::opset3::Divide::Divide; }; -NGRAPH_RTTI_DEFINITION(PrivateDivide, "PrivateDivide", 0, ngraph::opset3::Divide); +NGRAPH_RTTI_DEFINITION(PrivateDivide, "PrivateDivide", ngraph::opset3::Divide); std::shared_ptr get_derived_function() { auto data = std::make_shared(ngraph::element::f32, ngraph::Shape{3, 1, 2}); @@ -434,7 +434,7 @@ class CheckConsumers : public ngraph::pass::MatcherPass { } }; -NGRAPH_RTTI_DEFINITION(CheckConsumers, "CheckConsumers", 0); +NGRAPH_RTTI_DEFINITION(CheckConsumers, "CheckConsumers"); TEST(GraphRewriteTest, nodes_use_count) { auto f = get_function(); diff --git a/src/core/tests/opset.cpp b/src/core/tests/opset.cpp index cfc5cb7250b41a..289ae1188cea53 100644 --- a/src/core/tests/opset.cpp +++ b/src/core/tests/opset.cpp @@ -72,7 +72,7 @@ INSTANTIATE_TEST_SUITE_P(opset, class MyOpOld : public ov::op::Op { public: - static constexpr ov::DiscreteTypeInfo type_info{"MyOpOld", static_cast(0)}; + static constexpr ov::DiscreteTypeInfo type_info{"MyOpOld"}; const ov::DiscreteTypeInfo& get_type_info() const override { return type_info; } @@ -121,7 +121,7 @@ TEST(opset, custom_opset) { opset.insert(); opset.insert(); EXPECT_EQ(opset.get_types_info().size(), 3); - EXPECT_TRUE(opset.contains_type("MyOpNewFromOld")); - EXPECT_TRUE(opset.contains_type("MyOpNew")); - EXPECT_TRUE(opset.contains_type("MyOpIncorrect")); + EXPECT_TRUE(opset.contains_type(std::string("MyOpNewFromOld"))); + EXPECT_TRUE(opset.contains_type(std::string("MyOpNew"))); + EXPECT_TRUE(opset.contains_type(std::string("MyOpIncorrect"))); } diff --git a/src/core/tests/pass_config.cpp b/src/core/tests/pass_config.cpp index 3c9395aedc2bd2..e2c0d15e943eae 100644 --- a/src/core/tests/pass_config.cpp +++ b/src/core/tests/pass_config.cpp @@ -32,7 +32,7 @@ class RenameReLU : public ngraph::pass::MatcherPass { } }; -NGRAPH_RTTI_DEFINITION(RenameReLU, "RenameReLU", 0); +NGRAPH_RTTI_DEFINITION(RenameReLU, "RenameReLU"); class RenameSigmoid : public ngraph::pass::MatcherPass { public: @@ -50,7 +50,7 @@ class RenameSigmoid : public ngraph::pass::MatcherPass { } }; -NGRAPH_RTTI_DEFINITION(RenameSigmoid, "RenameSigmoid", 0); +NGRAPH_RTTI_DEFINITION(RenameSigmoid, "RenameSigmoid"); class TestFunctionPass : public ngraph::pass::FunctionPass { public: @@ -67,7 +67,7 @@ class TestFunctionPass : public ngraph::pass::FunctionPass { } }; -NGRAPH_RTTI_DEFINITION(TestFunctionPass, "TestFunctionPass", 0); +NGRAPH_RTTI_DEFINITION(TestFunctionPass, "TestFunctionPass"); class TestGraphRewritePass : public ngraph::pass::GraphRewrite { public: @@ -78,7 +78,7 @@ class TestGraphRewritePass : public ngraph::pass::GraphRewrite { } }; -NGRAPH_RTTI_DEFINITION(TestGraphRewritePass, "TestGraphRewritePass", 0); +NGRAPH_RTTI_DEFINITION(TestGraphRewritePass, "TestGraphRewritePass"); std::tuple, std::shared_ptr, std::shared_ptr> get_test_function() { auto data = std::make_shared(ngraph::element::f32, ngraph::Shape{3, 1, 2}); @@ -289,7 +289,7 @@ class TestNestedMatcher : public ngraph::pass::MatcherPass { } }; -NGRAPH_RTTI_DEFINITION(TestNestedMatcher, "TestNestedMatcher", 0); +NGRAPH_RTTI_DEFINITION(TestNestedMatcher, "TestNestedMatcher"); class TestNestedGraphRewrite : public pass::GraphRewrite { public: @@ -299,7 +299,7 @@ class TestNestedGraphRewrite : public pass::GraphRewrite { } }; -NGRAPH_RTTI_DEFINITION(TestNestedGraphRewrite, "TestNestedGraphRewrite", 0); +NGRAPH_RTTI_DEFINITION(TestNestedGraphRewrite, "TestNestedGraphRewrite"); TEST(PassConfig, EnableDisablePasses10) { std::shared_ptr f; diff --git a/src/core/tests/rtti.cpp b/src/core/tests/rtti.cpp index bb0362f52a366f..e5a1619e75d3ce 100644 --- a/src/core/tests/rtti.cpp +++ b/src/core/tests/rtti.cpp @@ -42,7 +42,7 @@ class OpTypeVersionParent : public OpType { class OpTypeVersionParentOld : public OpType { public: - OPENVINO_OP("OpTypeVersionParentOld", "my_version1", OpType, 1); + OPENVINO_OP("OpTypeVersionParentOld", "my_version1", OpType); OpTypeVersionParentOld() = default; std::shared_ptr clone_with_new_inputs(const OutputVector& inputs) const override { @@ -56,7 +56,6 @@ TEST(rtti, op_with_type) { auto type_info = op.get_type_info(); ASSERT_EQ(type_info, OpType::get_type_info_static()); ASSERT_EQ(strcmp(type_info.name, "OpType"), 0); - ASSERT_EQ(type_info.version, 0); ASSERT_EQ(strcmp(type_info.version_id, "extension"), 0); ASSERT_NE(type_info.parent, nullptr); ASSERT_EQ(*type_info.parent, ngraph::op::Op::get_type_info_static()); @@ -67,7 +66,6 @@ TEST(rtti, op_with_type_version) { auto type_info = op.get_type_info(); ASSERT_EQ(type_info, OpTypeVersion::get_type_info_static()); ASSERT_EQ(strcmp(type_info.name, "OpTypeVersion"), 0); - ASSERT_EQ(type_info.version, 0); ASSERT_EQ(strcmp(type_info.version_id, "my_version"), 0); ASSERT_NE(type_info.parent, nullptr); ASSERT_EQ(*type_info.parent, ngraph::op::Op::get_type_info_static()); @@ -78,7 +76,6 @@ TEST(rtti, op_with_type_version_parent) { auto type_info = op.get_type_info(); ASSERT_EQ(type_info, OpTypeVersionParent::get_type_info_static()); ASSERT_EQ(strcmp(type_info.name, "OpTypeVersionParent"), 0); - ASSERT_EQ(type_info.version, 0); ASSERT_EQ(strcmp(type_info.version_id, "my_version"), 0); ASSERT_NE(type_info.parent, nullptr); ASSERT_EQ(*type_info.parent, OpType::get_type_info_static()); @@ -90,7 +87,6 @@ TEST(rtti, op_with_type_version_parent_old) { ASSERT_EQ(type_info, OpTypeVersionParentOld::get_type_info_static()); ASSERT_EQ(strcmp(type_info.name, "OpTypeVersionParentOld"), 0); ASSERT_EQ(strcmp(type_info.version_id, "my_version1"), 0); - ASSERT_EQ(type_info.version, 1); ASSERT_NE(type_info.parent, nullptr); ASSERT_EQ(*type_info.parent, OpType::get_type_info_static()); } diff --git a/src/core/tests/type_info.cpp b/src/core/tests/type_info.cpp index 1beac34b3055c2..98c1f9c8e146ed 100644 --- a/src/core/tests/type_info.cpp +++ b/src/core/tests/type_info.cpp @@ -10,11 +10,11 @@ OPENVINO_SUPPRESS_DEPRECATED_START TEST(type_info, compare_old_type) { - ov::DiscreteTypeInfo type1("type1", static_cast(0)); - ov::DiscreteTypeInfo type2("type2", static_cast(0)); - ov::DiscreteTypeInfo type3("type1", 1ul); - ov::DiscreteTypeInfo type4("type3", static_cast(0), &type1); - ov::DiscreteTypeInfo type5("type3", static_cast(0), &type2); + ov::DiscreteTypeInfo type1("type1"); + ov::DiscreteTypeInfo type2("type2"); + ov::DiscreteTypeInfo type3("type1"); + ov::DiscreteTypeInfo type4("type3", &type1); + ov::DiscreteTypeInfo type5("type3", &type2); ASSERT_TRUE(type1 != type2); ASSERT_TRUE(type1 == type1); ASSERT_TRUE(type1 < type2); @@ -46,40 +46,37 @@ TEST(type_info, compare_new_type) { } TEST(type_info, compare_new_with_old_type) { - ov::DiscreteTypeInfo type1("type1", static_cast(0), "version1"); - ov::DiscreteTypeInfo type1_o("type1", static_cast(0)); + ov::DiscreteTypeInfo type1("type1", "version1"); + ov::DiscreteTypeInfo type1_o("type1", "version1"); ASSERT_TRUE(type1 == type1_o); } TEST(type_info, check_hash_value) { - const auto& hash_val = [](const char* name, const char* version_id, uint64_t version) -> size_t { + const auto& hash_val = [](const char* name, const char* version_id) -> size_t { size_t name_hash = name ? std::hash()(std::string(name)) : 0; - size_t version_hash = std::hash()(version); size_t version_id_hash = version_id ? std::hash()(std::string(version_id)) : 0; // don't use parent for hash calculation, it is not a part of type (yet) - return ov::util::hash_combine(std::vector{name_hash, version_hash, version_id_hash}); + return ov::util::hash_combine(std::vector{name_hash, version_id_hash}); }; - ov::DiscreteTypeInfo type("type1", 0, "version1"); - ov::DiscreteTypeInfo type_old("type1", 1); - ov::DiscreteTypeInfo type_with_version("type1", 1, "version1"); - ov::DiscreteTypeInfo type_empty_name("", static_cast(0)); - ov::DiscreteTypeInfo type_empty_ver("type", static_cast(0), ""); - EXPECT_EQ(hash_val(type.name, type.version_id, type.version), type.hash()); - EXPECT_EQ(hash_val(type_old.name, type_old.version_id, type_old.version), type_old.hash()); - EXPECT_EQ(hash_val(type_with_version.name, type_with_version.version_id, type_with_version.version), - type_with_version.hash()); - EXPECT_EQ(hash_val(type_empty_name.name, type_empty_name.version_id, type_empty_name.version), - type_empty_name.hash()); - EXPECT_EQ(hash_val(type_empty_ver.name, type_empty_ver.version_id, type_empty_ver.version), type_empty_ver.hash()); + ov::DiscreteTypeInfo type("type1", "version1"); + ov::DiscreteTypeInfo type_old("type1"); + ov::DiscreteTypeInfo type_with_version("type1", "version1"); + ov::DiscreteTypeInfo type_empty_name(""); + ov::DiscreteTypeInfo type_empty_ver("type", ""); + EXPECT_EQ(hash_val(type.name, type.version_id), type.hash()); + EXPECT_EQ(hash_val(type_old.name, type_old.version_id), type_old.hash()); + EXPECT_EQ(hash_val(type_with_version.name, type_with_version.version_id), type_with_version.hash()); + EXPECT_EQ(hash_val(type_empty_name.name, type_empty_name.version_id), type_empty_name.hash()); + EXPECT_EQ(hash_val(type_empty_ver.name, type_empty_ver.version_id), type_empty_ver.hash()); } TEST(type_info, find_in_map) { std::vector vector_names; - ov::DiscreteTypeInfo a("Mod", 1ul, "opset1"); - ov::DiscreteTypeInfo b("Prelu", static_cast(0), "opset1"); - ov::DiscreteTypeInfo c("Vector", static_cast(0)); - ov::DiscreteTypeInfo d("Mod", 1ul, "opset3"); - ov::DiscreteTypeInfo f("Mod", 2ul); + ov::DiscreteTypeInfo a("Mod", "opset1"); + ov::DiscreteTypeInfo b("Prelu", "opset1"); + ov::DiscreteTypeInfo c("Vector"); + ov::DiscreteTypeInfo d("Mod", "opset3"); + ov::DiscreteTypeInfo f("Mod", "opset2"); std::map test_map; test_map[a] = 1; @@ -94,20 +91,20 @@ TEST(type_info, find_in_map) { test_map[type] = 2; std::string name = type.name; vector_names.emplace_back(name); - ov::DiscreteTypeInfo t(vector_names.rbegin()->c_str(), 1000); - ov::DiscreteTypeInfo t2(vector_names.rbegin()->c_str(), static_cast(0)); + ov::DiscreteTypeInfo t(vector_names.rbegin()->c_str()); + ov::DiscreteTypeInfo t2(vector_names.rbegin()->c_str()); test_map[t] = 3; test_map[t2] = 4; std::string name1 = "a" + name; vector_names.emplace_back(name1); - ov::DiscreteTypeInfo t3(vector_names.rbegin()->c_str(), 1000); - ov::DiscreteTypeInfo t4(vector_names.rbegin()->c_str(), static_cast(0)); + ov::DiscreteTypeInfo t3(vector_names.rbegin()->c_str()); + ov::DiscreteTypeInfo t4(vector_names.rbegin()->c_str()); test_map[t3] = 5; test_map[t4] = 6; std::string name2 = name + "z"; vector_names.emplace_back(name2); - ov::DiscreteTypeInfo t5(vector_names.rbegin()->c_str(), 1000); - ov::DiscreteTypeInfo t6(vector_names.rbegin()->c_str(), static_cast(0)); + ov::DiscreteTypeInfo t5(vector_names.rbegin()->c_str()); + ov::DiscreteTypeInfo t6(vector_names.rbegin()->c_str()); test_map[t5] = 7; test_map[t6] = 8; } diff --git a/src/core/tests/type_prop/broadcast.cpp b/src/core/tests/type_prop/broadcast.cpp index e4a82aefb31285..e587512b4e7a70 100644 --- a/src/core/tests/type_prop/broadcast.cpp +++ b/src/core/tests/type_prop/broadcast.cpp @@ -613,7 +613,6 @@ TEST(type_prop, broadcast_v3_bidirectional_mode_string) { const auto broadcast_v3 = make_shared(arg, shape, "BIDIRECTIONAL"); ASSERT_EQ(broadcast_v3->get_broadcast_spec(), op::BroadcastType::BIDIRECTIONAL); - ASSERT_EQ(broadcast_v3->get_version(), 3); } TEST(type_prop, broadcast_v3_shape_unexpected_axes_mapping_input) { diff --git a/src/frontends/ir/src/ir_deserializer.cpp b/src/frontends/ir/src/ir_deserializer.cpp index 72eaeb9b07acf4..0a4ece0aae5e63 100644 --- a/src/frontends/ir/src/ir_deserializer.cpp +++ b/src/frontends/ir/src/ir_deserializer.cpp @@ -771,7 +771,7 @@ std::shared_ptr XmlDeserializer::create_node( const std::string& type_name = translate_type_name(params.type); std::shared_ptr ngraphNode; - ov::DiscreteTypeInfo type(type_name.c_str(), 0, params.version.c_str()); + ov::DiscreteTypeInfo type(type_name.c_str(), params.version.c_str()); auto extensionIt = m_extensions.find(type); if (extensionIt != m_extensions.end()) { @@ -885,7 +885,7 @@ std::shared_ptr XmlDeserializer::create_node( item.print(ss); IE_THROW() << "rt_info attribute: " << attribute_name << " has no \"version\" field: " << ss.str(); } - const auto& type_info = ov::DiscreteTypeInfo(attribute_name.c_str(), 0, attribute_version.c_str()); + const auto& type_info = ov::DiscreteTypeInfo(attribute_name.c_str(), attribute_version.c_str()); auto attr = attrs_factory.create_by_type_info(type_info); if (!attr.empty()) { if (attr.is()) { diff --git a/src/inference/src/ie_network_reader.cpp b/src/inference/src/ie_network_reader.cpp index 6baf232df80f57..16bfdc7d883f98 100644 --- a/src/inference/src/ie_network_reader.cpp +++ b/src/inference/src/ie_network_reader.cpp @@ -47,7 +47,7 @@ class ExtensionWrapper : public ov::LegacyOpExtension { : m_ext(ext), m_opset_name(opset), m_type(name), - m_ext_type(m_type.c_str(), 0, m_opset_name.c_str()) {} + m_ext_type(m_type.c_str(), m_opset_name.c_str()) {} const ov::DiscreteTypeInfo& get_type_info() const override { return m_ext_type; diff --git a/src/plugins/intel_cpu/src/ngraph_transformations/swap_convert_transpose.cpp b/src/plugins/intel_cpu/src/ngraph_transformations/swap_convert_transpose.cpp index f03806a7adee38..8a836b46f9c186 100755 --- a/src/plugins/intel_cpu/src/ngraph_transformations/swap_convert_transpose.cpp +++ b/src/plugins/intel_cpu/src/ngraph_transformations/swap_convert_transpose.cpp @@ -10,7 +10,7 @@ #include "itt.hpp" -NGRAPH_RTTI_DEFINITION(ov::intel_cpu::SwapConvertTranspose, "SwapConvertTranspose", 0); +NGRAPH_RTTI_DEFINITION(ov::intel_cpu::SwapConvertTranspose, "SwapConvertTranspose"); ov::intel_cpu::SwapConvertTranspose::SwapConvertTranspose() { MATCHER_SCOPE(SwapConvertTranspose); diff --git a/src/plugins/intel_cpu/src/nodes/if.cpp b/src/plugins/intel_cpu/src/nodes/if.cpp index b1f93795cf874c..84856a3e6a90ef 100644 --- a/src/plugins/intel_cpu/src/nodes/if.cpp +++ b/src/plugins/intel_cpu/src/nodes/if.cpp @@ -48,7 +48,7 @@ void If::PortMapHelper::redefineTo() { bool If::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { try { if (!one_of(op->get_type_info(), ov::op::v8::If::get_type_info_static())) { - errorMessage = "Not supported If operation version " + std::to_string(op->get_type_info().version) + + errorMessage = "Not supported If operation version " + std::string(op->get_type_info().version_id) + " with name '" + op->get_friendly_name() + "'. Node If supports only opset8 version."; return false; } diff --git a/src/plugins/intel_cpu/src/nodes/non_max_suppression.cpp b/src/plugins/intel_cpu/src/nodes/non_max_suppression.cpp index 780e6eb4607f36..67400590ce40a4 100644 --- a/src/plugins/intel_cpu/src/nodes/non_max_suppression.cpp +++ b/src/plugins/intel_cpu/src/nodes/non_max_suppression.cpp @@ -600,7 +600,7 @@ NonMaxSuppression::NonMaxSuppression(const std::shared_ptr& op, co sortResultDescending = nmsIe->m_sort_result_descending; } else { const auto &typeInfo = op->get_type_info(); - IE_THROW() << errorPrefix << " doesn't support NMS: " << typeInfo.name << " v" << typeInfo.version; + IE_THROW() << errorPrefix << " doesn't support NMS: " << typeInfo.name << " v" << typeInfo.version_id; } const auto &boxes_dims = getInputShapeAtPort(NMS_BOXES).getDims(); diff --git a/src/plugins/intel_gna/legacy/include/legacy/ngraph_ops/onehot_ie.hpp b/src/plugins/intel_gna/legacy/include/legacy/ngraph_ops/onehot_ie.hpp index e295e25e06ec6c..e06f8c5a528adc 100644 --- a/src/plugins/intel_gna/legacy/include/legacy/ngraph_ops/onehot_ie.hpp +++ b/src/plugins/intel_gna/legacy/include/legacy/ngraph_ops/onehot_ie.hpp @@ -32,12 +32,6 @@ class ngraph::op::OneHotIE : public Op { float off_value, element::Type type); - OPENVINO_SUPPRESS_DEPRECATED_START - size_t get_version() const override { - return 1; - } - OPENVINO_SUPPRESS_DEPRECATED_END - void validate_and_infer_types() override; std::shared_ptr clone_with_new_inputs(const OutputVector& new_args) const override; bool visit_attributes(AttributeVisitor& visitor) override; diff --git a/src/plugins/intel_gna/legacy/include/legacy/ngraph_ops/pad_ie.hpp b/src/plugins/intel_gna/legacy/include/legacy/ngraph_ops/pad_ie.hpp index 4f6b98c27bef73..076280819e3ad3 100644 --- a/src/plugins/intel_gna/legacy/include/legacy/ngraph_ops/pad_ie.hpp +++ b/src/plugins/intel_gna/legacy/include/legacy/ngraph_ops/pad_ie.hpp @@ -29,12 +29,6 @@ class PadIE : public Op { Shape output_shape, float pad_value); - OPENVINO_SUPPRESS_DEPRECATED_START - size_t get_version() const override { - return 1; - } - OPENVINO_SUPPRESS_DEPRECATED_END - void validate_and_infer_types() override; bool visit_attributes(AttributeVisitor& visitor) override; std::shared_ptr clone_with_new_inputs(const OutputVector& new_args) const override; diff --git a/src/plugins/intel_gna/legacy/tests/convert_ngraph_to_cnn_network_tests.cpp b/src/plugins/intel_gna/legacy/tests/convert_ngraph_to_cnn_network_tests.cpp index 295f75ccaeb7fc..74d73a777c9859 100644 --- a/src/plugins/intel_gna/legacy/tests/convert_ngraph_to_cnn_network_tests.cpp +++ b/src/plugins/intel_gna/legacy/tests/convert_ngraph_to_cnn_network_tests.cpp @@ -234,10 +234,10 @@ TEST(ConvertFunctionToCNNNetworkTests, UnsupportedDynamicOps) { } catch (InferenceEngine::Exception& e) { EXPECT_THAT(e.what(), testing::HasSubstr(std::string("Unsupported dynamic ops: \n" - "v0::Parameter param () -> (f32[...])\n" - "v0::Relu relu (param[0]:f32[...]) -> (f32[...])\n" - "v3::NonZero non_zero (relu[0]:f32[...]) -> (i64[?,?])\n" - "v0::Result result (non_zero[0]:i64[?,?]) -> (i64[?,?])"))); + "vopset1::Parameter param () -> (f32[...])\n" + "vopset1::Relu relu (param[0]:f32[...]) -> (f32[...])\n" + "vopset3::NonZero non_zero (relu[0]:f32[...]) -> (i64[?,?])\n" + "vopset1::Result result (non_zero[0]:i64[?,?]) -> (i64[?,?])"))); } } diff --git a/src/plugins/intel_gna/src/transformations/insert_copy_layer.cpp b/src/plugins/intel_gna/src/transformations/insert_copy_layer.cpp index 809af60bc6017f..c4c1fd18f74304 100644 --- a/src/plugins/intel_gna/src/transformations/insert_copy_layer.cpp +++ b/src/plugins/intel_gna/src/transformations/insert_copy_layer.cpp @@ -20,11 +20,11 @@ using namespace ov::intel_gna::pass; using namespace ov::intel_gna::ngraph_util; using namespace ov::opset9; -NGRAPH_RTTI_DEFINITION(InsertCopyBeforeAssignLayer, "InsertCopyBeforeAssignLayer", 0); -NGRAPH_RTTI_DEFINITION(InsertCopyBeforeConcatLayer, "InsertCopyBeforeConcatLayer", 0); -NGRAPH_RTTI_DEFINITION(HandleMultiConnectedLayerToConcatAndMemory, "HandleMultiConnectedLayerToConcatAndMemory", 0); -NGRAPH_RTTI_DEFINITION(MatchNonComputationalLayers, "MatchNonComputationalLayers", 0); -NGRAPH_RTTI_DEFINITION(HandleNonFunctionalSubgraphs, "HandleNonFunctionalSubgraphs", 0); +NGRAPH_RTTI_DEFINITION(InsertCopyBeforeAssignLayer, "InsertCopyBeforeAssignLayer"); +NGRAPH_RTTI_DEFINITION(InsertCopyBeforeConcatLayer, "InsertCopyBeforeConcatLayer"); +NGRAPH_RTTI_DEFINITION(HandleMultiConnectedLayerToConcatAndMemory, "HandleMultiConnectedLayerToConcatAndMemory"); +NGRAPH_RTTI_DEFINITION(MatchNonComputationalLayers, "MatchNonComputationalLayers"); +NGRAPH_RTTI_DEFINITION(HandleNonFunctionalSubgraphs, "HandleNonFunctionalSubgraphs"); namespace { void insert_copy_layer_between(std::shared_ptr input_op, diff --git a/src/plugins/intel_gna/src/transformations/pwl_approximation.cpp b/src/plugins/intel_gna/src/transformations/pwl_approximation.cpp index 100e0c00ba5aed..d2bcfa13c7202f 100644 --- a/src/plugins/intel_gna/src/transformations/pwl_approximation.cpp +++ b/src/plugins/intel_gna/src/transformations/pwl_approximation.cpp @@ -25,8 +25,8 @@ using namespace ov::intel_gna; using namespace ov::intel_gna::pass; using namespace ov::intel_gna::common; -NGRAPH_RTTI_DEFINITION(PWLApproximation, "PWLApproximation", 0); -NGRAPH_RTTI_DEFINITION(PWLApproximationWithFq, "PWLApproximationWithFq", 0); +NGRAPH_RTTI_DEFINITION(PWLApproximation, "PWLApproximation"); +NGRAPH_RTTI_DEFINITION(PWLApproximationWithFq, "PWLApproximationWithFq"); template double get_break_bound() { diff --git a/src/plugins/intel_gpu/src/plugin/program.cpp b/src/plugins/intel_gpu/src/plugin/program.cpp index cffe8a8c7b1622..6e75d68d012b7f 100644 --- a/src/plugins/intel_gpu/src/plugin/program.cpp +++ b/src/plugins/intel_gpu/src/plugin/program.cpp @@ -418,7 +418,7 @@ bool Program::IsOpSupported(const InferenceEngine::CNNNetwork& network, const st void Program::CreateSingleLayerPrimitive(cldnn::topology& topology, const std::shared_ptr& op) { OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Program::CreateSingleLayerPrimitive"); - GPU_DEBUG_LOG << "Process " << "op::v" << op->get_type_info().version << "::" << op->get_type_name() << " operation " + GPU_DEBUG_LOG << "Process " << "op::v" << op->get_type_info().version_id << "::" << op->get_type_name() << " operation " << "(friendly_name=" << op->get_friendly_name() << ")" << std::endl; bool is_created = false; @@ -442,7 +442,7 @@ void Program::CreateSingleLayerPrimitive(cldnn::topology& topology, const std::s if (!is_created) { IE_THROW() << "Operation: " << op->get_friendly_name() << " of type " << op->get_type_name() - << "(op::v" << op->get_type_info().version << ") is not supported"; + << "(op::v" << op->get_type_info().version_id << ") is not supported"; } } @@ -560,7 +560,7 @@ void validate_inputs_count(const std::shared_ptr& op, std::vector< IE_THROW() << "Invalid inputs count (" << op->get_input_size() << ") in " << op->get_friendly_name() << " (" << op->get_type_name() - << " op::v" << op->get_type_info().version << ")"; + << " op::v" << op->get_type_info().version_id << ")"; } } // namespace intel_gpu diff --git a/src/tests/ie_test_utils/common_test_utils/graph_comparator.cpp b/src/tests/ie_test_utils/common_test_utils/graph_comparator.cpp index 54b2c4abdc0dd5..00636bca0d8182 100644 --- a/src/tests/ie_test_utils/common_test_utils/graph_comparator.cpp +++ b/src/tests/ie_test_utils/common_test_utils/graph_comparator.cpp @@ -34,11 +34,9 @@ bool is_type_relaxed(const std::string& type) { } bool compare_type_info(const ngraph::DiscreteTypeInfo& info1, const ngraph::DiscreteTypeInfo& info2) { - OPENVINO_SUPPRESS_DEPRECATED_START - if (!is_type_relaxed(info1.name) && !is_type_relaxed(info2.name) && (info1.version != info2.version)) { + if (!is_type_relaxed(info1.name) && !is_type_relaxed(info2.name) && (std::strcmp(info1.version_id, info2.version_id) != 0)) { return false; } - OPENVINO_SUPPRESS_DEPRECATED_END const std::string info1Name = is_type_relaxed(info1.name) && (info1.parent != nullptr) ? info1.parent->name : info1.name; @@ -89,9 +87,7 @@ bool less_by_parent_name(const std::shared_ptr& l, } std::string typeInfoToStr(const ngraph::Node::type_info_t& typeInfo) { - OPENVINO_SUPPRESS_DEPRECATED_START - return std::string(typeInfo.name) + "/" + to_str(typeInfo.version); - OPENVINO_SUPPRESS_DEPRECATED_END + return std::string(typeInfo.name) + "/" + std::string(typeInfo.version_id); } std::string tensor_names(const ngraph::descriptor::Tensor& t) { diff --git a/src/tests/ngraph_helpers/ngraph_functions/src/utils/ngraph_helpers.cpp b/src/tests/ngraph_helpers/ngraph_functions/src/utils/ngraph_helpers.cpp index c6a7caefeb5934..b6cd1dc3c15c47 100644 --- a/src/tests/ngraph_helpers/ngraph_functions/src/utils/ngraph_helpers.cpp +++ b/src/tests/ngraph_helpers/ngraph_functions/src/utils/ngraph_helpers.cpp @@ -274,9 +274,7 @@ std::vector>> getCons namespace { std::string toString(const NodeTypeInfo& typeInfo) { - OPENVINO_SUPPRESS_DEPRECATED_START - return std::string(typeInfo.name) + " ver. " + std::to_string(typeInfo.version); - OPENVINO_SUPPRESS_DEPRECATED_END + return std::string(typeInfo.name) + " ver. " + std::string(typeInfo.version_id); } void CompareShapes(const PartialShape& actual, const PartialShape& expected) { @@ -337,9 +335,7 @@ std::shared_ptr getNodeSharedPtr(const ngraph::NodeTypeInfo &type_ ngraphNode->validate_and_infer_types(); return ngraphNode; } - OPENVINO_SUPPRESS_DEPRECATED_START - NGRAPH_UNREACHABLE("supported opsets does not contain op with name: ", type_info.name, " version: ", type_info.version); - OPENVINO_SUPPRESS_DEPRECATED_END + NGRAPH_UNREACHABLE("supported opsets does not contain op with name: ", type_info.name, " version: ", type_info.version_id); } bool is_tensor_iterator_exist(const std::shared_ptr & func) { From 4561aa7109b12a03664da9f580a2d5f9daf8c8af Mon Sep 17 00:00:00 2001 From: Jan Iwaszkiewicz Date: Wed, 22 Mar 2023 16:12:07 +0100 Subject: [PATCH 039/296] [PyOV] OVDict class - new return value from inference (#16370) --- src/bindings/python/requirements.txt | 1 + src/bindings/python/requirements_test.txt | 1 + .../python/src/openvino/runtime/ie_api.py | 33 ++- .../runtime/utils/data_helpers/__init__.py | 1 + .../runtime/utils/data_helpers/wrappers.py | 118 ++++++++- .../python/src/pyopenvino/core/common.cpp | 177 ++++++------- .../python/src/pyopenvino/core/common.hpp | 16 +- .../src/pyopenvino/core/compiled_model.cpp | 4 - .../python/src/pyopenvino/core/containers.cpp | 23 -- .../python/src/pyopenvino/core/containers.hpp | 23 -- .../src/pyopenvino/core/infer_request.cpp | 12 +- .../python/src/pyopenvino/pyopenvino.cpp | 4 - .../python/tests/test_runtime/test_ovdict.py | 249 ++++++++++++++++++ 13 files changed, 489 insertions(+), 173 deletions(-) delete mode 100644 src/bindings/python/src/pyopenvino/core/containers.cpp delete mode 100644 src/bindings/python/src/pyopenvino/core/containers.hpp create mode 100644 src/bindings/python/tests/test_runtime/test_ovdict.py diff --git a/src/bindings/python/requirements.txt b/src/bindings/python/requirements.txt index e83f59eb8b3ae7..968d95b8760bed 100644 --- a/src/bindings/python/requirements.txt +++ b/src/bindings/python/requirements.txt @@ -1 +1,2 @@ numpy>=1.16.6 +singledispatchmethod; python_version<'3.8' diff --git a/src/bindings/python/requirements_test.txt b/src/bindings/python/requirements_test.txt index 530a28b3bf1e16..2bd82fb628bc26 100644 --- a/src/bindings/python/requirements_test.txt +++ b/src/bindings/python/requirements_test.txt @@ -40,3 +40,4 @@ types-pkg_resources wheel>=0.38.1 protobuf~=3.18.1 numpy>=1.16.6,<=1.23.4 +singledispatchmethod; python_version<'3.8' diff --git a/src/bindings/python/src/openvino/runtime/ie_api.py b/src/bindings/python/src/openvino/runtime/ie_api.py index 7bab65a0382113..90099609a1a313 100644 --- a/src/bindings/python/src/openvino/runtime/ie_api.py +++ b/src/bindings/python/src/openvino/runtime/ie_api.py @@ -2,7 +2,6 @@ # Copyright (C) 2018-2023 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -from functools import singledispatch from typing import Any, Iterable, Union, Dict, Optional from pathlib import Path @@ -16,6 +15,7 @@ from openvino._pyopenvino import Tensor from openvino.runtime.utils.data_helpers import ( + OVDict, _InferRequestWrapper, _data_dispatch, tensor_from_file, @@ -25,7 +25,7 @@ class InferRequest(_InferRequestWrapper): """InferRequest class represents infer request which can be run in asynchronous or synchronous manners.""" - def infer(self, inputs: Any = None, shared_memory: bool = False) -> dict: + def infer(self, inputs: Any = None, shared_memory: bool = False) -> OVDict: """Infers specified input(s) in synchronous mode. Blocks all methods of InferRequest while request is running. @@ -68,14 +68,14 @@ def infer(self, inputs: Any = None, shared_memory: bool = False) -> dict: Default value: False :type shared_memory: bool, optional - :return: Dictionary of results from output tensors with ports as keys. - :rtype: Dict[openvino.runtime.ConstOutput, numpy.ndarray] + :return: Dictionary of results from output tensors with port/int/str keys. + :rtype: OVDict """ - return super().infer(_data_dispatch( + return OVDict(super().infer(_data_dispatch( self, inputs, is_shared=shared_memory, - )) + ))) def start_async( self, @@ -138,6 +138,15 @@ def start_async( userdata, ) + @property + def results(self) -> OVDict: + """Gets all outputs tensors of this InferRequest. + + :return: Dictionary of results from output tensors with ports as keys. + :rtype: Dict[openvino.runtime.ConstOutput, numpy.array] + """ + return OVDict(super().results) + class CompiledModel(CompiledModelBase): """CompiledModel class. @@ -161,7 +170,7 @@ def create_infer_request(self) -> InferRequest: """ return InferRequest(super().create_infer_request()) - def infer_new_request(self, inputs: Union[dict, list, tuple, Tensor, np.ndarray] = None) -> dict: + def infer_new_request(self, inputs: Union[dict, list, tuple, Tensor, np.ndarray] = None) -> OVDict: """Infers specified input(s) in synchronous mode. Blocks all methods of CompiledModel while request is running. @@ -187,8 +196,8 @@ def infer_new_request(self, inputs: Union[dict, list, tuple, Tensor, np.ndarray] :param inputs: Data to be set on input tensors. :type inputs: Union[Dict[keys, values], List[values], Tuple[values], Tensor, numpy.ndarray], optional - :return: Dictionary of results from output tensors with ports as keys. - :rtype: Dict[openvino.runtime.ConstOutput, numpy.array] + :return: Dictionary of results from output tensors with port/int/str keys. + :rtype: OVDict """ # It returns wrapped python InferReqeust and then call upon # overloaded functions of InferRequest class @@ -196,7 +205,7 @@ def infer_new_request(self, inputs: Union[dict, list, tuple, Tensor, np.ndarray] def __call__(self, inputs: Union[dict, list, tuple, Tensor, np.ndarray] = None, - shared_memory: bool = True) -> dict: + shared_memory: bool = True) -> OVDict: """Callable infer wrapper for CompiledModel. Infers specified input(s) in synchronous mode. @@ -248,8 +257,8 @@ def __call__(self, Default value: True :type shared_memory: bool, optional - :return: Dictionary of results from output tensors with ports as keys. - :rtype: Dict[openvino.runtime.ConstOutput, numpy.ndarray] + :return: Dictionary of results from output tensors with port/int/str as keys. + :rtype: OVDict """ if self._infer_request is None: self._infer_request = self.create_infer_request() diff --git a/src/bindings/python/src/openvino/runtime/utils/data_helpers/__init__.py b/src/bindings/python/src/openvino/runtime/utils/data_helpers/__init__.py index e49265ccca987f..829a77af96a04c 100644 --- a/src/bindings/python/src/openvino/runtime/utils/data_helpers/__init__.py +++ b/src/bindings/python/src/openvino/runtime/utils/data_helpers/__init__.py @@ -5,3 +5,4 @@ from openvino.runtime.utils.data_helpers.data_dispatcher import _data_dispatch from openvino.runtime.utils.data_helpers.wrappers import tensor_from_file from openvino.runtime.utils.data_helpers.wrappers import _InferRequestWrapper +from openvino.runtime.utils.data_helpers.wrappers import OVDict diff --git a/src/bindings/python/src/openvino/runtime/utils/data_helpers/wrappers.py b/src/bindings/python/src/openvino/runtime/utils/data_helpers/wrappers.py index 24b09d40de9555..e2849b8d5e01bd 100644 --- a/src/bindings/python/src/openvino/runtime/utils/data_helpers/wrappers.py +++ b/src/bindings/python/src/openvino/runtime/utils/data_helpers/wrappers.py @@ -4,7 +4,17 @@ import numpy as np -from openvino._pyopenvino import Tensor +# TODO: remove this WA and refactor OVDict when Python3.8 +# becomes minimal supported version. +try: + from functools import singledispatchmethod +except ImportError: + from singledispatchmethod import singledispatchmethod # type: ignore[no-redef] + +from collections.abc import Mapping +from typing import Union, Dict, List, Iterator, KeysView, ItemsView, ValuesView + +from openvino._pyopenvino import Tensor, ConstOutput from openvino._pyopenvino import InferRequest as InferRequestBase @@ -20,3 +30,109 @@ def __init__(self, other: InferRequestBase) -> None: # Private memeber to store newly created shared memory data self._inputs_data = None super().__init__(other) + + +class OVDict(Mapping): + """Custom OpenVINO dictionary with inference results. + + This class is a dict-like object. It provides possibility to + address data tensors with three key types: + + * `openvino.runtime.ConstOutput` - port of the output + * `int` - index of the output + * `str` - names of the output + + This class follows `frozenset`/`tuple` concept of immutability. + It is prohibited to assign new items or edit them. + + To revert to the previous behavior use `to_dict` method which + return shallow copy of underlaying dictionary. + Note: It removes addressing feature! New dictionary keeps + only `ConstOutput` keys. + + If a tuple returns value is needed, use `to_tuple` method which + converts values to the tuple. + + :Example: + + .. code-block:: python + + # Reverts to the previous behavior of the native dict + result = request.infer(inputs).to_dict() + # or alternatively: + result = dict(request.infer(inputs)) + + .. code-block:: python + + # To dispatch outputs of multi-ouput inference: + out1, out2, out3, _ = request.infer(inputs).values() + # or alternatively: + out1, out2, out3, _ = request.infer(inputs).to_tuple() + """ + def __init__(self, _dict: Dict[ConstOutput, np.ndarray]) -> None: + self._dict = _dict + + def __iter__(self) -> Iterator: + return self._dict.__iter__() + + def __len__(self) -> int: + return len(self._dict) + + def __repr__(self) -> str: + return self._dict.__repr__() + + def __get_key(self, index: int) -> ConstOutput: + return list(self._dict.keys())[index] + + @singledispatchmethod + def __getitem_impl(self, key: Union[ConstOutput, int, str]) -> np.ndarray: + raise TypeError("Unknown key type!") + + @__getitem_impl.register + def _(self, key: ConstOutput) -> np.ndarray: + return self._dict[key] + + @__getitem_impl.register + def _(self, key: int) -> np.ndarray: + try: + return self._dict[self.__get_key(key)] + except IndexError: + raise KeyError(key) + + @__getitem_impl.register + def _(self, key: str) -> np.ndarray: + try: + return self._dict[self.__get_key(self.names().index(key))] + except ValueError: + raise KeyError(key) + + def __getitem__(self, key: Union[ConstOutput, int, str]) -> np.ndarray: + return self.__getitem_impl(key) + + def keys(self) -> KeysView[ConstOutput]: + return self._dict.keys() + + def values(self) -> ValuesView[np.ndarray]: + return self._dict.values() + + def items(self) -> ItemsView[ConstOutput, np.ndarray]: + return self._dict.items() + + def names(self) -> List[str]: + """Return a name of every output key. + + Throws RuntimeError if any of ConstOutput keys has no name. + """ + return [key.get_any_name() for key in self._dict.keys()] + + def to_dict(self) -> Dict[ConstOutput, np.ndarray]: + """Return underlaying native dictionary. + + Function performs shallow copy, thus any modifications to + returned values may affect this class as well. + """ + return self._dict + + def to_tuple(self) -> tuple: + """Convert values of this dictionary to a tuple.""" + return tuple(self._dict.values()) diff --git a/src/bindings/python/src/pyopenvino/core/common.cpp b/src/bindings/python/src/pyopenvino/core/common.cpp index 2ad7e395a92895..ef5313cec0185d 100644 --- a/src/bindings/python/src/pyopenvino/core/common.cpp +++ b/src/bindings/python/src/pyopenvino/core/common.cpp @@ -53,6 +53,27 @@ const std::map& dtype_to_ov_type() { return dtype_to_ov_type_mapping; } +namespace containers { +const TensorIndexMap cast_to_tensor_index_map(const py::dict& inputs) { + TensorIndexMap result_map; + for (auto&& input : inputs) { + int idx; + if (py::isinstance(input.first)) { + idx = input.first.cast(); + } else { + throw py::type_error("incompatible function arguments!"); + } + if (py::isinstance(input.second)) { + auto tensor = Common::cast_to_tensor(input.second); + result_map[idx] = tensor; + } else { + throw ov::Exception("Unable to cast tensor " + std::to_string(idx) + "!"); + } + } + return result_map; +} +}; // namespace containers + namespace array_helpers { bool is_contiguous(const py::array& array) { @@ -110,6 +131,67 @@ py::array as_contiguous(py::array& array, ov::element::Type type) { } } +py::array array_from_tensor(ov::Tensor&& t) { + switch (t.get_element_type()) { + case ov::element::Type_t::f32: { + return py::array_t(t.get_shape(), t.data()); + break; + } + case ov::element::Type_t::f64: { + return py::array_t(t.get_shape(), t.data()); + break; + } + case ov::element::Type_t::bf16: { + return py::array(py::dtype("float16"), t.get_shape(), t.data()); + break; + } + case ov::element::Type_t::f16: { + return py::array(py::dtype("float16"), t.get_shape(), t.data()); + break; + } + case ov::element::Type_t::i8: { + return py::array_t(t.get_shape(), t.data()); + break; + } + case ov::element::Type_t::i16: { + return py::array_t(t.get_shape(), t.data()); + break; + } + case ov::element::Type_t::i32: { + return py::array_t(t.get_shape(), t.data()); + break; + } + case ov::element::Type_t::i64: { + return py::array_t(t.get_shape(), t.data()); + break; + } + case ov::element::Type_t::u8: { + return py::array_t(t.get_shape(), t.data()); + break; + } + case ov::element::Type_t::u16: { + return py::array_t(t.get_shape(), t.data()); + break; + } + case ov::element::Type_t::u32: { + return py::array_t(t.get_shape(), t.data()); + break; + } + case ov::element::Type_t::u64: { + return py::array_t(t.get_shape(), t.data()); + break; + } + case ov::element::Type_t::boolean: { + return py::array_t(t.get_shape(), t.data()); + break; + } + default: { + throw ov::Exception("Numpy array cannot be created from given OV Tensor!"); + break; + } + } +} + }; // namespace array_helpers template <> @@ -226,38 +308,6 @@ const ov::Tensor& cast_to_tensor(const py::handle& tensor) { return tensor.cast(); } -const Containers::TensorNameMap cast_to_tensor_name_map(const py::dict& inputs) { - Containers::TensorNameMap result_map; - for (auto&& input : inputs) { - std::string name; - if (py::isinstance(input.first)) { - name = input.first.cast(); - } else { - throw py::type_error("incompatible function arguments!"); - } - OPENVINO_ASSERT(py::isinstance(input.second), "Unable to cast tensor ", name, "!"); - auto tensor = Common::cast_to_tensor(input.second); - result_map[name] = tensor; - } - return result_map; -} - -const Containers::TensorIndexMap cast_to_tensor_index_map(const py::dict& inputs) { - Containers::TensorIndexMap result_map; - for (auto&& input : inputs) { - int idx; - if (py::isinstance(input.first)) { - idx = input.first.cast(); - } else { - throw py::type_error("incompatible function arguments!"); - } - OPENVINO_ASSERT(py::isinstance(input.second), "Unable to cast tensor ", idx, "!"); - auto tensor = Common::cast_to_tensor(input.second); - result_map[idx] = tensor; - } - return result_map; -} - void set_request_tensors(ov::InferRequest& request, const py::dict& inputs) { if (!inputs.empty()) { for (auto&& input : inputs) { @@ -293,67 +343,10 @@ uint32_t get_optimal_number_of_requests(const ov::CompiledModel& actual) { } } -py::dict outputs_to_dict(const std::vector>& outputs, ov::InferRequest& request) { +py::dict outputs_to_dict(InferRequestWrapper& request) { py::dict res; - for (const auto& out : outputs) { - ov::Tensor t{request.get_tensor(out)}; - switch (t.get_element_type()) { - case ov::element::Type_t::i8: { - res[py::cast(out)] = py::array_t(t.get_shape(), t.data()); - break; - } - case ov::element::Type_t::i16: { - res[py::cast(out)] = py::array_t(t.get_shape(), t.data()); - break; - } - case ov::element::Type_t::i32: { - res[py::cast(out)] = py::array_t(t.get_shape(), t.data()); - break; - } - case ov::element::Type_t::i64: { - res[py::cast(out)] = py::array_t(t.get_shape(), t.data()); - break; - } - case ov::element::Type_t::u8: { - res[py::cast(out)] = py::array_t(t.get_shape(), t.data()); - break; - } - case ov::element::Type_t::u16: { - res[py::cast(out)] = py::array_t(t.get_shape(), t.data()); - break; - } - case ov::element::Type_t::u32: { - res[py::cast(out)] = py::array_t(t.get_shape(), t.data()); - break; - } - case ov::element::Type_t::u64: { - res[py::cast(out)] = py::array_t(t.get_shape(), t.data()); - break; - } - case ov::element::Type_t::bf16: { - res[py::cast(out)] = py::array(py::dtype("float16"), t.get_shape(), t.data()); - break; - } - case ov::element::Type_t::f16: { - res[py::cast(out)] = py::array(py::dtype("float16"), t.get_shape(), t.data()); - break; - } - case ov::element::Type_t::f32: { - res[py::cast(out)] = py::array_t(t.get_shape(), t.data()); - break; - } - case ov::element::Type_t::f64: { - res[py::cast(out)] = py::array_t(t.get_shape(), t.data()); - break; - } - case ov::element::Type_t::boolean: { - res[py::cast(out)] = py::array_t(t.get_shape(), t.data()); - break; - } - default: { - break; - } - } + for (const auto& out : request.m_outputs) { + res[py::cast(out)] = array_helpers::array_from_tensor(request.m_request.get_tensor(out)); } return res; } diff --git a/src/bindings/python/src/pyopenvino/core/common.hpp b/src/bindings/python/src/pyopenvino/core/common.hpp index 910d9e55e966ed..de033c3ddf383c 100644 --- a/src/bindings/python/src/pyopenvino/core/common.hpp +++ b/src/bindings/python/src/pyopenvino/core/common.hpp @@ -20,14 +20,20 @@ #include "openvino/runtime/infer_request.hpp" #include "openvino/runtime/tensor.hpp" #include "openvino/pass/serialize.hpp" -#include "pyopenvino/core/containers.hpp" #include "pyopenvino/graph/any.hpp" #include "pyopenvino/graph/ops/constant.hpp" +#include "pyopenvino/core/infer_request.hpp" namespace py = pybind11; namespace Common { +namespace containers { + using TensorIndexMap = std::map; + + const TensorIndexMap cast_to_tensor_index_map(const py::dict& inputs); +}; // namespace containers + namespace values { // Minimum amount of bits for common numpy types. Used to perform checks against OV types. @@ -52,6 +58,8 @@ std::vector get_strides(const py::array& array); py::array as_contiguous(py::array& array, ov::element::Type type); +py::array array_from_tensor(ov::Tensor&& t); + }; // namespace array_helpers template @@ -80,15 +88,11 @@ ov::PartialShape partial_shape_from_list(const py::list& shape); const ov::Tensor& cast_to_tensor(const py::handle& tensor); -const Containers::TensorNameMap cast_to_tensor_name_map(const py::dict& inputs); - -const Containers::TensorIndexMap cast_to_tensor_index_map(const py::dict& inputs); - void set_request_tensors(ov::InferRequest& request, const py::dict& inputs); uint32_t get_optimal_number_of_requests(const ov::CompiledModel& actual); -py::dict outputs_to_dict(const std::vector>& outputs, ov::InferRequest& request); +py::dict outputs_to_dict(InferRequestWrapper& request); ov::pass::Serialize::Version convert_to_version(const std::string& version); diff --git a/src/bindings/python/src/pyopenvino/core/compiled_model.cpp b/src/bindings/python/src/pyopenvino/core/compiled_model.cpp index 9cd0202f32f415..7cca9af077e15a 100644 --- a/src/bindings/python/src/pyopenvino/core/compiled_model.cpp +++ b/src/bindings/python/src/pyopenvino/core/compiled_model.cpp @@ -9,13 +9,9 @@ #include "common.hpp" #include "pyopenvino/core/compiled_model.hpp" -#include "pyopenvino/core/containers.hpp" #include "pyopenvino/core/infer_request.hpp" #include "pyopenvino/utils/utils.hpp" -PYBIND11_MAKE_OPAQUE(Containers::TensorIndexMap); -PYBIND11_MAKE_OPAQUE(Containers::TensorNameMap); - namespace py = pybind11; void regclass_CompiledModel(py::module m) { diff --git a/src/bindings/python/src/pyopenvino/core/containers.cpp b/src/bindings/python/src/pyopenvino/core/containers.cpp deleted file mode 100644 index 8ee414e007a14f..00000000000000 --- a/src/bindings/python/src/pyopenvino/core/containers.cpp +++ /dev/null @@ -1,23 +0,0 @@ -// Copyright (C) 2018-2023 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#include "pyopenvino/core/containers.hpp" - -#include - -PYBIND11_MAKE_OPAQUE(Containers::TensorIndexMap); -PYBIND11_MAKE_OPAQUE(Containers::TensorNameMap); - -namespace py = pybind11; - -namespace Containers { - -void regclass_TensorIndexMap(py::module m) { - py::bind_map(m, "TensorIndexMap"); -} - -void regclass_TensorNameMap(py::module m) { - py::bind_map(m, "TensorNameMap"); -} -} // namespace Containers diff --git a/src/bindings/python/src/pyopenvino/core/containers.hpp b/src/bindings/python/src/pyopenvino/core/containers.hpp deleted file mode 100644 index becf2f717847de..00000000000000 --- a/src/bindings/python/src/pyopenvino/core/containers.hpp +++ /dev/null @@ -1,23 +0,0 @@ -// Copyright (C) 2018-2023 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#pragma once - -#include -#include -#include - -#include - -#include - -namespace py = pybind11; - -namespace Containers { - using TensorIndexMap = std::map; - using TensorNameMap = std::map; - - void regclass_TensorIndexMap(py::module m); - void regclass_TensorNameMap(py::module m); -} diff --git a/src/bindings/python/src/pyopenvino/core/infer_request.cpp b/src/bindings/python/src/pyopenvino/core/infer_request.cpp index 585441569f9e77..8be02e8adb86bb 100644 --- a/src/bindings/python/src/pyopenvino/core/infer_request.cpp +++ b/src/bindings/python/src/pyopenvino/core/infer_request.cpp @@ -11,12 +11,8 @@ #include #include "pyopenvino/core/common.hpp" -#include "pyopenvino/core/containers.hpp" #include "pyopenvino/utils/utils.hpp" -PYBIND11_MAKE_OPAQUE(Containers::TensorIndexMap); -PYBIND11_MAKE_OPAQUE(Containers::TensorNameMap); - namespace py = pybind11; inline py::dict run_sync_infer(InferRequestWrapper& self) { @@ -26,7 +22,7 @@ inline py::dict run_sync_infer(InferRequestWrapper& self) { self.m_request.infer(); *self.m_end_time = Time::now(); } - return Common::outputs_to_dict(self.m_outputs, self.m_request); + return Common::outputs_to_dict(self); } void regclass_InferRequest(py::module m) { @@ -103,7 +99,7 @@ void regclass_InferRequest(py::module m) { cls.def( "set_output_tensors", [](InferRequestWrapper& self, const py::dict& outputs) { - auto outputs_map = Common::cast_to_tensor_index_map(outputs); + auto outputs_map = Common::containers::cast_to_tensor_index_map(outputs); for (auto&& output : outputs_map) { self.m_request.set_output_tensor(output.first, output.second); } @@ -120,7 +116,7 @@ void regclass_InferRequest(py::module m) { cls.def( "set_input_tensors", [](InferRequestWrapper& self, const py::dict& inputs) { - auto inputs_map = Common::cast_to_tensor_index_map(inputs); + auto inputs_map = Common::containers::cast_to_tensor_index_map(inputs); for (auto&& input : inputs_map) { self.m_request.set_input_tensor(input.first, input.second); } @@ -719,7 +715,7 @@ void regclass_InferRequest(py::module m) { cls.def_property_readonly( "results", [](InferRequestWrapper& self) { - return Common::outputs_to_dict(self.m_outputs, self.m_request); + return Common::outputs_to_dict(self); }, R"( Gets all outputs tensors of this InferRequest. diff --git a/src/bindings/python/src/pyopenvino/pyopenvino.cpp b/src/bindings/python/src/pyopenvino/pyopenvino.cpp index a229f9eaa7d72e..0f2cdf38278010 100644 --- a/src/bindings/python/src/pyopenvino/pyopenvino.cpp +++ b/src/bindings/python/src/pyopenvino/pyopenvino.cpp @@ -24,7 +24,6 @@ #endif #include "pyopenvino/core/async_infer_queue.hpp" #include "pyopenvino/core/compiled_model.hpp" -#include "pyopenvino/core/containers.hpp" #include "pyopenvino/core/core.hpp" #include "pyopenvino/core/extension.hpp" #include "pyopenvino/core/infer_request.hpp" @@ -210,9 +209,6 @@ PYBIND11_MODULE(_pyopenvino, m) { regclass_Core(m); regclass_Tensor(m); - // Registering specific types of containers - Containers::regclass_TensorIndexMap(m); - Containers::regclass_TensorNameMap(m); regclass_CompiledModel(m); regclass_InferRequest(m); diff --git a/src/bindings/python/tests/test_runtime/test_ovdict.py b/src/bindings/python/tests/test_runtime/test_ovdict.py new file mode 100644 index 00000000000000..e8c76a6d8d3bf7 --- /dev/null +++ b/src/bindings/python/tests/test_runtime/test_ovdict.py @@ -0,0 +1,249 @@ +# -*- coding: utf-8 -*- +# Copyright (C) 2018-2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +from collections.abc import Mapping +import numpy as np +import pytest + +import openvino.runtime.opset10 as ops +from openvino.runtime import Core, ConstOutput, CompiledModel, InferRequest, Model +from openvino.runtime.ie_api import OVDict + + +def _get_ovdict( + device, + input_shape=None, + data_type=np.float32, + input_names=None, + output_names=None, + multi_output=False, + direct_infer=False, + split_num=5, +): + # Create model + # If model is multi-output (multi_output=True), input_shape must match + # requirements of split operation. + # TODO OpenSource: refactor it to be more generic + if input_shape is None: + input_shape = [1, 20] + if input_names is None: + input_names = ["data_0"] + if output_names is None: + output_names = ["output_0"] + if multi_output: + assert isinstance(output_names, (list, tuple)) + assert len(output_names) > 1 + assert len(output_names) == split_num + param = ops.parameter(input_shape, data_type, name=input_names[0]) + model = Model( + ops.split(param, 1, split_num) if multi_output else ops.abs(param), [param], + ) + # Manually name outputs + for i in range(len(output_names)): + model.output(i).tensor.names = {output_names[i]} + # Compile model + core = Core() + compiled_model = core.compile_model(model, device) + # Create test data + input_data = np.random.random(input_shape).astype(data_type) + # Two ways of infering + if direct_infer: + result = compiled_model(input_data) + assert result is not None + return result, compiled_model + + request = compiled_model.create_infer_request() + result = request.infer(input_data) + assert result is not None + return result, request + + +def _check_keys(keys, outs): + outs_iter = iter(outs) + for key in keys: + assert isinstance(key, ConstOutput) + assert key == next(outs_iter) + return True + + +def _check_values(result): + for value in result.values(): + assert isinstance(value, np.ndarray) + return True + + +def _check_items(result, outs, output_names): + i = 0 + for key, value in result.items(): + assert isinstance(key, ConstOutput) + assert isinstance(value, np.ndarray) + # Check values + assert np.equal(result[outs[i]], result[key]).all() + assert np.equal(result[outs[i]], result[i]).all() + assert np.equal(result[outs[i]], result[output_names[i]]).all() + i += 1 + return True + + +def _check_dict(result, obj, output_names=None): + if output_names is None: + output_names = ["output_0"] + + outs = obj.model_outputs if isinstance(obj, InferRequest) else obj.outputs + assert len(outs) == len(result) + assert len(outs) == len(output_names) + # Check for __iter__ + assert _check_keys(result, outs) + # Check for keys function + assert _check_keys(result.keys(), outs) + assert _check_values(result) + assert _check_items(result, outs, output_names) + assert result.names() == output_names + + return True + + +@pytest.mark.parametrize("is_direct", [True, False]) +def test_ovdict_assign(device, is_direct): + result, _ = _get_ovdict(device, multi_output=False, direct_infer=is_direct) + + with pytest.raises(TypeError) as e: + result["some_name"] = 99 + assert "'OVDict' object does not support item assignment" in str(e.value) + + +@pytest.mark.parametrize("is_direct", [True, False]) +def test_ovdict_single_output_basic(device, is_direct): + result, obj = _get_ovdict(device, multi_output=False, direct_infer=is_direct) + + assert isinstance(result, OVDict) + if isinstance(obj, (InferRequest, CompiledModel)): + assert _check_dict(result, obj) + else: + raise TypeError("Unknown `obj` type!") + + +@pytest.mark.parametrize("is_direct", [True, False]) +def test_ovdict_single_output_noname(device, is_direct): + result, obj = _get_ovdict( + device, + multi_output=False, + direct_infer=is_direct, + output_names=[], + ) + + assert isinstance(result, OVDict) + + outs = obj.model_outputs if isinstance(obj, InferRequest) else obj.outputs + + assert isinstance(result[outs[0]], np.ndarray) + assert isinstance(result[0], np.ndarray) + + with pytest.raises(RuntimeError) as e0: + _ = result["some_name"] + assert "Attempt to get a name for a Tensor without names" in str(e0.value) + + with pytest.raises(RuntimeError) as e1: + _ = result.names() + assert "Attempt to get a name for a Tensor without names" in str(e1.value) + + +@pytest.mark.parametrize("is_direct", [True, False]) +def test_ovdict_single_output_wrongname(device, is_direct): + result, obj = _get_ovdict( + device, + multi_output=False, + direct_infer=is_direct, + output_names=["output_21"], + ) + + assert isinstance(result, OVDict) + + outs = obj.model_outputs if isinstance(obj, InferRequest) else obj.outputs + + assert isinstance(result[outs[0]], np.ndarray) + assert isinstance(result[0], np.ndarray) + + with pytest.raises(KeyError) as e: + _ = result["output_37"] + assert "output_37" in str(e.value) + + with pytest.raises(KeyError) as e: + _ = result[6] + assert "6" in str(e.value) + + +@pytest.mark.parametrize("is_direct", [True, False]) +@pytest.mark.parametrize("use_function", [True, False]) +def test_ovdict_single_output_dict(device, is_direct, use_function): + result, obj = _get_ovdict( + device, + multi_output=False, + direct_infer=is_direct, + ) + + assert isinstance(result, OVDict) + + outs = obj.model_outputs if isinstance(obj, InferRequest) else obj.outputs + native_dict = result.to_dict() if use_function else dict(result) + + assert issubclass(type(native_dict), dict) + assert not isinstance(native_dict, OVDict) + assert isinstance(native_dict[outs[0]], np.ndarray) + + with pytest.raises(KeyError) as e: + _ = native_dict["output_0"] + assert "output_0" in str(e.value) + + with pytest.raises(KeyError) as e: + _ = native_dict[0] + assert "0" in str(e.value) + + +@pytest.mark.parametrize("is_direct", [True, False]) +def test_ovdict_multi_output_basic(device, is_direct): + output_names = ["output_0", "output_1", "output_2", "output_3", "output_4"] + result, obj = _get_ovdict( + device, + multi_output=True, + direct_infer=is_direct, + output_names=output_names, + ) + + assert isinstance(result, OVDict) + if isinstance(obj, (InferRequest, CompiledModel)): + assert _check_dict(result, obj, output_names) + else: + raise TypeError("Unknown `obj` type!") + + +@pytest.mark.parametrize("is_direct", [True, False]) +@pytest.mark.parametrize("use_function", [True, False]) +def test_ovdict_multi_output_tuple0(device, is_direct, use_function): + output_names = ["output_0", "output_1"] + result, obj = _get_ovdict( + device, + input_shape=(1, 10), + multi_output=True, + direct_infer=is_direct, + split_num=2, + output_names=output_names, + ) + + out0, out1 = None, None + if use_function: + assert isinstance(result.to_tuple(), tuple) + out0, out1 = result.to_tuple() + else: + out0, out1 = result.values() + + assert out0 is not None + assert out1 is not None + assert isinstance(out0, np.ndarray) + assert isinstance(out1, np.ndarray) + + outs = obj.model_outputs if isinstance(obj, InferRequest) else obj.outputs + + assert np.equal(result[outs[0]], out0).all() + assert np.equal(result[outs[1]], out1).all() From c23a1170ba5a494be4993c475273e01d43c02709 Mon Sep 17 00:00:00 2001 From: Ilya Churaev Date: Wed, 22 Mar 2023 19:51:07 +0400 Subject: [PATCH 040/296] Remove plugins xml (#16470) * Update core_impl.cpp Add first implementation of register_compile_time_plugins (needs to depend on the actual CMake configuration as a next step). * Update core.cpp Check for missing plugins.xml * Update core_impl.cpp Avoid exception for missing plugins.xml * Update core_impl.hpp Add register_compile_time_plugins function definition * Plugin loading based on CMake configuration * Remove debug output command * Unify static/dynamic plugin loading * Add CMake option for plugins.xml that defaults to off * Move GENERATE_PLUGINS_XML option to features.cmake * Add missing brace * Remove unnecessary #ifdef check * Prepare to resolve conflicts * Fix compile error * Activate generation of plugins.xml in OpenVINODeveloperPackageConfig.cmake * Fix CMake installation * Plugin loading logic implemented in ie_core.cpp as well * Fix format * Small fixes * Fixed code style * Skip if xml file wasn't found * Added function to find compiled plugins * Generalize plugins hpp * Use new API * Fixed old core * Fixed static build --------- Co-authored-by: CSBVision --- .../plugins/create_plugins_hpp.cmake | 38 +++++----- cmake/developer_package/plugins/plugins.cmake | 53 +++++++------- .../developer_package/plugins/plugins.hpp.in | 17 ++++- cmake/features.cmake | 2 + .../OpenVINODeveloperPackageConfig.cmake.in | 3 + src/cmake/openvino.cmake | 2 +- src/common/util/CMakeLists.txt | 7 ++ .../util/include/openvino/util/file_util.hpp | 8 +++ src/common/util/src/file_util.cpp | 32 +++++++++ src/inference/src/core.cpp | 25 +++---- src/inference/src/dev/core_impl.cpp | 34 +++++++++ src/inference/src/dev/core_impl.hpp | 70 ++++++------------- src/inference/src/ie_core.cpp | 18 ++--- 13 files changed, 191 insertions(+), 118 deletions(-) diff --git a/cmake/developer_package/plugins/create_plugins_hpp.cmake b/cmake/developer_package/plugins/create_plugins_hpp.cmake index cddcad738470ca..10adcac6c28f1f 100644 --- a/cmake/developer_package/plugins/create_plugins_hpp.cmake +++ b/cmake/developer_package/plugins/create_plugins_hpp.cmake @@ -2,7 +2,7 @@ # SPDX-License-Identifier: Apache-2.0 # -foreach(var IE_DEVICE_MAPPING IE_PLUGINS_HPP_HEADER IE_PLUGINS_HPP_HEADER_IN) +foreach(var IE_DEVICE_MAPPING OV_DYNAMIC IE_PLUGINS_HPP_HEADER IE_PLUGINS_HPP_HEADER_IN) if(NOT DEFINED ${var}) message(FATAL_ERROR "${var} is required, but not defined") endif() @@ -19,20 +19,6 @@ foreach(dev_map IN LISTS IE_DEVICE_MAPPING) list(GET dev_map 0 mapped_dev_name) list(GET dev_map 1 actual_dev_name) - # common - set(_IE_CREATE_PLUGIN_FUNC "CreatePluginEngine${actual_dev_name}") - set(_IE_CREATE_EXTENSION_FUNC "CreateExtensionShared${actual_dev_name}") - - # declarations - set(IE_PLUGINS_DECLARATIONS "${IE_PLUGINS_DECLARATIONS} -IE_DEFINE_PLUGIN_CREATE_FUNCTION_DECLARATION(${_IE_CREATE_PLUGIN_FUNC});") - if(${actual_dev_name}_AS_EXTENSION) - set(IE_PLUGINS_DECLARATIONS "${IE_PLUGINS_DECLARATIONS} -IE_DEFINE_EXTENSION_CREATE_FUNCTION_DECLARATION(${_IE_CREATE_EXTENSION_FUNC});") - else() - set(_IE_CREATE_EXTENSION_FUNC "nullptr") - endif() - # definitions set(dev_config "{") if(${mapped_dev_name}_CONFIG) @@ -48,8 +34,28 @@ IE_DEFINE_EXTENSION_CREATE_FUNCTION_DECLARATION(${_IE_CREATE_EXTENSION_FUNC});") endif() set(dev_config "${dev_config}}") - set(IE_PLUGINS_MAP_DEFINITION "${IE_PLUGINS_MAP_DEFINITION} + + if(NOT OV_DYNAMIC) + # common + set(_IE_CREATE_PLUGIN_FUNC "CreatePluginEngine${actual_dev_name}") + set(_IE_CREATE_EXTENSION_FUNC "CreateExtensionShared${actual_dev_name}") + + # declarations + set(IE_PLUGINS_DECLARATIONS "${IE_PLUGINS_DECLARATIONS} + IE_DEFINE_PLUGIN_CREATE_FUNCTION_DECLARATION(${_IE_CREATE_PLUGIN_FUNC});") + if(${actual_dev_name}_AS_EXTENSION) + set(IE_PLUGINS_DECLARATIONS "${IE_PLUGINS_DECLARATIONS} + IE_DEFINE_EXTENSION_CREATE_FUNCTION_DECLARATION(${_IE_CREATE_EXTENSION_FUNC});") + else() + set(_IE_CREATE_EXTENSION_FUNC "nullptr") + endif() + + set(IE_PLUGINS_MAP_DEFINITION "${IE_PLUGINS_MAP_DEFINITION} { \"${mapped_dev_name}\", Value { ${_IE_CREATE_PLUGIN_FUNC}, ${_IE_CREATE_EXTENSION_FUNC}, ${dev_config} } },") + else() + set(IE_PLUGINS_MAP_DEFINITION "${IE_PLUGINS_MAP_DEFINITION} + { \"${mapped_dev_name}\", Value { \"${actual_dev_name}\", ${dev_config} } },") + endif() endforeach() set(IE_PLUGINS_MAP_DEFINITION "${IE_PLUGINS_MAP_DEFINITION} diff --git a/cmake/developer_package/plugins/plugins.cmake b/cmake/developer_package/plugins/plugins.cmake index b4cfe20bd024e5..7f00cc70269861 100644 --- a/cmake/developer_package/plugins/plugins.cmake +++ b/cmake/developer_package/plugins/plugins.cmake @@ -113,7 +113,7 @@ function(ie_add_plugin) if(IE_PLUGIN_PSEUDO_DEVICE) set(plugin_hidden HIDDEN) endif() - ie_cpack_add_component(${install_component} + ie_cpack_add_component(${install_component} DISPLAY_NAME "${IE_PLUGIN_DEVICE_NAME} runtime" DESCRIPTION "${IE_PLUGIN_DEVICE_NAME} runtime" ${plugin_hidden} @@ -227,16 +227,18 @@ macro(ie_register_plugins_dynamic) # Combine all .xml files into plugins.xml - add_custom_command(TARGET ${IE_REGISTER_MAIN_TARGET} POST_BUILD - COMMAND - "${CMAKE_COMMAND}" - -D "CMAKE_SHARED_MODULE_PREFIX=${CMAKE_SHARED_MODULE_PREFIX}" - -D "IE_CONFIG_OUTPUT_FILE=${config_output_file}" - -D "IE_CONFIGS_DIR=${CMAKE_BINARY_DIR}/plugins" - -P "${IEDevScripts_DIR}/plugins/register_plugin_cmake.cmake" - COMMENT - "Registering plugins to plugins.xml config file" - VERBATIM) + if(ENABLE_PLUGINS_XML) + add_custom_command(TARGET ${IE_REGISTER_MAIN_TARGET} POST_BUILD + COMMAND + "${CMAKE_COMMAND}" + -D "CMAKE_SHARED_MODULE_PREFIX=${CMAKE_SHARED_MODULE_PREFIX}" + -D "IE_CONFIG_OUTPUT_FILE=${config_output_file}" + -D "IE_CONFIGS_DIR=${CMAKE_BINARY_DIR}/plugins" + -P "${IEDevScripts_DIR}/plugins/register_plugin_cmake.cmake" + COMMENT + "Registering plugins to plugins.xml config file" + VERBATIM) + endif() endmacro() # @@ -282,10 +284,6 @@ endfunction() # ie_generate_plugins_hpp() # function(ie_generate_plugins_hpp) - if(BUILD_SHARED_LIBS) - return() - endif() - set(device_mapping) set(device_configs) set(as_extension) @@ -296,17 +294,23 @@ function(ie_generate_plugins_hpp) message(FATAL_ERROR "Unexpected error, please, contact developer of this script") endif() - # create device mapping: preudo device => actual device + # create device mapping: pseudo device => actual device list(GET name 0 device_name) - if(${device_name}_PSEUDO_PLUGIN_FOR) - list(APPEND device_mapping "${device_name}:${${device_name}_PSEUDO_PLUGIN_FOR}") + if(BUILD_SHARED_LIBS) + list(GET name 1 library_name) + ie_plugin_get_file_name(${library_name} library_name) + list(APPEND device_mapping "${device_name}:${library_name}") else() - list(APPEND device_mapping "${device_name}:${device_name}") - endif() + if(${device_name}_PSEUDO_PLUGIN_FOR) + list(APPEND device_mapping "${device_name}:${${device_name}_PSEUDO_PLUGIN_FOR}") + else() + list(APPEND device_mapping "${device_name}:${device_name}") + endif() - # register plugin as extension - if(${device_name}_AS_EXTENSION) - list(APPEND as_extension -D "${device_name}_AS_EXTENSION=ON") + # register plugin as extension + if(${device_name}_AS_EXTENSION) + list(APPEND as_extension -D "${device_name}_AS_EXTENSION=ON") + endif() endif() # add default plugin config options @@ -330,6 +334,7 @@ function(ie_generate_plugins_hpp) COMMAND "${CMAKE_COMMAND}" -D "IE_DEVICE_MAPPING=${device_mapping}" + -D "OV_DYNAMIC=${BUILD_SHARED_LIBS}" -D "IE_PLUGINS_HPP_HEADER_IN=${plugins_hpp_in}" -D "IE_PLUGINS_HPP_HEADER=${ie_plugins_hpp}" ${device_configs} @@ -339,7 +344,7 @@ function(ie_generate_plugins_hpp) "${plugins_hpp_in}" "${IEDevScripts_DIR}/plugins/create_plugins_hpp.cmake" COMMENT - "Generate ie_plugins.hpp for static build" + "Generate ie_plugins.hpp for build" VERBATIM) # for some reason dependency on source files does not work diff --git a/cmake/developer_package/plugins/plugins.hpp.in b/cmake/developer_package/plugins/plugins.hpp.in index fa8119756b82e8..d351bcfb76f3d0 100644 --- a/cmake/developer_package/plugins/plugins.hpp.in +++ b/cmake/developer_package/plugins/plugins.hpp.in @@ -4,6 +4,11 @@ #pragma once +#include +#include + +#ifdef OPENVINO_STATIC_LIBRARY + #include "cpp_interfaces/interface/ie_iplugin_internal.hpp" @IE_PLUGINS_DECLARATIONS@ @@ -14,10 +19,20 @@ struct Value { std::map m_default_config; }; +#else + +struct Value { + std::string m_plugin_path; + std::map m_default_config; +}; + +#endif + using Key = std::string; using PluginsStaticRegistry = std::map; -inline const std::map getStaticPluginsRegistry() { + +inline const std::map getCompiledPluginsRegistry() { @IE_PLUGINS_MAP_DEFINITION@ return plugins_hpp; } diff --git a/cmake/features.cmake b/cmake/features.cmake index 17c5ccc1b3c7e5..24dfaef46e89a7 100644 --- a/cmake/features.cmake +++ b/cmake/features.cmake @@ -94,6 +94,8 @@ ie_option (ENABLE_HETERO "Enables Hetero Device Plugin" ON) ie_option (ENABLE_TEMPLATE "Enable template plugin" ON) +ie_dependent_option (ENABLE_PLUGINS_XML "Generate plugins.xml configuration file or not" OFF "NOT BUILD_SHARED_LIBS" OFF) + ie_dependent_option (GAPI_TEST_PERF "if GAPI unit tests should examine performance" OFF "ENABLE_TESTS;ENABLE_GAPI_PREPROCESSING" OFF) ie_dependent_option (ENABLE_DATA "fetch models from testdata repo" ON "ENABLE_FUNCTIONAL_TESTS;NOT ANDROID" OFF) diff --git a/cmake/templates/OpenVINODeveloperPackageConfig.cmake.in b/cmake/templates/OpenVINODeveloperPackageConfig.cmake.in index 24238be0604c1b..d530ea36d1d9c8 100644 --- a/cmake/templates/OpenVINODeveloperPackageConfig.cmake.in +++ b/cmake/templates/OpenVINODeveloperPackageConfig.cmake.in @@ -28,6 +28,9 @@ foreach(option IN LISTS ov_options) endforeach() message(" ") +# activate generation of plugins.xml +set(ENABLE_PLUGINS_XML ON) + # for samples in 3rd party projects if(ENABLE_SAMPLES) set_and_check(gflags_DIR "@gflags_BINARY_DIR@") diff --git a/src/cmake/openvino.cmake b/src/cmake/openvino.cmake index 7870e2963e3c59..0a0b9f9d1896a4 100644 --- a/src/cmake/openvino.cmake +++ b/src/cmake/openvino.cmake @@ -131,7 +131,7 @@ ie_cpack_add_component(${OV_CPACK_COMP_CORE_DEV} HIDDEN DEPENDS ${OV_CPACK_COMP_CORE} ${core_dev_components}) -if(BUILD_SHARED_LIBS) +if(ENABLE_PLUGINS_XML) install(FILES $/plugins.xml DESTINATION ${OV_CPACK_PLUGINSDIR} COMPONENT ${OV_CPACK_COMP_CORE}) diff --git a/src/common/util/CMakeLists.txt b/src/common/util/CMakeLists.txt index a589c283390761..160be0259b8b30 100644 --- a/src/common/util/CMakeLists.txt +++ b/src/common/util/CMakeLists.txt @@ -24,6 +24,13 @@ endif() # Create named folders for the sources within the .vcproj # Empty name lists them directly under the .vcproj +set(MIXED_SRC + "${CMAKE_CURRENT_SOURCE_DIR}/src/file_util.cpp") + +set_property(SOURCE ${MIXED_SRC} + APPEND PROPERTY INCLUDE_DIRECTORIES + $) + source_group("src" FILES ${LIBRARY_SRC}) source_group("include" FILES ${PUBLIC_HEADERS}) diff --git a/src/common/util/include/openvino/util/file_util.hpp b/src/common/util/include/openvino/util/file_util.hpp index 00d8dbe073cd61..ccf8ed4e46c6e0 100644 --- a/src/common/util/include/openvino/util/file_util.hpp +++ b/src/common/util/include/openvino/util/file_util.hpp @@ -260,6 +260,14 @@ inline std::basic_string make_plugin_library_name(const std::basic_string& */ FilePath get_plugin_path(const std::string& plugin); +/** + * @brief Find the plugins which are located together with OV library + * @param plugin - Path (absolute or relative) or name of a plugin. Depending on platform, `plugin` is wrapped with + * shared library suffix and prefix to identify library full name + * @return absolute path or file name with extension (to be found in ENV) + */ +FilePath get_compiled_plugin_path(const std::string& plugin); + /** * @brief Format plugin path (canonicalize, complete to absolute or complete to file name) for further * dynamic loading by OS diff --git a/src/common/util/src/file_util.cpp b/src/common/util/src/file_util.cpp index bcbd3fe2f906c7..f39f2dd3c677d9 100644 --- a/src/common/util/src/file_util.cpp +++ b/src/common/util/src/file_util.cpp @@ -12,6 +12,7 @@ #include #include +#include "openvino/core/version.hpp" #include "openvino/util/common_util.hpp" #ifdef _WIN32 @@ -504,6 +505,37 @@ ov::util::FilePath ov::util::get_plugin_path(const std::string& plugin) { return ov::util::to_file_path(lib_name); } +ov::util::FilePath ov::util::get_compiled_plugin_path(const std::string& plugin) { + const auto ov_library_path = get_ov_lib_path(); + + // plugin can be found either: + + // 1. in openvino-X.Y.Z folder relative to libopenvino.so + std::ostringstream str; + str << "openvino-" << OPENVINO_VERSION_MAJOR << "." << OPENVINO_VERSION_MINOR << "." << OPENVINO_VERSION_PATCH; + const auto sub_folder = str.str(); + + std::string abs_file_path = ov::util::path_join({ov_library_path, sub_folder, plugin}); + if (ov::util::file_exists(abs_file_path)) + return ov::util::to_file_path(abs_file_path); + + // 2. in the openvino.so location + abs_file_path = ov::util::path_join({ov_library_path, plugin}); + if (ov::util::file_exists(abs_file_path)) + return ov::util::to_file_path(abs_file_path); + + auto lib_name = plugin; + // For 3rd case - convert to 4th case + if (!ov::util::ends_with(plugin, ov::util::FileTraits::library_ext())) + lib_name = ov::util::make_plugin_library_name({}, plugin); + + // For 4th case + auto lib_path = ov::util::to_file_path(ov::util::get_absolute_file_path(lib_name)); + if (ov::util::file_exists(lib_path)) + return lib_path; + return ov::util::to_file_path(lib_name); +} + ov::util::FilePath ov::util::get_plugin_path(const std::string& plugin, const std::string& xml_path, bool as_abs_only) { // Assume `plugin` (from XML "location" record) contains only: // 1. /path/to/libexample.so absolute path diff --git a/src/inference/src/core.cpp b/src/inference/src/core.cpp index 0a2fba9072b6ff..fef2652b275d17 100644 --- a/src/inference/src/core.cpp +++ b/src/inference/src/core.cpp @@ -9,13 +9,10 @@ #include "dev/converter_utils.hpp" #include "dev/core_impl.hpp" #include "ie_itt.hpp" +#include "ie_plugins.hpp" #include "openvino/runtime/device_id_parser.hpp" #include "so_extension.hpp" -#ifdef OPENVINO_STATIC_LIBRARY -# include "ie_plugins.hpp" -#endif - namespace { std::string resolve_extension_path(const std::string& path) { std::string retvalue; @@ -32,8 +29,6 @@ std::string resolve_extension_path(const std::string& path) { namespace ov { -#ifndef OPENVINO_STATIC_LIBRARY - std::string findPluginXML(const std::string& xmlFile) { std::string xmlConfigFile_ = xmlFile; if (xmlConfigFile_.empty()) { @@ -56,14 +51,10 @@ std::string findPluginXML(const std::string& xmlFile) { xmlConfigFileDefault = FileUtils::makePath(ielibraryDir, ov::util::to_file_path("plugins.xml")); if (FileUtils::fileExist(xmlConfigFileDefault)) return xmlConfigFile_ = ov::util::from_file_path(xmlConfigFileDefault); - - OPENVINO_THROW("Failed to find plugins.xml file"); } return xmlConfigFile_; } -#endif // OPENVINO_STATIC_LIBRARY - #define OV_CORE_CALL_STATEMENT(...) \ try { \ __VA_ARGS__; \ @@ -81,13 +72,13 @@ class Core::Impl : public CoreImpl { Core::Core(const std::string& xml_config_file) { _impl = std::make_shared(); -#ifdef OPENVINO_STATIC_LIBRARY - OV_CORE_CALL_STATEMENT(_impl->register_plugins_in_registry(::getStaticPluginsRegistry());) -#else - OV_CORE_CALL_STATEMENT( - // If XML is default, load default plugins by absolute paths - _impl->register_plugins_in_registry(findPluginXML(xml_config_file), xml_config_file.empty());) -#endif + std::string xmlConfigFile = ov::findPluginXML(xml_config_file); + if (!xmlConfigFile.empty()) + OV_CORE_CALL_STATEMENT( + // If XML is default, load default plugins by absolute paths + _impl->register_plugins_in_registry(xmlConfigFile, xml_config_file.empty());) + // Load plugins from the pre-compiled list + OV_CORE_CALL_STATEMENT(_impl->register_compile_time_plugins();) } std::map Core::get_versions(const std::string& device_name) const { diff --git a/src/inference/src/dev/core_impl.cpp b/src/inference/src/dev/core_impl.cpp index ed39bc67f1f94e..d97a89f8f79411 100644 --- a/src/inference/src/dev/core_impl.cpp +++ b/src/inference/src/dev/core_impl.cpp @@ -34,6 +34,7 @@ #include "openvino/runtime/remote_context.hpp" #include "openvino/runtime/threading/executor_manager.hpp" #include "openvino/util/common_util.hpp" +#include "openvino/util/file_util.hpp" #include "openvino/util/shared_object.hpp" #include "preprocessing/preprocessing.hpp" #include "xml_parse_utils.h" @@ -311,6 +312,39 @@ ov::CoreImpl::CoreImpl(bool _newAPI) : m_new_api(_newAPI) { } } +void ov::CoreImpl::register_compile_time_plugins() { + std::lock_guard lock(get_mutex()); + + const decltype(::getCompiledPluginsRegistry())& plugins = getCompiledPluginsRegistry(); +#ifdef OPENVINO_STATIC_LIBRARY + for (const auto& plugin : plugins) { + const auto& deviceName = plugin.first; + if (deviceName.find('.') != std::string::npos) { + OPENVINO_THROW("Device name must not contain dot '.' symbol"); + } + if (pluginRegistry.find(deviceName) == pluginRegistry.end()) { + const auto& value = plugin.second; + ov::AnyMap config = any_copy(value.m_default_config); + PluginDescriptor desc{value.m_create_plugin_func, config, value.m_create_extension_func}; + pluginRegistry[deviceName] = desc; + add_mutex(deviceName); + } + } +#else + for (const auto& plugin : plugins) { + const auto& deviceName = plugin.first; + const auto& pluginPath = ov::util::get_compiled_plugin_path(plugin.second.m_plugin_path); + + if (pluginRegistry.find(deviceName) == pluginRegistry.end() && ov::util::file_exists(pluginPath)) { + ov::AnyMap config = any_copy(plugin.second.m_default_config); + PluginDescriptor desc{pluginPath, config}; + pluginRegistry[deviceName] = desc; + add_mutex(deviceName); + } + } +#endif +} + void ov::CoreImpl::register_plugins_in_registry(const std::string& xml_config_file, const bool& by_abs_path) { std::lock_guard lock(get_mutex()); diff --git a/src/inference/src/dev/core_impl.hpp b/src/inference/src/dev/core_impl.hpp index 2277d70b9d0acd..8fe7768dc6c91a 100644 --- a/src/inference/src/dev/core_impl.hpp +++ b/src/inference/src/dev/core_impl.hpp @@ -15,6 +15,7 @@ #include "ie_cache_manager.hpp" #include "ie_extension.h" #include "ie_icore.hpp" +#include "ie_plugins.hpp" #include "multi-device/multi_device_config.hpp" #include "openvino/core/any.hpp" #include "openvino/core/extension.hpp" @@ -22,10 +23,7 @@ #include "openvino/runtime/common.hpp" #include "openvino/runtime/icompiled_model.hpp" #include "openvino/runtime/threading/executor_manager.hpp" - -#ifdef OPENVINO_STATIC_LIBRARY -# include "ie_plugins.hpp" -#endif +#include "openvino/util/file_util.hpp" namespace ov { @@ -48,16 +46,13 @@ Parsed parseDeviceNameIntoConfig(const std::string& deviceName, const AnyMap& co * * @param device_name Target device * @param device_name_to_parse Device ID of property - * @return true if ov::device::properties(, ...) is applicable for device identified by 'device_name + * @return true if ov::device::properties(, ...) is applicable for device identified by + * 'device_name */ bool is_config_applicable(const std::string& device_name, const std::string& device_name_to_parse); -#ifndef OPENVINO_STATIC_LIBRARY - std::string findPluginXML(const std::string& xmlFile); -#endif - class CoreImpl : public InferenceEngine::ICore, public std::enable_shared_from_this { private: mutable std::map plugins; @@ -94,8 +89,7 @@ class CoreImpl : public InferenceEngine::ICore, public std::enable_shared_from_t // Creating thread-safe copy of config including shared_ptr to ICacheManager // Passing empty or not-existing name will return global cache config - CacheConfig get_cache_config_for_device(const ov::Plugin& plugin, - ov::AnyMap& parsedConfig) const; + CacheConfig get_cache_config_for_device(const ov::Plugin& plugin, ov::AnyMap& parsedConfig) const; private: mutable std::mutex _cacheConfigMutex; @@ -158,16 +152,17 @@ class CoreImpl : public InferenceEngine::ICore, public std::enable_shared_from_t const bool m_new_api; ov::SoPtr compile_model_and_cache(const std::shared_ptr& model, - ov::Plugin& plugin, - const ov::AnyMap& parsedConfig, - const ov::RemoteContext& context, - const CacheContent& cacheContent) const; + ov::Plugin& plugin, + const ov::AnyMap& parsedConfig, + const ov::RemoteContext& context, + const CacheContent& cacheContent) const; - static ov::SoPtr load_model_from_cache(const CacheContent& cacheContent, - ov::Plugin& plugin, - const ov::AnyMap& config, - const ov::RemoteContext& context, - std::function()> compile_model_lambda); + static ov::SoPtr load_model_from_cache( + const CacheContent& cacheContent, + ov::Plugin& plugin, + const ov::AnyMap& config, + const ov::RemoteContext& context, + std::function()> compile_model_lambda); bool device_supports_import_export(const ov::Plugin& plugin) const; @@ -177,12 +172,11 @@ class CoreImpl : public InferenceEngine::ICore, public std::enable_shared_from_t bool device_supports_cache_dir(const ov::Plugin& plugin) const; ov::SoPtr compile_model_with_preprocess(ov::Plugin& plugin, - const std::shared_ptr& model, - const ov::RemoteContext& context, - const ov::AnyMap& config) const; + const std::shared_ptr& model, + const ov::RemoteContext& context, + const ov::AnyMap& config) const; - ov::AnyMap create_compile_config(const ov::Plugin& plugin, - const ov::AnyMap& origConfig) const; + ov::AnyMap create_compile_config(const ov::Plugin& plugin, const ov::AnyMap& origConfig) const; // Legacy API void AddExtensionUnsafe(const InferenceEngine::IExtensionPtr& extension) const; @@ -218,30 +212,10 @@ class CoreImpl : public InferenceEngine::ICore, public std::enable_shared_from_t std::string& deviceName, ov::AnyMap& config) const; -#ifdef OPENVINO_STATIC_LIBRARY - - /** - * @brief Register plugins for devices using statically defined configuration - * @note The function supports UNICODE path - * @param static_registry a statically defined configuration with device / plugin information + /* + * @brief Register plugins according to the build configuration */ - void register_plugins_in_registry(const decltype(::getStaticPluginsRegistry())& static_registry) { - std::lock_guard lock(get_mutex()); - - for (const auto& plugin : static_registry) { - const auto& deviceName = plugin.first; - if (deviceName.find('.') != std::string::npos) { - IE_THROW() << "Device name must not contain dot '.' symbol"; - } - const auto& value = plugin.second; - ov::AnyMap config = any_copy(value.m_default_config); - PluginDescriptor desc{value.m_create_plugin_func, config, value.m_create_extension_func}; - pluginRegistry[deviceName] = desc; - add_mutex(deviceName); - } - } - -#endif + void register_compile_time_plugins(); // // ICore public API diff --git a/src/inference/src/ie_core.cpp b/src/inference/src/ie_core.cpp index cc138a0f13d17a..de604f6fab4f21 100644 --- a/src/inference/src/ie_core.cpp +++ b/src/inference/src/ie_core.cpp @@ -30,6 +30,7 @@ #include "ie_network_reader.hpp" #include "ie_ngraph_utils.hpp" #include "ie_plugin_config.hpp" +#include "ie_plugins.hpp" #include "ie_remote_context.hpp" #include "ngraph/graph_util.hpp" #include "ngraph/ngraph.hpp" @@ -47,10 +48,6 @@ #include "so_extension.hpp" #include "xml_parse_utils.h" -#ifdef OPENVINO_STATIC_LIBRARY -# include "ie_plugins.hpp" -#endif - using namespace InferenceEngine::PluginConfigParams; using namespace InferenceEngine; using namespace std::placeholders; @@ -91,13 +88,12 @@ class Core::Impl : public ov::CoreImpl { Core::Core(const std::string& xmlConfigFile) { _impl = std::make_shared(); -#ifdef OPENVINO_STATIC_LIBRARY - _impl->register_plugins_in_registry(::getStaticPluginsRegistry()); -#else - // If XML is default, load default plugins by absolute paths - auto loadByAbsPath = xmlConfigFile.empty(); - _impl->register_plugins_in_registry(ov::findPluginXML(xmlConfigFile), loadByAbsPath); -#endif + std::string xmlConfigFile_ = ov::findPluginXML(xmlConfigFile); + if (!xmlConfigFile_.empty()) + // If XML is default, load default plugins by absolute paths + _impl->register_plugins_in_registry(xmlConfigFile_, xmlConfigFile.empty()); + // Load plugins from pre-compiled list + _impl->register_compile_time_plugins(); } std::map Core::GetVersions(const std::string& deviceName) const { From 8eb142ca6ead9ac08e15741d554e7bb061339e0f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tomasz=20Do=C5=82bniak?= Date: Wed, 22 Mar 2023 17:00:53 +0100 Subject: [PATCH 041/296] Interpolate v11 -> v4 downgrade transformation (#16448) --- .../convert_interpolate11_downgrade.hpp | 24 +++ .../common_optimizations.cpp | 2 + .../convert_interpolate11_downgrade.cpp | 75 +++++++++ .../convert_interpolate11_downgrade_test.cpp | 147 ++++++++++++++++++ src/core/src/op/interpolate.cpp | 15 ++ src/core/tests/type_prop/interpolate.cpp | 22 +++ 6 files changed, 285 insertions(+) create mode 100644 src/common/transformations/include/transformations/op_conversions/convert_interpolate11_downgrade.hpp create mode 100644 src/common/transformations/src/transformations/op_conversions/convert_interpolate11_downgrade.cpp create mode 100644 src/common/transformations/tests/op_conversions/convert_interpolate11_downgrade_test.cpp diff --git a/src/common/transformations/include/transformations/op_conversions/convert_interpolate11_downgrade.hpp b/src/common/transformations/include/transformations/op_conversions/convert_interpolate11_downgrade.hpp new file mode 100644 index 00000000000000..b112c5d8abdf45 --- /dev/null +++ b/src/common/transformations/include/transformations/op_conversions/convert_interpolate11_downgrade.hpp @@ -0,0 +1,24 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include + +namespace ov { +namespace pass { +/** + * @ingroup ie_transformation_common_api + * @brief Converts Interpolate version 11 to Interpolate version 4 if the new op uses any of the v4 allowed + * interpolation modes. + */ +class TRANSFORMATIONS_API ConvertInterpolate11ToInterpolate4 : public MatcherPass { +public: + OPENVINO_RTTI("ConvertInterpolate11ToInterpolate4", "0"); + ConvertInterpolate11ToInterpolate4(); +}; + +} // namespace pass +} // namespace ov diff --git a/src/common/transformations/src/transformations/common_optimizations/common_optimizations.cpp b/src/common/transformations/src/transformations/common_optimizations/common_optimizations.cpp index 8b43dcfc8d2b29..6064effe880c4b 100644 --- a/src/common/transformations/src/transformations/common_optimizations/common_optimizations.cpp +++ b/src/common/transformations/src/transformations/common_optimizations/common_optimizations.cpp @@ -76,6 +76,7 @@ #include "transformations/op_conversions/convert_gather_downgrade.hpp" #include "transformations/op_conversions/convert_gather_upgrade.hpp" #include "transformations/op_conversions/convert_gelu.hpp" +#include "transformations/op_conversions/convert_interpolate11_downgrade.hpp" #include "transformations/op_conversions/convert_interpolate1_to_interpolate4.hpp" #include "transformations/op_conversions/convert_maxpool_downgrade.hpp" #include "transformations/op_conversions/convert_maxpool_upgrade.hpp" @@ -211,6 +212,7 @@ bool ov::pass::CommonOptimizations::run_on_model(const std::shared_ptr(); ADD_MATCHER(fq_fusions, FakeQuantizeMulFusion) diff --git a/src/common/transformations/src/transformations/op_conversions/convert_interpolate11_downgrade.cpp b/src/common/transformations/src/transformations/op_conversions/convert_interpolate11_downgrade.cpp new file mode 100644 index 00000000000000..c9b2e15dd4cfaf --- /dev/null +++ b/src/common/transformations/src/transformations/op_conversions/convert_interpolate11_downgrade.cpp @@ -0,0 +1,75 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "transformations/op_conversions/convert_interpolate11_downgrade.hpp" + +#include +#include +#include +#include +#include + +#include "itt.hpp" + +ov::pass::ConvertInterpolate11ToInterpolate4::ConvertInterpolate11ToInterpolate4() { + MATCHER_SCOPE(ConvertInterpolate11ToInterpolate4); + + const auto interpolate_v11_pattern = pattern::wrap_type(); + + const matcher_pass_callback callback = [=](pattern::Matcher& m) { + const auto v4_compatible_interpolation_mode = [](const op::util::InterpolateBase::InterpolateMode mode) { + constexpr std::array allowed_modes = { + op::util::InterpolateBase::InterpolateMode::NEAREST, + op::util::InterpolateBase::InterpolateMode::LINEAR, + op::util::InterpolateBase::InterpolateMode::LINEAR_ONNX, + op::util::InterpolateBase::InterpolateMode::CUBIC}; + + return std::find(std::begin(allowed_modes), std::end(allowed_modes), mode) != std::end(allowed_modes); + }; + + const auto interpolate_v11 = std::dynamic_pointer_cast(m.get_match_root()); + if (!interpolate_v11 || !v4_compatible_interpolation_mode(interpolate_v11->get_attrs().mode) || + transformation_callback(interpolate_v11)) { + return false; + } + + // downgrade only if the interpolation mode used to create v11 is supported by v4 + std::shared_ptr interpolate_v4; + ov::Output v4_input_output_shape; + ov::Output v4_input_scales; + + if (interpolate_v11->get_attrs().shape_calculation_mode == + ov::op::util::InterpolateBase::ShapeCalcMode::SCALES) { + v4_input_scales = interpolate_v11->input_value(1); + v4_input_output_shape = opset4::Constant::create(element::i32, Shape{1}, {1}); + copy_runtime_info(interpolate_v11, v4_input_output_shape.get_node_shared_ptr()); + } else { + v4_input_output_shape = interpolate_v11->input_value(1); + v4_input_scales = opset4::Constant::create(element::f32, Shape{1}, {1.0f}); + copy_runtime_info(interpolate_v11, v4_input_scales.get_node_shared_ptr()); + } + + if (interpolate_v11->get_input_size() == 3) { // with axes input + interpolate_v4 = std::make_shared(interpolate_v11->input_value(0), + v4_input_output_shape, + v4_input_scales, + interpolate_v11->input_value(2), + interpolate_v11->get_attrs()); + } else { + interpolate_v4 = std::make_shared(interpolate_v11->input_value(0), + v4_input_output_shape, + v4_input_scales, + interpolate_v11->get_attrs()); + } + + interpolate_v4->set_friendly_name(interpolate_v11->get_friendly_name()); + copy_runtime_info(interpolate_v11, interpolate_v4); + replace_node(interpolate_v11, interpolate_v4); + + return true; + }; + + auto m = std::make_shared(interpolate_v11_pattern, matcher_name); + register_matcher(m, callback); +} diff --git a/src/common/transformations/tests/op_conversions/convert_interpolate11_downgrade_test.cpp b/src/common/transformations/tests/op_conversions/convert_interpolate11_downgrade_test.cpp new file mode 100644 index 00000000000000..7504cd378ebba6 --- /dev/null +++ b/src/common/transformations/tests/op_conversions/convert_interpolate11_downgrade_test.cpp @@ -0,0 +1,147 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include +#include +#include +#include +#include +#include + +#include "common_test_utils/ngraph_test_utils.hpp" + +using namespace testing; + +namespace { +constexpr bool WITH_AXES = true; +constexpr bool WITHOUT_AXES = false; + +std::shared_ptr create_v11_model(const bool with_axes, + const ov::opset11::Interpolate::ShapeCalcMode shape_calc_mode) { + auto attributes = ov::opset11::Interpolate::InterpolateAttrs{}; + attributes.shape_calculation_mode = shape_calc_mode; + attributes.pads_begin = {0, 0}; + attributes.pads_end = {0, 0}; + + const auto input = std::make_shared(ov::element::i32, ov::Shape{1, 2, 10, 10}); + std::shared_ptr scales_or_sizes; + std::shared_ptr interpolate; + + const size_t num_scales_or_sizes = with_axes ? 2 : 4; + if (shape_calc_mode == ov::opset11::Interpolate::ShapeCalcMode::SCALES) { + scales_or_sizes = std::make_shared(ov::element::f32, ov::Shape{num_scales_or_sizes}); + } else { + scales_or_sizes = std::make_shared(ov::element::i32, ov::Shape{num_scales_or_sizes}); + } + + ov::ParameterVector model_params; + model_params.push_back(input); + model_params.push_back(scales_or_sizes); + if (with_axes) { + const auto axes = std::make_shared(ov::element::i32, ov::Shape{2}); + model_params.push_back(axes); + interpolate = std::make_shared(input, scales_or_sizes, axes, attributes); + } else { + interpolate = std::make_shared(input, scales_or_sizes, attributes); + } + interpolate->set_friendly_name("interpolate11"); + + return std::make_shared(interpolate->outputs(), model_params); +} + +std::shared_ptr create_v4_model(const bool with_axes, + const ov::opset4::Interpolate::ShapeCalcMode shape_calc_mode) { + auto attributes = ov::opset4::Interpolate::InterpolateAttrs{}; + attributes.shape_calculation_mode = shape_calc_mode; + attributes.pads_begin = {0, 0}; + attributes.pads_end = {0, 0}; + + const auto input = std::make_shared(ov::element::i32, ov::Shape{1, 2, 10, 10}); + std::shared_ptr output_shape; + std::shared_ptr scales; + std::shared_ptr interpolate; + + ov::ParameterVector model_params; + model_params.push_back(input); + + const size_t num_scales_or_sizes = with_axes ? 2 : 4; + if (shape_calc_mode == ov::opset4::Interpolate::ShapeCalcMode::SCALES) { + scales = std::make_shared(ov::element::f32, ov::Shape{num_scales_or_sizes}); + model_params.push_back(std::dynamic_pointer_cast(scales)); + output_shape = ov::opset4::Constant::create(ov::element::i32, ov::Shape{1}, {1}); + + } else { + output_shape = std::make_shared(ov::element::i32, ov::Shape{num_scales_or_sizes}); + model_params.push_back(std::dynamic_pointer_cast(output_shape)); + scales = ov::opset4::Constant::create(ov::element::f32, ov::Shape{1}, {1.0f}); + } + + if (with_axes) { + const auto axes = std::make_shared(ov::element::i32, ov::Shape{2}); + model_params.push_back(axes); + interpolate = std::make_shared(input, output_shape, scales, axes, attributes); + } else { + interpolate = std::make_shared(input, output_shape, scales, attributes); + } + interpolate->set_friendly_name("interpolate11"); + + return std::make_shared(interpolate->outputs(), model_params); +} + +} // namespace + +TEST_F(TransformationTestsF, ConvertInterpolate11ToInterpolate4_scales) { + manager.register_pass(); + function = create_v11_model(WITH_AXES, ov::opset11::Interpolate::ShapeCalcMode::SCALES); + function_ref = create_v4_model(WITH_AXES, ov::opset4::Interpolate::ShapeCalcMode::SCALES); +} + +TEST_F(TransformationTestsF, ConvertInterpolate11ToInterpolate4_sizes) { + manager.register_pass(); + function = create_v11_model(WITH_AXES, ov::opset11::Interpolate::ShapeCalcMode::SIZES); + function_ref = create_v4_model(WITH_AXES, ov::opset4::Interpolate::ShapeCalcMode::SIZES); +} + +TEST_F(TransformationTestsF, ConvertInterpolate11ToInterpolate4_scales_no_axes) { + manager.register_pass(); + function = create_v11_model(WITHOUT_AXES, ov::opset11::Interpolate::ShapeCalcMode::SCALES); + function_ref = create_v4_model(WITHOUT_AXES, ov::opset4::Interpolate::ShapeCalcMode::SCALES); +} + +TEST_F(TransformationTestsF, ConvertInterpolate11ToInterpolate4_sizes_no_axes) { + manager.register_pass(); + function = create_v11_model(WITHOUT_AXES, ov::opset11::Interpolate::ShapeCalcMode::SIZES); + function_ref = create_v4_model(WITHOUT_AXES, ov::opset4::Interpolate::ShapeCalcMode::SIZES); +} + +namespace { +std::shared_ptr create_non_downgradeable_model(const ov::opset11::Interpolate::InterpolateMode mode) { + auto attributes = ov::opset11::Interpolate::InterpolateAttrs{}; + attributes.mode = mode; + attributes.shape_calculation_mode = ov::opset11::Interpolate::ShapeCalcMode::SCALES; + attributes.pads_begin = {0, 0}; + attributes.pads_end = {0, 0}; + + const auto input = std::make_shared(ov::element::i32, ov::Shape{1, 2, 10, 10}); + const auto scales = std::make_shared(ov::element::f32, ov::Shape{2}); + const auto axes = std::make_shared(ov::element::i32, ov::Shape{2}); + + const auto interpolate = std::make_shared(input, scales, axes, attributes); + interpolate->set_friendly_name("interpolate11"); + + return std::make_shared(interpolate->outputs(), ov::ParameterVector{input, scales, axes}); +} +} // namespace + +TEST_F(TransformationTestsF, ConvertInterpolate11ToInterpolate4_bicubic_pillow) { + function = create_non_downgradeable_model(ov::opset11::Interpolate::InterpolateMode::BICUBIC_PILLOW); + manager.register_pass(); +} + +TEST_F(TransformationTestsF, ConvertInterpolate11ToInterpolate4_bilinear_pillow) { + function = create_non_downgradeable_model(ov::opset11::Interpolate::InterpolateMode::BILINEAR_PILLOW); + manager.register_pass(); +} diff --git a/src/core/src/op/interpolate.cpp b/src/core/src/op/interpolate.cpp index 6bfd961fc35de8..b34d39bc60ec63 100644 --- a/src/core/src/op/interpolate.cpp +++ b/src/core/src/op/interpolate.cpp @@ -186,6 +186,21 @@ void ov::op::v4::Interpolate::validate_and_infer_types() { input_shapes = {input_shape, target_spatial_shape, scales, axes}; } + const auto interpolation_mode_check = [](const op::util::InterpolateBase::InterpolateMode mode) { + constexpr std::array allowed_modes = { + op::util::InterpolateBase::InterpolateMode::NEAREST, + op::util::InterpolateBase::InterpolateMode::LINEAR, + op::util::InterpolateBase::InterpolateMode::LINEAR_ONNX, + op::util::InterpolateBase::InterpolateMode::CUBIC}; + + return std::find(std::begin(allowed_modes), std::end(allowed_modes), mode) != std::end(allowed_modes); + }; + + NODE_VALIDATION_CHECK(this, + interpolation_mode_check(m_attrs.mode), + "Unsupported interpolation mode used with version 4 of the Interpolate op: ", + as_string(m_attrs.mode)); + util::correct_pads_attr(this, m_attrs.pads_begin, m_attrs.pads_end, input_shapes); shape_infer(this, m_attrs.pads_begin, m_attrs.pads_end, input_shapes, output_shapes, {}); set_output_type(0, get_input_element_type(0), output_shapes[0]); diff --git a/src/core/tests/type_prop/interpolate.cpp b/src/core/tests/type_prop/interpolate.cpp index b220ecd8a8f754..7f0f5ff3a5bb68 100644 --- a/src/core/tests/type_prop/interpolate.cpp +++ b/src/core/tests/type_prop/interpolate.cpp @@ -214,6 +214,28 @@ TEST(type_prop, interpolate_v4_interval_logic) { ASSERT_TRUE(interp->get_output_partial_shape(0).same_scheme(out_shape)); } +TEST(type_prop, interpolate_v4_incorrect_mode) { + const auto image = std::make_shared(element::f32, Shape{1, 3, 30, 60}); + const auto target_shape = std::make_shared(element::i32, Shape{2}); + const auto scales = op::Constant::create(element::f32, Shape{2}, {6.f, 12.f}); + const auto axes = op::Constant::create(element::i64, Shape{2}, {2, 3}); + + ov::op::util::InterpolateBase::InterpolateAttrs attrs; + attrs.shape_calculation_mode = ov::op::util::InterpolateBase::ShapeCalcMode::SCALES; + attrs.mode = ov::op::util::InterpolateBase::InterpolateMode::BICUBIC_PILLOW; + attrs.pads_begin = {0, 0, 0, 0}; + attrs.pads_end = {0, 0, 0, 0}; + + OV_EXPECT_THROW(auto interp = std::make_shared(image, target_shape, scales, axes, attrs), + ov::NodeValidationFailure, + HasSubstr("Unsupported interpolation mode used with version 4 of the Interpolate op")); + + attrs.mode = ov::op::util::InterpolateBase::InterpolateMode::BILINEAR_PILLOW; + OV_EXPECT_THROW(auto interp = std::make_shared(image, target_shape, scales, axes, attrs), + ov::NodeValidationFailure, + HasSubstr("Unsupported interpolation mode used with version 4 of the Interpolate op")); +} + TEST(type_prop, interpolate_v11_scales) { const auto image = std::make_shared(element::f32, Shape{1, 3, 30, 60}); const auto scales = op::Constant::create(element::f32, Shape{2}, {0.2f, 0.2f}); From 6ac5e42b62ef065ecf6b553b2a9d662cea5a5b03 Mon Sep 17 00:00:00 2001 From: Irina Efode Date: Wed, 22 Mar 2023 20:07:47 +0400 Subject: [PATCH 042/296] [CONFORMANCE] Fix if impossible to remove log (#16485) * fix_reporting * w/a for remove * Update merge_xmls.py remove extra --- .../functional_test_utils/layer_tests_summary/merge_xmls.py | 3 +-- .../layer_tests_summary/run_parallel.py | 5 ++++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/src/tests/ie_test_utils/functional_test_utils/layer_tests_summary/merge_xmls.py b/src/tests/ie_test_utils/functional_test_utils/layer_tests_summary/merge_xmls.py index e32ecf275b846a..5e1e8d01779363 100644 --- a/src/tests/ie_test_utils/functional_test_utils/layer_tests_summary/merge_xmls.py +++ b/src/tests/ie_test_utils/functional_test_utils/layer_tests_summary/merge_xmls.py @@ -70,7 +70,7 @@ def aggregate_test_results(aggregated_results: SubElement, xml_reports: list, re aggregated_device_results = aggregated_results.find(xml_device_entry.tag) if aggregated_device_results is None: aggregated_results.append(xml_device_entry) - continue + aggregated_device_results = aggregated_results.find(xml_device_entry.tag) # op or api_type for xml_results_entry in xml_device_entry: aggregated_results_entry = aggregated_device_results.find(xml_results_entry.tag) @@ -88,7 +88,6 @@ def aggregate_test_results(aggregated_results: SubElement, xml_reports: list, re aggregated_results_entry.append(xml_real_device_entry) continue update_result_node(xml_real_device_entry, aggregated_real_device_api_report) - a = 1 return aggregated_timestamp diff --git a/src/tests/ie_test_utils/functional_test_utils/layer_tests_summary/run_parallel.py b/src/tests/ie_test_utils/functional_test_utils/layer_tests_summary/run_parallel.py index 52cad8139552cc..62e7111372ea3f 100644 --- a/src/tests/ie_test_utils/functional_test_utils/layer_tests_summary/run_parallel.py +++ b/src/tests/ie_test_utils/functional_test_utils/layer_tests_summary/run_parallel.py @@ -199,7 +199,10 @@ def __replace_restricted_symbols(input_string:str): def __get_test_list_by_runtime(self): test_list_file_name = os.path.join(self._working_dir, "test_list.lst") if os.path.isfile(test_list_file_name): - os.remove(test_list_file_name) + try: + os.remove(test_list_file_name) + except Exception as err: + logger.warning(f"Imposible to remove {test_list_file_name}. Error: {err}") command_to_get_test_list = self._command + f' --gtest_list_tests >> {test_list_file_name}' logger.info(f"Get test list using command: {command_to_get_test_list}") run_res = run(command_to_get_test_list, check=True, shell=True) From 5290822f8b0b3df91a083649867fbbe45c51cc78 Mon Sep 17 00:00:00 2001 From: Yury Gaydaychuk Date: Wed, 22 Mar 2023 17:36:05 +0100 Subject: [PATCH 043/296] [CPU] Enabled BatchToSpace and SpaceToBatch with nonconstant inputs support (#16344) --- .../intel_cpu/src/nodes/batch_to_space.cpp | 46 +++-- .../intel_cpu/src/nodes/batch_to_space.h | 1 + .../intel_cpu/src/nodes/space_to_batch.cpp | 45 +++-- .../intel_cpu/src/nodes/space_to_batch.h | 1 + .../single_layer_tests/batch_to_space.cpp | 189 +++++++++++++----- .../single_layer_tests/space_to_batch.cpp | 167 +++++++++++++--- 6 files changed, 331 insertions(+), 118 deletions(-) diff --git a/src/plugins/intel_cpu/src/nodes/batch_to_space.cpp b/src/plugins/intel_cpu/src/nodes/batch_to_space.cpp index 9aab08d021fecb..804f79d507d70d 100644 --- a/src/plugins/intel_cpu/src/nodes/batch_to_space.cpp +++ b/src/plugins/intel_cpu/src/nodes/batch_to_space.cpp @@ -24,12 +24,6 @@ bool BatchToSpace::isSupportedOperation(const std::shared_ptr(op->get_input_node_shared_ptr(1)) == nullptr || - std::dynamic_pointer_cast(op->get_input_node_shared_ptr(2)) == nullptr || - std::dynamic_pointer_cast(op->get_input_node_shared_ptr(3)) == nullptr) { - errorMessage = "Only constant 'block_shape', 'crops_begin', 'crops_end' are supported"; - return false; - } } catch (...) { return false; } @@ -54,9 +48,6 @@ BatchToSpace::BatchToSpace(const std::shared_ptr& op, const GraphC IE_THROW() << errorPrefix << " has unsupported 'data' input rank: " << inDims.size(); if (inDims.size() != outDims.size()) IE_THROW() << errorPrefix << " has incorrect number of input/output dimensions"; - - blockShapeIn = std::dynamic_pointer_cast(op->get_input_node_shared_ptr(1))->cast_vector(); - cropsBeginIn = std::dynamic_pointer_cast(op->get_input_node_shared_ptr(2))->cast_vector(); } void BatchToSpace::initSupportedPrimitiveDescriptors() { @@ -70,30 +61,30 @@ void BatchToSpace::initSupportedPrimitiveDescriptors() { IE_THROW() << errorPrefix << " has unsupported precision: " << precision.name(); addSupportedPrimDesc({{LayoutType::nspc, precision}, - {LayoutType::ncsp}, - {LayoutType::ncsp}, - {LayoutType::ncsp}}, + {LayoutType::ncsp, Precision::I32}, + {LayoutType::ncsp, Precision::I32}, + {LayoutType::ncsp, Precision::I32}}, {{LayoutType::nspc, precision}}, impl_desc_type::ref_any); addSupportedPrimDesc({{LayoutType::ncsp, precision}, - {LayoutType::ncsp}, - {LayoutType::ncsp}, - {LayoutType::ncsp}}, + {LayoutType::ncsp, Precision::I32}, + {LayoutType::ncsp, Precision::I32}, + {LayoutType::ncsp, Precision::I32}}, {{LayoutType::ncsp, precision}}, impl_desc_type::ref_any); if (inDims[1] != Shape::UNDEFINED_DIM && inDims[1] % 8 == 0) { addSupportedPrimDesc({{LayoutType::nCsp8c, precision}, - {LayoutType::ncsp}, - {LayoutType::ncsp}, - {LayoutType::ncsp}}, + {LayoutType::ncsp, Precision::I32}, + {LayoutType::ncsp, Precision::I32}, + {LayoutType::ncsp, Precision::I32}}, {{LayoutType::nCsp8c, precision}}, impl_desc_type::ref_any); } if (inDims[1] != Shape::UNDEFINED_DIM && inDims[1] % 16 == 0) { addSupportedPrimDesc({{LayoutType::nCsp16c, precision}, - {LayoutType::ncsp}, - {LayoutType::ncsp}, - {LayoutType::ncsp}}, + {LayoutType::ncsp, Precision::I32}, + {LayoutType::ncsp, Precision::I32}, + {LayoutType::ncsp, Precision::I32}}, {{LayoutType::nCsp16c, precision}}, impl_desc_type::ref_any); } @@ -112,6 +103,19 @@ static std::vector getShape5D(const SizeVector &shape) { template void BatchToSpace::batchToSpaceKernel() { const auto *srcData = reinterpret_cast(getParentEdgeAt(0)->getMemoryPtr()->GetPtr()); + const auto *blockShapesPtr = reinterpret_cast(getParentEdgeAt(1)->getMemoryPtr()->GetPtr()); + size_t dataRank = getParentEdgesAtPort(0)[0]->getMemoryPtr()->GetShape().getRank(); + blockShapeIn.clear(); + for (size_t i = 0; i < dataRank; i++) { + blockShapeIn.push_back(*(blockShapesPtr + i)); + } + + const auto *padsBeginPtr = reinterpret_cast(getParentEdgeAt(2)->getMemoryPtr()->GetPtr()); + cropsBeginIn.clear(); + for (size_t i = 0; i < dataRank; i++) { + cropsBeginIn.push_back(*(padsBeginPtr + i)); + } + auto *dstData = reinterpret_cast(getChildEdgeAt(0)->getMemoryPtr()->GetPtr()); const auto &inDims = getParentEdgesAtPort(0)[0]->getMemory().getStaticDims(); diff --git a/src/plugins/intel_cpu/src/nodes/batch_to_space.h b/src/plugins/intel_cpu/src/nodes/batch_to_space.h index 8d3ff90d3a7b20..430893f4689060 100644 --- a/src/plugins/intel_cpu/src/nodes/batch_to_space.h +++ b/src/plugins/intel_cpu/src/nodes/batch_to_space.h @@ -24,6 +24,7 @@ class BatchToSpace : public Node { bool created() const override; bool needPrepareParams() const override { return false; }; + bool needShapeInfer() const override {return true;}; void executeDynamicImpl(dnnl::stream strm) override; static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; diff --git a/src/plugins/intel_cpu/src/nodes/space_to_batch.cpp b/src/plugins/intel_cpu/src/nodes/space_to_batch.cpp index 3389053df33501..4136724303f40f 100644 --- a/src/plugins/intel_cpu/src/nodes/space_to_batch.cpp +++ b/src/plugins/intel_cpu/src/nodes/space_to_batch.cpp @@ -26,12 +26,6 @@ bool SpaceToBatch::isSupportedOperation(const std::shared_ptr(op->get_input_node_shared_ptr(1)) == nullptr || - std::dynamic_pointer_cast(op->get_input_node_shared_ptr(2)) == nullptr || - std::dynamic_pointer_cast(op->get_input_node_shared_ptr(3)) == nullptr) { - errorMessage = "Only constant 'block_shape', 'pads_begin', 'pads_end' are supported"; - return false; - } } catch (...) { return false; } @@ -56,8 +50,6 @@ SpaceToBatch::SpaceToBatch(const std::shared_ptr& op, const GraphC IE_THROW() << errorPrefix << " has unsupported 'data' input rank: " << srcRank; if (srcRank != dstRank) IE_THROW() << errorPrefix << " has incorrect number of input/output dimensions"; - blockShapeIn = std::dynamic_pointer_cast(op->get_input_node_shared_ptr(1))->cast_vector(); - padsBeginIn = std::dynamic_pointer_cast(op->get_input_node_shared_ptr(2))->cast_vector(); } void SpaceToBatch::initSupportedPrimitiveDescriptors() { @@ -71,30 +63,30 @@ void SpaceToBatch::initSupportedPrimitiveDescriptors() { IE_THROW() << errorPrefix << " has unsupported precision: " << precision.name(); addSupportedPrimDesc({{LayoutType::nspc, precision}, - {LayoutType::ncsp}, - {LayoutType::ncsp}, - {LayoutType::ncsp}}, + {LayoutType::ncsp, Precision::I32}, + {LayoutType::ncsp, Precision::I32}, + {LayoutType::ncsp, Precision::I32}}, {{LayoutType::nspc, precision}}, impl_desc_type::ref_any); addSupportedPrimDesc({{LayoutType::ncsp, precision}, - {LayoutType::ncsp}, - {LayoutType::ncsp}, - {LayoutType::ncsp}}, + {LayoutType::ncsp, Precision::I32}, + {LayoutType::ncsp, Precision::I32}, + {LayoutType::ncsp, Precision::I32}}, {{LayoutType::ncsp, precision}}, impl_desc_type::ref_any); if (inDims[1] != Shape::UNDEFINED_DIM && inDims[1] % 8 == 0) { addSupportedPrimDesc({{LayoutType::nCsp8c, precision}, - {LayoutType::ncsp}, - {LayoutType::ncsp}, - {LayoutType::ncsp}}, + {LayoutType::ncsp, Precision::I32}, + {LayoutType::ncsp, Precision::I32}, + {LayoutType::ncsp, Precision::I32}}, {{LayoutType::nCsp8c, precision}}, impl_desc_type::ref_any); } if (inDims[1] != Shape::UNDEFINED_DIM && inDims[1] % 16 == 0) { addSupportedPrimDesc({{LayoutType::nCsp16c, precision}, - {LayoutType::ncsp}, - {LayoutType::ncsp}, - {LayoutType::ncsp}}, + {LayoutType::ncsp, Precision::I32}, + {LayoutType::ncsp, Precision::I32}, + {LayoutType::ncsp, Precision::I32}}, {{LayoutType::nCsp16c, precision}}, impl_desc_type::ref_any); } @@ -112,6 +104,19 @@ static std::vector getShape5D(const SizeVector &shape) { template void SpaceToBatch::SpaceToBatchKernel() { + const auto *blockShapesPtr = reinterpret_cast(getParentEdgeAt(1)->getMemoryPtr()->GetPtr()); + size_t dataRank = getParentEdgesAtPort(0)[0]->getMemoryPtr()->GetShape().getRank(); + blockShapeIn.clear(); + for (size_t i = 0; i < dataRank; i++) { + blockShapeIn.push_back(*(blockShapesPtr + i)); + } + + const auto *padsBeginPtr = reinterpret_cast(getParentEdgeAt(2)->getMemoryPtr()->GetPtr()); + padsBeginIn.clear(); + for (size_t i = 0; i < dataRank; i++) { + padsBeginIn.push_back(*(padsBeginPtr + i)); + } + const auto *srcData = reinterpret_cast(getParentEdgeAt(0)->getMemoryPtr()->GetPtr()); auto *dstData = reinterpret_cast(getChildEdgeAt(0)->getMemoryPtr()->GetPtr()); diff --git a/src/plugins/intel_cpu/src/nodes/space_to_batch.h b/src/plugins/intel_cpu/src/nodes/space_to_batch.h index ab7bb40b7c97a2..ccfa0d853d4be2 100644 --- a/src/plugins/intel_cpu/src/nodes/space_to_batch.h +++ b/src/plugins/intel_cpu/src/nodes/space_to_batch.h @@ -24,6 +24,7 @@ class SpaceToBatch : public Node { bool created() const override; bool needPrepareParams() const override { return false; }; + bool needShapeInfer() const override {return true;}; void executeDynamicImpl(dnnl::stream strm) override; static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; diff --git a/src/plugins/intel_cpu/tests/functional/single_layer_tests/batch_to_space.cpp b/src/plugins/intel_cpu/tests/functional/single_layer_tests/batch_to_space.cpp index 8f62cc50b342fb..aa248f85e29447 100644 --- a/src/plugins/intel_cpu/tests/functional/single_layer_tests/batch_to_space.cpp +++ b/src/plugins/intel_cpu/tests/functional/single_layer_tests/batch_to_space.cpp @@ -2,6 +2,7 @@ // SPDX-License-Identifier: Apache-2.0 // +#include #include "shared_test_classes/base/ov_subgraph.hpp" #include "ngraph_functions/builders.hpp" #include "test_utils/cpu_test_utils.hpp" @@ -13,6 +14,11 @@ using namespace ov::test; namespace CPULayerTestsDefinitions { +namespace { + std::vector blockShape, cropsBegin, cropsEnd; + ngraph::Shape paramShape; +} // namespace + using BatchToSpaceLayerTestCPUParams = std::tuple< std::vector, // Input shapes std::vector, // block shape @@ -26,7 +32,6 @@ class BatchToSpaceCPULayerTest : public testing::WithParamInterface &obj) { std::vector inputShapes; - std::vector blockShape, cropsBegin, cropsEnd; Precision netPrecision; CPUSpecificParams cpuParams; std::tie(inputShapes, blockShape, cropsBegin, cropsEnd, netPrecision, cpuParams) = obj.param; @@ -53,21 +58,51 @@ class BatchToSpaceCPULayerTest : public testing::WithParamInterface& targetInputStaticShapes) override { + inputs.clear(); + const auto& funcInputs = function->inputs(); + for (int i = 0; i < funcInputs.size(); i++) { + const auto& funcInput = funcInputs[i]; + ov::Tensor tensor; + if (i == 0) { + tensor = ov::test::utils::create_and_fill_tensor(funcInput.get_element_type(), targetInputStaticShapes[i], 2560, 0, 256); + } else if (i == 1) { + tensor = ov::Tensor(funcInput.get_element_type(), paramShape); + auto *dataPtr = tensor.data(); + for (size_t j = 0; j < blockShape.size(); j++) { + dataPtr[j] = blockShape[j]; + } + } else if (i == 2) { + tensor = ov::Tensor(funcInput.get_element_type(), paramShape); + auto *dataPtr = tensor.data(); + for (size_t j = 0; j < cropsBegin.size(); j++) { + dataPtr[j] = cropsBegin[j]; + } + } else if (i == 3) { + tensor = ov::Tensor(funcInput.get_element_type(), paramShape); + auto *dataPtr = tensor.data(); + for (size_t j = 0; j < cropsEnd.size(); j++) { + dataPtr[j] = cropsEnd[j]; + } + } + inputs.insert({funcInput.get_node_shared_ptr(), tensor}); + } + } + protected: void SetUp() override { targetDevice = CommonTestUtils::DEVICE_CPU; std::vector inputShapes; - std::vector blockShape, cropsBegin, cropsEnd; Precision netPrecision; CPUSpecificParams cpuParams; std::tie(inputShapes, blockShape, cropsBegin, cropsEnd, netPrecision, cpuParams) = this->GetParam(); std::tie(inFmts, outFmts, priority, selectedType) = cpuParams; auto ngPrec = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision); - inType = outType = ngPrec; - init_input_shapes(inputShapes); + const std::vector inputShapesVec{inputShapes}; + init_input_shapes(inputShapesVec); if (strcmp(netPrecision.name(), "U8") == 0) selectedType = std::string("ref_any_") + "I8"; @@ -76,9 +111,21 @@ class BatchToSpaceCPULayerTest : public testing::WithParamInterface(params)); - auto b2s = ngraph::builder::makeBatchToSpace(paramOuts[0], ngPrec, blockShape, cropsBegin, cropsEnd); - b2s->get_rt_info() = getCPUInfo(); - ngraph::ResultVector results{std::make_shared(b2s)}; + paramShape = {paramOuts[0].get_partial_shape().size()}; + + std::shared_ptr in2, in3, in4; + auto blockShapeParam = std::make_shared(ngraph::element::i64, paramShape); + in2 = blockShapeParam; + params.push_back(blockShapeParam); + auto cropsBeginParam = std::make_shared(ngraph::element::i64, paramShape); + params.push_back(cropsBeginParam); + in3 = cropsBeginParam; + auto cropsEndParam = std::make_shared(ngraph::element::i64, paramShape); + params.push_back(cropsEndParam); + in4 = cropsEndParam; + auto btsNode = std::make_shared(paramOuts[0], in2, in3, in4); + btsNode->get_rt_info() = getCPUInfo(); + ngraph::ResultVector results{std::make_shared(btsNode)}; function = std::make_shared(results, params, "BatchToSpace"); } }; @@ -103,20 +150,31 @@ const std::vector> cropsBegin4D1 = {{0, 0, 0, 0}, {0, 0, 0 const std::vector> cropsEnd4D1 = {{0, 0, 0, 0}, {0, 0, 1, 0}, {0, 0, 1, 1}}; std::vector> staticInputShapes4D1 = { - {{8, 16, 10, 10}} + {{8, 16, 10, 10}, {4}, {4}, {4}} }; std::vector> dynamicInputShapes4D1 = { - { - {{{-1, -1, -1, -1}, {{8, 8, 6, 7}, {4, 10, 5, 5}, {12, 9, 7, 5}}}}, - {{{{4, 12}, {8, 16}, 6, -1}, {{8, 8, 6, 7}, {4, 10, 6, 5}, {12, 9, 6, 5}}}} - } + { + {{-1, -1, -1, -1}, {{8, 8, 6, 7}, {4, 10, 5, 5}, {12, 9, 7, 5}}}, + {{4}, {{4}, {4}, {4}}}, + {{4}, {{4}, {4}, {4}}}, + {{4}, {{4}, {4}, {4}}} + }, + { + {{{4, 12}, {8, 16}, 6, -1}, {{8, 8, 6, 7}, {4, 10, 6, 5}, {12, 9, 6, 5}}}, + {{4}, {{4}, {4}, {4}}}, + {{4}, {{4}, {4}, {4}}}, + {{4}, {{4}, {4}, {4}}} + } }; std::vector> dynamicInputShapes4D1Blocked = { - { - {{{-1, 16, -1, -1}, {{4, 16, 5, 8}, {8, 16, 7, 6}, {12, 16, 4, 5}}}} - } + { + {{-1, 16, -1, -1}, {{4, 16, 5, 8}, {8, 16, 7, 6}, {12, 16, 4, 5}}}, + {{4}, {{4}, {4}, {4}}}, + {{4}, {{4}, {4}, {4}}}, + {{4}, {{4}, {4}, {4}}} + } }; const std::vector> blockShape4D2 = {{1, 2, 3, 4}, {1, 3, 4, 2}}; @@ -124,20 +182,31 @@ const std::vector> cropsBegin4D2 = {{0, 0, 0, 1}, {0, 0, 1 const std::vector> cropsEnd4D2 = {{0, 0, 1, 0}, {0, 0, 3, 1}}; std::vector> staticInputShapes4D2 = { - {{24, 16, 7, 8}} + {{24, 16, 7, 8}, {4}, {4}, {4}} }; std::vector> dynamicInputShapes4D2 = { - { - {{{-1, -1, -1, -1}, {{48, 4, 7, 8}, {24, 8, 6, 7}, {24, 16, 5, 5}}}}, - {{{24, {4, 10}, -1, -1}, {{24, 8, 6, 7}, {24, 6, 7, 5}, {24, 4, 5, 5}}}} - } + { + {{-1, -1, -1, -1}, {{48, 4, 7, 8}, {24, 8, 6, 7}, {24, 16, 5, 5}}}, + {{4}, {{4}, {4}, {4}}}, + {{4}, {{4}, {4}, {4}}}, + {{4}, {{4}, {4}, {4}}} + }, + { + {{24, {4, 10}, -1, -1}, {{24, 8, 6, 7}, {24, 6, 7, 5}, {24, 4, 5, 5}}}, + {{4}, {{4}, {4}, {4}}}, + {{4}, {{4}, {4}, {4}}}, + {{4}, {{4}, {4}, {4}}} + } }; std::vector> dynamicInputShapes4D2Blocked = { - { - {{-1, 16, -1, -1}, {{24, 16, 5, 5}, {24, 16, 6, 7}, {48, 16, 4, 4}}} - } + { + {{-1, 16, -1, -1}, {{24, 16, 5, 5}, {24, 16, 6, 7}, {48, 16, 4, 4}}}, + {{4}, {{4}, {4}, {4}}}, + {{4}, {{4}, {4}, {4}}}, + {{4}, {{4}, {4}, {4}}} + } }; const std::vector cpuParamsWithBlock_4D = { @@ -223,20 +292,32 @@ const std::vector> cropsBegin5D1 = {{0, 0, 0, 0, 0}, {0, 0 const std::vector> cropsEnd5D1 = {{0, 0, 0, 0, 0}, {0, 0, 1, 0, 1}}; std::vector> staticInputShapes5D1 = { - {{8, 16, 4, 10, 10}} + {{8, 16, 4, 10, 10}, {5}, {5}, {5}} }; + std::vector> dynamicInputShapes5D1 = { - { - {{{-1, -1, -1, -1, -1}, {{8, 16, 4, 10, 10}, {16, 10, 5, 11, 9}, {24, 6, 6, 8, 8}}}}, - {{{{8, 16}, {8, 16}, {2, 7}, -1, -1}, {{8, 16, 2, 6, 8}, {8, 10, 4, 7, 5}, {16, 8, 7, 5, 10}}}} - } + { + {{-1, -1, -1, -1, -1}, {{8, 16, 4, 10, 10}, {16, 10, 5, 11, 9}, {24, 6, 6, 8, 8}}}, + {{5}, {{5}, {5}, {5}}}, + {{5}, {{5}, {5}, {5}}}, + {{5}, {{5}, {5}, {5}}} + }, + { + {{{8, 16}, {8, 16}, {2, 7}, -1, -1}, {{8, 16, 2, 6, 8}, {8, 10, 4, 7, 5}, {16, 8, 7, 5, 10}}}, + {{5}, {{5}, {5}, {5}}}, + {{5}, {{5}, {5}, {5}}}, + {{5}, {{5}, {5}, {5}}} + } }; std::vector> dynamicInputShapes5D1Blocked = { - { - {{{-1, 16, -1, -1, -1}, {{24, 16, 3, 6, 7}, {48, 16, 4, 5, 5}, {24, 16, 5, 8, 5}}}} - } + { + {{-1, 16, -1, -1, -1}, {{24, 16, 3, 6, 7}, {48, 16, 4, 5, 5}, {24, 16, 5, 8, 5}}}, + {{5}, {{5}, {5}, {5}}}, + {{5}, {{5}, {5}, {5}}}, + {{5}, {{5}, {5}, {5}}} + } }; const std::vector> blockShape5D2 = {{1, 2, 4, 3, 1}, {1, 1, 2, 4, 3}}; @@ -244,29 +325,43 @@ const std::vector> cropsBegin5D2 = {{0, 0, 1, 2, 0}, {0, 0 const std::vector> cropsEnd5D2 = {{0, 0, 1, 0, 1}, {0, 0, 1, 1, 1}}; std::vector> staticInputShapes5D2 = { - {{48, 16, 3, 3, 3}} + {{48, 16, 3, 3, 3}, {5}, {5}, {5}} }; std::vector> dynamicInputShapes5D2 = { + { + {{-1, -1, -1, -1, -1}, {{48, 4, 3, 3, 3}, {24, 16, 5, 3, 5}, {24, 8, 7, 5, 5}}}, + {{5}, {{5}, {5}, {5}}}, + {{5}, {{5}, {5}, {5}}}, + {{5}, {{5}, {5}, {5}}} + }, + { + {{24, {8, 16}, {3, 5}, -1, -1}, {{24, 16, 3, 4, 3}, {24, 12, 5, 3, 5}, {24, 8, 4, 5, 5}}}, + {{5}, {{5}, {5}, {5}}}, + {{5}, {{5}, {5}, {5}}}, + {{5}, {{5}, {5}, {5}}} + }, + { + // special case + {{{1, 24}, {1, 16}, {1, 10}, {1, 10}, {1, 10}}, { - {{{-1, -1, -1, -1, -1}, {{48, 4, 3, 3, 3}, {24, 16, 5, 3, 5}, {24, 8, 7, 5, 5}}}}, - {{{24, {8, 16}, {3, 5}, -1, -1}, {{24, 16, 3, 4, 3}, {24, 12, 5, 3, 5}, {24, 8, 4, 5, 5}}}}, - // special case - { - {{{1, 24}, {1, 16}, {1, 10}, {1, 10}, {1, 10}}, - { - {24, 16, 5, 3, 5}, - {24, 16, 5, 3, 5}, - {24, 16, 7, 5, 5} - }} - } - } + {24, 16, 5, 3, 5}, + {24, 16, 5, 3, 5}, + {24, 16, 7, 5, 5} + }}, + {{5}, {{5}, {5}, {5}}}, + {{5}, {{5}, {5}, {5}}}, + {{5}, {{5}, {5}, {5}}} + } }; std::vector> dynamicInputShapes5D2Blocked = { - { - {{{-1, 16, -1, -1, -1}, {{24, 16, 4, 5, 5}, {48, 16, 3, 4, 3}, {24, 16, 5, 3, 5}}}} - } + { + {{-1, 16, -1, -1, -1}, {{24, 16, 4, 5, 5}, {48, 16, 3, 4, 3}, {24, 16, 5, 3, 5}}}, + {{5}, {{5}, {5}, {5}}}, + {{5}, {{5}, {5}, {5}}}, + {{5}, {{5}, {5}, {5}}} + } }; const std::vector cpuParamsWithBlock_5D = { diff --git a/src/plugins/intel_cpu/tests/functional/single_layer_tests/space_to_batch.cpp b/src/plugins/intel_cpu/tests/functional/single_layer_tests/space_to_batch.cpp index 01edafe13aeda3..4cbf4379033a6f 100644 --- a/src/plugins/intel_cpu/tests/functional/single_layer_tests/space_to_batch.cpp +++ b/src/plugins/intel_cpu/tests/functional/single_layer_tests/space_to_batch.cpp @@ -2,6 +2,7 @@ // SPDX-License-Identifier: Apache-2.0 // +#include #include "shared_test_classes/base/ov_subgraph.hpp" #include "ngraph_functions/builders.hpp" #include "test_utils/cpu_test_utils.hpp" @@ -13,8 +14,13 @@ using namespace ov::test; namespace CPULayerTestsDefinitions { +namespace { + std::vector blockShape, padsBegin, padsEnd; + ngraph::Shape paramShape; +} // namespace + using SpaceToBatchLayerTestCPUParams = std::tuple< - InputShape, // Input shapes + std::vector, // Input shapes std::vector, // block shape std::vector, // pads begin std::vector, // pads end @@ -25,21 +31,24 @@ class SpaceToBatchCPULayerTest : public testing::WithParamInterface &obj) { - InputShape inputShapes; - std::vector blockShape, padsBegin, padsEnd; + std::vector inputShapes; Precision netPrecision; CPUSpecificParams cpuParams; std::tie(inputShapes, blockShape, padsBegin, padsEnd, netPrecision, cpuParams) = obj.param; std::ostringstream result; - if (inputShapes.first.size() != 0) { + if (inputShapes.front().first.size() != 0) { result << "IS=("; - result << CommonTestUtils::partialShape2str(std::vector{inputShapes.first}) << "_"; + for (const auto &shape : inputShapes) { + result << CommonTestUtils::partialShape2str({shape.first}) << "_"; + } result.seekp(-1, result.cur); result << ")_"; } result << "TS="; - for (const auto &item : inputShapes.second) { - result << CommonTestUtils::vec2str(item) << "_"; + for (const auto& shape : inputShapes) { + for (const auto& item : shape.second) { + result << CommonTestUtils::vec2str(item) << "_"; + } } result << "blockShape=" << CommonTestUtils::vec2str(blockShape) << "_"; result << "padsBegin=" << CommonTestUtils::vec2str(padsBegin) << "_"; @@ -49,19 +58,47 @@ class SpaceToBatchCPULayerTest : public testing::WithParamInterface& targetInputStaticShapes) override { + inputs.clear(); + const auto& funcInputs = function->inputs(); + for (int i = 0; i < funcInputs.size(); i++) { + const auto& funcInput = funcInputs[i]; + ov::Tensor tensor; + if (i == 0) { + tensor = ov::test::utils::create_and_fill_tensor(funcInput.get_element_type(), targetInputStaticShapes[i], 2560, 0, 256); + } else if (i == 1) { + tensor = ov::Tensor(funcInput.get_element_type(), paramShape); + auto *dataPtr = tensor.data(); + for (size_t j = 0; j < blockShape.size(); j++) { + dataPtr[j] = blockShape[j]; + } + } else if (i == 2) { + tensor = ov::Tensor(funcInput.get_element_type(), paramShape); + auto *dataPtr = tensor.data(); + for (size_t j = 0; j < padsBegin.size(); j++) { + dataPtr[j] = padsBegin[j]; + } + } else if (i == 3) { + tensor = ov::Tensor(funcInput.get_element_type(), paramShape); + auto *dataPtr = tensor.data(); + for (size_t j = 0; j < padsEnd.size(); j++) { + dataPtr[j] = padsEnd[j]; + } + } + inputs.insert({funcInput.get_node_shared_ptr(), tensor}); + } + } + protected: void SetUp() override { targetDevice = CommonTestUtils::DEVICE_CPU; - - InputShape inputShapes; - std::vector blockShape, padsBegin, padsEnd; + std::vector inputShapes; Precision netPrecision; CPUSpecificParams cpuParams; std::tie(inputShapes, blockShape, padsBegin, padsEnd, netPrecision, cpuParams) = this->GetParam(); std::tie(inFmts, outFmts, priority, selectedType) = cpuParams; auto ngPrec = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision); - inType = outType = ngPrec; const std::vector inputShapesVec{inputShapes}; init_input_shapes(inputShapesVec); @@ -72,7 +109,20 @@ class SpaceToBatchCPULayerTest : public testing::WithParamInterface(params)); - auto s2b = ngraph::builder::makeSpaceToBatch(paramOuts[0], ngPrec, blockShape, padsBegin, padsEnd); + paramShape = {paramOuts[0].get_partial_shape().size()}; + + std::shared_ptr in2, in3, in4; + auto blockShapeParam = std::make_shared(ngraph::element::i64, paramShape); + in2 = blockShapeParam; + params.push_back(blockShapeParam); + auto padsBeginParam = std::make_shared(ngraph::element::i64, paramShape); + in3 = padsBeginParam; + params.push_back(padsBeginParam); + auto padsEndParam = std::make_shared(ngraph::element::i64, paramShape); + in4 = padsEndParam; + params.push_back(padsEndParam); + + auto s2b = std::make_shared(paramOuts[0], in2, in3, in4); function = makeNgraphFunction(inType, params, s2b, "SpaceToBatchCPU"); } }; @@ -96,29 +146,67 @@ const std::vector> blockShape4D1 = {{1, 2, 1, 2}, {1, 1, 2, const std::vector> padsBegin4D1 = {{0, 0, 0, 1}, {0, 0, 2, 1}, {0, 0, 4, 3}}; const std::vector> padsEnd4D1 = {{0, 0, 0, 1}, {0, 0, 4, 1}, {0, 0, 2, 3}}; -std::vector staticInputShapes4D1 = {{1, 16, 8, 12}, {1, 32, 8, 8}}; +std::vector> staticInputShapes4D1 = { + {{1, 16, 8, 12}, {4}, {4}, {4}}, + {{1, 32, 8, 8}, {4}, {4}, {4}}, +}; -std::vector dynamicInputShapes4D1 = { - {{-1, -1, -1, -1}, {{1, 6, 4, 8}, {2, 4, 8, 10}, {1, 8, 4, 10}}}, - {{{1, 4}, {2, 16}, 6, -1}, {{4, 8, 6, 4}, {1, 6, 6, 8}, {2, 12, 6, 4}}} +std::vector> dynamicInputShapes4D1 = { + { + {{-1, -1, -1, -1}, {{1, 6, 4, 8}, {2, 4, 8, 10}, {1, 8, 4, 10}}}, + {{4}, {{4}, {4}, {4}}}, + {{4}, {{4}, {4}, {4}}}, + {{4}, {{4}, {4}, {4}}} + }, + { + {{{1, 4}, {2, 16}, 6, -1}, {{4, 8, 6, 4}, {1, 6, 6, 8}, {2, 12, 6, 4}}}, + {{4}, {{4}, {4}, {4}}}, + {{4}, {{4}, {4}, {4}}}, + {{4}, {{4}, {4}, {4}}} + } }; -std::vector dynamicInputShapes4D1Blocked = { - {{-1, 16, -1, -1}, {{1, 16, 4, 6}, {2, 16, 6, 6}, {4, 16, 4, 8}}} +std::vector> dynamicInputShapes4D1Blocked = { + { + {{-1, 16, -1, -1}, {{1, 16, 4, 6}, {2, 16, 6, 6}, {4, 16, 4, 8}}}, + {{4}, {{4}, {4}, {4}}}, + {{4}, {{4}, {4}, {4}}}, + {{4}, {{4}, {4}, {4}}} + } }; + const std::vector> blockShape4D2 = { {1, 2, 4, 3}, {1, 4, 4, 1}}; const std::vector> padsBegin4D2 = {{0, 0, 0, 0}, {0, 0, 4, 3}}; const std::vector> padsEnd4D2 = {{0, 0, 4, 0}, {0, 0, 4, 3}}; -std::vector staticInputShapes4D2 = {{1, 16, 12, 12}, {1, 32, 12, 15}}; -std::vector dynamicInputShapes4D2 = { - {{-1, -1, -1, -1}, {{1, 4, 8, 9}, {2, 8, 12, 9}, {6, 12, 4, 12}}}, - {{2, {4, 16}, -1, -1}, {{2, 8, 4, 9}, {2, 4, 8, 6}, {2, 12, 12, 3}}} +std::vector> staticInputShapes4D2 = { + {{1, 16, 12, 12}, {4}, {4}, {4}}, + {{1, 32, 12, 15}, {4}, {4}, {4}}, }; -std::vector dynamicInputShapes4D2Blocked = { - {{-1, 16, -1, -1}, {{2, 16, 4, 15}, {2, 16, 8, 12}, {3, 16, 12, 9}}} +std::vector> dynamicInputShapes4D2 = { + { + {{-1, -1, -1, -1}, {{1, 4, 8, 9}, {2, 8, 12, 9}, {6, 12, 4, 12}}}, + {{4}, {{4}, {4}, {4}}}, + {{4}, {{4}, {4}, {4}}}, + {{4}, {{4}, {4}, {4}}} + }, + { + {{2, {4, 16}, -1, -1}, {{2, 8, 4, 9}, {2, 4, 8, 6}, {2, 12, 12, 3}}}, + {{4}, {{4}, {4}, {4}}}, + {{4}, {{4}, {4}, {4}}}, + {{4}, {{4}, {4}, {4}}} + } +}; + +std::vector> dynamicInputShapes4D2Blocked = { + { + {{-1, 16, -1, -1}, {{2, 16, 4, 15}, {2, 16, 8, 12}, {3, 16, 12, 9}}}, + {{4}, {{4}, {4}, {4}}}, + {{4}, {{4}, {4}, {4}}}, + {{4}, {{4}, {4}, {4}}} + } }; const std::vector cpuParamsWithBlock_4D = { @@ -203,15 +291,34 @@ const std::vector> blockShape5D = {{1, 1, 2, 2, 1}, {1, 2, const std::vector> padsBegin5D = {{0, 0, 0, 0, 0}, {0, 0, 4, 0, 0}, {0, 0, 0, 2, 3}}; const std::vector> padsEnd5D = {{0, 0, 0, 0, 0}, {0, 0, 0, 4, 3}, {0, 0, 4, 2, 3}}; -std::vector staticInputShapes5D = {{2, 16, 4, 6, 12}, {1, 32, 8, 8, 6}, {1, 16, 4, 12, 12}}; +std::vector> staticInputShapes5D = { + {{2, 16, 4, 6, 12}, {5}, {5}, {5}}, + {{1, 32, 8, 8, 6}, {5}, {5}, {5}}, + {{1, 16, 4, 12, 12}, {5}, {5}, {5}} +}; -std::vector dynamicInputShapes5D = { - {{-1, -1, -1, -1, -1}, {{2, 2, 12, 4, 15}, {4, 4, 8, 6, 9}, {3, 6, 4, 2, 12}}}, - {{{1, 10}, {2, 20}, {4, 50}, -1, -1}, {{3, 12, 8, 6, 9}, {5, 10, 4, 8, 15}, {6, 8, 20, 4, 12}}} +std::vector> dynamicInputShapes5D = { + { + {{-1, -1, -1, -1, -1}, {{2, 2, 12, 4, 15}, {4, 4, 8, 6, 9}, {3, 6, 4, 2, 12}}}, + {{5}, {{5}, {5}, {5}}}, + {{5}, {{5}, {5}, {5}}}, + {{5}, {{5}, {5}, {5}}} + }, + { + {{{1, 10}, {2, 20}, {4, 50}, -1, -1}, {{3, 12, 8, 6, 9}, {5, 10, 4, 8, 15}, {6, 8, 20, 4, 12}}}, + {{5}, {{5}, {5}, {5}}}, + {{5}, {{5}, {5}, {5}}}, + {{5}, {{5}, {5}, {5}}} + } }; -std::vector dynamicInputShapes5DBlocked = { - {{-1, 16, -1, -1, -1}, {{2, 16, 4, 6, 9}, {5, 16, 16, 4, 6}, {7, 16, 8, 2, 3}}} +std::vector> dynamicInputShapes5DBlocked = { + { + {{-1, 16, -1, -1, -1}, {{2, 16, 4, 6, 9}, {5, 16, 16, 4, 6}, {7, 16, 8, 2, 3}}}, + {{5}, {{5}, {5}, {5}}}, + {{5}, {{5}, {5}, {5}}}, + {{5}, {{5}, {5}, {5}}} + } }; const std::vector cpuParamsWithBlock_5D = { From 951c5fdae9fcd081954517ed76280962f52614d3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tomasz=20Do=C5=82bniak?= Date: Wed, 22 Mar 2023 19:12:16 +0100 Subject: [PATCH 044/296] Interpolate 11 exposed to Python (#16465) --- docs/api/ie_python_api/api.rst | 8 +- src/bindings/python/setup.py | 1 + .../src/compatibility/ngraph/__init__.py | 348 +++++++++--------- .../compatibility/ngraph/opset11/__init__.py | 177 +++++++++ .../src/compatibility/ngraph/opset11/ops.py | 77 ++++ .../ngraph/utils/node_factory.py | 2 +- .../compatibility/pyngraph/node_factory.cpp | 2 +- .../python/src/openvino/runtime/__init__.py | 33 +- .../src/openvino/runtime/opset11/__init__.py | 178 +++++++++ .../src/openvino/runtime/opset11/ops.py | 77 ++++ .../openvino/runtime/utils/node_factory.py | 2 +- .../src/pyopenvino/graph/node_factory.cpp | 2 +- .../python/tests/test_graph/test_create_op.py | 28 +- .../test_ngraph/test_create_op.py | 25 +- 14 files changed, 760 insertions(+), 200 deletions(-) create mode 100644 src/bindings/python/src/compatibility/ngraph/opset11/__init__.py create mode 100644 src/bindings/python/src/compatibility/ngraph/opset11/ops.py create mode 100644 src/bindings/python/src/openvino/runtime/opset11/__init__.py create mode 100644 src/bindings/python/src/openvino/runtime/opset11/ops.py diff --git a/docs/api/ie_python_api/api.rst b/docs/api/ie_python_api/api.rst index c06cc2b2fb8c80..5faa85f4e05b8e 100644 --- a/docs/api/ie_python_api/api.rst +++ b/docs/api/ie_python_api/api.rst @@ -73,6 +73,12 @@ OpenVINO Python API openvino.runtime.opset10 +.. autosummary:: + :toctree: _autosummary + :template: custom-module-template.rst + + openvino.runtime.opset11 + .. autosummary:: :toctree: _autosummary :template: custom-module-template.rst @@ -95,4 +101,4 @@ OpenVINO Python API :maxdepth: 2 :hidden: - compatibility \ No newline at end of file + compatibility diff --git a/src/bindings/python/setup.py b/src/bindings/python/setup.py index 021e37875e32da..6205f70aeb4e04 100644 --- a/src/bindings/python/setup.py +++ b/src/bindings/python/setup.py @@ -55,6 +55,7 @@ "openvino.runtime.opset8", "openvino.runtime.opset9", "openvino.runtime.opset10", + "openvino.runtime.opset11", "openvino.runtime.utils", "openvino.runtime.op", "openvino.runtime.op.util", diff --git a/src/bindings/python/src/compatibility/ngraph/__init__.py b/src/bindings/python/src/compatibility/ngraph/__init__.py index 3109d9e3d1d525..d80f2199dfc5d0 100644 --- a/src/bindings/python/src/compatibility/ngraph/__init__.py +++ b/src/bindings/python/src/compatibility/ngraph/__init__.py @@ -18,180 +18,180 @@ from ngraph.impl import PartialShape from ngraph.helpers import function_from_cnn from ngraph.helpers import function_to_cnn -from ngraph.opset10 import absolute -from ngraph.opset10 import absolute as abs -from ngraph.opset10 import acos -from ngraph.opset10 import acosh -from ngraph.opset10 import adaptive_avg_pool -from ngraph.opset10 import adaptive_max_pool -from ngraph.opset10 import add -from ngraph.opset10 import asin -from ngraph.opset10 import asinh -from ngraph.opset10 import assign -from ngraph.opset10 import atan -from ngraph.opset10 import atanh -from ngraph.opset10 import avg_pool -from ngraph.opset10 import batch_norm_inference -from ngraph.opset10 import batch_to_space -from ngraph.opset10 import binary_convolution -from ngraph.opset10 import broadcast -from ngraph.opset10 import bucketize -from ngraph.opset10 import ceiling -from ngraph.opset10 import ceiling as ceil -from ngraph.opset10 import clamp -from ngraph.opset10 import concat -from ngraph.opset10 import constant -from ngraph.opset10 import convert -from ngraph.opset10 import convert_like -from ngraph.opset10 import convolution -from ngraph.opset10 import convolution_backprop_data -from ngraph.opset10 import cos -from ngraph.opset10 import cosh -from ngraph.opset10 import ctc_greedy_decoder -from ngraph.opset10 import ctc_greedy_decoder_seq_len -from ngraph.opset10 import ctc_loss -from ngraph.opset10 import cum_sum -from ngraph.opset10 import cum_sum as cumsum -from ngraph.opset10 import deformable_convolution -from ngraph.opset10 import deformable_psroi_pooling -from ngraph.opset10 import depth_to_space -from ngraph.opset10 import detection_output -from ngraph.opset10 import dft -from ngraph.opset10 import divide -from ngraph.opset10 import einsum -from ngraph.opset10 import elu -from ngraph.opset10 import embedding_bag_offsets_sum -from ngraph.opset10 import embedding_bag_packed_sum -from ngraph.opset10 import embedding_segments_sum -from ngraph.opset10 import extract_image_patches -from ngraph.opset10 import equal -from ngraph.opset10 import erf -from ngraph.opset10 import exp -from ngraph.opset10 import eye -from ngraph.opset10 import fake_quantize -from ngraph.opset10 import floor -from ngraph.opset10 import floor_mod -from ngraph.opset10 import gather -from ngraph.opset10 import gather_elements -from ngraph.opset10 import gather_nd -from ngraph.opset10 import gather_tree -from ngraph.opset10 import gelu -from ngraph.opset10 import generate_proposals -from ngraph.opset10 import greater -from ngraph.opset10 import greater_equal -from ngraph.opset10 import grid_sample -from ngraph.opset10 import grn -from ngraph.opset10 import group_convolution -from ngraph.opset10 import group_convolution_backprop_data -from ngraph.opset10 import gru_cell -from ngraph.opset10 import gru_sequence -from ngraph.opset10 import hard_sigmoid -from ngraph.opset10 import hsigmoid -from ngraph.opset10 import hswish -from ngraph.opset10 import idft -from ngraph.opset10 import if_op -from ngraph.opset10 import interpolate -from ngraph.opset10 import irdft -from ngraph.opset10 import is_finite -from ngraph.opset10 import is_inf -from ngraph.opset10 import is_nan -from ngraph.opset10 import i420_to_bgr -from ngraph.opset10 import i420_to_rgb -from ngraph.opset10 import less -from ngraph.opset10 import less_equal -from ngraph.opset10 import log -from ngraph.opset10 import logical_and -from ngraph.opset10 import logical_not -from ngraph.opset10 import logical_or -from ngraph.opset10 import logical_xor -from ngraph.opset10 import log_softmax -from ngraph.opset10 import loop -from ngraph.opset10 import lrn -from ngraph.opset10 import lstm_cell -from ngraph.opset10 import lstm_sequence -from ngraph.opset10 import matmul -from ngraph.opset10 import matrix_nms -from ngraph.opset10 import max_pool -from ngraph.opset10 import maximum -from ngraph.opset10 import minimum -from ngraph.opset10 import mish -from ngraph.opset10 import mod -from ngraph.opset10 import multiclass_nms -from ngraph.opset10 import multiply -from ngraph.opset10 import mvn -from ngraph.opset10 import negative -from ngraph.opset10 import non_max_suppression -from ngraph.opset10 import non_zero -from ngraph.opset10 import normalize_l2 -from ngraph.opset10 import not_equal -from ngraph.opset10 import nv12_to_bgr -from ngraph.opset10 import nv12_to_rgb -from ngraph.opset10 import one_hot -from ngraph.opset10 import pad -from ngraph.opset10 import parameter -from ngraph.opset10 import power -from ngraph.opset10 import prelu -from ngraph.opset10 import prior_box -from ngraph.opset10 import prior_box_clustered -from ngraph.opset10 import psroi_pooling -from ngraph.opset10 import proposal -from ngraph.opset10 import random_uniform -from ngraph.opset10 import range -from ngraph.opset10 import rdft -from ngraph.opset10 import read_value -from ngraph.opset10 import reduce_l1 -from ngraph.opset10 import reduce_l2 -from ngraph.opset10 import reduce_logical_and -from ngraph.opset10 import reduce_logical_or -from ngraph.opset10 import reduce_max -from ngraph.opset10 import reduce_mean -from ngraph.opset10 import reduce_min -from ngraph.opset10 import reduce_prod -from ngraph.opset10 import reduce_sum -from ngraph.opset10 import region_yolo -from ngraph.opset10 import reorg_yolo -from ngraph.opset10 import relu -from ngraph.opset10 import reshape -from ngraph.opset10 import result -from ngraph.opset10 import reverse_sequence -from ngraph.opset10 import rnn_cell -from ngraph.opset10 import rnn_sequence -from ngraph.opset10 import roi_align -from ngraph.opset10 import roi_pooling -from ngraph.opset10 import roll -from ngraph.opset10 import round -from ngraph.opset10 import scatter_elements_update -from ngraph.opset10 import scatter_update -from ngraph.opset10 import select -from ngraph.opset10 import selu -from ngraph.opset10 import shape_of -from ngraph.opset10 import shuffle_channels -from ngraph.opset10 import sigmoid -from ngraph.opset10 import sign -from ngraph.opset10 import sin -from ngraph.opset10 import sinh -from ngraph.opset10 import slice -from ngraph.opset10 import softmax -from ngraph.opset10 import softplus -from ngraph.opset10 import softsign -from ngraph.opset10 import space_to_batch -from ngraph.opset10 import space_to_depth -from ngraph.opset10 import split -from ngraph.opset10 import sqrt -from ngraph.opset10 import squared_difference -from ngraph.opset10 import squeeze -from ngraph.opset10 import strided_slice -from ngraph.opset10 import subtract -from ngraph.opset10 import swish -from ngraph.opset10 import tan -from ngraph.opset10 import tanh -from ngraph.opset10 import tensor_iterator -from ngraph.opset10 import tile -from ngraph.opset10 import topk -from ngraph.opset10 import transpose -from ngraph.opset10 import unique -from ngraph.opset10 import unsqueeze -from ngraph.opset10 import variadic_split +from ngraph.opset11 import absolute +from ngraph.opset11 import absolute as abs +from ngraph.opset11 import acos +from ngraph.opset11 import acosh +from ngraph.opset11 import adaptive_avg_pool +from ngraph.opset11 import adaptive_max_pool +from ngraph.opset11 import add +from ngraph.opset11 import asin +from ngraph.opset11 import asinh +from ngraph.opset11 import assign +from ngraph.opset11 import atan +from ngraph.opset11 import atanh +from ngraph.opset11 import avg_pool +from ngraph.opset11 import batch_norm_inference +from ngraph.opset11 import batch_to_space +from ngraph.opset11 import binary_convolution +from ngraph.opset11 import broadcast +from ngraph.opset11 import bucketize +from ngraph.opset11 import ceiling +from ngraph.opset11 import ceiling as ceil +from ngraph.opset11 import clamp +from ngraph.opset11 import concat +from ngraph.opset11 import constant +from ngraph.opset11 import convert +from ngraph.opset11 import convert_like +from ngraph.opset11 import convolution +from ngraph.opset11 import convolution_backprop_data +from ngraph.opset11 import cos +from ngraph.opset11 import cosh +from ngraph.opset11 import ctc_greedy_decoder +from ngraph.opset11 import ctc_greedy_decoder_seq_len +from ngraph.opset11 import ctc_loss +from ngraph.opset11 import cum_sum +from ngraph.opset11 import cum_sum as cumsum +from ngraph.opset11 import deformable_convolution +from ngraph.opset11 import deformable_psroi_pooling +from ngraph.opset11 import depth_to_space +from ngraph.opset11 import detection_output +from ngraph.opset11 import dft +from ngraph.opset11 import divide +from ngraph.opset11 import einsum +from ngraph.opset11 import elu +from ngraph.opset11 import embedding_bag_offsets_sum +from ngraph.opset11 import embedding_bag_packed_sum +from ngraph.opset11 import embedding_segments_sum +from ngraph.opset11 import extract_image_patches +from ngraph.opset11 import equal +from ngraph.opset11 import erf +from ngraph.opset11 import exp +from ngraph.opset11 import eye +from ngraph.opset11 import fake_quantize +from ngraph.opset11 import floor +from ngraph.opset11 import floor_mod +from ngraph.opset11 import gather +from ngraph.opset11 import gather_elements +from ngraph.opset11 import gather_nd +from ngraph.opset11 import gather_tree +from ngraph.opset11 import gelu +from ngraph.opset11 import generate_proposals +from ngraph.opset11 import greater +from ngraph.opset11 import greater_equal +from ngraph.opset11 import grid_sample +from ngraph.opset11 import grn +from ngraph.opset11 import group_convolution +from ngraph.opset11 import group_convolution_backprop_data +from ngraph.opset11 import gru_cell +from ngraph.opset11 import gru_sequence +from ngraph.opset11 import hard_sigmoid +from ngraph.opset11 import hsigmoid +from ngraph.opset11 import hswish +from ngraph.opset11 import idft +from ngraph.opset11 import if_op +from ngraph.opset11 import interpolate +from ngraph.opset11 import irdft +from ngraph.opset11 import is_finite +from ngraph.opset11 import is_inf +from ngraph.opset11 import is_nan +from ngraph.opset11 import i420_to_bgr +from ngraph.opset11 import i420_to_rgb +from ngraph.opset11 import less +from ngraph.opset11 import less_equal +from ngraph.opset11 import log +from ngraph.opset11 import logical_and +from ngraph.opset11 import logical_not +from ngraph.opset11 import logical_or +from ngraph.opset11 import logical_xor +from ngraph.opset11 import log_softmax +from ngraph.opset11 import loop +from ngraph.opset11 import lrn +from ngraph.opset11 import lstm_cell +from ngraph.opset11 import lstm_sequence +from ngraph.opset11 import matmul +from ngraph.opset11 import matrix_nms +from ngraph.opset11 import max_pool +from ngraph.opset11 import maximum +from ngraph.opset11 import minimum +from ngraph.opset11 import mish +from ngraph.opset11 import mod +from ngraph.opset11 import multiclass_nms +from ngraph.opset11 import multiply +from ngraph.opset11 import mvn +from ngraph.opset11 import negative +from ngraph.opset11 import non_max_suppression +from ngraph.opset11 import non_zero +from ngraph.opset11 import normalize_l2 +from ngraph.opset11 import not_equal +from ngraph.opset11 import nv12_to_bgr +from ngraph.opset11 import nv12_to_rgb +from ngraph.opset11 import one_hot +from ngraph.opset11 import pad +from ngraph.opset11 import parameter +from ngraph.opset11 import power +from ngraph.opset11 import prelu +from ngraph.opset11 import prior_box +from ngraph.opset11 import prior_box_clustered +from ngraph.opset11 import psroi_pooling +from ngraph.opset11 import proposal +from ngraph.opset11 import random_uniform +from ngraph.opset11 import range +from ngraph.opset11 import rdft +from ngraph.opset11 import read_value +from ngraph.opset11 import reduce_l1 +from ngraph.opset11 import reduce_l2 +from ngraph.opset11 import reduce_logical_and +from ngraph.opset11 import reduce_logical_or +from ngraph.opset11 import reduce_max +from ngraph.opset11 import reduce_mean +from ngraph.opset11 import reduce_min +from ngraph.opset11 import reduce_prod +from ngraph.opset11 import reduce_sum +from ngraph.opset11 import region_yolo +from ngraph.opset11 import reorg_yolo +from ngraph.opset11 import relu +from ngraph.opset11 import reshape +from ngraph.opset11 import result +from ngraph.opset11 import reverse_sequence +from ngraph.opset11 import rnn_cell +from ngraph.opset11 import rnn_sequence +from ngraph.opset11 import roi_align +from ngraph.opset11 import roi_pooling +from ngraph.opset11 import roll +from ngraph.opset11 import round +from ngraph.opset11 import scatter_elements_update +from ngraph.opset11 import scatter_update +from ngraph.opset11 import select +from ngraph.opset11 import selu +from ngraph.opset11 import shape_of +from ngraph.opset11 import shuffle_channels +from ngraph.opset11 import sigmoid +from ngraph.opset11 import sign +from ngraph.opset11 import sin +from ngraph.opset11 import sinh +from ngraph.opset11 import slice +from ngraph.opset11 import softmax +from ngraph.opset11 import softplus +from ngraph.opset11 import softsign +from ngraph.opset11 import space_to_batch +from ngraph.opset11 import space_to_depth +from ngraph.opset11 import split +from ngraph.opset11 import sqrt +from ngraph.opset11 import squared_difference +from ngraph.opset11 import squeeze +from ngraph.opset11 import strided_slice +from ngraph.opset11 import subtract +from ngraph.opset11 import swish +from ngraph.opset11 import tan +from ngraph.opset11 import tanh +from ngraph.opset11 import tensor_iterator +from ngraph.opset11 import tile +from ngraph.opset11 import topk +from ngraph.opset11 import transpose +from ngraph.opset11 import unique +from ngraph.opset11 import unsqueeze +from ngraph.opset11 import variadic_split # Extend Node class to support binary operators diff --git a/src/bindings/python/src/compatibility/ngraph/opset11/__init__.py b/src/bindings/python/src/compatibility/ngraph/opset11/__init__.py new file mode 100644 index 00000000000000..91f84b81f415cd --- /dev/null +++ b/src/bindings/python/src/compatibility/ngraph/opset11/__init__.py @@ -0,0 +1,177 @@ +# Copyright (C) 2018-2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +from ngraph.opset1.ops import absolute +from ngraph.opset1.ops import absolute as abs +from ngraph.opset1.ops import acos +from ngraph.opset4.ops import acosh +from ngraph.opset8.ops import adaptive_avg_pool +from ngraph.opset8.ops import adaptive_max_pool +from ngraph.opset1.ops import add +from ngraph.opset1.ops import asin +from ngraph.opset4.ops import asinh +from ngraph.opset3.ops import assign +from ngraph.opset1.ops import atan +from ngraph.opset4.ops import atanh +from ngraph.opset1.ops import avg_pool +from ngraph.opset5.ops import batch_norm_inference +from ngraph.opset2.ops import batch_to_space +from ngraph.opset1.ops import binary_convolution +from ngraph.opset3.ops import broadcast +from ngraph.opset3.ops import bucketize +from ngraph.opset1.ops import ceiling +from ngraph.opset1.ops import ceiling as ceil +from ngraph.opset1.ops import clamp +from ngraph.opset1.ops import concat +from ngraph.opset1.ops import constant +from ngraph.opset1.ops import convert +from ngraph.opset1.ops import convert_like +from ngraph.opset1.ops import convolution +from ngraph.opset1.ops import convolution_backprop_data +from ngraph.opset1.ops import cos +from ngraph.opset1.ops import cosh +from ngraph.opset1.ops import ctc_greedy_decoder +from ngraph.opset6.ops import ctc_greedy_decoder_seq_len +from ngraph.opset4.ops import ctc_loss +from ngraph.opset3.ops import cum_sum +from ngraph.opset3.ops import cum_sum as cumsum +from ngraph.opset8.ops import deformable_convolution +from ngraph.opset1.ops import deformable_psroi_pooling +from ngraph.opset1.ops import depth_to_space +from ngraph.opset8.ops import detection_output +from ngraph.opset7.ops import dft +from ngraph.opset1.ops import divide +from ngraph.opset7.ops import einsum +from ngraph.opset1.ops import elu +from ngraph.opset3.ops import embedding_bag_offsets_sum +from ngraph.opset3.ops import embedding_bag_packed_sum +from ngraph.opset3.ops import embedding_segments_sum +from ngraph.opset3.ops import extract_image_patches +from ngraph.opset1.ops import equal +from ngraph.opset1.ops import erf +from ngraph.opset1.ops import exp +from ngraph.opset9.ops import eye +from ngraph.opset1.ops import fake_quantize +from ngraph.opset1.ops import floor +from ngraph.opset1.ops import floor_mod +from ngraph.opset8.ops import gather +from ngraph.opset6.ops import gather_elements +from ngraph.opset8.ops import gather_nd +from ngraph.opset1.ops import gather_tree +from ngraph.opset7.ops import gelu +from ngraph.opset9.ops import generate_proposals +from ngraph.opset1.ops import greater +from ngraph.opset1.ops import greater_equal +from ngraph.opset9.ops import grid_sample +from ngraph.opset1.ops import grn +from ngraph.opset1.ops import group_convolution +from ngraph.opset1.ops import group_convolution_backprop_data +from ngraph.opset3.ops import gru_cell +from ngraph.opset5.ops import gru_sequence +from ngraph.opset1.ops import hard_sigmoid +from ngraph.opset5.ops import hsigmoid +from ngraph.opset4.ops import hswish +from ngraph.opset7.ops import idft +from ngraph.opset8.ops import if_op +from ngraph.opset11.ops import interpolate +from ngraph.opset9.ops import irdft +from ngraph.opset10.ops import is_finite +from ngraph.opset10.ops import is_inf +from ngraph.opset10.ops import is_nan +from ngraph.opset8.ops import i420_to_bgr +from ngraph.opset8.ops import i420_to_rgb +from ngraph.opset1.ops import less +from ngraph.opset1.ops import less_equal +from ngraph.opset1.ops import log +from ngraph.opset1.ops import logical_and +from ngraph.opset1.ops import logical_not +from ngraph.opset1.ops import logical_or +from ngraph.opset1.ops import logical_xor +from ngraph.opset5.ops import log_softmax +from ngraph.opset5.ops import loop +from ngraph.opset1.ops import lrn +from ngraph.opset4.ops import lstm_cell +from ngraph.opset5.ops import lstm_sequence +from ngraph.opset1.ops import matmul +from ngraph.opset8.ops import matrix_nms +from ngraph.opset8.ops import max_pool +from ngraph.opset1.ops import maximum +from ngraph.opset1.ops import minimum +from ngraph.opset4.ops import mish +from ngraph.opset1.ops import mod +from ngraph.opset9.ops import multiclass_nms +from ngraph.opset1.ops import multiply +from ngraph.opset6.ops import mvn +from ngraph.opset1.ops import negative +from ngraph.opset9.ops import non_max_suppression +from ngraph.opset3.ops import non_zero +from ngraph.opset1.ops import normalize_l2 +from ngraph.opset1.ops import not_equal +from ngraph.opset8.ops import nv12_to_bgr +from ngraph.opset8.ops import nv12_to_rgb +from ngraph.opset1.ops import one_hot +from ngraph.opset1.ops import pad +from ngraph.opset1.ops import parameter +from ngraph.opset1.ops import power +from ngraph.opset1.ops import prelu +from ngraph.opset8.ops import prior_box +from ngraph.opset1.ops import prior_box_clustered +from ngraph.opset1.ops import psroi_pooling +from ngraph.opset4.ops import proposal +from ngraph.opset8.ops import random_uniform +from ngraph.opset1.ops import range +from ngraph.opset9.ops import rdft +from ngraph.opset3.ops import read_value +from ngraph.opset4.ops import reduce_l1 +from ngraph.opset4.ops import reduce_l2 +from ngraph.opset1.ops import reduce_logical_and +from ngraph.opset1.ops import reduce_logical_or +from ngraph.opset1.ops import reduce_max +from ngraph.opset1.ops import reduce_mean +from ngraph.opset1.ops import reduce_min +from ngraph.opset1.ops import reduce_prod +from ngraph.opset1.ops import reduce_sum +from ngraph.opset1.ops import region_yolo +from ngraph.opset2.ops import reorg_yolo +from ngraph.opset1.ops import relu +from ngraph.opset1.ops import reshape +from ngraph.opset1.ops import result +from ngraph.opset1.ops import reverse_sequence +from ngraph.opset3.ops import rnn_cell +from ngraph.opset5.ops import rnn_sequence +from ngraph.opset9.ops import roi_align +from ngraph.opset2.ops import roi_pooling +from ngraph.opset7.ops import roll +from ngraph.opset5.ops import round +from ngraph.opset3.ops import scatter_elements_update +from ngraph.opset3.ops import scatter_update +from ngraph.opset1.ops import select +from ngraph.opset1.ops import selu +from ngraph.opset3.ops import shape_of +from ngraph.opset3.ops import shuffle_channels +from ngraph.opset1.ops import sigmoid +from ngraph.opset1.ops import sign +from ngraph.opset1.ops import sin +from ngraph.opset1.ops import sinh +from ngraph.opset8.ops import slice +from ngraph.opset8.ops import softmax +from ngraph.opset4.ops import softplus +from ngraph.opset9.ops import softsign +from ngraph.opset2.ops import space_to_batch +from ngraph.opset1.ops import space_to_depth +from ngraph.opset1.ops import split +from ngraph.opset1.ops import sqrt +from ngraph.opset1.ops import squared_difference +from ngraph.opset1.ops import squeeze +from ngraph.opset1.ops import strided_slice +from ngraph.opset1.ops import subtract +from ngraph.opset4.ops import swish +from ngraph.opset1.ops import tan +from ngraph.opset1.ops import tanh +from ngraph.opset1.ops import tensor_iterator +from ngraph.opset1.ops import tile +from ngraph.opset3.ops import topk +from ngraph.opset1.ops import transpose +from ngraph.opset10.ops import unique +from ngraph.opset1.ops import unsqueeze +from ngraph.opset1.ops import variadic_split diff --git a/src/bindings/python/src/compatibility/ngraph/opset11/ops.py b/src/bindings/python/src/compatibility/ngraph/opset11/ops.py new file mode 100644 index 00000000000000..434b778b246cf8 --- /dev/null +++ b/src/bindings/python/src/compatibility/ngraph/opset11/ops.py @@ -0,0 +1,77 @@ +# -*- coding: utf-8 -*- +# Copyright (C) 2018-2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +"""Factory functions for all openvino ops.""" +from functools import partial +from typing import List, Optional + +from ngraph.impl import Node +from ngraph.opset_utils import _get_node_factory +from ngraph.utils.decorators import nameable_op +from ngraph.utils.types import ( + NodeInput, + as_nodes, +) + +_get_node_factory_opset11 = partial(_get_node_factory, "opset11") + +# -------------------------------------------- ops ------------------------------------------------ + + +@nameable_op +def interpolate( + image: NodeInput, + scales_or_sizes: NodeInput, + mode: str, + shape_calculation_mode: str, + pads_begin: Optional[List[int]] = None, + pads_end: Optional[List[int]] = None, + coordinate_transformation_mode: str = "half_pixel", + nearest_mode: str = "round_prefer_floor", + antialias: bool = False, + cube_coeff: float = -0.75, + axes: Optional[NodeInput] = None, + name: Optional[str] = None, +) -> Node: + """Perfors the interpolation of the input tensor. + + :param image: The node providing input tensor with data for interpolation. + :param scales_or_sizes: + 1D tensor providing information used to calculate the output shape + of the operation. It might contain floats (scales) or integers(sizes). + :param mode: Specifies type of interpolation. Possible values are: nearest, linear, + linear_onnx, cubic, bilinear_pillow, bicubic_pillow. + :param shape_calculation_mode: + Specifies how the scales_or_sizes input should be interpreted. + :param pads_begin: Specifies the number of pixels to add to the beginning of the image + being interpolated. Default is None. + :param pads_end: Specifies the number of pixels to add to the end of the image being + interpolated. Default is None. + :param coordinate_transformation_mode: + Specifies how to transform the coordinate in the resized tensor to the + coordinate in the original tensor. Default is "half_pixel". + :param nearest_mode: Specifies round mode when mode == nearest and is used only when + mode == nearest. Default is "round_prefer_floor". + :param antialias: Specifies whether to perform anti-aliasing. Default is False. + :param cube_coeff: Specifies the parameter a for cubic interpolation. Default is -0.75. + :param axes: 1D tensor specifying dimension indices where interpolation is applied. + The default is None. + :param name: Optional name for the output node. The default is None. + :return: Node representing the interpolation operation. + """ + attrs = { + "mode": mode, + "shape_calculation_mode": shape_calculation_mode, + "coordinate_transformation_mode": coordinate_transformation_mode, + "nearest_mode": nearest_mode, + "antialias": antialias, + "cube_coeff": cube_coeff, + } + + attrs["pads_begin"] = [] if pads_begin is None else pads_begin + attrs["pads_end"] = [] if pads_end is None else pads_end + + inputs = as_nodes(image, scales_or_sizes) if axes is None else as_nodes(image, scales_or_sizes, axes) + + return _get_node_factory_opset11().create("Interpolate", inputs, attrs) diff --git a/src/bindings/python/src/compatibility/ngraph/utils/node_factory.py b/src/bindings/python/src/compatibility/ngraph/utils/node_factory.py index 6aa951a65b66b7..0e3d2cc09cecc2 100644 --- a/src/bindings/python/src/compatibility/ngraph/utils/node_factory.py +++ b/src/bindings/python/src/compatibility/ngraph/utils/node_factory.py @@ -12,7 +12,7 @@ from ngraph.exceptions import UserInputError -DEFAULT_OPSET = "opset10" +DEFAULT_OPSET = "opset11" class NodeFactory(object): diff --git a/src/bindings/python/src/compatibility/pyngraph/node_factory.cpp b/src/bindings/python/src/compatibility/pyngraph/node_factory.cpp index 281525cca95832..2108a7a057bb3c 100644 --- a/src/bindings/python/src/compatibility/pyngraph/node_factory.cpp +++ b/src/bindings/python/src/compatibility/pyngraph/node_factory.cpp @@ -82,7 +82,7 @@ class NodeFactory { return it->second(); } - const ngraph::OpSet& m_opset = ngraph::get_opset10(); + const ngraph::OpSet& m_opset = ngraph::get_opset11(); std::unordered_map> m_variables; }; } // namespace diff --git a/src/bindings/python/src/openvino/runtime/__init__.py b/src/bindings/python/src/openvino/runtime/__init__.py index 3c2937c214ed70..9241819e87135c 100644 --- a/src/bindings/python/src/openvino/runtime/__init__.py +++ b/src/bindings/python/src/openvino/runtime/__init__.py @@ -56,6 +56,7 @@ from openvino.runtime import opset8 from openvino.runtime import opset9 from openvino.runtime import opset10 +from openvino.runtime import opset11 # Import properties API from openvino._pyopenvino import properties @@ -66,19 +67,19 @@ # Extend Node class to support binary operators -Node.__add__ = opset10.add -Node.__sub__ = opset10.subtract -Node.__mul__ = opset10.multiply -Node.__div__ = opset10.divide -Node.__truediv__ = opset10.divide -Node.__radd__ = lambda left, right: opset10.add(right, left) -Node.__rsub__ = lambda left, right: opset10.subtract(right, left) -Node.__rmul__ = lambda left, right: opset10.multiply(right, left) -Node.__rdiv__ = lambda left, right: opset10.divide(right, left) -Node.__rtruediv__ = lambda left, right: opset10.divide(right, left) -Node.__eq__ = opset10.equal -Node.__ne__ = opset10.not_equal -Node.__lt__ = opset10.less -Node.__le__ = opset10.less_equal -Node.__gt__ = opset10.greater -Node.__ge__ = opset10.greater_equal +Node.__add__ = opset11.add +Node.__sub__ = opset11.subtract +Node.__mul__ = opset11.multiply +Node.__div__ = opset11.divide +Node.__truediv__ = opset11.divide +Node.__radd__ = lambda left, right: opset11.add(right, left) +Node.__rsub__ = lambda left, right: opset11.subtract(right, left) +Node.__rmul__ = lambda left, right: opset11.multiply(right, left) +Node.__rdiv__ = lambda left, right: opset11.divide(right, left) +Node.__rtruediv__ = lambda left, right: opset11.divide(right, left) +Node.__eq__ = opset11.equal +Node.__ne__ = opset11.not_equal +Node.__lt__ = opset11.less +Node.__le__ = opset11.less_equal +Node.__gt__ = opset11.greater +Node.__ge__ = opset11.greater_equal diff --git a/src/bindings/python/src/openvino/runtime/opset11/__init__.py b/src/bindings/python/src/openvino/runtime/opset11/__init__.py new file mode 100644 index 00000000000000..79c7068bf83d87 --- /dev/null +++ b/src/bindings/python/src/openvino/runtime/opset11/__init__.py @@ -0,0 +1,178 @@ +# -*- coding: utf-8 -*- +# Copyright (C) 2018-2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +from openvino.runtime.opset1.ops import absolute +from openvino.runtime.opset1.ops import absolute as abs +from openvino.runtime.opset1.ops import acos +from openvino.runtime.opset4.ops import acosh +from openvino.runtime.opset8.ops import adaptive_avg_pool +from openvino.runtime.opset8.ops import adaptive_max_pool +from openvino.runtime.opset1.ops import add +from openvino.runtime.opset1.ops import asin +from openvino.runtime.opset4.ops import asinh +from openvino.runtime.opset3.ops import assign +from openvino.runtime.opset1.ops import atan +from openvino.runtime.opset4.ops import atanh +from openvino.runtime.opset1.ops import avg_pool +from openvino.runtime.opset5.ops import batch_norm_inference +from openvino.runtime.opset2.ops import batch_to_space +from openvino.runtime.opset1.ops import binary_convolution +from openvino.runtime.opset3.ops import broadcast +from openvino.runtime.opset3.ops import bucketize +from openvino.runtime.opset1.ops import ceiling +from openvino.runtime.opset1.ops import ceiling as ceil +from openvino.runtime.opset1.ops import clamp +from openvino.runtime.opset1.ops import concat +from openvino.runtime.opset1.ops import constant +from openvino.runtime.opset1.ops import convert +from openvino.runtime.opset1.ops import convert_like +from openvino.runtime.opset1.ops import convolution +from openvino.runtime.opset1.ops import convolution_backprop_data +from openvino.runtime.opset1.ops import cos +from openvino.runtime.opset1.ops import cosh +from openvino.runtime.opset1.ops import ctc_greedy_decoder +from openvino.runtime.opset6.ops import ctc_greedy_decoder_seq_len +from openvino.runtime.opset4.ops import ctc_loss +from openvino.runtime.opset3.ops import cum_sum +from openvino.runtime.opset3.ops import cum_sum as cumsum +from openvino.runtime.opset8.ops import deformable_convolution +from openvino.runtime.opset1.ops import deformable_psroi_pooling +from openvino.runtime.opset1.ops import depth_to_space +from openvino.runtime.opset8.ops import detection_output +from openvino.runtime.opset7.ops import dft +from openvino.runtime.opset1.ops import divide +from openvino.runtime.opset7.ops import einsum +from openvino.runtime.opset1.ops import elu +from openvino.runtime.opset3.ops import embedding_bag_offsets_sum +from openvino.runtime.opset3.ops import embedding_bag_packed_sum +from openvino.runtime.opset3.ops import embedding_segments_sum +from openvino.runtime.opset3.ops import extract_image_patches +from openvino.runtime.opset1.ops import equal +from openvino.runtime.opset1.ops import erf +from openvino.runtime.opset1.ops import exp +from openvino.runtime.opset9.ops import eye +from openvino.runtime.opset1.ops import fake_quantize +from openvino.runtime.opset1.ops import floor +from openvino.runtime.opset1.ops import floor_mod +from openvino.runtime.opset8.ops import gather +from openvino.runtime.opset6.ops import gather_elements +from openvino.runtime.opset8.ops import gather_nd +from openvino.runtime.opset1.ops import gather_tree +from openvino.runtime.opset7.ops import gelu +from openvino.runtime.opset9.ops import generate_proposals +from openvino.runtime.opset1.ops import greater +from openvino.runtime.opset1.ops import greater_equal +from openvino.runtime.opset9.ops import grid_sample +from openvino.runtime.opset1.ops import grn +from openvino.runtime.opset1.ops import group_convolution +from openvino.runtime.opset1.ops import group_convolution_backprop_data +from openvino.runtime.opset3.ops import gru_cell +from openvino.runtime.opset5.ops import gru_sequence +from openvino.runtime.opset1.ops import hard_sigmoid +from openvino.runtime.opset5.ops import hsigmoid +from openvino.runtime.opset4.ops import hswish +from openvino.runtime.opset7.ops import idft +from openvino.runtime.opset8.ops import if_op +from openvino.runtime.opset11.ops import interpolate +from openvino.runtime.opset9.ops import irdft +from openvino.runtime.opset10.ops import is_finite +from openvino.runtime.opset10.ops import is_inf +from openvino.runtime.opset10.ops import is_nan +from openvino.runtime.opset8.ops import i420_to_bgr +from openvino.runtime.opset8.ops import i420_to_rgb +from openvino.runtime.opset1.ops import less +from openvino.runtime.opset1.ops import less_equal +from openvino.runtime.opset1.ops import log +from openvino.runtime.opset1.ops import logical_and +from openvino.runtime.opset1.ops import logical_not +from openvino.runtime.opset1.ops import logical_or +from openvino.runtime.opset1.ops import logical_xor +from openvino.runtime.opset5.ops import log_softmax +from openvino.runtime.opset5.ops import loop +from openvino.runtime.opset1.ops import lrn +from openvino.runtime.opset4.ops import lstm_cell +from openvino.runtime.opset5.ops import lstm_sequence +from openvino.runtime.opset1.ops import matmul +from openvino.runtime.opset8.ops import matrix_nms +from openvino.runtime.opset8.ops import max_pool +from openvino.runtime.opset1.ops import maximum +from openvino.runtime.opset1.ops import minimum +from openvino.runtime.opset4.ops import mish +from openvino.runtime.opset1.ops import mod +from openvino.runtime.opset9.ops import multiclass_nms +from openvino.runtime.opset1.ops import multiply +from openvino.runtime.opset6.ops import mvn +from openvino.runtime.opset1.ops import negative +from openvino.runtime.opset9.ops import non_max_suppression +from openvino.runtime.opset3.ops import non_zero +from openvino.runtime.opset1.ops import normalize_l2 +from openvino.runtime.opset1.ops import not_equal +from openvino.runtime.opset8.ops import nv12_to_bgr +from openvino.runtime.opset8.ops import nv12_to_rgb +from openvino.runtime.opset1.ops import one_hot +from openvino.runtime.opset1.ops import pad +from openvino.runtime.opset1.ops import parameter +from openvino.runtime.opset1.ops import power +from openvino.runtime.opset1.ops import prelu +from openvino.runtime.opset8.ops import prior_box +from openvino.runtime.opset1.ops import prior_box_clustered +from openvino.runtime.opset1.ops import psroi_pooling +from openvino.runtime.opset4.ops import proposal +from openvino.runtime.opset1.ops import range +from openvino.runtime.opset8.ops import random_uniform +from openvino.runtime.opset9.ops import rdft +from openvino.runtime.opset3.ops import read_value +from openvino.runtime.opset4.ops import reduce_l1 +from openvino.runtime.opset4.ops import reduce_l2 +from openvino.runtime.opset1.ops import reduce_logical_and +from openvino.runtime.opset1.ops import reduce_logical_or +from openvino.runtime.opset1.ops import reduce_max +from openvino.runtime.opset1.ops import reduce_mean +from openvino.runtime.opset1.ops import reduce_min +from openvino.runtime.opset1.ops import reduce_prod +from openvino.runtime.opset1.ops import reduce_sum +from openvino.runtime.opset1.ops import region_yolo +from openvino.runtime.opset2.ops import reorg_yolo +from openvino.runtime.opset1.ops import relu +from openvino.runtime.opset1.ops import reshape +from openvino.runtime.opset1.ops import result +from openvino.runtime.opset1.ops import reverse_sequence +from openvino.runtime.opset3.ops import rnn_cell +from openvino.runtime.opset5.ops import rnn_sequence +from openvino.runtime.opset9.ops import roi_align +from openvino.runtime.opset2.ops import roi_pooling +from openvino.runtime.opset7.ops import roll +from openvino.runtime.opset5.ops import round +from openvino.runtime.opset3.ops import scatter_elements_update +from openvino.runtime.opset3.ops import scatter_update +from openvino.runtime.opset1.ops import select +from openvino.runtime.opset1.ops import selu +from openvino.runtime.opset3.ops import shape_of +from openvino.runtime.opset3.ops import shuffle_channels +from openvino.runtime.opset1.ops import sigmoid +from openvino.runtime.opset1.ops import sign +from openvino.runtime.opset1.ops import sin +from openvino.runtime.opset1.ops import sinh +from openvino.runtime.opset8.ops import slice +from openvino.runtime.opset8.ops import softmax +from openvino.runtime.opset4.ops import softplus +from openvino.runtime.opset9.ops import softsign +from openvino.runtime.opset2.ops import space_to_batch +from openvino.runtime.opset1.ops import space_to_depth +from openvino.runtime.opset1.ops import split +from openvino.runtime.opset1.ops import sqrt +from openvino.runtime.opset1.ops import squared_difference +from openvino.runtime.opset1.ops import squeeze +from openvino.runtime.opset1.ops import strided_slice +from openvino.runtime.opset1.ops import subtract +from openvino.runtime.opset4.ops import swish +from openvino.runtime.opset1.ops import tan +from openvino.runtime.opset1.ops import tanh +from openvino.runtime.opset1.ops import tensor_iterator +from openvino.runtime.opset1.ops import tile +from openvino.runtime.opset3.ops import topk +from openvino.runtime.opset1.ops import transpose +from openvino.runtime.opset10.ops import unique +from openvino.runtime.opset1.ops import unsqueeze +from openvino.runtime.opset1.ops import variadic_split diff --git a/src/bindings/python/src/openvino/runtime/opset11/ops.py b/src/bindings/python/src/openvino/runtime/opset11/ops.py new file mode 100644 index 00000000000000..2a54db0069ebd1 --- /dev/null +++ b/src/bindings/python/src/openvino/runtime/opset11/ops.py @@ -0,0 +1,77 @@ +# -*- coding: utf-8 -*- +# Copyright (C) 2018-2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +"""Factory functions for all openvino ops.""" +from functools import partial +from typing import List, Optional + +from openvino.runtime import Node +from openvino.runtime.opset_utils import _get_node_factory +from openvino.runtime.utils.decorators import nameable_op +from openvino.runtime.utils.types import ( + NodeInput, + as_nodes, +) + +_get_node_factory_opset11 = partial(_get_node_factory, "opset11") + +# -------------------------------------------- ops ------------------------------------------------ + + +@nameable_op +def interpolate( + image: NodeInput, + scales_or_sizes: NodeInput, + mode: str, + shape_calculation_mode: str, + pads_begin: Optional[List[int]] = None, + pads_end: Optional[List[int]] = None, + coordinate_transformation_mode: str = "half_pixel", + nearest_mode: str = "round_prefer_floor", + antialias: bool = False, + cube_coeff: float = -0.75, + axes: Optional[NodeInput] = None, + name: Optional[str] = None, +) -> Node: + """Perfors the interpolation of the input tensor. + + :param image: The node providing input tensor with data for interpolation. + :param scales_or_sizes: + 1D tensor providing information used to calculate the output shape + of the operation. It might contain floats (scales) or integers(sizes). + :param mode: Specifies type of interpolation. Possible values are: nearest, linear, + linear_onnx, cubic, bilinear_pillow, bicubic_pillow. + :param shape_calculation_mode: + Specifies how the scales_or_sizes input should be interpreted. + :param pads_begin: Specifies the number of pixels to add to the beginning of the image + being interpolated. Default is None. + :param pads_end: Specifies the number of pixels to add to the end of the image being + interpolated. Default is None. + :param coordinate_transformation_mode: + Specifies how to transform the coordinate in the resized tensor to the + coordinate in the original tensor. Default is "half_pixel". + :param nearest_mode: Specifies round mode when mode == nearest and is used only when + mode == nearest. Default is "round_prefer_floor". + :param antialias: Specifies whether to perform anti-aliasing. Default is False. + :param cube_coeff: Specifies the parameter a for cubic interpolation. Default is -0.75. + :param axes: 1D tensor specifying dimension indices where interpolation is applied. + The default is None. + :param name: Optional name for the output node. The default is None. + :return: Node representing the interpolation operation. + """ + attrs = { + "mode": mode, + "shape_calculation_mode": shape_calculation_mode, + "coordinate_transformation_mode": coordinate_transformation_mode, + "nearest_mode": nearest_mode, + "antialias": antialias, + "cube_coeff": cube_coeff, + } + + attrs["pads_begin"] = [] if pads_begin is None else pads_begin + attrs["pads_end"] = [] if pads_end is None else pads_end + + inputs = as_nodes(image, scales_or_sizes) if axes is None else as_nodes(image, scales_or_sizes, axes) + + return _get_node_factory_opset11().create("Interpolate", inputs, attrs) diff --git a/src/bindings/python/src/openvino/runtime/utils/node_factory.py b/src/bindings/python/src/openvino/runtime/utils/node_factory.py index a89c05ab0cffb6..f952bcf90fb4dc 100644 --- a/src/bindings/python/src/openvino/runtime/utils/node_factory.py +++ b/src/bindings/python/src/openvino/runtime/utils/node_factory.py @@ -13,7 +13,7 @@ from openvino.runtime.exceptions import UserInputError -DEFAULT_OPSET = "opset10" +DEFAULT_OPSET = "opset11" class NodeFactory(object): diff --git a/src/bindings/python/src/pyopenvino/graph/node_factory.cpp b/src/bindings/python/src/pyopenvino/graph/node_factory.cpp index bdf7c982b3e8fd..9aed62c2e00a17 100644 --- a/src/bindings/python/src/pyopenvino/graph/node_factory.cpp +++ b/src/bindings/python/src/pyopenvino/graph/node_factory.cpp @@ -79,7 +79,7 @@ class NodeFactory { return it->second(); } - const ov::OpSet& m_opset = ov::get_opset10(); + const ov::OpSet& m_opset = ov::get_opset11(); std::unordered_map> m_variables; }; } // namespace diff --git a/src/bindings/python/tests/test_graph/test_create_op.py b/src/bindings/python/tests/test_graph/test_create_op.py index 120f07562d4457..f76ed01641a6d5 100644 --- a/src/bindings/python/tests/test_graph/test_create_op.py +++ b/src/bindings/python/tests/test_graph/test_create_op.py @@ -11,7 +11,8 @@ import openvino.runtime.opset1 as ov_opset1 import openvino.runtime.opset5 as ov_opset5 -import openvino.runtime.opset10 as ov +import openvino.runtime.opset10 as ov_opset10 +import openvino.runtime.opset11 as ov from openvino.runtime import Type np_types = [np.float32, np.int32] @@ -2145,8 +2146,29 @@ def test_interpolate_opset10(dtype, expected_shape, shape_calculation_mode): axes = [2, 3] mode = "cubic" - node = ov.interpolate(image=image_node, output_shape=output_shape, scales=scales, - axes=axes, mode=mode, + node = ov_opset10.interpolate(image=image_node, output_shape=output_shape, scales=scales, + axes=axes, mode=mode, shape_calculation_mode=shape_calculation_mode) + assert node.get_type_name() == "Interpolate" + assert node.get_output_size() == 1 + assert list(node.get_output_shape(0)) == expected_shape + + +@pytest.mark.parametrize( + ("expected_shape", "shape_calculation_mode", "input_value"), + [ + ([1, 3, 64, 64], "scales", np.array([1 / 16, 1 / 16], dtype=np.float32)), + ([1, 3, 256, 256], "sizes", np.array([256, 256], dtype=np.int32)), + ], +) +@pytest.mark.parametrize("dtype", np_types) +def test_interpolate_opset11(dtype, expected_shape, shape_calculation_mode, input_value): + + image_shape = [1, 3, 1024, 1024] + image_node = ov.parameter(image_shape, dtype, name="Image") + axes = [2, 3] + mode = "bilinear_pillow" + + node = ov.interpolate(image=image_node, scales_or_sizes=input_value, axes=axes, mode=mode, shape_calculation_mode=shape_calculation_mode) assert node.get_type_name() == "Interpolate" assert node.get_output_size() == 1 diff --git a/src/bindings/python/tests_compatibility/test_ngraph/test_create_op.py b/src/bindings/python/tests_compatibility/test_ngraph/test_create_op.py index 7b084bc77a81d0..09fda90564bd01 100644 --- a/src/bindings/python/tests_compatibility/test_ngraph/test_create_op.py +++ b/src/bindings/python/tests_compatibility/test_ngraph/test_create_op.py @@ -11,6 +11,7 @@ import ngraph.opset1 as ng_opset1 import ngraph.opset5 as ng_opset5 import ngraph.opset10 as ng_opset10 +import ngraph.opset11 as ng_opset11 from ngraph.utils.types import make_constant_node from ngraph.exceptions import UserInputError from ngraph.impl import Type @@ -2259,13 +2260,33 @@ def test_interpolate_opset10(dtype, expected_shape, shape_calculation_mode): mode = "cubic" node = ng_opset10.interpolate(image=image_node, output_shape=output_shape, scales=scales, - axes=axes, - mode=mode, shape_calculation_mode=shape_calculation_mode) + axes=axes,mode=mode, shape_calculation_mode=shape_calculation_mode) assert node.get_type_name() == "Interpolate" assert node.get_output_size() == 1 assert list(node.get_output_shape(0)) == expected_shape +@pytest.mark.parametrize( + ("expected_shape", "shape_calculation_mode", "input_value"), + [ + ([1, 3, 64, 64], "scales", np.array([1 / 16, 1 / 16], dtype=np.float32)), + ([1, 3, 256, 256], "sizes", np.array([256, 256], dtype=np.int32)), + ], +) +@pytest.mark.parametrize("dtype", np_types) +def test_interpolate_opset11(dtype, expected_shape, shape_calculation_mode, input_value): + + image_shape = [1, 3, 1024, 1024] + image_node = ng.parameter(image_shape, dtype, name="Image") + axes = [2, 3] + mode = "bilinear_pillow" + + node = ng_opset11.interpolate(image=image_node, scales_or_sizes=input_value, axes=axes, mode=mode, + shape_calculation_mode=shape_calculation_mode) + assert node.get_type_name() == "Interpolate" + assert node.get_output_size() == 1 + assert list(node.get_output_shape(0)) == expected_shape + def test_is_finite_opset10(): input_shape = [1, 2, 3, 4] input_node = ng.parameter(input_shape, np.float32, name="InputData") From 6bf2fe11aeb891eb66db37932df281a982f90369 Mon Sep 17 00:00:00 2001 From: Kelvin Choi Date: Thu, 23 Mar 2023 05:00:29 +0900 Subject: [PATCH 045/296] [GPU] Need to exclude fused mem_dep from shape_infer_dep (#16300) --- .../src/graph/include/program_node.h | 5 ++ .../intel_gpu/src/graph/primitive_inst.cpp | 4 + .../intel_gpu/src/graph/program_node.cpp | 16 ++++ .../test_cases/deconvolution_gpu_test.cpp | 73 +++++++++++++++++++ 4 files changed, 98 insertions(+) diff --git a/src/plugins/intel_gpu/src/graph/include/program_node.h b/src/plugins/intel_gpu/src/graph/include/program_node.h index 1f72b49bd8b5d1..4d353c270706ef 100644 --- a/src/plugins/intel_gpu/src/graph/include/program_node.h +++ b/src/plugins/intel_gpu/src/graph/include/program_node.h @@ -91,6 +91,9 @@ struct program_node { if (u->get_dependencies().size() <= dep_idx) { continue; } + if (u->is_fused_dep(dep_idx)) { + continue; + } if (u->get_dependency(dep_idx).get_unique_id() == unique_id) { return true; } @@ -99,6 +102,8 @@ struct program_node { return false; } + bool is_fused_dep(size_t dep_idx) const; + std::map get_const_memory_deps() const; virtual std::unique_ptr get_kernel_impl_params() const { diff --git a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp index aae9e844a4eeb6..4acd2d02c808e6 100644 --- a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp +++ b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp @@ -190,6 +190,10 @@ void primitive_inst::update_shape() { } auto& dep = _node->get_dependency(i); auto dep_id = dep.id(); + // exclude fused node from memory_deps + if (_node->is_fused_dep(i)) { + break; + } // Events may be not created for in-order queue, so take them for OOO queue only if (_network.has_event(dep.id()) && queue_type == QueueTypes::out_of_order) { dependencies_events.push_back(_network.get_primitive_event(dep_id)); diff --git a/src/plugins/intel_gpu/src/graph/program_node.cpp b/src/plugins/intel_gpu/src/graph/program_node.cpp index 5b66ad11a25149..70cc56d4420f0a 100644 --- a/src/plugins/intel_gpu/src/graph/program_node.cpp +++ b/src/plugins/intel_gpu/src/graph/program_node.cpp @@ -378,6 +378,16 @@ bool program_node::has_padded_dependency() const { }); } +bool program_node::is_fused_dep(size_t dep_idx) const { + for (auto fused : get_fused_primitives()) { + if (dep_idx >= fused.dep_start_idx) { + return true; + } + } + + return false; +} + std::map program_node::get_const_memory_deps() const { std::map mem_deps; for (auto& i : get_shape_infer_dependencies()) { @@ -385,6 +395,12 @@ std::map program_node::get_const_memory_deps() const { if (i >= get_dependencies().size()) continue; + // exclude fused dependency + if (is_fused_dep(i)) { + continue; + } + + // constant type only auto& dep = get_dependency(i); if (dep.is_type()) { mem_deps.insert({i, dep.as().get_attached_memory_ptr()}); diff --git a/src/plugins/intel_gpu/tests/test_cases/deconvolution_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/deconvolution_gpu_test.cpp index a218640354ae95..1f47df7623195e 100644 --- a/src/plugins/intel_gpu/tests/test_cases/deconvolution_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/test_cases/deconvolution_gpu_test.cpp @@ -7,6 +7,7 @@ #include #include #include +#include #include #include @@ -258,6 +259,78 @@ TYPED_TEST(deconvolution_basic, no_bias_basic_wsiz2x2_in2x2x1x1_nopad) { } } + +TYPED_TEST(deconvolution_basic, no_bias_basic_wsiz2x2_in2x2x1x1_nopad_exclude_fused_mem_dep) { + // Filter : 2x2 + // Input : 2x2 + // Output : 3x3 + // + // Input: + // 8 0.5 + // 6 9 + // + // Filter + // -2 0.5 + // 3.5 1.5 + // + // no bias + // + // + // Output: + // -16.f, 3.f, 0.25f, + // 16.f, -1.25f, 5.25f, + // 21.f, 40.5f, 13.5f + + auto& engine = get_test_engine(); + + auto input = engine.allocate_memory({ data_types::f32, format::yxfb,{ 1, 1, 2, 2 } }); + auto weights = engine.allocate_memory({ data_types::f32, format::oiyx,{ 1, 1, 2, 2 } }); + auto elt_input = engine.allocate_memory({ data_types::f32, format::yxfb,{ 9, 1, 1, 1 } }); + auto in_layout = layout(ov::PartialShape::dynamic(4), data_types::f32, format::yxfb); + + set_values(input, { 8.f, 0.5f, 6.f, 9.f }); + set_values(weights, { -2.0f, 0.5f, 3.5f, 1.5f }); + set_values(elt_input, { 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f }); + + topology topology( + input_layout("input", in_layout), + input_layout("elt_input", elt_input->get_layout()), + reorder("reordered_input", input_info("input"), this->input_layout_format, data_types::f32), + reorder("reordered_elt_input", input_info("elt_input"), format::bfyx, data_types::f32), + data("weights", weights), + deconvolution("deconv", input_info("reordered_input"), { "weights" }), + eltwise("elt_scale", { input_info("deconv"), input_info("reordered_elt_input") }, eltwise_mode::prod), + reorder("plane_output", input_info("elt_scale"), format::bfyx, data_types::f32) + ); + + ExecutionConfig config; + config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); + config.set_property(ov::intel_gpu::optimize_data(true)); + + network network(engine, topology, config); + network.set_input_data("input", input); + network.set_input_data("elt_input", elt_input); + + auto outputs = network.execute(); + ASSERT_EQ(outputs.size(), size_t(1)); + ASSERT_EQ(outputs.begin()->first, "plane_output"); + + auto output_prim = outputs.begin()->second.get_memory(); + + cldnn::mem_lock output_ptr (output_prim, get_test_stream()); + + std::vector expected_output_vec = { + -16.f, 3.f, 0.25f, + 16.f, -1.25f, 5.25f, + 21.f, 40.5f, 13.5f + }; + + for (unsigned int i = 0; i < expected_output_vec.size(); i++) + { + ASSERT_FLOAT_EQ(expected_output_vec[i], output_ptr[i]); + } +} + TYPED_TEST(deconvolution_basic, basic_wsiz2x2_in2x2x1x1_nopad_bfyx) { // Filter : 2x2 // Input : 2x2 // Output : 3x3 From a205c675db9d65f21427c73f6b8c38c7a2ec616d Mon Sep 17 00:00:00 2001 From: Ilya Churaev Date: Thu, 23 Mar 2023 08:32:36 +0400 Subject: [PATCH 046/296] Fix leftovers after removing plugins.xml (#16487) * Fixed comments * Rename ie_plugins to ov_plugins * Remove dependency from tests --- .../plugins/create_plugins_hpp.cmake | 34 +++++++++---------- cmake/developer_package/plugins/plugins.cmake | 26 +++++++------- .../developer_package/plugins/plugins.hpp.in | 4 +-- cmake/extra_modules.cmake | 4 +-- scripts/setupvars/setupvars.sh | 12 +++---- src/common/util/CMakeLists.txt | 9 ++--- src/common/util/src/file_util.cpp | 3 +- src/inference/CMakeLists.txt | 2 +- src/inference/src/core.cpp | 1 - src/inference/src/dev/core_impl.cpp | 9 ++--- src/inference/src/dev/core_impl.hpp | 2 -- src/inference/src/ie_core.cpp | 1 - src/inference/tests/unit/CMakeLists.txt | 3 -- 13 files changed, 48 insertions(+), 62 deletions(-) diff --git a/cmake/developer_package/plugins/create_plugins_hpp.cmake b/cmake/developer_package/plugins/create_plugins_hpp.cmake index 10adcac6c28f1f..1fedf858ce58ca 100644 --- a/cmake/developer_package/plugins/create_plugins_hpp.cmake +++ b/cmake/developer_package/plugins/create_plugins_hpp.cmake @@ -2,7 +2,7 @@ # SPDX-License-Identifier: Apache-2.0 # -foreach(var IE_DEVICE_MAPPING OV_DYNAMIC IE_PLUGINS_HPP_HEADER IE_PLUGINS_HPP_HEADER_IN) +foreach(var OV_DEVICE_MAPPING BUILD_SHARED_LIBS OV_PLUGINS_HPP_HEADER OV_PLUGINS_HPP_HEADER_IN) if(NOT DEFINED ${var}) message(FATAL_ERROR "${var} is required, but not defined") endif() @@ -10,11 +10,11 @@ endforeach() # configure variables -set(IE_PLUGINS_DECLARATIONS "") -set(IE_PLUGINS_MAP_DEFINITION +set(OV_PLUGINS_DECLARATIONS "") +set(OV_PLUGINS_MAP_DEFINITION " static const std::map plugins_hpp = {") -foreach(dev_map IN LISTS IE_DEVICE_MAPPING) +foreach(dev_map IN LISTS OV_DEVICE_MAPPING) string(REPLACE ":" ";" dev_map "${dev_map}") list(GET dev_map 0 mapped_dev_name) list(GET dev_map 1 actual_dev_name) @@ -35,30 +35,30 @@ foreach(dev_map IN LISTS IE_DEVICE_MAPPING) set(dev_config "${dev_config}}") - if(NOT OV_DYNAMIC) + if(NOT BUILD_SHARED_LIBS) # common - set(_IE_CREATE_PLUGIN_FUNC "CreatePluginEngine${actual_dev_name}") - set(_IE_CREATE_EXTENSION_FUNC "CreateExtensionShared${actual_dev_name}") + set(_OV_CREATE_PLUGIN_FUNC "CreatePluginEngine${actual_dev_name}") + set(_OV_CREATE_EXTENSION_FUNC "CreateExtensionShared${actual_dev_name}") # declarations - set(IE_PLUGINS_DECLARATIONS "${IE_PLUGINS_DECLARATIONS} - IE_DEFINE_PLUGIN_CREATE_FUNCTION_DECLARATION(${_IE_CREATE_PLUGIN_FUNC});") + set(OV_PLUGINS_DECLARATIONS "${OV_PLUGINS_DECLARATIONS} + IE_DEFINE_PLUGIN_CREATE_FUNCTION_DECLARATION(${_OV_CREATE_PLUGIN_FUNC});") if(${actual_dev_name}_AS_EXTENSION) - set(IE_PLUGINS_DECLARATIONS "${IE_PLUGINS_DECLARATIONS} - IE_DEFINE_EXTENSION_CREATE_FUNCTION_DECLARATION(${_IE_CREATE_EXTENSION_FUNC});") + set(OV_PLUGINS_DECLARATIONS "${OV_PLUGINS_DECLARATIONS} + IE_DEFINE_EXTENSION_CREATE_FUNCTION_DECLARATION(${_OV_CREATE_EXTENSION_FUNC});") else() - set(_IE_CREATE_EXTENSION_FUNC "nullptr") + set(_OV_CREATE_EXTENSION_FUNC "nullptr") endif() - set(IE_PLUGINS_MAP_DEFINITION "${IE_PLUGINS_MAP_DEFINITION} - { \"${mapped_dev_name}\", Value { ${_IE_CREATE_PLUGIN_FUNC}, ${_IE_CREATE_EXTENSION_FUNC}, ${dev_config} } },") + set(OV_PLUGINS_MAP_DEFINITION "${OV_PLUGINS_MAP_DEFINITION} + { \"${mapped_dev_name}\", Value { ${_OV_CREATE_PLUGIN_FUNC}, ${_OV_CREATE_EXTENSION_FUNC}, ${dev_config} } },") else() - set(IE_PLUGINS_MAP_DEFINITION "${IE_PLUGINS_MAP_DEFINITION} + set(OV_PLUGINS_MAP_DEFINITION "${OV_PLUGINS_MAP_DEFINITION} { \"${mapped_dev_name}\", Value { \"${actual_dev_name}\", ${dev_config} } },") endif() endforeach() -set(IE_PLUGINS_MAP_DEFINITION "${IE_PLUGINS_MAP_DEFINITION} +set(OV_PLUGINS_MAP_DEFINITION "${OV_PLUGINS_MAP_DEFINITION} };\n") -configure_file("${IE_PLUGINS_HPP_HEADER_IN}" "${IE_PLUGINS_HPP_HEADER}" @ONLY) +configure_file("${OV_PLUGINS_HPP_HEADER_IN}" "${OV_PLUGINS_HPP_HEADER}" @ONLY) diff --git a/cmake/developer_package/plugins/plugins.cmake b/cmake/developer_package/plugins/plugins.cmake index 7f00cc70269861..0d8db5561e5ada 100644 --- a/cmake/developer_package/plugins/plugins.cmake +++ b/cmake/developer_package/plugins/plugins.cmake @@ -281,9 +281,9 @@ function(ie_target_link_plugins TARGET_NAME) endfunction() # -# ie_generate_plugins_hpp() +# ov_generate_plugins_hpp() # -function(ie_generate_plugins_hpp) +function(ov_generate_plugins_hpp) set(device_mapping) set(device_configs) set(as_extension) @@ -321,22 +321,22 @@ function(ie_generate_plugins_hpp) endif() endforeach() - # add plugins to libraries including ie_plugins.hpp + # add plugins to libraries including ov_plugins.hpp ie_target_link_plugins(openvino) if(TARGET inference_engine_s) ie_target_link_plugins(inference_engine_s) endif() - set(ie_plugins_hpp "${CMAKE_BINARY_DIR}/src/inference/ie_plugins.hpp") + set(ov_plugins_hpp "${CMAKE_BINARY_DIR}/src/inference/ov_plugins.hpp") set(plugins_hpp_in "${IEDevScripts_DIR}/plugins/plugins.hpp.in") - add_custom_command(OUTPUT "${ie_plugins_hpp}" + add_custom_command(OUTPUT "${ov_plugins_hpp}" COMMAND "${CMAKE_COMMAND}" - -D "IE_DEVICE_MAPPING=${device_mapping}" - -D "OV_DYNAMIC=${BUILD_SHARED_LIBS}" - -D "IE_PLUGINS_HPP_HEADER_IN=${plugins_hpp_in}" - -D "IE_PLUGINS_HPP_HEADER=${ie_plugins_hpp}" + -D "BUILD_SHARED_LIBS=${BUILD_SHARED_LIBS}" + -D "OV_DEVICE_MAPPING=${device_mapping}" + -D "OV_PLUGINS_HPP_HEADER_IN=${plugins_hpp_in}" + -D "OV_PLUGINS_HPP_HEADER=${ov_plugins_hpp}" ${device_configs} ${as_extension} -P "${IEDevScripts_DIR}/plugins/create_plugins_hpp.cmake" @@ -344,13 +344,13 @@ function(ie_generate_plugins_hpp) "${plugins_hpp_in}" "${IEDevScripts_DIR}/plugins/create_plugins_hpp.cmake" COMMENT - "Generate ie_plugins.hpp for build" + "Generate ov_plugins.hpp for build" VERBATIM) # for some reason dependency on source files does not work # so, we have to use explicit target and make it dependency for inference_engine - add_custom_target(_ie_plugins_hpp DEPENDS ${ie_plugins_hpp}) - add_dependencies(inference_engine_obj _ie_plugins_hpp) + add_custom_target(_ov_plugins_hpp DEPENDS ${ov_plugins_hpp}) + add_dependencies(inference_engine_obj _ov_plugins_hpp) # add dependency for object files get_target_property(sources inference_engine_obj SOURCES) @@ -367,5 +367,5 @@ function(ie_generate_plugins_hpp) endforeach() # add dependency on header file generation for all inference_engine source files - set_source_files_properties(${all_sources} PROPERTIES OBJECT_DEPENDS ${ie_plugins_hpp}) + set_source_files_properties(${all_sources} PROPERTIES OBJECT_DEPENDS ${ov_plugins_hpp}) endfunction() diff --git a/cmake/developer_package/plugins/plugins.hpp.in b/cmake/developer_package/plugins/plugins.hpp.in index d351bcfb76f3d0..224f77c8cb980b 100644 --- a/cmake/developer_package/plugins/plugins.hpp.in +++ b/cmake/developer_package/plugins/plugins.hpp.in @@ -11,7 +11,7 @@ #include "cpp_interfaces/interface/ie_iplugin_internal.hpp" -@IE_PLUGINS_DECLARATIONS@ +@OV_PLUGINS_DECLARATIONS@ struct Value { InferenceEngine::CreatePluginEngineFunc * m_create_plugin_func; @@ -33,6 +33,6 @@ using PluginsStaticRegistry = std::map; inline const std::map getCompiledPluginsRegistry() { -@IE_PLUGINS_MAP_DEFINITION@ +@OV_PLUGINS_MAP_DEFINITION@ return plugins_hpp; } diff --git a/cmake/extra_modules.cmake b/cmake/extra_modules.cmake index 7b843341c11159..a5b1cd22e82ccf 100644 --- a/cmake/extra_modules.cmake +++ b/cmake/extra_modules.cmake @@ -169,9 +169,9 @@ ov_generate_dev_package_config() # with all imported developer targets register_extra_modules() -# for static libraries case we need to generate final ie_plugins.hpp +# for static libraries case we need to generate final ov_plugins.hpp # with all the information about plugins -ie_generate_plugins_hpp() +ov_generate_plugins_hpp() # used for static build ov_generate_frontends_hpp() diff --git a/scripts/setupvars/setupvars.sh b/scripts/setupvars/setupvars.sh index 41789160e69a59..3cdf4987d732b7 100755 --- a/scripts/setupvars/setupvars.sh +++ b/scripts/setupvars/setupvars.sh @@ -36,15 +36,15 @@ if [ -e "$INSTALLDIR/runtime" ]; then export OpenVINO_DIR=$INSTALLDIR/runtime/cmake system_type=$(ls "$INSTALLDIR/runtime/lib/") - IE_PLUGINS_PATH=$INSTALLDIR/runtime/lib/$system_type + OV_PLUGINS_PATH=$INSTALLDIR/runtime/lib/$system_type if [[ "$OSTYPE" == "darwin"* ]]; then - export DYLD_LIBRARY_PATH=${IE_PLUGINS_PATH}/Release:${IE_PLUGINS_PATH}/Debug${DYLD_LIBRARY_PATH:+:$DYLD_LIBRARY_PATH} - export LD_LIBRARY_PATH=${IE_PLUGINS_PATH}/Release:${IE_PLUGINS_PATH}/Debug${LD_LIBRARY_PATH:+:$LD_LIBRARY_PATH} - export PKG_CONFIG_PATH=${IE_PLUGINS_PATH}/Release/pkgconfig${PKG_CONFIG_PATH:+:$PKG_CONFIG_PATH} + export DYLD_LIBRARY_PATH=${OV_PLUGINS_PATH}/Release:${OV_PLUGINS_PATH}/Debug${DYLD_LIBRARY_PATH:+:$DYLD_LIBRARY_PATH} + export LD_LIBRARY_PATH=${OV_PLUGINS_PATH}/Release:${OV_PLUGINS_PATH}/Debug${LD_LIBRARY_PATH:+:$LD_LIBRARY_PATH} + export PKG_CONFIG_PATH=${OV_PLUGINS_PATH}/Release/pkgconfig${PKG_CONFIG_PATH:+:$PKG_CONFIG_PATH} else - export LD_LIBRARY_PATH=${IE_PLUGINS_PATH}${LD_LIBRARY_PATH:+:$LD_LIBRARY_PATH} - export PKG_CONFIG_PATH=$IE_PLUGINS_PATH/pkgconfig${PKG_CONFIG_PATH:+:$PKG_CONFIG_PATH} + export LD_LIBRARY_PATH=${OV_PLUGINS_PATH}${LD_LIBRARY_PATH:+:$LD_LIBRARY_PATH} + export PKG_CONFIG_PATH=$OV_PLUGINS_PATH/pkgconfig${PKG_CONFIG_PATH:+:$PKG_CONFIG_PATH} fi if [ -e "$INSTALLDIR/runtime/3rdparty/tbb" ]; then diff --git a/src/common/util/CMakeLists.txt b/src/common/util/CMakeLists.txt index 160be0259b8b30..4b538aab7c7461 100644 --- a/src/common/util/CMakeLists.txt +++ b/src/common/util/CMakeLists.txt @@ -24,12 +24,9 @@ endif() # Create named folders for the sources within the .vcproj # Empty name lists them directly under the .vcproj -set(MIXED_SRC - "${CMAKE_CURRENT_SOURCE_DIR}/src/file_util.cpp") - -set_property(SOURCE ${MIXED_SRC} - APPEND PROPERTY INCLUDE_DIRECTORIES - $) +set_source_files_properties( + "${CMAKE_CURRENT_SOURCE_DIR}/src/file_util.cpp" + PROPERTIES COMPILE_DEFINITIONS OpenVINO_VERSION="${OpenVINO_VERSION}") source_group("src" FILES ${LIBRARY_SRC}) source_group("include" FILES ${PUBLIC_HEADERS}) diff --git a/src/common/util/src/file_util.cpp b/src/common/util/src/file_util.cpp index f39f2dd3c677d9..0de2ab70d377b4 100644 --- a/src/common/util/src/file_util.cpp +++ b/src/common/util/src/file_util.cpp @@ -12,7 +12,6 @@ #include #include -#include "openvino/core/version.hpp" #include "openvino/util/common_util.hpp" #ifdef _WIN32 @@ -512,7 +511,7 @@ ov::util::FilePath ov::util::get_compiled_plugin_path(const std::string& plugin) // 1. in openvino-X.Y.Z folder relative to libopenvino.so std::ostringstream str; - str << "openvino-" << OPENVINO_VERSION_MAJOR << "." << OPENVINO_VERSION_MINOR << "." << OPENVINO_VERSION_PATCH; + str << "openvino-" << OpenVINO_VERSION; const auto sub_folder = str.str(); std::string abs_file_path = ov::util::path_join({ov_library_path, sub_folder, plugin}); diff --git a/src/inference/CMakeLists.txt b/src/inference/CMakeLists.txt index ca27952acad912..25d4272b3d3620 100644 --- a/src/inference/CMakeLists.txt +++ b/src/inference/CMakeLists.txt @@ -152,7 +152,7 @@ target_include_directories(${TARGET_NAME}_obj SYSTEM PRIVATE target_include_directories(${TARGET_NAME}_obj PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/src" - # for static ie_plugins.hpp + # for static ov_plugins.hpp "${CMAKE_CURRENT_BINARY_DIR}" # for ie_ir_version.hpp $<$:$> diff --git a/src/inference/src/core.cpp b/src/inference/src/core.cpp index fef2652b275d17..45c9e9665efb17 100644 --- a/src/inference/src/core.cpp +++ b/src/inference/src/core.cpp @@ -9,7 +9,6 @@ #include "dev/converter_utils.hpp" #include "dev/core_impl.hpp" #include "ie_itt.hpp" -#include "ie_plugins.hpp" #include "openvino/runtime/device_id_parser.hpp" #include "so_extension.hpp" diff --git a/src/inference/src/dev/core_impl.cpp b/src/inference/src/dev/core_impl.cpp index d97a89f8f79411..3e696487e9d6a1 100644 --- a/src/inference/src/dev/core_impl.cpp +++ b/src/inference/src/dev/core_impl.cpp @@ -36,6 +36,7 @@ #include "openvino/util/common_util.hpp" #include "openvino/util/file_util.hpp" #include "openvino/util/shared_object.hpp" +#include "ov_plugins.hpp" #include "preprocessing/preprocessing.hpp" #include "xml_parse_utils.h" @@ -316,12 +317,12 @@ void ov::CoreImpl::register_compile_time_plugins() { std::lock_guard lock(get_mutex()); const decltype(::getCompiledPluginsRegistry())& plugins = getCompiledPluginsRegistry(); -#ifdef OPENVINO_STATIC_LIBRARY for (const auto& plugin : plugins) { const auto& deviceName = plugin.first; if (deviceName.find('.') != std::string::npos) { OPENVINO_THROW("Device name must not contain dot '.' symbol"); } +#ifdef OPENVINO_STATIC_LIBRARY if (pluginRegistry.find(deviceName) == pluginRegistry.end()) { const auto& value = plugin.second; ov::AnyMap config = any_copy(value.m_default_config); @@ -329,20 +330,16 @@ void ov::CoreImpl::register_compile_time_plugins() { pluginRegistry[deviceName] = desc; add_mutex(deviceName); } - } #else - for (const auto& plugin : plugins) { - const auto& deviceName = plugin.first; const auto& pluginPath = ov::util::get_compiled_plugin_path(plugin.second.m_plugin_path); - if (pluginRegistry.find(deviceName) == pluginRegistry.end() && ov::util::file_exists(pluginPath)) { ov::AnyMap config = any_copy(plugin.second.m_default_config); PluginDescriptor desc{pluginPath, config}; pluginRegistry[deviceName] = desc; add_mutex(deviceName); } - } #endif + } } void ov::CoreImpl::register_plugins_in_registry(const std::string& xml_config_file, const bool& by_abs_path) { diff --git a/src/inference/src/dev/core_impl.hpp b/src/inference/src/dev/core_impl.hpp index 8fe7768dc6c91a..c711e416484050 100644 --- a/src/inference/src/dev/core_impl.hpp +++ b/src/inference/src/dev/core_impl.hpp @@ -15,7 +15,6 @@ #include "ie_cache_manager.hpp" #include "ie_extension.h" #include "ie_icore.hpp" -#include "ie_plugins.hpp" #include "multi-device/multi_device_config.hpp" #include "openvino/core/any.hpp" #include "openvino/core/extension.hpp" @@ -23,7 +22,6 @@ #include "openvino/runtime/common.hpp" #include "openvino/runtime/icompiled_model.hpp" #include "openvino/runtime/threading/executor_manager.hpp" -#include "openvino/util/file_util.hpp" namespace ov { diff --git a/src/inference/src/ie_core.cpp b/src/inference/src/ie_core.cpp index de604f6fab4f21..139c12d763145c 100644 --- a/src/inference/src/ie_core.cpp +++ b/src/inference/src/ie_core.cpp @@ -30,7 +30,6 @@ #include "ie_network_reader.hpp" #include "ie_ngraph_utils.hpp" #include "ie_plugin_config.hpp" -#include "ie_plugins.hpp" #include "ie_remote_context.hpp" #include "ngraph/graph_util.hpp" #include "ngraph/ngraph.hpp" diff --git a/src/inference/tests/unit/CMakeLists.txt b/src/inference/tests/unit/CMakeLists.txt index 36889d8548ede8..02fed6cfd7756a 100644 --- a/src/inference/tests/unit/CMakeLists.txt +++ b/src/inference/tests/unit/CMakeLists.txt @@ -11,9 +11,6 @@ ov_add_test_target( template_extension LINK_LIBRARIES unitTestUtils - INCLUDES - # for static ie_plugins.hpp - "${CMAKE_BINARY_DIR}/src/inference/" ADD_CLANG_FORMAT LABELS OV From a20b3631fb5d253bde84d82087cb11b3dee78c59 Mon Sep 17 00:00:00 2001 From: Jade Cho Date: Thu, 23 Mar 2023 13:55:55 +0900 Subject: [PATCH 047/296] Support float64 data type as input of benchmark_app (#16435) --- samples/cpp/benchmark_app/remote_tensors_filling.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/samples/cpp/benchmark_app/remote_tensors_filling.cpp b/samples/cpp/benchmark_app/remote_tensors_filling.cpp index fa139d7485f141..9301a8d113210f 100644 --- a/samples/cpp/benchmark_app/remote_tensors_filling.cpp +++ b/samples/cpp/benchmark_app/remote_tensors_filling.cpp @@ -40,8 +40,10 @@ void fill_buffer_random(void* inputBuffer, void fill_buffer(void* inputBuffer, size_t elementsNum, const ov::element::Type& type) { if (type == ov::element::f32) { fill_buffer_random(inputBuffer, elementsNum); + } else if (type == ov::element::f64) { + fill_buffer_random(inputBuffer, elementsNum); } else if (type == ov::element::f16) { - fill_buffer_random(inputBuffer, elementsNum); + fill_buffer_random(inputBuffer, elementsNum); } else if (type == ov::element::i32) { fill_buffer_random(inputBuffer, elementsNum); } else if (type == ov::element::i64) { From 17174a3839f4fb85350b499c620e18facc271474 Mon Sep 17 00:00:00 2001 From: Maciej Smyk Date: Thu, 23 Mar 2023 08:39:46 +0100 Subject: [PATCH 048/296] DOCS shift to rst - Troubleshooting (#16483) * troubleshooting * code-block fix --- .../installing-openvino-yocto.md | 2 +- docs/install_guides/troubleshooting-issues.md | 75 ++++++++++------ docs/install_guides/troubleshooting-steps.md | 89 +++++++++++-------- docs/install_guides/troubleshooting.md | 12 ++- 4 files changed, 104 insertions(+), 74 deletions(-) diff --git a/docs/install_guides/installing-openvino-yocto.md b/docs/install_guides/installing-openvino-yocto.md index 2aa69d56d3db52..479d03f2e87c49 100644 --- a/docs/install_guides/installing-openvino-yocto.md +++ b/docs/install_guides/installing-openvino-yocto.md @@ -116,7 +116,7 @@ If the image build is successful, it will return the list of packages as below: Additional Resources #################### -- :ref:`Troubleshooting Guide ` +- :ref:`Troubleshooting Guide ` - `Yocto Project `__ - official documentation webpage - `BitBake Tool `__ - `Poky `__ diff --git a/docs/install_guides/troubleshooting-issues.md b/docs/install_guides/troubleshooting-issues.md index fd539d6ea64845..a381f0f46c1517 100644 --- a/docs/install_guides/troubleshooting-issues.md +++ b/docs/install_guides/troubleshooting-issues.md @@ -1,52 +1,71 @@ # Issues & Solutions for OpenVINO™ Installation & Configuration {#openvino_docs_get_started_guide_troubleshooting_issues} +@sphinxdirective + This page lists issues that you may encounter during the installation and configuration of OpenVINO™, as well as their possible solutions. -## Errors with Installing via PIP for Users in China +.. _install_for_prc: + +Errors with Installing via PIP for Users in China +################################################# Users in China might encounter errors while downloading sources via PIP during OpenVINO™ installation. To resolve the issues, try one of the following options: * Add the download source using the ``-i`` parameter with the Python ``pip`` command. For example: - ``` sh - pip install openvino-dev -i https://mirrors.aliyun.com/pypi/simple/ - ``` - Use the ``--trusted-host`` parameter if the URL above is ``http`` instead of ``https``. - You can also run the following command to install specific framework. For example: + .. code-block:: sh + + pip install openvino-dev -i https://mirrors.aliyun.com/pypi/simple/ + + Use the ``--trusted-host`` parameter if the URL above is ``http`` instead of ``https``. + You can also run the following command to install specific framework. For example: + + .. code-block:: sh + + pip install openvino-dev[tensorflow2] -i https://mirrors.aliyun.com/pypi/simple/ - ``` - pip install openvino-dev[tensorflow2] -i https://mirrors.aliyun.com/pypi/simple/ - ``` * For C++ developers, if you have installed OpenVINO Runtime via APT, YUM, or the archive file, and then installed OpenVINO Development Tools via PyPI, you may run into issues. To resolve that, install the components in ``requirements.txt`` by using the following command: - ``` sh - pip install -r /tools/requirements.txt - ``` - For APT and YUM users, replace the `INSTALL_DIR` with `/usr/share/openvino`. + + .. code-block:: sh + + pip install -r /tools/requirements.txt + + For APT and YUM users, replace the ``INSTALL_DIR`` with ``/usr/share/openvino``. -## Issues with Installing OpenVINO on Linux from Docker +Issues with Installing OpenVINO on Linux from Docker +#################################################### + +.. _proxy-issues: -### Proxy Issues +Proxy Issues +++++++++++++ -If you met proxy issues during the installation with Docker, you need set up proxy settings for Docker. See the [Docker guide](https://docs.docker.com/network/proxy/) for more details. +If you met proxy issues during the installation with Docker, you need set up proxy settings for Docker. See the `Docker guide `__ for more details. +.. _yocto_install_issues: -@anchor yocto-install-issues -## Issues with Creating a Yocto Image for OpenVINO +Issues with Creating a Yocto Image for OpenVINO +############################################### -### Error while adding "meta-intel" layer +Error while adding "meta-intel" layer ++++++++++++++++++++++++++++++++++++++ -When using the `bitbake-layers add-layer meta-intel` command, the following error might occur: -```sh -NOTE: Starting bitbake server... -ERROR: The following required tools (as specified by HOSTTOOLS) appear to be unavailable in PATH, please install them in order to proceed: chrpath diffstat pzstd zstd -``` +When using the ``bitbake-layers add-layer meta-intel`` command, the following error might occur: -To resolve the issue, install the `chrpath diffstat zstd` tools: +.. code-block:: sh + + NOTE: Starting bitbake server... + ERROR: The following required tools (as specified by HOSTTOOLS) appear to be unavailable in PATH, please install them in order to proceed: chrpath diffstat pzstd zstd + + +To resolve the issue, install the ``chrpath diffstat zstd`` tools: + +.. code-block:: sh + + sudo apt-get install chrpath diffstat zstd -```sh -sudo apt-get install chrpath diffstat zstd -``` +@endsphinxdirective diff --git a/docs/install_guides/troubleshooting-steps.md b/docs/install_guides/troubleshooting-steps.md index ec4049f445325b..fd5f03c73b21d2 100644 --- a/docs/install_guides/troubleshooting-steps.md +++ b/docs/install_guides/troubleshooting-steps.md @@ -1,56 +1,69 @@ # Troubleshooting Steps for OpenVINO™ Installation and Configurations {#openvino_docs_get_started_guide_troubleshooting_steps} +@sphinxdirective + If you run into issues while installing or configuring OpenVINO™, you can try the following methods to do some quick checks first. -## Check the versions of OpenVINO Runtime and Developement Tools +Check the versions of OpenVINO Runtime and Developement Tools +############################################################# * To check the version of OpenVINO Development Tools, use the following command: - ```sh - mo --version - ``` + + .. code-block:: sh + + mo --version + * To check the version of OpenVINO Runtime, use the following code: - ```sh - from openvino.runtime import get_version get_version() - ``` + + .. code-block:: sh + + from openvino.runtime import get_version get_version() + -## Check the versions of Python and PIP +Check the versions of Python and PIP +#################################### -To check your Python version, run `python -VV` or `python --version`. The supported Python versions should be 64-bit and between 3.7 and 3.10. If you are using Python 3.6, you are recommended to upgrade the version to 3.7 or higher. +To check your Python version, run ``python -VV`` or ``python --version``. The supported Python versions should be 64-bit and between 3.7 and 3.10. If you are using Python 3.6, you are recommended to upgrade the version to 3.7 or higher. If your Python version does not meet the requirements, update Python: -* For Windows, **do not install Python from a Windows Store** as it can cause issues. You are highly recommended to install Python from . -* For Linux and macOS systems, download and install a proper Python version from . See the [Python Beginners' Guide](https://wiki.python.org/moin/BeginnersGuide/Download) for more information on selecting a version. Note that macOS 10.x comes with python 2.7 installed, which is not supported, so you must install Python from the official website. +* For Windows, **do not install Python from a Windows Store** as it can cause issues. You are highly recommended to install Python from `official website `__ . +* For Linux and macOS systems, download and install a proper Python version from `official website `__ . See the `Python Beginners' Guide `__ for more information on selecting a version. Note that macOS 10.x comes with python 2.7 installed, which is not supported, so you must install Python from the official website. For PIP, make sure that you have installed the latest version. To check and upgrade your PIP version, run the following command: -```sh -python -m pip install --upgrade pip -``` + +.. code-block:: sh + + python -m pip install --upgrade pip - -## Check if required external dependencies are installed (for pre-2022.2 releases) +Check if required external dependencies are installed (for pre-2022.2 releases) +############################################################################### For OpenVINO releases prior to 2022.2: -- If you are using Ubuntu or RHEL 8 systems, and installed OpenVINO Runtime via the archive file, APT, or YUM repository, and then decided to [install OpenVINO Development Tools](installing-model-dev-tools.md), make sure that you **Install External Software Dependencies** first by following the steps in the corresponding installation pages. -- For C++ developers with Windows systems, make sure that Microsoft Visual Studio 2019 with MSBuild and CMake 3.14 or higher (64-bit) are installed. While installing Microsoft Visual Studio 2019, make sure that you have selected **Desktop development with C++** in the **Workloads** tab. If not, launch the installer again to select that option. For more information on modifying the installation options for Microsoft Visual Studio, see its [official support page](https://docs.microsoft.com/en-us/visualstudio/install/modify-visual-studio?view=vs-2019). -## Check if environment variables are set correctly +- If you are using Ubuntu or RHEL 8 systems, and installed OpenVINO Runtime via the archive file, APT, or YUM repository, and then decided to :doc:`install OpenVINO Development Tools `, make sure that you **Install External Software Dependencies** first by following the steps in the corresponding installation pages. +- For C++ developers with Windows systems, make sure that Microsoft Visual Studio 2019 with MSBuild and CMake 3.14 or higher (64-bit) are installed. While installing Microsoft Visual Studio 2019, make sure that you have selected **Desktop development with C++** in the **Workloads** tab. If not, launch the installer again to select that option. For more information on modifying the installation options for Microsoft Visual Studio, see its `official support page `__ . -- For Python developers, if you previously installed OpenVINO using the archive file, and are now installing OpenVINO using PIP, remove all the PATH settings and the lines with `setupvars` from `.bashrc`. Note that if you installed OpenVINO with PIP in a virtual environment, you don't need to set any environment variables. -- If you have installed OpenVINO before, you probably have added `setupvars` to your `PATH /.bashrc` or Windows environment variables. After restarting your environment, you should see similar information as below: -```sh -[setupvars.sh] OpenVINO™ environment initialized -``` - - If you don't see the information above, your PATH variables may be configured incorrectly. Check if you have typed the correct or you are trying to activate in the correct directory. - - If you added it to a `.bashrc` file, make sure that the command is correctly written and the file is found in the `~/.bashrc` folder. +Check if environment variables are set correctly +################################################ -## Verify that OpenVINO is correctly installed +- For Python developers, if you previously installed OpenVINO using the archive file, and are now installing OpenVINO using PIP, remove all the PATH settings and the lines with ``setupvars`` from ``.bashrc``. Note that if you installed OpenVINO with PIP in a virtual environment, you don't need to set any environment variables. +- If you have installed OpenVINO before, you probably have added ``setupvars`` to your ``PATH /.bashrc`` or Windows environment variables. After restarting your environment, you should see similar information as below: -@sphinxdirective + .. code-block:: sh + + [setupvars.sh] OpenVINO™ environment initialized + + + - If you don't see the information above, your PATH variables may be configured incorrectly. Check if you have typed the correct or you are trying to activate in the correct directory. + - If you added it to a ``.bashrc`` file, make sure that the command is correctly written and the file is found in the ``~/.bashrc`` folder. + +Verify that OpenVINO is correctly installed +########################################### * For Python developers, to verify if OpenVINO is correctly installed, use the following command: @@ -66,24 +79,24 @@ For OpenVINO releases prior to 2022.2: * If you installed OpenVINO Runtime from YUM, use the ``yum list installed 'openvino*'`` command to list the installed OpenVINO packages. -@endsphinxdirective - -## Check if GPU drvier is installed +Check if GPU drvier is installed +################################ -[Additional configurations](configurations-header.md) may be required in order to use OpenVINO with different hardware such as Intel® GPUs. +:doc:`Additional configurations ` may be required in order to use OpenVINO with different hardware such as Intel® GPUs. -To run inference on an Intel® GPU, make sure that you have installed the correct GPU driver. To check that, see [additional configurations for GPU](configurations-for-intel-gpu.md). +To run inference on an Intel® GPU, make sure that you have installed the correct GPU driver. To check that, see :doc:`additional configurations for GPU `. -## Check firewall and network settings +Check firewall and network settings +################################### Make sure that your firewall and network settings are configured correctly. For example, consider configuring system-wide proxy settings and specifying options for using PIP behind the proxy: -@sphinxdirective +.. code-block:: sh + + pip install --proxy http://address:port --trusted-host pypi.org openvino - .. code-block:: sh - pip install --proxy http://address:port --trusted-host pypi.org openvino +For specific issues, see :ref:`Errors with Installing via PIP for Users in China ` and :ref:`proxy issues with installing OpenVINO on Linux from Docker `. @endsphinxdirective -For specific issues, see Errors with Installing via PIP for Users in China and proxy issues with installing OpenVINO on Linux from Docker. \ No newline at end of file diff --git a/docs/install_guides/troubleshooting.md b/docs/install_guides/troubleshooting.md index 9963a579978025..99e3fd7ca8ea97 100644 --- a/docs/install_guides/troubleshooting.md +++ b/docs/install_guides/troubleshooting.md @@ -9,16 +9,14 @@ Issues & Solutions Troubleshooting Steps -@endsphinxdirective - -@sphinxdirective .. _troubleshooting guide for install: -@endsphinxdirective - This guide provides general troubleshooting steps and solutions to possible issues that can be encountered while installing and configuring OpenVINO™. -The [Issues & Solutions](./troubleshooting-issues.md) page lists common installation and configuration errors, and their possible solutions. If you encountered a specific error while installing or configuring OpenVINO, check this page to see if there is a solution. +The :doc:`Issues & Solutions ` page lists common installation and configuration errors, and their possible solutions. If you encountered a specific error while installing or configuring OpenVINO, check this page to see if there is a solution. + +The :doc:`Troubleshooting Steps ` page provides a set of instructions for diagnosing and resolving installation and configuration issues. If you had problems during installation and configuration, walk through these steps to try and resolve your issue. + +@endsphinxdirective -The [Troubleshooting Steps](./troubleshooting-steps.md) page provides a set of instructions for diagnosing and resolving installation and configuration issues. If you had problems during installation and configuration, walk through these steps to try and resolve your issue. From aaa4a4c2105f0e9268ecf0dbd16d59e9cf64ca4c Mon Sep 17 00:00:00 2001 From: Roman Kazantsev Date: Thu, 23 Mar 2023 11:49:46 +0400 Subject: [PATCH 049/296] [TF FE] Skip Assert operation and add test (#16484) At the conversion stage we can't resolve Assert node because the condition is computed only during inference time. Signed-off-by: Kazantsev, Roman --- .../tensorflow/src/op/lookup_table_insert.cpp | 24 ------------ src/frontends/tensorflow/src/op_table.cpp | 7 ++-- .../tests/convert_tricky_models.cpp | 11 ++++++ .../gen_scripts/generate_model_with_assert.py | 38 +++++++++++++++++++ .../include/common_op_table.hpp | 1 - .../tensorflow_common/src/op/assert.cpp | 36 ------------------ .../tensorflow_common/src/op/no_op.cpp | 2 +- 7 files changed, 53 insertions(+), 66 deletions(-) delete mode 100644 src/frontends/tensorflow/src/op/lookup_table_insert.cpp create mode 100644 src/frontends/tensorflow/tests/test_models/gen_scripts/generate_model_with_assert.py delete mode 100644 src/frontends/tensorflow_common/src/op/assert.cpp diff --git a/src/frontends/tensorflow/src/op/lookup_table_insert.cpp b/src/frontends/tensorflow/src/op/lookup_table_insert.cpp deleted file mode 100644 index 3fb679e170be38..00000000000000 --- a/src/frontends/tensorflow/src/op/lookup_table_insert.cpp +++ /dev/null @@ -1,24 +0,0 @@ -// Copyright (C) 2018-2023 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#include "common_op_table.hpp" -#include "openvino/frontend/tensorflow/node_context.hpp" -#include "utils.hpp" - -namespace ov { -namespace frontend { -namespace tensorflow { -namespace op { - -OutputVector translate_lookup_table_insert_op(const ov::frontend::tensorflow::NodeContext& node) { - // auto-pruning of unsupported sub-graphs that contain - // operations working with dictionaries - default_op_checks(node, 3, {"LookupTableInsert", "LookupTableInsertV2"}); - return {}; -} - -} // namespace op -} // namespace tensorflow -} // namespace frontend -} // namespace ov diff --git a/src/frontends/tensorflow/src/op_table.cpp b/src/frontends/tensorflow/src/op_table.cpp index dddab20848841f..26a2a332345f99 100644 --- a/src/frontends/tensorflow/src/op_table.cpp +++ b/src/frontends/tensorflow/src/op_table.cpp @@ -26,7 +26,6 @@ TF_OP_CONVERTER(translate_gru_block_cell_op); TF_OP_CONVERTER(translate_hash_table_op); TF_OP_CONVERTER(translate_iterator_get_next_op); TF_OP_CONVERTER(translate_iterator_op); -TF_OP_CONVERTER(translate_lookup_table_insert_op); TF_OP_CONVERTER(translate_partitioned_call_op); TF_OP_CONVERTER(translate_queue_dequeue_op); TF_OP_CONVERTER(translate_queue_dequeue_many_op); @@ -105,7 +104,7 @@ const std::map get_supported_ops() { {"AddN", translate_add_n_op}, {"ArgMax", translate_arg_max_op}, {"ArgMin", translate_arg_min_op}, - {"Assert", translate_assert_op}, + {"Assert", translate_no_op}, {"AvgPool", translate_avg_pool_op}, {"AvgPool3D", translate_avg_pool_op}, {"BatchMatMul", translate_batch_mat_mul_op}, @@ -164,8 +163,8 @@ const std::map get_supported_ops() { {"ListDiff", translate_list_diff_op}, {"LogSoftmax", translate_log_softmax_op}, {"Log1p", translate_log_1p_op}, - {"LookupTableInsert", translate_lookup_table_insert_op}, - {"LookupTableInsertV2", translate_lookup_table_insert_op}, + {"LookupTableInsert", translate_no_op}, + {"LookupTableInsertV2", translate_no_op}, {"LRN", translate_lrn_op}, {"MatMul", translate_mat_mul_op}, {"MatrixDiag", translate_matrix_diag_op}, diff --git a/src/frontends/tensorflow/tests/convert_tricky_models.cpp b/src/frontends/tensorflow/tests/convert_tricky_models.cpp index 6dd2a5a510b325..e1bde1af03784f 100644 --- a/src/frontends/tensorflow/tests/convert_tricky_models.cpp +++ b/src/frontends/tensorflow/tests/convert_tricky_models.cpp @@ -346,6 +346,7 @@ TEST_F(TransformationTestsF, ModelWithIteratorGetNextAndUnsupportedOp) { model_ref = make_shared(OutputVector{add}, ParameterVector{x, y}); } } + TEST_F(TransformationTestsF, ModelWithMultioutputBodyGraphNode) { { model = convert_model("partitioned_call2/partitioned_call2.pb"); } { @@ -376,3 +377,13 @@ TEST_F(TransformationTestsF, ModelWithEmptyTensorListAndPushBack) { model_ref = make_shared(OutputVector{recover_item}, ParameterVector{x}); } } + +TEST_F(TransformationTestsF, ModelWithAssertNode) { + { model = convert_model("model_with_assert/model_with_assert.pb"); } + { + auto x = make_shared(i32, PartialShape{Dimension::dynamic()}); + auto y = make_shared(i32, PartialShape{Dimension::dynamic()}); + auto add = make_shared(x, y); + model_ref = make_shared(OutputVector{add}, ParameterVector{x, y}); + } +} diff --git a/src/frontends/tensorflow/tests/test_models/gen_scripts/generate_model_with_assert.py b/src/frontends/tensorflow/tests/test_models/gen_scripts/generate_model_with_assert.py new file mode 100644 index 00000000000000..79c6d84bf2a2c5 --- /dev/null +++ b/src/frontends/tensorflow/tests/test_models/gen_scripts/generate_model_with_assert.py @@ -0,0 +1,38 @@ +# Copyright (C) 2018-2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +# +# model with Assert node generator +# + +import os +import sys + +import numpy as np +import tensorflow as tf + + +def main(): + tf.compat.v1.reset_default_graph() + + # Create the graph and model + with tf.compat.v1.Session() as sess: + x = tf.compat.v1.placeholder(dtype=tf.int32, shape=[None], name='x') + y = tf.compat.v1.placeholder(dtype=tf.int32, shape=[None], name='y') + tf.raw_ops.AddV2(x=x, y=y) + shape1 = tf.raw_ops.Shape(input=x) + shape2 = tf.raw_ops.Shape(input=y) + equal = tf.raw_ops.Equal(x=shape1, y=shape2) + axis = tf.constant([0], dtype=tf.int32) + all_equal = tf.raw_ops.All(input=equal, axis=axis) + message = tf.constant("Shapes of operands are incompatible", dtype=tf.string) + tf.raw_ops.Assert(condition=all_equal, data=[message]) + + tf.compat.v1.global_variables_initializer() + tf_net = sess.graph_def + + tf.io.write_graph(tf_net, os.path.join(sys.argv[1], "model_with_assert"), "model_with_assert.pb", False) + + +if __name__ == "__main__": + main() diff --git a/src/frontends/tensorflow_common/include/common_op_table.hpp b/src/frontends/tensorflow_common/include/common_op_table.hpp index eeec128e8c1de3..20fefeabd811b4 100644 --- a/src/frontends/tensorflow_common/include/common_op_table.hpp +++ b/src/frontends/tensorflow_common/include/common_op_table.hpp @@ -34,7 +34,6 @@ OP_T_CONVERTER(translate_direct_reduce_op); OP_CONVERTER(translate_add_n_op); OP_CONVERTER(translate_arg_max_op); OP_CONVERTER(translate_arg_min_op); -OP_CONVERTER(translate_assert_op); OP_CONVERTER(translate_avg_pool_op); OP_CONVERTER(translate_batch_mat_mul_op); OP_CONVERTER(translate_batch_to_space_nd_op); diff --git a/src/frontends/tensorflow_common/src/op/assert.cpp b/src/frontends/tensorflow_common/src/op/assert.cpp deleted file mode 100644 index 5275e85a8c2edc..00000000000000 --- a/src/frontends/tensorflow_common/src/op/assert.cpp +++ /dev/null @@ -1,36 +0,0 @@ -// Copyright (C) 2018-2023 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#include - -#include "common_op_table.hpp" -#include "openvino/core/validation_util.hpp" - -using namespace std; - -namespace ov { -namespace frontend { -namespace tensorflow { -namespace op { - -OutputVector translate_assert_op(const NodeContext& node) { - default_op_checks(node, 1, {"Assert"}); - auto cond = node.get_input(0); - auto cond_const = get_constant_from_source(cond); - TENSORFLOW_OP_VALIDATION(node, - cond_const, - "[TensorFlow Frontend] The condition must be constant for further model conversion."); - auto cond_values = cond_const->cast_vector(); - TENSORFLOW_OP_VALIDATION(node, - cond_values.size() == 1, - "[TensorFlow Frontend] Incorrect model - the condition must have one element."); - TENSORFLOW_OP_VALIDATION(node, - cond_values[0], - "[TensorFlow Frontend] The condition must be true for further model conversion."); - return {}; -} -} // namespace op -} // namespace tensorflow -} // namespace frontend -} // namespace ov diff --git a/src/frontends/tensorflow_common/src/op/no_op.cpp b/src/frontends/tensorflow_common/src/op/no_op.cpp index 9d8552b6f71c24..388d5c5c6af573 100644 --- a/src/frontends/tensorflow_common/src/op/no_op.cpp +++ b/src/frontends/tensorflow_common/src/op/no_op.cpp @@ -15,7 +15,7 @@ namespace op { OutputVector translate_no_op(const NodeContext& node) { // the operation does nothing in terms of data generation - default_op_checks(node, 0, {"NoOp", "SaveV2"}); + default_op_checks(node, 0, {"NoOp", "SaveV2", "Assert", "LookupTableInsert", "LookupTableInsertV2"}); return {}; } } // namespace op From 66ae71454aa20592b5114cdd69d43e046f2cc70a Mon Sep 17 00:00:00 2001 From: Sebastian Golebiewski Date: Thu, 23 Mar 2023 10:09:43 +0100 Subject: [PATCH 050/296] DOCS shift to rst - Install OpenVINO on Windows (#16502) --- .../installing-openvino-docker-windows.md | 264 +++++++++++------- ...nstalling-openvino-from-archive-windows.md | 224 +++++++++------ .../installing-openvino-windows-header.md | 14 +- 3 files changed, 305 insertions(+), 197 deletions(-) diff --git a/docs/install_guides/installing-openvino-docker-windows.md b/docs/install_guides/installing-openvino-docker-windows.md index b42034cb7bc9b2..d10bf08df67a2e 100644 --- a/docs/install_guides/installing-openvino-docker-windows.md +++ b/docs/install_guides/installing-openvino-docker-windows.md @@ -1,97 +1,124 @@ # Install Intel® Distribution of OpenVINO™ toolkit for Windows from Docker Image {#openvino_docs_install_guides_installing_openvino_docker_windows} +@sphinxdirective + This guide provides steps for creating a Docker image with Intel® Distribution of OpenVINO™ toolkit for Windows and using the Docker image on different devices. -## System Requirements +.. _system-requirements-docker-windows: + +System Requirements +#################### + -@sphinxdirective .. tab:: Target Operating System with Python Versions - +------------------------------------+--------------------------+ - | Operating System | Supported Python Version | - +====================================+==========================+ - | Windows Server Core base LTSC 2019 | 3.8 | - +------------------------------------+--------------------------+ - | Windows 10, version 20H2 | 3.8 | - +------------------------------------+--------------------------+ + .. list-table:: + :header-rows: 1 + + * - Operating System + - Supported Python Version + * - Windows Server Core base LTSC 2019 + - 3.8 + * - Windows 10, version 20H2 + - 3.8 .. tab:: Host Operating Systems - * Windows 10, 64-bit Pro, Enterprise or Education (1607 Anniversary Update, Build 14393 or later) editions - * Windows Server 2016 or higher + * Windows 10, 64-bit Pro, Enterprise or Education (1607 Anniversary Update, Build 14393 or later) editions + * Windows Server 2016 or higher -@endsphinxdirective -### Additional Requirements for GPU +Additional Requirements for GPU ++++++++++++++++++++++++++++++++ To use GPU Acceleration in Windows containers, make sure that the following requirements for Windows host, OpenVINO and Docker are met: -- [Windows requirements](https://docs.microsoft.com/en-us/virtualization/windowscontainers/deploy-containers/gpu-acceleration): +- `Windows requirements `__: + - The container host must be running Windows Server 2019 or Windows 10 of version 1809 or higher. - - The container base image must be `mcr.microsoft.com/windows:1809` or higher. Windows Server Core and Nano Server container images are not currently supported. + - The container base image must be ``mcr.microsoft.com/windows:1809`` or higher. Windows Server Core and Nano Server container images are not currently supported. - The container host must be running Docker Engine 19.03 or higher. - The container host must have GPU running display drivers of version WDDM 2.5 or higher. + - GPU requirement for OpenVINO: Intel Graphics Driver for Windows of version 15.65 or higher. -- [Docker isolation mode requirements](https://docs.microsoft.com/en-us/virtualization/windowscontainers/manage-containers/hyperv-container): +- `Docker isolation mode requirements `__: + - Windows host and container version tags must match. - - [Windows host and container isolation process support](https://docs.microsoft.com/en-us/virtualization/windowscontainers/deploy-containers/version-compatibility). + - `Windows host and container isolation process support `__. -## Installation Flow +Installation Flow +#################### There are two ways to install OpenVINO with Docker. You can choose either of them according to your needs: + * Use a prebuilt image. Do the following steps: - 1. Get a prebuilt image from provided sources. - 2. Run the image on different devices. + + 1. `Get a prebuilt image from provided sources <#getting-a-prebuilt-image-from-provided-sources>`__. + 2. `Run the image on different devices <#running-the-docker-image-on-different-devices>`__. + * If you want to customize your image, you can also build a Docker image manually by using the following steps: - 1. Prepare a Dockerfile. - 2. Configure the Docker image. - 3. Run the image on different devices. -## Getting a Prebuilt Image from Provided Sources + 1. `Prepare a Dockerfile <#preparing-a-dockerfile>`__. + 2. `Configure the Docker image <#configuring-the-docker-image-for-different-devices>`__. + 3. `Run the image on different devices <#running-the-docker-image-on-different-devices>`__. + +Getting a Prebuilt Image from Provided Sources +############################################## You can find prebuilt images on: -- [Docker Hub](https://hub.docker.com/u/openvino) -- [Azure Marketplace](https://azuremarketplace.microsoft.com/en-us/marketplace/apps/intel_corporation.openvino) +- `Docker Hub `__ +- `Azure Marketplace `__ -## Preparing a Dockerfile +Preparing a Dockerfile +###################### -You can use the [available Dockerfiles on GitHub](https://github.com/openvinotoolkit/docker_ci/tree/master/dockerfiles) or generate a Dockerfile with your settings via [DockerHub CI Framework](https://github.com/openvinotoolkit/docker_ci) which can generate a Dockerfile, build, test and deploy an image with the Intel® Distribution of OpenVINO™ toolkit. +You can use the `available Dockerfiles on GitHub `__ or generate a Dockerfile with your settings via `DockerHub CI Framework `__ which can generate a Dockerfile, build, test and deploy an image with the Intel® Distribution of OpenVINO™ toolkit. -## Configuring the Docker Image for Different Devices +Configuring the Docker Image for Different Devices +################################################## -### Installing Additional Dependencies for CPU +Installing Additional Dependencies for CPU +++++++++++++++++++++++++++++++++++++++++++ -#### Installing CMake +Installing CMake +---------------- + +To add CMake to the image, add the following commands to the Dockerfile: + +.. code-block:: bat - To add CMake to the image, add the following commands to the Dockerfile: - ```bat RUN powershell.exe -Command ` Invoke-WebRequest -URI https://cmake.org/files/v3.14/cmake-3.14.7-win64-x64.msi -OutFile %TMP%\\cmake-3.14.7-win64-x64.msi ; ` Start-Process %TMP%\\cmake-3.14.7-win64-x64.msi -ArgumentList '/quiet /norestart' -Wait ; ` Remove-Item %TMP%\\cmake-3.14.7-win64-x64.msi -Force RUN SETX /M PATH "C:\Program Files\CMake\Bin;%PATH%" - ``` - In case of proxy issues, please add the `ARG HTTPS_PROXY` and `-Proxy %%HTTPS_PROXY%` settings to the `powershell.exe` command to the Dockerfile. Then build a Docker image: - ```bat + +In case of proxy issues, please add the ``ARG HTTPS_PROXY`` and ``-Proxy %%HTTPS_PROXY%`` settings to the ``powershell.exe`` command to the Dockerfile. Then build a Docker image: + +.. code-block:: bat + docker build . -t ` --build-arg HTTPS_PROXY= - ``` - -#### Installing Microsoft Visual Studio Build Tools - You can add Microsoft Visual Studio Build Tools to a Windows OS Docker image using the [offline](https://docs.microsoft.com/en-us/visualstudio/install/create-an-offline-installation-of-visual-studio?view=vs-2019) or [online](https://docs.microsoft.com/en-us/visualstudio/install/build-tools-container?view=vs-2019) installers for Build Tools. - - Microsoft Visual Studio Build Tools are licensed as a supplement your existing Microsoft Visual Studio license. - - Any images built with these tools should be for your personal use or for use in your organization in accordance with your existing Visual Studio and Windows licenses. - To add MSBuild 2019 to the image, add the following commands to the Dockerfile: - ```bat - RUN powershell.exe -Command Invoke-WebRequest -URI https://aka.ms/vs/16/release/vs_buildtools.exe -OutFile %TMP%\\vs_buildtools.exe +Installing Microsoft Visual Studio Build Tools +---------------------------------------------- + +You can add Microsoft Visual Studio Build Tools to a Windows OS Docker image using the `offline `__ or `online `__ installers for Build Tools. + +Microsoft Visual Studio Build Tools are licensed as a supplement your existing Microsoft Visual Studio license. + +Any images built with these tools should be for your personal use or for use in your organization in accordance with your existing Visual Studio and Windows licenses. + +To add MSBuild 2019 to the image, add the following commands to the Dockerfile: +.. code-block:: bat + + RUN powershell.exe -Command Invoke-WebRequest -URI https://aka.ms/vs/16/release/vs_buildtools.exe -OutFile %TMP%\\vs_buildtools.exe + RUN %TMP%\\vs_buildtools.exe --quiet --norestart --wait --nocache ` --installPath "C:\Program Files (x86)\Microsoft Visual Studio\2019\BuildTools" ` --add Microsoft.VisualStudio.Workload.MSBuildTools ` @@ -101,75 +128,110 @@ You can use the [available Dockerfiles on GitHub](https://github.com/openvinotoo --remove Microsoft.VisualStudio.Component.Windows10SDK.10586 ` --remove Microsoft.VisualStudio.Component.Windows10SDK.14393 ` --remove Microsoft.VisualStudio.Component.Windows81SDK || IF "%ERRORLEVEL%"=="3010" EXIT 0 && powershell set-executionpolicy remotesigned - ``` - In case of proxy issues, please use the [offline installer for Build Tools](https://docs.microsoft.com/en-us/visualstudio/install/create-an-offline-installation-of-visual-studio?view=vs-2019). -### Configuring the Image for GPU +In case of proxy issues, please use the `offline installer for Build Tools `__. + +Configuring the Image for GPU ++++++++++++++++++++++++++++++ + +.. note:: + + Since GPU is not supported in `prebuilt images <#getting-a-prebuilt-image-from-provided-sources>`__ or `default Dockerfiles `__, you must make sure the Additional Requirements for GPU in `System Requirements <#system-requirements>`__ are met, and do the following steps to build the image manually. + +1. Reuse one of `available Dockerfiles `__. You can also use your own Dockerfile. +2. Check your `Windows host and container isolation process compatibility `__. +3. Find the appropriate Windows container base image on `DockerHub `__ and set up your host/container version in the ``FROM`` Dockerfile instruction. + + For example, in the ``openvino_c_dev_.dockerfile``, change: + + .. code-block:: bat -> **NOTE**: Since GPU is not supported in prebuilt images or [default Dockerfiles](https://github.com/openvinotoolkit/docker_ci/tree/master/dockerfiles), you must make sure the Additional Requirements for GPU in System Requirements are met, and do the following steps to build the image manually. + FROM mcr.microsoft.com/windows/servercore:ltsc2019 AS ov_base -1. Reuse one of [available Dockerfiles](https://github.com/openvinotoolkit/docker_ci/tree/master/dockerfiles). You can also use your own Dockerfile. -2. Check your [Windows host and container isolation process compatibility](https://docs.microsoft.com/en-us/virtualization/windowscontainers/deploy-containers/version-compatibility). -3. Find the appropriate Windows container base image on [DockerHub](https://hub.docker.com/_/microsoft-windows) and set up your host/container version in the `FROM` Dockerfile instruction. - For example, in the `openvino_c_dev_.dockerfile`, change: - ```bat - FROM mcr.microsoft.com/windows/servercore:ltsc2019 AS ov_base - ``` to: - ```bat - FROM mcr.microsoft.com/windows:20H2 - ``` + + .. code-block:: bat + + FROM mcr.microsoft.com/windows:20H2 + + 4. Build the Docker image by running the following command: - ```bat - docker build --build-arg package_url= -f -t . - ``` -5. Copy `OpenCL.dll` from your `C:\Windows\System32` host folder to any `temp` directory: - ```bat - mkdir C:\tmp - copy C:\Windows\System32\OpenCL.dll C:\tmp - ``` -## Running the Docker Image on Different Devices + .. code-block:: bat + + docker build --build-arg package_url= -f -t . + + +5. Copy ``OpenCL.dll`` from your ``C:\Windows\System32`` host folder to any ``temp`` directory: + + .. code-block:: bat + + mkdir C:\tmp + copy C:\Windows\System32\OpenCL.dll C:\tmp -### Running the Image on CPU + +Running the Docker Image on Different Devices +############################################# + +Running the Image on CPU +++++++++++++++++++++++++ To start the interactive session, run the following command: -```bat -docker run -it --rm -``` + +.. code-block:: bat + + docker run -it --rm + If you want to try some samples, run the image with the following command: -```bat -docker run -it --rm -cmd /S /C "omz_downloader --name googlenet-v1 --precisions FP16 && omz_converter --name googlenet-v1 --precision FP16 && curl -kO https://storage.openvinotoolkit.org/data/test_data/images/car_1.bmp && python samples\python\hello_classification\hello_classification.py public\googlenet-v1\FP16\googlenet-v1.xml car_1.bmp CPU" -``` -### Running the Image on GPU +.. code-block:: bat + + docker run -it --rm + cmd /S /C "omz_downloader --name googlenet-v1 --precisions FP16 && omz_converter --name googlenet-v1 --precision FP16 && curl -kO https://storage.openvinotoolkit.org/data/test_data/images/car_1.bmp && python samples\python\hello_classification\hello_classification.py public\googlenet-v1\FP16\googlenet-v1.xml car_1.bmp CPU" + + +Running the Image on GPU +++++++++++++++++++++++++ + +.. note:: -> **NOTE**: Since GPU is not supported in prebuilt images or [default Dockerfiles](https://github.com/openvinotoolkit/docker_ci/tree/master/dockerfiles), you must make sure the Additional Requirements for GPU in System Requirements are met, and configure and build the image manually before you can run inferences on a GPU. + Since GPU is not supported in `prebuilt images <#getting-a-prebuilt-image-from-provided-sources>`__ or `default Dockerfiles `__, you must make sure the Additional Requirements for GPU in `System Requirements <#system-requirements>`__ are met, and `configure and build the image manually <#configuring-the-image-for-gpu>`__ before you can run inferences on a GPU. 1. To try inference on a GPU, run the image with the following command: - ```bat - docker run -it --rm -u ContainerAdministrator --isolation process --device class/5B45201D-F2F2-4F3B-85BB-30FF1F953599 -v C:\Windows\System32\DriverStore\FileRepository\iigd_dch.inf_amd64_518f2921ba495409:C:\Windows\System32\DriverStore\FileRepository\iigd_dch.inf_amd64_518f2921ba495409 -v C:\tmp:C:\tmp - ``` + + .. code-block:: bat + + docker run -it --rm -u ContainerAdministrator --isolation process --device class/5B45201D-F2F2-4F3B-85BB-30FF1F953599 -v C:\Windows\System32\DriverStore\FileRepository\iigd_dch. inf_amd64_518f2921ba495409:C:\Windows\System32\DriverStore\FileRepository\iigd_dch.inf_amd64_518f2921ba495409 -v C:\tmp:C:\tmp + + where - - `--device class/5B45201D-F2F2-4F3B-85BB-30FF1F953599` is a reserved interface class GUID for a GPU device. - - `C:\Windows\System32\DriverStore\FileRepository\iigd_dch.inf_amd64_518f2921ba495409` is the path to OpenCL driver home directory. To find it on your PC, run the `C:\Windows\System32\DriverStore\FileRepository\iigd_dch.inf_amd64_*` regular expression. - - `C:\tmp` is the folder with the copy of `OpenCL.dll` from your `C:\Windows\System32` host folder. -2. Copy `OpenCL.dll` to the `C:\Windows\System32` folder inside the container and set appropriate registry entry. Now you can run inference on a GPU device: - ```bat - copy C:\tmp\OpenCL.dll C:\Windows\System32\ && reg add "HKLM\SOFTWARE\Khronos\OpenCL\Vendors" /v "C:\Windows\System32\DriverStore\FileRepository\iigd_dch.inf_amd64_518f2921ba495409\ocl\bin\x64\intelocl64.dll" /t REG_DWORD /d 0 - ``` - For example, run the `Hello Classification Python` sample with the following command: - ```bat - omz_downloader --name googlenet-v1 --precisions FP16 && omz_converter --name googlenet-v1 --precision FP16 && curl -kO https://storage.openvinotoolkit.org/data/test_data/images/car_1.bmp && python samples\python\hello_classification\hello_classification.py public\googlenet-v1\FP16\googlenet-v1.xml car_1.bmp GPU - ``` - - -## Additional Resources - -- [DockerHub CI Framework](https://github.com/openvinotoolkit/docker_ci) for Intel® Distribution of OpenVINO™ toolkit. The Framework can generate a Dockerfile, build, test, and deploy an image with the Intel® Distribution of OpenVINO™ toolkit. You can reuse available Dockerfiles, add your layer and customize the image of OpenVINO™ for your needs. -- Intel® Distribution of OpenVINO™ toolkit home page: [https://software.intel.com/en-us/openvino-toolkit](https://software.intel.com/en-us/openvino-toolkit) -- [OpenVINO Installation Selector Tool](https://www.intel.com/content/www/us/en/developer/tools/openvino-toolkit/download.html) \ No newline at end of file + + - ``--device class/5B45201D-F2F2-4F3B-85BB-30FF1F953599`` is a reserved interface class GUID for a GPU device. + - ``C:\Windows\System32\DriverStore\FileRepository\iigd_dch.inf_amd64_518f2921ba495409`` is the path to OpenCL driver home directory. To find it on your PC, run the ``C:\Windows\System32\DriverStore\FileRepository\iigd_dch.inf_amd64_*`` regular expression. + - ``C:\tmp`` is the folder with the copy of ``OpenCL.dll`` from your ``C:\Windows\System32`` host folder. + +2. Copy ``OpenCL.dll`` to the ``C:\Windows\System32`` folder inside the container and set appropriate registry entry. Now you can run inference on a GPU device: + + .. code-block:: bat + + copy C:\tmp\OpenCL.dll C:\Windows\System32\ && reg add "HKLM\SOFTWARE\Khronos\OpenCL\Vendors" /v "C:\Windows\System32\DriverStore\FileRepository\iigd_dch. inf_amd64_518f2921ba495409\ocl\bin\x64\intelocl64.dll" /t REG_DWORD /d 0 + + + For example, run the ``Hello Classification Python`` sample with the following command: + + .. code-block:: bat + + omz_downloader --name googlenet-v1 --precisions FP16 && omz_converter --name googlenet-v1 --precision FP16 && curl -kO https://storage.openvinotoolkit.org/data/test_data/images/ car_1.bmp && python samples\python\hello_classification\hello_classification.py public\googlenet-v1\FP16\googlenet-v1.xml car_1.bmp GPU + + +Additional Resources +#################### + +- `DockerHub CI Framework `__ for Intel® Distribution of OpenVINO™ toolkit. The Framework can generate a Dockerfile, build, test, and deploy an image with the Intel® Distribution of OpenVINO™ toolkit. You can reuse available Dockerfiles, add your layer and customize the image of OpenVINO™ for your needs. +- Intel® Distribution of OpenVINO™ toolkit home page: `https://software.intel.com/en-us/openvino-toolkit `__ +- `OpenVINO Installation Selector Tool `__ + +@endsphinxdirective diff --git a/docs/install_guides/installing-openvino-from-archive-windows.md b/docs/install_guides/installing-openvino-from-archive-windows.md index 2300a1dceb8423..a0bc2b7649c94e 100644 --- a/docs/install_guides/installing-openvino-from-archive-windows.md +++ b/docs/install_guides/installing-openvino-from-archive-windows.md @@ -1,112 +1,158 @@ # Install OpenVINO™ Runtime on Windows from an Archive File {#openvino_docs_install_guides_installing_openvino_from_archive_windows} -With the OpenVINO™ 2022.3 release, you can download and use archive files to install OpenVINO Runtime. The archive files contain pre-built binaries and library files needed for OpenVINO Runtime, as well as code samples. +@sphinxdirective -Installing OpenVINO Runtime from archive files is recommended for C++ developers. If you are working with Python, the PyPI package has everything needed for Python development and deployment on CPU and GPUs. See the [Install OpenVINO from PyPI](installing-openvino-pip.md) page for instructions on how to install OpenVINO Runtime for Python using PyPI. +With the OpenVINO™ 2022.3 release, you can download and use archive files to install OpenVINO Runtime. The archive files contain pre-built binaries and library files needed for OpenVINO Runtime, as well as code samples. -> **NOTE**: Since the OpenVINO™ 2022.1 release, the following development tools: Model Optimizer, Post-Training Optimization Tool, Model Downloader and other Open Model Zoo tools, Accuracy Checker, and Annotation Converter can be installed via [pypi.org](https://pypi.org/project/openvino-dev/) only. +Installing OpenVINO Runtime from archive files is recommended for C++ developers. If you are working with Python, the PyPI package has everything needed for Python development and deployment on CPU and GPUs. See the :doc:`Install OpenVINO from PyPI ` page for instructions on how to install OpenVINO Runtime for Python using PyPI. -See the [Release Notes](https://software.intel.com/en-us/articles/OpenVINO-RelNotes) for more information on updates in the latest release. +.. note:: -## System Requirements + Since the OpenVINO™ 2022.1 release, the following development tools: Model Optimizer, Post-Training Optimization Tool, Model Downloader and other Open Model Zoo tools, Accuracy Checker, and Annotation Converter can be installed via `pypi.org `__ only. + + +See the `Release Notes `__ for more information on updates in the latest release. + +System Requirements +#################### -@sphinxdirective .. tab:: System Requirements | Full requirement listing is available in: | `System Requirements Page `_ - + .. tab:: Processor Notes - Processor graphics are not included in all processors. - See `Product Specifications`_ for information about your processor. - - .. _Product Specifications: https://ark.intel.com/ + Processor graphics are not included in all processors. + See `Product Specifications`_ for information about your processor. + + .. _Product Specifications: https://ark.intel.com/ .. tab:: Software - * `Microsoft Visual Studio 2019 with MSBuild `_ or `Microsoft Visual Studio 2022 `_ - * `CMake 3.14 or higher, 64-bit `_ (optional, only required for building sample applications) - * `Python 3.7 - 3.10, 64-bit `_ + * `Microsoft Visual Studio 2019 with MSBuild `_ or `Microsoft Visual Studio 2022 `_ + * `CMake 3.14 or higher, 64-bit `_ (optional, only required for building sample applications) + * `Python 3.7 - 3.10, 64-bit `_ - .. note:: - To install Microsoft Visual Studio 2019, follow the `Microsoft Visual Studio installation guide `_. You can choose to download the Community version. During installation in the **Workloads** tab, choose **Desktop development with C++**. + .. note:: - .. note:: - You can either use `cmake.msi` which is the installation wizard or `cmake.zip` where you have to go into the `bin` folder and then manually add the path to environmental variables. - - .. important:: - When installing Python, make sure you click the option **Add Python 3.x to PATH** to `add Python `_ to your `PATH` environment variable. + To install Microsoft Visual Studio 2019, follow the `Microsoft Visual Studio installation guide `_. You can choose to download the Community version. During installation in the **Workloads** tab, choose **Desktop development with C++**. -@endsphinxdirective + .. note:: -## Installing OpenVINO Runtime + You can either use `cmake.msi` which is the installation wizard or `cmake.zip` where you have to go into the `bin` folder and then manually add the path to environmental variables. -### Step 1: Download and Install OpenVINO Core Components + .. important:: + + When installing Python, make sure you click the option **Add Python 3.x to PATH** to `add Python `_ to your `PATH` environment variable. + + + +Installing OpenVINO Runtime +########################### + +.. _install-openvino-archive-windows: + +Step 1: Download and Install OpenVINO Core Components ++++++++++++++++++++++++++++++++++++++++++++++++++++++ + +1. Create an ``Intel`` folder in the ``C:\Program Files (x86)\`` directory. Skip this step if the folder already exists. -1. Create an `Intel` folder in the `C:\Program Files (x86)\` directory. Skip this step if the folder already exists. - You can also do this via command-lines. Open a new command prompt window as administrator by right-clicking **Command Prompt** from the Start menu and select **Run as administrator**, and then run the following command: - ```sh - mkdir "C:\Program Files (x86)\Intel" - ``` - > **NOTE**: `C:\Program Files (x86)\Intel` is the recommended folder. You may also use a different path if desired or if you don't have administrator privileges on your computer. -2. Download the [OpenVINO Runtime archive file for Windows](https://storage.openvinotoolkit.org/repositories/openvino/packages/2022.3/windows/) to your local `Downloads` folder. - + .. code-block:: sh + + mkdir "C:\Program Files (x86)\Intel" + + + .. note:: + + ``C:\Program Files (x86)\Intel`` is the recommended folder. You may also use a different path if desired or if you don't have administrator privileges on your computer. + + +2. Download the `OpenVINO Runtime archive file for Windows `__ to your local ``Downloads`` folder. + If you prefer using command-lines, run the following commands in the command prompt window you opened: - ```sh - cd /Downloads - curl -L https://storage.openvinotoolkit.org/repositories/openvino/packages/2022.3/windows/w_openvino_toolkit_windows_2022.3.0.9052.9752fafe8eb_x86_64.zip --output openvino_2022.3.0.zip - ``` - > **NOTE**: A `.sha256` file is provided together with the archive file to validate your download process. To do that, download the `.sha256` file from the same repository and run `CertUtil -hashfile openvino_2022.3.0.zip SHA256`. Compare the returned value in the output with what's in the `.sha256` file: if the values are the same, you have downloaded the correct file successfully; if not, create a Support ticket [here](https://www.intel.com/content/www/us/en/support/contact-intel.html). + + .. code-block:: sh + + cd /Downloads + curl -L https://storage.openvinotoolkit.org/repositories/openvino/packages/2022.3/windows/w_openvino_toolkit_windows_2022.3.0.9052.9752fafe8eb_x86_64.zip --output openvino_2022. 3.0.zip -3. Use your favorite tool to extract the archive file, rename the extracted folder, and move it to the `C:\Program Files (x86)\Intel` directory. - + .. note:: + + A ``.sha256`` file is provided together with the archive file to validate your download process. To do that, download the ``.sha256`` file from the same repository and run ``CertUtil -hashfile openvino_2022.3.0.zip SHA256``. Compare the returned value in the output with what's in the ``.sha256`` file: if the values are the same, you have downloaded the correct file successfully; if not, create a Support ticket `here `__. + + +3. Use your favorite tool to extract the archive file, rename the extracted folder, and move it to the ``C:\Program Files (x86)\Intel`` directory. + To do this step using command-lines, run the following commands in the command prompt window you opened: - ```sh - tar -xf openvino_2022.3.0.zip - ren w_openvino_toolkit_windows_2022.3.0.9052.9752fafe8eb_x86_64 openvino_2022.3.0 - move openvino_2022.3.0 "C:\Program Files (x86)\Intel" - ``` + + .. code-block:: sh + + tar -xf openvino_2022.3.0.zip + ren w_openvino_toolkit_windows_2022.3.0.9052.9752fafe8eb_x86_64 openvino_2022.3.0 + move openvino_2022.3.0 "C:\Program Files (x86)\Intel" + 4. For simplicity, it is useful to create a symbolic link. Open a command prompt window as administrator (see Step 1 for how to do this) and run the following commands: - ```sh - cd C:\Program Files (x86)\Intel - mklink /D openvino_2022 openvino_2022.3.0 - ``` - > **NOTE**: If you have already installed a previous release of OpenVINO 2022, a symbolic link to the `openvino_2022` folder may already exist. If you want to override it, nagivate to the `C:\Program Files (x86)\Intel` folder and delete the existing linked folder before running the `mklink` command. + .. code-block:: sh + + cd C:\Program Files (x86)\Intel + mklink /D openvino_2022 openvino_2022.3.0 -Congratulations, you finished the installation! The `C:\Program Files (x86)\Intel\openvino_2022` folder now contains the core components for OpenVINO. If you used a different path in Step 1, you will find the `openvino_2022` folder there. The path to the `openvino_2022` directory is also referred as `` throughout the OpenVINO documentation. -### Step 2: Configure the Environment + .. note:: -You must update several environment variables before you can compile and run OpenVINO™ applications. Open the Command Prompt, and run the `setupvars.bat` batch file to temporarily set your environment variables. If your is not `C:\Program Files (x86)\Intel\openvino_2022`, use the correct directory instead. + If you have already installed a previous release of OpenVINO 2022, a symbolic link to the ``openvino_2022`` folder may already exist. If you want to override it, nagivate to the ``C:\Program Files (x86)\Intel`` folder and delete the existing linked folder before running the ``mklink`` command. -```sh -"C:\Program Files (x86)\Intel\openvino_2022\setupvars.bat" -``` -> **Important**: The above command must be re-run every time a new Command Prompt window is opened. +Congratulations, you finished the installation! The ``C:\Program Files (x86)\Intel\openvino_2022`` folder now contains the core components for OpenVINO. If you used a different path in Step 1, you will find the ``openvino_2022`` folder there. The path to the ``openvino_2022`` directory is also referred as ```` throughout the OpenVINO documentation. + +.. _set-the-environment-variables-windows: + +Step 2: Configure the Environment ++++++++++++++++++++++++++++++++++ + +You must update several environment variables before you can compile and run OpenVINO™ applications. Open the Command Prompt, and run the ``setupvars.bat`` batch file to temporarily set your environment variables. If your ```` is not ``C:\Program Files (x86)\Intel\openvino_2022``, use the correct directory instead. + +.. code-block: sh + + "C:\Program Files (x86)\Intel\openvino_2022\setupvars.bat" + + +.. important:: + + The above command must be re-run every time a new Command Prompt window is opened. + + +.. note:: + + If you see an error indicating Python is not installed, Python may not be added to the PATH environment variable (as described `here `__). Check your system environment variables, and add Python if necessary. -> **NOTE**: If you see an error indicating Python is not installed, Python may not be added to the PATH environment variable (as described [here](https://docs.python.org/3/using/windows.html#finding-the-python-executable)). Check your system environment variables, and add Python if necessary. The environment variables are set. Continue to the next section if you want to download any additional components. -### Step 3 (Optional): Install Additional Components +.. _model-optimizer-windows: + +Step 3 (Optional): Install Additional Components +++++++++++++++++++++++++++++++++++++++++++++++++ OpenVINO Development Tools is a set of utilities for working with OpenVINO and OpenVINO models. It provides tools like Model Optimizer, Benchmark Tool, Post-Training Optimization Tool, and Open Model Zoo Downloader. If you install OpenVINO Runtime using archive files, OpenVINO Development Tools must be installed separately. -See the [Install OpenVINO Development Tools](installing-model-dev-tools.md) page for step-by-step installation instructions. +See the :doc:`Install OpenVINO Development Tools ` page for step-by-step installation instructions. -OpenCV is necessary to run demos from Open Model Zoo (OMZ). Some OpenVINO samples can also extend their capabilities when compiled with OpenCV as a dependency. To install OpenCV for OpenVINO, see the [instructions on GitHub](https://github.com/opencv/opencv/wiki/BuildOpenCV4OpenVINO). +OpenCV is necessary to run demos from Open Model Zoo (OMZ). Some OpenVINO samples can also extend their capabilities when compiled with OpenCV as a dependency. To install OpenCV for OpenVINO, see the `instructions on GitHub `. + +.. _optional-steps-windows: + +Step 4 (Optional): Configure Inference on non-CPU Devices ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ -### Step 4 (Optional): Configure Inference on non-CPU Devices OpenVINO Runtime has a plugin architecture that enables you to run inference on multiple devices without rewriting your code. Supported devices include integrated GPUs, discrete GPUs and GNAs. See the instructions below to set up OpenVINO on these devices. -@sphinxdirective .. tab:: GPU To enable the toolkit components to use processor graphics (GPU) on your system, follow the steps in :ref:`GPU Setup Guide `. @@ -114,69 +160,67 @@ OpenVINO Runtime has a plugin architecture that enables you to run inference on .. tab:: GNA To enable the toolkit components to use Intel® Gaussian & Neural Accelerator (GNA) on your system, follow the steps in :ref:`GNA Setup Guide `. - -@endsphinxdirective -## What's Next? + +.. _get-started-windows: + +What's Next? +#################### + Now that you've installed OpenVINO Runtime, you're ready to run your own machine learning applications! Learn more about how to integrate a model in OpenVINO applications by trying out the following tutorials. -@sphinxdirective + .. tab:: Get started with Python - Try the `Python Quick Start Example `_ to estimate depth in a scene using an OpenVINO monodepth model in a Jupyter Notebook inside your web browser. - + Try the `Python Quick Start Example `__ to estimate depth in a scene using an OpenVINO monodepth model in a Jupyter Notebook inside your web browser. + .. image:: https://user-images.githubusercontent.com/15709723/127752390-f6aa371f-31b5-4846-84b9-18dd4f662406.gif :width: 400 Visit the :ref:`Tutorials ` page for more Jupyter Notebooks to get you started with OpenVINO, such as: - - * `OpenVINO Python API Tutorial `_ - * `Basic image classification program with Hello Image Classification `_ - * `Convert a PyTorch model and use it for image background removal `_ + + * `OpenVINO Python API Tutorial `__ + * `Basic image classification program with Hello Image Classification `__ + * `Convert a PyTorch model and use it for image background removal `__ .. tab:: Get started with C++ Try the `C++ Quick Start Example `_ for step-by-step instructions on building and running a basic image classification C++ application. - + .. image:: https://user-images.githubusercontent.com/36741649/127170593-86976dc3-e5e4-40be-b0a6-206379cd7df5.jpg :width: 400 Visit the :ref:`Samples ` page for other C++ example applications to get you started with OpenVINO, such as: - + * `Basic object detection with the Hello Reshape SSD C++ sample `_ * `Automatic speech recognition C++ sample `_ -@endsphinxdirective - -## Uninstalling OpenVINO Runtime -To uninstall OpenVINO, follow the steps on the [Uninstalling page](uninstalling-openvino.md). +.. _uninstall-from-windows: -## Additional Resources +Uninstalling OpenVINO Runtime +############################# -## Additional Resources +To uninstall OpenVINO, follow the steps on the :doc:`Uninstalling page ` -@sphinxdirective +Additional Resources +#################### +* `OpenVINO Installation Selector Tool `__ * :ref:`Troubleshooting Guide for OpenVINO Installation & Configuration ` * Converting models for use with OpenVINO™: :ref:`Model Optimizer Developer Guide ` * Writing your own OpenVINO™ applications: :ref:`OpenVINO™ Runtime User Guide ` * Sample applications: :ref:`OpenVINO™ Toolkit Samples Overview ` * Pre-trained deep learning models: :ref:`Overview of OpenVINO™ Toolkit Pre-Trained Models ` -* IoT libraries and code samples in the GitHUB repository: `Intel® IoT Developer Kit`_ - - -.. _Intel® IoT Developer Kit: https://github.com/intel-iot-devkit +---> @endsphinxdirective - -## Additional Resources - -- [OpenVINO Installation Selector Tool](https://www.intel.com/content/www/us/en/developer/tools/openvino-toolkit/download.html) \ No newline at end of file diff --git a/docs/install_guides/installing-openvino-windows-header.md b/docs/install_guides/installing-openvino-windows-header.md index 2b0489fd384379..f5480a6a350f3a 100644 --- a/docs/install_guides/installing-openvino-windows-header.md +++ b/docs/install_guides/installing-openvino-windows-header.md @@ -10,12 +10,14 @@ From PyPI Using Docker -@endsphinxdirective -If you want to install OpenVINO™ Runtime on Windows, you have the following options: +If you want to install OpenVINO™ Runtime on Windows, you have the following options: + +* :doc:`Install OpenVINO Runtime from an Archive File ` +* :doc:`Install OpenVINO from PyPI ` +* :doc:`Install OpenVINO with Docker ` -* [Install OpenVINO Runtime from an Archive File](installing-openvino-from-archive-windows.md) -* [Install OpenVINO from PyPI](installing-openvino-pip.md) -* [Install OpenVINO with Docker](installing-openvino-docker-windows.md) +For a full selection of distribution channels, +see the `OpenVINO Installation Selector Tool `__ -For a full selection of distribution channels, see the [OpenVINO Installation Selector Tool](https://www.intel.com/content/www/us/en/developer/tools/openvino-toolkit/download.html) +@endsphinxdirective From 5fa95ff19d815af8cf6f2dbea7ace34865485cf6 Mon Sep 17 00:00:00 2001 From: Sebastian Golebiewski Date: Thu, 23 Mar 2023 10:12:13 +0100 Subject: [PATCH 051/296] DOCS shift to rst - Protecting Deep Learning Model (#16474) --- .../deployment_guide_introduction.md | 8 +- .../deployment/deployment_intro.md | 76 +++++++++------ .../images}/deployment_simplified.svg | 0 .../range_supervision}/img_combined_2.png | 0 .../images/range_supervision}/scheme3.svg | 0 .../quantization/range_supervision/README.md | 95 +++++++++++-------- 6 files changed, 106 insertions(+), 73 deletions(-) rename docs/{img => _static/images}/deployment_simplified.svg (100%) rename {tools/pot/docs/range_supervision/images => docs/_static/images/range_supervision}/img_combined_2.png (100%) rename {tools/pot/docs/range_supervision/images => docs/_static/images/range_supervision}/scheme3.svg (100%) diff --git a/docs/Documentation/deployment_guide_introduction.md b/docs/Documentation/deployment_guide_introduction.md index 4e3162d6de5383..6496a3cf494250 100644 --- a/docs/Documentation/deployment_guide_introduction.md +++ b/docs/Documentation/deployment_guide_introduction.md @@ -9,12 +9,9 @@ Run and Deploy Locally Deploy via Model Serving -@endsphinxdirective - Once you have a model that meets both OpenVINO™ and your requirements, you can choose how to deploy it with your application. -@sphinxdirective .. panels:: :doc:`Deploy via OpenVINO Runtime ` @@ -30,8 +27,7 @@ Once you have a model that meets both OpenVINO™ and your requirements, you can Deployment via OpenVINO Model Server allows the application to connect to the inference server set up remotely. This way inference can use external resources instead of those available to the application itself. -@endsphinxdirective - +Apart from the default deployment options, you may also :doc:`deploy your application for the TensorFlow framework with OpenVINO Integration ` -Apart from the default deployment options, you may also [deploy your application for the TensorFlow framework with OpenVINO Integration](./openvino_ecosystem_ovtf.md). +@endsphinxdirective \ No newline at end of file diff --git a/docs/OV_Runtime_UG/deployment/deployment_intro.md b/docs/OV_Runtime_UG/deployment/deployment_intro.md index df629a51e97574..fc9f4581c3792d 100644 --- a/docs/OV_Runtime_UG/deployment/deployment_intro.md +++ b/docs/OV_Runtime_UG/deployment/deployment_intro.md @@ -11,47 +11,69 @@ Deploy Application with Deployment Manager Local Distribution Libraries -@endsphinxdirective -> **NOTE**: Note that [running inference in OpenVINO Runtime](../openvino_intro.md) is the most basic form of deployment. Before moving forward, make sure you know how to create a proper Inference configuration and [develop your application properly](../integrate_with_your_application.md) +.. note:: + Note that :doc:`running inference in OpenVINO Runtime ` is the most basic form of deployment. Before moving forward, make sure you know how to create a proper Inference configuration and :doc:`develop your application properly `. -## Local Deployment Options +Local Deployment Options +######################## - Set a dependency on the existing prebuilt packages, also called "centralized distribution": - - using Debian / RPM packages - a recommended way for Linux operating systems; - - using PIP package manager on PyPI - the default approach for Python-based applications; - - using Docker images - if the application should be deployed as a Docker image, use a pre-built OpenVINO™ Runtime Docker image as a base image in the Dockerfile for the application container image. For more information about OpenVINO Docker images, refer to [Installing OpenVINO on Linux from Docker](../../install_guides/installing-openvino-docker-linux.md) and [Installing OpenVINO on Windows from Docker](../../install_guides/installing-openvino-docker-windows.md). -Furthermore, to customize your OpenVINO Docker image, use the [Docker CI Framework](https://github.com/openvinotoolkit/docker_ci) to generate a Dockerfile and built the image. + + - using Debian / RPM packages - a recommended way for Linux operating systems; + - using PIP package manager on PyPI - the default approach for Python-based applications; + - using Docker images - if the application should be deployed as a Docker image, use a pre-built OpenVINO™ Runtime Docker image as a base image in the Dockerfile for the application container image. For more information about OpenVINO Docker images, refer to :doc:`Installing OpenVINO on Linux from Docker ` and :doc:`Installing OpenVINO on Windows from Docker `. + +Furthermore, to customize your OpenVINO Docker image, use the `Docker CI Framework ` to generate a Dockerfile and built the image. + - Grab a necessary functionality of OpenVINO together with your application, also called "local distribution": - - using [OpenVINO Deployment Manager](deployment-manager-tool.md) - providing a convenient way for creating a distribution package; - - using the advanced [local distribution](local-distribution.md) approach; - - using [a static version of OpenVINO Runtime linked to the final app](https://github.com/openvinotoolkit/openvino/blob/master/docs/dev/static_libaries.md). + + - using :doc:`OpenVINO Deployment Manager ` - providing a convenient way for creating a distribution package; + - using the advanced :doc:`local distribution ` approach; + - using `a static version of OpenVINO Runtime linked to the final app `__. The table below shows which distribution type can be used for what target operating system: -| Distribution type | Operating systems | -|------- ---------- | ----------------- | -| Debian packages | Ubuntu 18.04 long-term support (LTS), 64-bit; Ubuntu 20.04 long-term support (LTS), 64-bit | -| RMP packages | Red Hat Enterprise Linux 8, 64-bit | -| Docker images | Ubuntu 18.04 long-term support (LTS), 64-bit; Ubuntu 20.04 long-term support (LTS), 64-bit; Red Hat Enterprise Linux 8, 64-bit; Windows Server Core base LTSC 2019, 64-bit; Windows 10, version 20H2, 64-bit | -| PyPI (PIP package manager) | See [https://pypi.org/project/openvino/](https://pypi.org/project/openvino/) | -| [OpenVINO Deployment Manager](deployment-manager-tool.md) | All operating systems | -| [Local distribution](local-distribution.md) | All operating systems | -| [Build OpenVINO statically and link to the final app](https://github.com/openvinotoolkit/openvino/blob/master/docs/dev/static_libaries.md) | All operating systems | +.. list-table:: + :header-rows: 1 + + * - Distribution type + - Operating systems + * - Debian packages + - Ubuntu 18.04 long-term support (LTS), 64-bit; Ubuntu 20.04 long-term support (LTS), 64-bit + * - RMP packages + - Red Hat Enterprise Linux 8, 64-bit + * - Docker images + - Ubuntu 18.04 long-term support (LTS), 64-bit; Ubuntu 20.04 long-term support (LTS), 64-bit; Red Hat Enterprise Linux 8, 64-bit; Windows Server Core base LTSC 2019, 64-bit; Windows 10, version 20H2, 64-bit + * - PyPI (PIP package manager) + - See https://pypi.org/project/openvino + * - :doc:`OpenVINO Deployment Manager ` + - All operating systems + * - :doc:`Libraries for Local Distribution ` + - All operating systems + * - `Build OpenVINO statically and link to the final app `__ + - All operating systems -## Granularity of Major Distribution Types -The granularity of OpenVINO packages may vary for different distribution types. For example, the PyPI distribution of OpenVINO has a [single 'openvino' package](https://pypi.org/project/openvino/) that contains all the runtime libraries and plugins, while a [local distribution](local-distribution.md) is a more configurable type providing higher granularity. Below are important details of the set of libraries included in the OpenVINO Runtime package: +Granularity of Major Distribution Types +####################################### -![](../../img/deployment_simplified.svg) +The granularity of OpenVINO packages may vary for different distribution types. For example, the PyPI distribution of OpenVINO has a `single 'openvino' package `__ that contains all the runtime libraries and plugins, while a :doc:`local distribution ` is a more configurable type providing higher granularity. Below are important details of the set of libraries included in the OpenVINO Runtime package: -- The main library `openvino` is used by users' C++ applications to link against with. The library provides all OpenVINO Runtime public APIs, including both API 2.0 and the previous Inference Engine and nGraph APIs. For C language applications, `openvino_c` is additionally required for distribution. -- The "optional" plugin libraries like `openvino_intel_cpu_plugin` (matching the `openvino_.+_plugin` pattern) are used to provide inference capabilities on specific devices or additional capabilities like [Hetero Execution](../hetero_execution.md) and [Multi-Device Execution](../multi_device.md). -- The "optional" plugin libraries like `openvino_ir_frontend` (matching `openvino_.+_frontend`) are used to provide capabilities to read models of different file formats such as OpenVINO IR, TensorFlow, ONNX, and PaddlePaddle. +.. image:: _static/images/deployment_simplified.svg + + +- The main library ``openvino`` is used by users' C++ applications to link against with. The library provides all OpenVINO Runtime public APIs, including both API 2.0 and the previous Inference Engine and nGraph APIs. For C language applications, ``openvino_c`` is additionally required for distribution. +- The "optional" plugin libraries like ``openvino_intel_cpu_plugin`` (matching the ``openvino_.+_plugin`` pattern) are used to provide inference capabilities on specific devices or additional capabilities like :doc:`Hetero Execution ` and :doc:`Multi-Device Execution `. +- The "optional" plugin libraries like ``openvino_ir_frontend`` (matching ``openvino_.+_frontend``) are used to provide capabilities to read models of different file formats such as OpenVINO IR, TensorFlow, ONNX, and PaddlePaddle. Here the term "optional" means that if the application does not use the capability enabled by the plugin, the plugin library or a package with the plugin is not needed in the final distribution. -Building a local distribution will require more detailed information, and you will find it in the dedicated [Libraries for Local Distribution](local-distribution.md) article. +Building a local distribution will require more detailed information, and you will find it in the dedicated :doc:`Libraries for Local Distribution ` article. + +.. note:: + + Depending on your target OpenVINO devices, the following configurations might be needed for deployed machines: :doc:`Configurations for GPU `, :doc:`Configurations for GNA `. -> **NOTE**: Depending on your target OpenVINO devices, the following configurations might be needed for deployed machines: [Configurations for GPU](../../install_guides/configurations-for-intel-gpu.md), [Configurations for GNA](../../install_guides/configurations-for-intel-gna.md). +@endsphinxdirective \ No newline at end of file diff --git a/docs/img/deployment_simplified.svg b/docs/_static/images/deployment_simplified.svg similarity index 100% rename from docs/img/deployment_simplified.svg rename to docs/_static/images/deployment_simplified.svg diff --git a/tools/pot/docs/range_supervision/images/img_combined_2.png b/docs/_static/images/range_supervision/img_combined_2.png similarity index 100% rename from tools/pot/docs/range_supervision/images/img_combined_2.png rename to docs/_static/images/range_supervision/img_combined_2.png diff --git a/tools/pot/docs/range_supervision/images/scheme3.svg b/docs/_static/images/range_supervision/scheme3.svg similarity index 100% rename from tools/pot/docs/range_supervision/images/scheme3.svg rename to docs/_static/images/range_supervision/scheme3.svg diff --git a/tools/pot/openvino/tools/pot/algorithms/quantization/range_supervision/README.md b/tools/pot/openvino/tools/pot/algorithms/quantization/range_supervision/README.md index 0385c31e4aba25..de7b16950544f8 100644 --- a/tools/pot/openvino/tools/pot/algorithms/quantization/range_supervision/README.md +++ b/tools/pot/openvino/tools/pot/algorithms/quantization/range_supervision/README.md @@ -1,70 +1,85 @@ # Experimental: Protecting Deep Learning Model through Range Supervision ("RangeSupervision") {#pot_ranger_README} -## Introduction +@sphinxdirective + +Introduction +#################### Deep neural network find applications in many scenarios where the prediction is a critical component for safety-relevant decisions. Such workloads can benefit from additional protection against underlying errors. For example, memory bit flips (**"soft errors"** originating, e.g., from external radiation or internal electrical disturbances within the circuitry) in der platform hosting the network inference can corrupt the learned network parameters and lead to incorrect predictions. Typically, errors resulting in very large parameter values have a more drastic impact on the network behavior. **The range supervision algorithm ("RangeSupervision") described here establishes and inserts additional protection layers after already present activation layers**. Those layers truncate values that are found to be out of an expected activation range in order to mitigate the traces of potential platform errors. They do so during inference by applying a *clamp* operation to any activation *x* in the input to the RangeSupervision layer, - \f[ - x = clamp(x ; T_{low}, T_{up}) = min(max(x, T_{low}), T_{high}) - \f] - where \f$T_{low}\f$ and \f$T_{up}\f$ are the lower and upper bounds for the particular protection layer, respectively. -The process flow follows the diagram [Fig 1](#schematic-supervision). Starting from the internal representation (IR) of an OpenVINO model, the POT RangeSupervision algorithm is called to **add protection layers into the model graph**. This step requires **appropriate threshold values that are automatically extracted from a specified test dataset**. The result is an IR representation of the model with additional "RangeSupervision" layers after each supported activation layer. The original and the modified model can be called in the same way through the OpenVINO inference engine to evaluate the impact on accuracy, performance, and dependability in the presence of potential soft errors (for example using the *benchmark_app* and *accuracy_checker* functions). **The algorithm is designed to provide efficient protection at negligible performance overhead or accuracy impact in the absence of faults.** Bound extraction is a one-time effort and the protected IR model returned by the RangeSupervision algorithm can be used independently from there on. No changes in the learned parameters of the network are needed. +.. math:: - + x = clamp(x ; T_{low}, T_{up}) = min(max(x, T_{low}), T_{high}) -@anchor schematic -![Schematic](../../../../../../docs/range_supervision/images/scheme3.svg) -*Fig 1: Schematic of RangeSupervision process flow.* +where :math:`T_{low}` and :math:`T_{up}` are the lower and upper bounds for the particular protection layer, respectively. +The process flow follows the diagram :ref:`Fig 1 `. Starting from the internal representation (IR) of an OpenVINO model, the POT RangeSupervision algorithm is called to **add protection layers into the model graph**. This step requires **appropriate threshold values that are automatically extracted from a specified test dataset**. The result is an IR representation of the model with additional "RangeSupervision" layers after each supported activation layer. The original and the modified model can be called in the same way through the OpenVINO inference engine to evaluate the impact on accuracy, performance, and dependability in the presence of potential soft errors (for example using the *benchmark_app* and *accuracy_checker* functions). **The algorithm is designed to provide efficient protection at negligible performance overhead or accuracy impact in the absence of faults.** Bound extraction is a one-time effort and the protected IR model returned by the RangeSupervision algorithm can be used independently from there on. No changes in the learned parameters of the network are needed. + +.. _schematic-supervision: + +.. image:: _static/images/range_supervision/scheme3.svg + :alt: Schematic -### Supported activation layers +*Fig 1: Schematic of RangeSupervision process flow.* + +Supported activation layers ++++++++++++++++++++++++++++ The following activation layers are currently supported for range supervision: - - `ReLU` - - `Swish` - - `PReLU` - - `Elu` - - `Gelu` - - `Sigmoid` - - `Tanh` - +- `ReLU` +- `Swish` +- `PReLU` +- `Elu` +- `Gelu` +- `Sigmoid` +- `Tanh` + This means that any activation layer of one of the above types, that the model under consideration contains, will be protected with an appropriate subsequent RangeSupervision layer. -## Usage -RangeSupervision protection can be used the same way as [DefaultQuantization](@ref pot_default_quantization_usage) method. +Usage +#################### + +RangeSupervision protection can be used the same way as :doc:`DefaultQuantization ` method. + +Algorithm configuration ++++++++++++++++++++++++ -### Algorithm configuration Algorithm has a minimal configuration. Below is an example of such configuration: -```json -{ - "name": "RangeSupervision", - "params": { - "stat_subset_size": 300 - "stat_batch_size": 1 - } - -} -``` +.. code-block:: json + + { + "name": "RangeSupervision", + "params": { + "stat_subset_size": 300 + "stat_batch_size": 1 + } + } -The protected model will be saved in IR format in a new folder ``./results/\_RangeSupervision/...`` . + +The protected model will be saved in IR format in a new folder ``./results/\_RangeSupervision/...``. Mandatory parameters: -- `"stat_subset_size"`: This parameter defines *how many images* of the specified dataset in "engine: config" are used to extract the bounds (images are randomly chosen if a subset is chosen). This value is set to **300** by default. The more images are selected for the bound generation, the more accurate the estimation of an out-of-bound event will be, at the cost of increasing extraction time. -## Example of RangeSupervision results +- ``"stat_subset_size"``: This parameter defines *how many images* of the specified dataset in "engine: config" are used to extract the bounds (images are randomly chosen if a subset is chosen). This value is set to **300** by default. The more images are selected for the bound generation, the more accurate the estimation of an out-of-bound event will be, at the cost of increasing extraction time. -The following example shows a traffic camera image and predicted objects using a Yolov3 pre-trained on the Coco dataset. A single weight fault was injected in a randomly chosen convolution layer of YOLO, flipping the most significant bit of the selected network parameter. If range supervision is applied, the original network performance is recovered despite the presence of the fault. +Example of RangeSupervision results +################################### +The following example shows a traffic camera image and predicted objects using a Yolov3 pre-trained on the Coco dataset. A single weight fault was injected in a randomly chosen convolution layer of YOLO, flipping the most significant bit of the selected network parameter. If range supervision is applied, the original network performance is recovered despite the presence of the fault. +.. image:: _static/images/range_supervision/img_combined_2.png -![](../../../../../../docs/range_supervision/images/img_combined_2.png) *Fig 2: Example of fault mitigation via range supervision.* -## Resources: +Additional Resources +#################### + +- Z. Chen, G. Li, and K. Pittabiraman, "A Low-cost Fault Corrector for Deep Neural Networks through Range Restriction", 2020. https://arxiv.org/abs/2003.13874 +- F. Geissler, Q. Syed, S. Roychowdhury, A. Asgari, Y. Peng, A. Dhamasia, R. Graefe, K. Pattabiraman, and M. Paulitsch, "Towards a Safety Case for Hardware Fault Tolerance in Convolutional Neural Networks Using Activation Range Supervision", 2021. https://arxiv.org/abs/2108.07019 + + @endsphinxdirective - - Z. Chen, G. Li, and K. Pittabiraman, "A Low-cost Fault Corrector for Deep Neural Networks through Range Restriction", 2020. https://arxiv.org/abs/2003.13874 - - F. Geissler, Q. Syed, S. Roychowdhury, A. Asgari, Y. Peng, A. Dhamasia, R. Graefe, K. Pattabiraman, and M. Paulitsch, "Towards a Safety Case for Hardware Fault Tolerance in Convolutional Neural Networks Using Activation Range Supervision", 2021. https://arxiv.org/abs/2108.07019 From 087b10ff00f5f75b75acf842cc2a2e376212a6de Mon Sep 17 00:00:00 2001 From: Edward Shogulin Date: Thu, 23 Mar 2023 09:16:04 +0000 Subject: [PATCH 052/296] Snippets: precision propagation (#14996) --- src/bindings/python/tests/__init__.py | 1 - .../python/tests/test_onnx/test_backend.py | 5 - .../python/tests_compatibility/__init__.py | 1 - .../test_onnx/test_backend.py | 5 - .../snippets/include/snippets/generator.hpp | 15 +- .../snippets/include/snippets/op/subgraph.hpp | 16 +- .../snippets/pass/align_element_type.hpp | 46 --- .../snippets/pass/fq_decomposition.hpp | 5 +- .../snippets/pass/propagate_precision.hpp | 48 +++ src/common/snippets/src/op/subgraph.cpp | 80 +++-- .../snippets/src/pass/align_element_type.cpp | 99 ------ .../snippets/src/pass/collapse_subgraph.cpp | 6 +- .../snippets/src/pass/fq_decomposition.cpp | 12 - .../snippets/src/pass/propagate_precision.cpp | 293 +++++++++++++++++ .../snippets/tests/include/lowering_utils.hpp | 6 +- .../include/pass/precision_propagation.hpp | 54 ++++ .../snippets/tests/src/lowering_utils.cpp | 14 +- .../tests/src/pass/precision_propagation.cpp | 294 ++++++++++++++++++ .../precision_propagation_convert_test.cpp | 153 +++++++++ .../precision_propagation_get_precisions.cpp | 45 +++ src/core/src/pass/visualize_tree.cpp | 4 +- .../intel_cpu/src/emitters/cpu_generator.cpp | 10 +- .../src/emitters/jit_dnnl_emitters.cpp | 4 + .../src/emitters/jit_dnnl_emitters.hpp | 2 + .../src/emitters/jit_eltwise_emitters.cpp | 204 +++++++++--- .../src/emitters/jit_eltwise_emitters.hpp | 66 ++-- .../intel_cpu/src/emitters/jit_emitter.cpp | 6 +- .../intel_cpu/src/emitters/jit_emitter.hpp | 8 +- .../src/emitters/jit_snippets_emitters.cpp | 15 +- .../src/emitters/jit_snippets_emitters.hpp | 9 + src/plugins/intel_cpu/src/nodes/eltwise.cpp | 61 ++-- src/plugins/intel_cpu/src/nodes/subgraph.cpp | 29 +- .../remove_converts.cpp | 38 +++ .../remove_converts.hpp | 27 ++ .../snippets/check_broadcast.cpp | 81 +++++ .../precision_propagation_convertion.cpp | 37 +++ .../ngraph_transformations/mul_add_to_fma.cpp | 2 +- .../include/snippets/check_broadcast.hpp | 38 +++ .../precision_propagation_convertion.hpp | 33 ++ .../fuse_fake_quantize_transformation.cpp | 2 +- .../shared/src/snippets/check_broadcast.cpp | 89 ++++++ .../plugin/shared/src/snippets/convert.cpp | 4 +- .../precision_propagation_convertion.cpp | 48 +++ ...cision_propagation_convertion_function.hpp | 49 +++ .../precision_propagation_function.hpp | 131 ++++++++ .../include/snippets_helpers.hpp | 1 + ...cision_propagation_convertion_function.cpp | 92 ++++++ .../src/precision_propagation_function.cpp | 105 +++++++ 48 files changed, 2066 insertions(+), 327 deletions(-) delete mode 100644 src/common/snippets/include/snippets/pass/align_element_type.hpp create mode 100644 src/common/snippets/include/snippets/pass/propagate_precision.hpp delete mode 100644 src/common/snippets/src/pass/align_element_type.cpp create mode 100644 src/common/snippets/src/pass/propagate_precision.cpp create mode 100644 src/common/snippets/tests/include/pass/precision_propagation.hpp create mode 100644 src/common/snippets/tests/src/pass/precision_propagation.cpp create mode 100644 src/common/snippets/tests/src/pass/precision_propagation_convert_test.cpp create mode 100644 src/common/snippets/tests/src/pass/precision_propagation_get_precisions.cpp create mode 100644 src/plugins/intel_cpu/src/snippets_transformations/remove_converts.cpp create mode 100644 src/plugins/intel_cpu/src/snippets_transformations/remove_converts.hpp create mode 100644 src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/check_broadcast.cpp create mode 100644 src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/precision_propagation_convertion.cpp create mode 100644 src/tests/functional/plugin/shared/include/snippets/check_broadcast.hpp create mode 100644 src/tests/functional/plugin/shared/include/snippets/precision_propagation_convertion.hpp create mode 100644 src/tests/functional/plugin/shared/src/snippets/check_broadcast.cpp create mode 100644 src/tests/functional/plugin/shared/src/snippets/precision_propagation_convertion.cpp create mode 100644 src/tests/ngraph_helpers/snippets_ngraph_functions/include/precision_propagation_convertion_function.hpp create mode 100644 src/tests/ngraph_helpers/snippets_ngraph_functions/include/precision_propagation_function.hpp create mode 100644 src/tests/ngraph_helpers/snippets_ngraph_functions/src/precision_propagation_convertion_function.cpp create mode 100644 src/tests/ngraph_helpers/snippets_ngraph_functions/src/precision_propagation_function.cpp diff --git a/src/bindings/python/tests/__init__.py b/src/bindings/python/tests/__init__.py index 06d8dfb043480f..a426ce8424ec71 100644 --- a/src/bindings/python/tests/__init__.py +++ b/src/bindings/python/tests/__init__.py @@ -117,7 +117,6 @@ def xfail_test(reason="Mark the test as expected to fail", strict=True): xfail_issue_63033 = xfail_test(reason="BatchNormalization: Training mode is not supported") xfail_issue_63036 = xfail_test(reason="Changes in ConvTranspose padding") -xfail_issue_63039 = xfail_test(reason="Result mismatches with UINT8 operations") xfail_issue_63043 = xfail_test(reason="Recurrent node expects constants as W, R, B inputs.") skip_rng_tests = pytest.mark.skip(reason="Tests use random number generator with no seed.") diff --git a/src/bindings/python/tests/test_onnx/test_backend.py b/src/bindings/python/tests/test_onnx/test_backend.py index c681f376348142..dc30a9bda3806b 100644 --- a/src/bindings/python/tests/test_onnx/test_backend.py +++ b/src/bindings/python/tests/test_onnx/test_backend.py @@ -37,7 +37,6 @@ xfail_issue_58033, xfail_issue_63033, xfail_issue_63036, - xfail_issue_63039, xfail_issue_63043, xfail_issue_63137, xfail_issue_63138, @@ -278,10 +277,6 @@ def expect_fail(test_case_path, xfail): # type: (str) -> None "OnnxBackendNodeModelTest.test_batchnorm_example_training_mode_cpu", ), (xfail_issue_63036, "OnnxBackendNodeModelTest.test_convtranspose_autopad_same_cpu"), - ( - xfail_issue_63039, - "OnnxBackendNodeModelTest.test_div_uint8_cpu", - ), ( xfail_issue_63043, "OnnxBackendNodeModelTest.test_gru_batchwise_cpu", diff --git a/src/bindings/python/tests_compatibility/__init__.py b/src/bindings/python/tests_compatibility/__init__.py index 7b5d7217cd8ed1..24d2050a3a9d77 100644 --- a/src/bindings/python/tests_compatibility/__init__.py +++ b/src/bindings/python/tests_compatibility/__init__.py @@ -122,7 +122,6 @@ def xfail_test(reason="Mark the test as expected to fail", strict=True): xfail_issue_63033 = xfail_test(reason="BatchNormalization: Training mode is not supported") xfail_issue_63036 = xfail_test(reason="Changes in ConvTranspose padding") -xfail_issue_63039 = xfail_test(reason="Result mismatches with UINT8 operations") xfail_issue_63043 = xfail_test(reason="Recurrent node expects constants as W, R, B inputs.") skip_rng_tests = pytest.mark.skip(reason="Tests use random number generator with no seed.") diff --git a/src/bindings/python/tests_compatibility/test_onnx/test_backend.py b/src/bindings/python/tests_compatibility/test_onnx/test_backend.py index 89b7afcb47e4af..53ec35731cbc5f 100644 --- a/src/bindings/python/tests_compatibility/test_onnx/test_backend.py +++ b/src/bindings/python/tests_compatibility/test_onnx/test_backend.py @@ -37,7 +37,6 @@ xfail_issue_58033, xfail_issue_63033, xfail_issue_63036, - xfail_issue_63039, xfail_issue_63043, xfail_issue_63137, xfail_issue_63138, @@ -282,10 +281,6 @@ def expect_fail(test_case_path, xfail): # type: (str) -> None "OnnxBackendNodeModelTest.test_batchnorm_example_training_mode_cpu", ), (xfail_issue_63036, "OnnxBackendNodeModelTest.test_convtranspose_autopad_same_cpu"), - ( - xfail_issue_63039, - "OnnxBackendNodeModelTest.test_div_uint8_cpu", - ), ( xfail_issue_63043, "OnnxBackendNodeModelTest.test_gru_batchwise_cpu", diff --git a/src/common/snippets/include/snippets/generator.hpp b/src/common/snippets/include/snippets/generator.hpp index ab3156a108e3e1..939b4f4d43c33d 100644 --- a/src/common/snippets/include/snippets/generator.hpp +++ b/src/common/snippets/include/snippets/generator.hpp @@ -16,6 +16,8 @@ namespace snippets { auto getRegisters(std::shared_ptr& n) -> ngraph::snippets::RegInfo; +typedef std::pair(const std::shared_ptr&)>, + std::function>(const std::shared_ptr&)>> jitters_value; /** * @interface TargetMachine * @brief Base class Target machine representation. Target derives from this class to provide generator information about supported emitters @@ -51,7 +53,16 @@ class TargetMachine { if (jitter == jitters.end()) { throw ngraph_error(std::string("Target code emitter is not available for ") + type.name + " operation."); } - return jitter->second; + return jitter->second.first; + } + + std::function>(const std::shared_ptr&)> + get_supported_precisions(const ngraph::DiscreteTypeInfo type) const { + auto jitter = jitters.find(type); + if (jitter == jitters.end()) { + throw ngraph_error(std::string("Target code emitter is not available for ") + type.name + " operation."); + } + return jitter->second.second; } /** @@ -64,7 +75,7 @@ class TargetMachine { virtual ~TargetMachine() = default; protected: - std::map(std::shared_ptr)>> jitters; + std::map jitters; }; /** diff --git a/src/common/snippets/include/snippets/op/subgraph.hpp b/src/common/snippets/include/snippets/op/subgraph.hpp index ec55f076301c64..46e6633f61b8aa 100644 --- a/src/common/snippets/include/snippets/op/subgraph.hpp +++ b/src/common/snippets/include/snippets/op/subgraph.hpp @@ -101,11 +101,17 @@ class Subgraph : public ov::op::util::SubGraphOp { bool is_quantized() const { return config.m_is_quantized; } bool has_type_relaxed_ops() const { return config.m_has_type_relaxed_ops; } bool has_domain_sensitive_ops() const { return config.m_has_domain_sensitive_ops; } - - snippets::Schedule generate(const BlockedShapeVector& output_shapes, const BlockedShapeVector& input_shapes, ngraph::pass::Manager& opt, + snippets::Schedule generate(const BlockedShapeVector& output_shapes, + const BlockedShapeVector& input_shapes, + ngraph::pass::Manager& pre_dialect, + ngraph::pass::Manager& post_dialect, + ngraph::pass::Manager& post_precision, const void* compile_params = nullptr); snippets::Schedule generate(const BlockedShapeVector& output_shapes, const BlockedShapeVector& input_shapes, const void* compile_params = nullptr); - snippets::Schedule generate(ngraph::pass::Manager &opt, const void* compile_params = nullptr); + snippets::Schedule generate(ngraph::pass::Manager& pre_dialect, + ngraph::pass::Manager& post_dialect, + ngraph::pass::Manager& post_precision, + const void* compile_params = nullptr); snippets::Schedule generate(const void* compile_params = nullptr); ov::PartialShape canonicalize(const BlockedShapeVector& output_shapes, const BlockedShapeVector& input_shapes); std::vector reshape_body(const std::vector& input_shapes); @@ -132,6 +138,8 @@ class Subgraph : public ov::op::util::SubGraphOp { // This check returns True if Constant op which is input of this op should be inside Subgraph body static auto constant_input_should_be_inside_body(const std::shared_ptr& node) -> bool; + static bool check_broadcast(const std::shared_ptr& node) noexcept; + private: void align_element_types(const BlockedShapeVector& outputShapes, const BlockedShapeVector& inputShapes); void convert_to_snippet_dialect(); @@ -164,8 +172,6 @@ class Subgraph : public ov::op::util::SubGraphOp { public: // True if Subgraph contains FakeQuantize -> FQ decomposition should be called bool m_is_quantized = false; - // True if we should align element types indise body - bool m_is_needed_to_align_precision = false; // True if Subgraph contains TypeRelaxed nodes -> for several streams in tp mode we should copy body using mutexes // because TypeRelaxed::copy_with_new_inputs() isn't save-thread method bool m_has_type_relaxed_ops = false; diff --git a/src/common/snippets/include/snippets/pass/align_element_type.hpp b/src/common/snippets/include/snippets/pass/align_element_type.hpp deleted file mode 100644 index 0b1f831091c4cc..00000000000000 --- a/src/common/snippets/include/snippets/pass/align_element_type.hpp +++ /dev/null @@ -1,46 +0,0 @@ -// Copyright (C) 2018-2023 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#pragma once - -#include -#include - -namespace ngraph { -namespace snippets { -namespace pass { - -/** - * @interface AlignElementType - * @brief Wrap sequence of operations which doesn't support execution on original element type by ConvertSaturation - * and reset element type for type relaxed nodes inside body to align element type between nodes. - * Example 1: - * - After FQ decomposition there may be Convert[U8/I8]. If after the Convert there are other operations - * that don't support U8/I8, new ConvertSaturation[exec_type] will be inserted after the FQ decomposition - * to execute these operations on supported element type - * Example 2: - * - Input[I8] -> Unsupported I8 op -> Movement op -> Output[I8]. There will be inserted two ConvertSaturation: - * * ConvertSatiration[exec_type] before op which is unsupported I8 - * * ConvertSaturation[I8] before Movement op to return original low precision. - * Note: We cannot just remove original Convert[I8/U8] in Example 1 because we should cover two things: - * * allow execution of operations on supported element type for them - * * keep computations mathematically equivalent to the original function - * Thus, for these cases we should have the following pipeline: FP32 -> Convert[I8/U8] -> Convert[FP32] -> FP32 - * Note: We shouldn't call validate_and_infer_type() after Convert insertions to avoid element type conflicts on inputs of ops - * @ingroup snippets - */ -class AlignElementType: public ngraph::pass::FunctionPass { -public: - OPENVINO_RTTI("AlignElementType", "0"); - AlignElementType(const ov::element::Type exec_type = ov::element::f32); - bool run_on_model(const std::shared_ptr& m) override; - - static bool opNeedsAlignElementType(const std::shared_ptr& n, const ov::element::Type exec_type = ov::element::f32); -private: - ov::element::Type exec_type; -}; - -} // namespace pass -} // namespace snippets -} // namespace ngraph diff --git a/src/common/snippets/include/snippets/pass/fq_decomposition.hpp b/src/common/snippets/include/snippets/pass/fq_decomposition.hpp index 284640d8c18122..cfb9ff41955867 100644 --- a/src/common/snippets/include/snippets/pass/fq_decomposition.hpp +++ b/src/common/snippets/include/snippets/pass/fq_decomposition.hpp @@ -29,7 +29,7 @@ namespace pass { * * Expand brackets: * round(x * (levels-1) / (ih - il) - il * (levels-1) / (ih - il)) * (oh - ol) / (levels-1) + ol - * + * * Marking: * - isc := (levels-1) / (ih - il) * - ish := -il * isc @@ -37,7 +37,7 @@ namespace pass { * - osh := ol * Final expression: * round(x * isc + ish) * osc + osh - * + * * Some optimizations (example for scalars): * 1. If output element type of FQ is U8 and il = 0, ish = 0, osc = 1, osh = 0, there is enough expression: x * isc * 2. If output element type of FQ is I8 and ish ~= 128, osc = 1, osh ~= -128, il * isc ~= -128, ih * isc ~= 127 there is enough expression: x * isc @@ -54,7 +54,6 @@ class FakeQuantizeDecomposition : public ngraph::pass::MatcherPass { public: FakeQuantizeDecomposition(); - static bool isAllScalarConstant(const std::shared_ptr& node); static bool getScalesAndShifts(const std::shared_ptr& fq_node, std::vector& cl, std::vector& ch, diff --git a/src/common/snippets/include/snippets/pass/propagate_precision.hpp b/src/common/snippets/include/snippets/pass/propagate_precision.hpp new file mode 100644 index 00000000000000..d0920766f632fd --- /dev/null +++ b/src/common/snippets/include/snippets/pass/propagate_precision.hpp @@ -0,0 +1,48 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include +#include "snippets/generator.hpp" + +namespace ngraph { +namespace snippets { +namespace pass { + +/** + * @class PropagatePrecision + * @ingroup snippets + * @brief PropagatePrecision transformation propagate precision from parameters to results. + */ +class PropagatePrecision: public ngraph::pass::FunctionPass { +public: + OPENVINO_RTTI("PropagatePrecision", "0"); + PropagatePrecision(const std::shared_ptr& target_machine); + bool run_on_model(const std::shared_ptr& m) override; + + static std::vector get_precisions( + const std::vector& input_precisions, + const std::set>& supported_precisions) noexcept; + + // if can_be_removed returns true then actual convertion (actual_before => actual_after) + // can be replaced to required (actual_before => required_after) + static bool can_be_removed( + const element::Type& actual_before, + const element::Type& actual_after, + const element::Type& required_after) noexcept; + + // if can_be_fused returns true then actual convertion can be replaced to required + static bool can_be_fused( + const element::Type& actual, + const element::Type& required) noexcept; + +private: + const std::shared_ptr target_machine; +}; + +} // namespace pass +} // namespace snippets +} // namespace ngraph diff --git a/src/common/snippets/src/op/subgraph.cpp b/src/common/snippets/src/op/subgraph.cpp index 07f13ae8defb57..20b6edb17b9d14 100644 --- a/src/common/snippets/src/op/subgraph.cpp +++ b/src/common/snippets/src/op/subgraph.cpp @@ -11,6 +11,7 @@ #include "snippets/pass/insert_movebroadcast.hpp" #include "snippets/pass/broadcast_to_movebroadcast.hpp" #include "snippets/pass/load_movebroadcast_to_broadcastload.hpp" +#include "snippets/pass/propagate_precision.hpp" #include "snippets/pass/assign_registers.hpp" #include "snippets/pass/convert_constants.hpp" #include "snippets/pass/convert_power_to_powerstatic.hpp" @@ -18,7 +19,6 @@ #include "snippets/pass/insert_loops.hpp" #include "snippets/pass/transpose_decomposition.hpp" #include "snippets/pass/transform_convert.hpp" -#include "snippets/pass/align_element_type.hpp" #include "snippets/pass/matmul_to_brgemm.hpp" #include "snippets/pass/fuse_transpose_brgemm.hpp" #include "snippets/pass/softmax_decomposition.hpp" @@ -62,10 +62,6 @@ void snippets::op::Subgraph::init_config() { ov::is_type(op); config.m_has_type_relaxed_ops = config.m_has_type_relaxed_ops || std::dynamic_pointer_cast(op); - config.m_is_needed_to_align_precision = config.m_is_needed_to_align_precision || - is_quantized() || - has_type_relaxed_ops() || - snippets::pass::AlignElementType::opNeedsAlignElementType(op, execution_element_type); config.m_has_domain_sensitive_ops = config.m_has_domain_sensitive_ops || ov::is_type(op) || ov::is_type(op) || @@ -359,6 +355,14 @@ ov::PartialShape snippets::op::Subgraph::canonicalize(const BlockedShapeVector& return master_shape; } +bool snippets::op::Subgraph::check_broadcast(const std::shared_ptr& node) noexcept { + const auto elementwise = std::dynamic_pointer_cast(node); + return + (elementwise == nullptr) || + (elementwise->get_input_partial_shape(0).size() == elementwise->get_input_partial_shape(1).size()) || + (elementwise->get_autob().m_type != ov::op::AutoBroadcastType::PDPD); +} + void snippets::op::Subgraph::align_element_types(const BlockedShapeVector& outputShapes, const BlockedShapeVector& inputShapes) { // We should insert Convert before Results to set original output element type if needed @@ -369,35 +373,34 @@ void snippets::op::Subgraph::align_element_types(const BlockedShapeVector& outpu const auto convert = std::make_shared( body_results[i]->get_input_node_shared_ptr(0), needed_out_type); body_results[i]->set_argument(0, convert); + body_results[i]->validate_and_infer_types(); } } // We should change existing element type to original for Parameters if needed - const auto& body_parameters = body_ptr()->get_parameters(); + const auto& parameters = body_ptr()->get_parameters(); for (size_t i = 0; i < inputShapes.size(); ++i) { const auto needed_in_type = std::get<2>(inputShapes[i]); - if (body_parameters[i]->get_element_type() != needed_in_type) { - body_parameters[i]->set_element_type(needed_in_type); - config.m_is_needed_to_align_precision = true; - } - } + const auto& parameter = parameters[i]; + if (parameter->get_element_type() != needed_in_type) { + const auto parameter_output = parameter->output(0); + const auto convert = std::make_shared( + parameter_output, + parameter_output.get_element_type()); + ngraph::copy_runtime_info(parameter, convert); + + for (const auto input : parameter_output.get_target_inputs()) { + const auto& input_node = input.get_node(); + if (input_node == convert.get()) { + continue; + } + input_node->set_argument(input.get_index(), convert->output(0)); + } - // We should align element type inside body using the corresponding pass: - // - Insert Convert before operations that doesn't support original element type for execution - // - Insert reverse Convert before operations that support original element type - // but have inputs that doesn't support it (because before them will be inserted Convert with exec_type - first point) - // - Then we should use ConstantFolding pass to convert element type of Scalars before inference. - // - Eliminate redundant Converts which can be inserted in AlignElementType() pass - ngraph::pass::Manager manager; - if (config.m_is_needed_to_align_precision) { - manager.register_pass(execution_element_type); - manager.register_pass(); - // TODO [100041] : In some cases AlignElementType pass can insert extra Convert because - // the pass doesn't know real precisions in real time. - // We call EliminateConverts pass to remove them - manager.register_pass(); + parameter->set_element_type(needed_in_type); + parameter->validate_and_infer_types(); + } } - manager.run_passes(body_ptr()); } void snippets::op::Subgraph::initialize_buffer_scratchpad_size() { @@ -602,24 +605,39 @@ snippets::Schedule snippets::op::Subgraph::generate(const BlockedShapeVector& ou snippets::Schedule snippets::op::Subgraph::generate(const BlockedShapeVector& output_shapes, const BlockedShapeVector& input_shapes, - ngraph::pass::Manager& opt, + ngraph::pass::Manager& pre_dialect, + ngraph::pass::Manager& post_dialect, + ngraph::pass::Manager& post_precision, const void* compile_params) { canonicalize(output_shapes, input_shapes); - return generate(opt, compile_params); + return generate(pre_dialect, post_dialect, post_precision, compile_params); } snippets::Schedule snippets::op::Subgraph::generate(const void* compile_params) { auto mngr = ngraph::pass::Manager(); - return generate(mngr, compile_params); + return generate(mngr, mngr, mngr, compile_params); } -snippets::Schedule snippets::op::Subgraph::generate(ngraph::pass::Manager& opt, const void* compile_params) { +snippets::Schedule snippets::op::Subgraph::generate( + ngraph::pass::Manager& pre_dialect, + ngraph::pass::Manager& post_dialect, + ngraph::pass::Manager& post_precision, + const void* compile_params) { INTERNAL_OP_SCOPE(Subgraph); OV_ITT_SCOPED_TASK(ngraph::pass::itt::domains::SnippetsTransform, "Snippets::op::generate") NGRAPH_CHECK(m_generator != nullptr, "generate is called while generator is not set"); + pre_dialect.run_passes(body_ptr()); convert_to_snippet_dialect(); - opt.run_passes(body_ptr()); + post_dialect.run_passes(body_ptr()); + + ngraph::pass::Manager precision_manager; + precision_manager.register_pass(m_generator->get_target_machine()); + precision_manager.register_pass(); + precision_manager.register_pass(); + precision_manager.run_passes(body_ptr()); + + post_precision.run_passes(body_ptr()); // After all passes, when all optimizations are completed and all MemoryAccess ops are inserted, // we can calculate common buffer scratchpad size and propagate offset from Buffer to the corresponding MemoryAccess ops diff --git a/src/common/snippets/src/pass/align_element_type.cpp b/src/common/snippets/src/pass/align_element_type.cpp deleted file mode 100644 index abd50a9e44605c..00000000000000 --- a/src/common/snippets/src/pass/align_element_type.cpp +++ /dev/null @@ -1,99 +0,0 @@ -// Copyright (C) 2018-2023 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#include - -#include "snippets/snippets_isa.hpp" -#include "snippets/op/convert_saturation.hpp" -#include "snippets/pass/align_element_type.hpp" -#include "snippets/utils.hpp" -#include "ov_ops/type_relaxed.hpp" -#include "ngraph/op/util/op_types.hpp" - -#include - -namespace { - -inline auto is_in_op(const std::shared_ptr& n) -> bool { - return ov::is_type(n) - || ov::is_type(n); -} - -// At the moment Subgraph supports only Eltwise, Select, Convert, Broadcast and FQ (which is decomposed into Eltwises and Convert) with -// Softmax (which is decomposed into Eltwises as well) -// And only Eltwise and Select ops supports execution only in "exec_type". So we can check op type from the opposite -// NOTE: This check is only for executable which isn't Parameter/Constant/Result -inline auto op_supports_only_exec_type(const std::shared_ptr& n) -> bool { - return !is_in_op(n) && - !ov::is_type(n) && - !ov::is_type(n) && - !ov::is_type(n) && - !ov::is_type(n) && - !ov::is_type(n); -} - -} // namespace - -ngraph::snippets::pass::AlignElementType::AlignElementType(const ov::element::Type exec_type) : exec_type(exec_type) { } - -bool ngraph::snippets::pass::AlignElementType::run_on_model(const std::shared_ptr &m) { - RUN_ON_FUNCTION_SCOPE(AlignElementType); - - auto insertConvert = [](const std::shared_ptr& op, const size_t idx, const ov::element::Type& element_type) -> void { - auto convert = std::make_shared(op->input(idx).get_source_output(), element_type); - ngraph::copy_runtime_info(op->get_input_node_shared_ptr(idx), convert); - op->set_argument(idx, convert); - }; - - // NOTE: We don't call validate_and_infer_types() to avoid precision conflicts on inputs - bool rewritten = false; - auto ops = m->get_ordered_ops(); - for (auto& op : ops) { - if (is_in_op(op)) { - continue; - } - - if (op_supports_only_exec_type(op)) { - for (size_t i = 0; i < op->inputs().size(); i++) { - auto shared_input = op->get_input_node_shared_ptr(i); - auto existing_convert = ov::as_type_ptr(shared_input); - // We should insert Convert before Ops, which supports only exec element type, only when: - // - Input is Convert with unsupported destination type - // - Input is Op which support any element type - // We couldn't unite these conditions and just check that element type isn't supported exec type - // because we don't call validate_and_infer_types() so we don't know new precisions after setting of original - // input and output element types - if ((existing_convert && existing_convert->get_destination_type() != exec_type) || - (!op_supports_only_exec_type(shared_input))) { - insertConvert(op, i, exec_type); - rewritten |= true; - } - } - if (auto tr_node = std::dynamic_pointer_cast(op)) { - tr_node->set_overridden_output_type(exec_type, 0); - rewritten |= true; - } - } else { // branch for Movement ops, MatMul ops in the future and for the Convert, Result - for (size_t i = 0; i < op->inputs().size(); i++) { - auto shared_input = op->get_input_node_shared_ptr(i); - // it's original element type because we don't use validate_and_infer_type() anywhere - const auto original_eltype = op->input(i).get_element_type(); - // If before op there is another op that doesn't support execution on original element type, we know that - // before this op will be inserted reverse Convert to support execution on supported element type (first branch of condition). - // So we should return original element type for operations that can support low precision - if (op_supports_only_exec_type(shared_input) && original_eltype != exec_type) { - insertConvert(op, i, original_eltype); - rewritten |= true; - } - } - } - } - - return rewritten; -} - -bool ngraph::snippets::pass::AlignElementType::opNeedsAlignElementType(const std::shared_ptr& op, const ov::element::Type exec_type) { - // At the moment Snippets support only Eltwise/Convert/FQ/Select/Softmax/Broadcast which one output so we can just call get_element_type() - return op_supports_only_exec_type(op) && op->get_element_type() != exec_type; -} diff --git a/src/common/snippets/src/pass/collapse_subgraph.cpp b/src/common/snippets/src/pass/collapse_subgraph.cpp index cd3eb887481031..3325881834fd88 100644 --- a/src/common/snippets/src/pass/collapse_subgraph.cpp +++ b/src/common/snippets/src/pass/collapse_subgraph.cpp @@ -212,7 +212,11 @@ const std::set ngraph::snippets::pass::TokenizeSnippets:: { ngraph::element::f32, ngraph::element::bf16, ngraph::element::i8, ngraph::element::u8 }; bool TokenizeSnippets::AppropriateForSubgraph(const std::shared_ptr &node) { - return is_supported_op(node) && has_supported_in_out(node) && node->get_control_dependencies().empty(); + return + is_supported_op(node) && + has_supported_in_out(node) && + node->get_control_dependencies().empty() && + snippets::op::Subgraph::check_broadcast(node); } TokenizeSnippets::TokenizeSnippets() { diff --git a/src/common/snippets/src/pass/fq_decomposition.cpp b/src/common/snippets/src/pass/fq_decomposition.cpp index 5c2cfd6b0f82c3..9688e0a0e22940 100644 --- a/src/common/snippets/src/pass/fq_decomposition.cpp +++ b/src/common/snippets/src/pass/fq_decomposition.cpp @@ -36,11 +36,6 @@ bool isValidRangesInputs(const std::shared_ptr& fq }); } -bool is_scalar_constant(const std::shared_ptr& source_output_node) { - return ngraph::is_type(source_output_node) && - ngraph::shape_size(source_output_node->get_shape()) == 1; -} - } // namespace ngraph::snippets::pass::FakeQuantizeDecomposition::FakeQuantizeDecomposition() { @@ -182,13 +177,6 @@ ngraph::snippets::pass::FakeQuantizeDecomposition::FakeQuantizeDecomposition() { register_matcher(m, callback); } -bool ngraph::snippets::pass::FakeQuantizeDecomposition::isAllScalarConstant(const std::shared_ptr& node) { - return is_scalar_constant(node->get_input_node_shared_ptr(1)) && - is_scalar_constant(node->get_input_node_shared_ptr(2)) && - is_scalar_constant(node->get_input_node_shared_ptr(3)) && - is_scalar_constant(node->get_input_node_shared_ptr(4)); -} - bool ngraph::snippets::pass::FakeQuantizeDecomposition::getScalesAndShifts( const std::shared_ptr& fq_node, std::vector& cl, diff --git a/src/common/snippets/src/pass/propagate_precision.cpp b/src/common/snippets/src/pass/propagate_precision.cpp new file mode 100644 index 00000000000000..19be34b4e97648 --- /dev/null +++ b/src/common/snippets/src/pass/propagate_precision.cpp @@ -0,0 +1,293 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "snippets/pass/propagate_precision.hpp" + +#include +#include +#include "ov_ops/type_relaxed.hpp" +#include "snippets/itt.hpp" +#include "ngraph/rt_info.hpp" + +using namespace ngraph; + +ngraph::snippets::pass::PropagatePrecision::PropagatePrecision( + const std::shared_ptr& target_machine) : target_machine(target_machine) { +} + +bool ngraph::snippets::pass::PropagatePrecision::run_on_model(const std::shared_ptr& f) { + RUN_ON_MODEL_SCOPE(PropagatePrecision); + OV_ITT_SCOPED_TASK(ngraph::pass::itt::domains::SnippetsTransform, "Snippets::op::PropagatePrecision") + + std::unordered_map, element::Type> result_types; + auto results = f->get_results(); + for (auto& result : results) { + result_types.emplace(result, result->get_input_element_type(0)); + } + + bool was_updated = true; + for (const auto& op : f->get_ordered_ops()) { + auto type_info = op->get_type_info(); + OPENVINO_ASSERT( + target_machine->has(type_info), + "operation '" + std::string(type_info.version_id) + "::" + std::string(type_info.name) + "' was not found in target machine"); + + auto exec = target_machine->get_supported_precisions(type_info); + const auto supported_precisions = exec(op); + if (supported_precisions.empty()) { + continue; + } + + // There are two operation types which break precision propagation: + // 1) Existing convertion operations. Solution: remove convertion + // operation before general algo + // 2) Type relaxed based operations. Will be resolved by snippet opset. + + auto input_precisions_were_changed = false; + + for (const auto& input : op->inputs()) { + const auto convert = ngraph::as_type(input.get_source_output().get_node()); + if (convert == nullptr) { + continue; + } + + const auto precision_before = convert->get_input_element_type(0); + const auto precision_after = convert->get_output_element_type(0); + if (can_be_removed(precision_before, precision_after, precision_before)) { + op->set_argument(input.get_index(), convert->input(0).get_source_output()); + input_precisions_were_changed = true; + } + } + + std::vector input_precisions; + for (const auto& input : op->inputs()) { + const auto input_precision = input.get_source_output().get_element_type(); + input_precisions.push_back(input_precision); + } + + assert(std::all_of( + supported_precisions.begin(), + supported_precisions.end(), + [&input_precisions](const std::vector& precisions) { + return precisions.size() == input_precisions.size(); + }) && "input precisions count is not equal for supported precisions"); + + // update input precisions + // if possible then convert precisions to supported + if (!supported_precisions.empty() && + std::all_of( + supported_precisions.begin(), + supported_precisions.end(), + [&input_precisions](const std::vector& precisions) { + return precisions != input_precisions; + })) { + auto precisions = get_precisions(input_precisions, + supported_precisions); + OPENVINO_ASSERT( + !precisions.empty(), + "there are no supported precisions for operation '" + std::string(type_info.version_id) + "::" + std::string(type_info.name) + "'"); + + auto find_convert = []( + const ngraph::Output parent_output, + const ngraph::element::Type convert_type) -> snippets::op::ConvertSaturation* { + for (const auto& input : parent_output.get_target_inputs()) { + const auto child = ngraph::as_type(input.get_node()); + if ((child != nullptr) && (child->get_output_element_type(0) == convert_type)) { + return child; + } + } + return nullptr; + }; + + for (size_t i = 0; i < op->get_input_size(); ++i) { + const auto& op_input = op->input(i); + const auto& required_after = precisions[i]; + auto parent_output = op_input.get_source_output(); + const auto actual_before = parent_output.get_element_type(); + if (actual_before != required_after) { + was_updated = true; + input_precisions_were_changed = true; + auto existing_convert = ngraph::as_type( + parent_output.get_node()); + + if (existing_convert == nullptr) { + existing_convert = find_convert(parent_output, required_after); + if (existing_convert != nullptr) { + // reuse existing convert + op->set_argument(op_input.get_index(), existing_convert->shared_from_this()); + continue; + } + } + + if (existing_convert == nullptr) { + // create new Convert + auto convert = std::make_shared( + parent_output, + required_after); + ngraph::copy_runtime_info(parent_output.get_node_shared_ptr(), convert); + op->set_argument(op_input.get_index(), convert); + continue; + } + + const auto actual_before = existing_convert->get_input_element_type(0); + const auto actual_after = existing_convert->get_output_element_type(0); + + if (can_be_removed(actual_before, actual_after, required_after)) { + // remove existing convert + existing_convert->output(0).replace(parent_output); + continue; + } + + if (can_be_fused(actual_after, required_after)) { + // fuse existing convert + auto convert = std::make_shared( + existing_convert->get_input_node_shared_ptr(0), + required_after); + ngraph::copy_runtime_info(parent_output.get_node_shared_ptr(), convert); + op->set_argument(op_input.get_index(), convert); + continue; + } + + // create new convert + auto convert = std::make_shared( + existing_convert->output(0), + required_after); + ngraph::copy_runtime_info(existing_convert->output(0).get_node()->shared_from_this(), convert); + op->set_argument(op_input.get_index(), convert); + } + } + } + + auto type_relaxed_node = std::dynamic_pointer_cast(op); + if (input_precisions_were_changed || (type_relaxed_node != nullptr)) { + // update output precision + std::vector op_output_types; + for (auto& output : op->outputs()) { + op_output_types.push_back(output.get_element_type()); + } + + if (type_relaxed_node != nullptr) { + // TODO: user story 104284 + // to keep previous functionality + // unary and binary element-wise operations are supported + // will be replaced to snippets opset later + const auto op_element_type = op->get_input_element_type(0); + if (type_relaxed_node->get_overridden_output_type(0) != op_element_type) { + was_updated = true; + OPENVINO_ASSERT(op->get_output_size() == 1ull, "operation with several output is not supported"); + + type_relaxed_node->set_overridden_output_type(op_element_type, 0); + op->validate_and_infer_types(); + } + } else { + op->validate_and_infer_types(); + } + + for (size_t i = 0; i < op->get_output_size(); ++i) { + auto output = op->output(i); + + if (output.get_element_type() != op_output_types[i]) { + was_updated = true; + auto convert = std::make_shared( + output, + op_output_types[i]); + ngraph::copy_runtime_info(output.get_node_shared_ptr(), convert); + + for (auto& input : output.get_target_inputs()) { + auto child = input.get_node(); + if (child == convert.get()) { + continue; + } + + input.replace_source_output(convert->output(0)); + + + if (ngraph::is_type(input.get_node())) { + input.get_tensor_ptr()->add_names(output.get_tensor_ptr()->get_names()); + + const std::string original_name = op->get_friendly_name(); + op->set_friendly_name(original_name + "_original"); + convert->set_friendly_name(original_name); + } + } + output.get_tensor_ptr()->set_names({}); + } + } + } + } + + for (auto it = result_types.begin(); it != result_types.end(); ++it) { + const auto result = it->first; + const auto actual_type = result->get_input_element_type(0); + const auto expected_type = it->second; + if (actual_type != it->second) { + was_updated = true; + auto convert = std::make_shared( + result->get_input_node_shared_ptr(0), + expected_type); + ngraph::copy_runtime_info(result->get_input_node_shared_ptr(0), convert); + result->set_argument(0, convert); + } + } + + return was_updated; +} + +bool ngraph::snippets::pass::PropagatePrecision::can_be_removed( + const element::Type& actual_before, + const element::Type& actual_after, + const element::Type& required_after) noexcept { + if (actual_before != required_after) { + return false; + } + + return can_be_fused(actual_after, actual_before); +} + +bool ngraph::snippets::pass::PropagatePrecision::can_be_fused( + const element::Type& actual, + const element::Type& required) noexcept { + // custom conditions: between int & float precisions + if (((actual == element::bf16) || (actual == element::f16) || (actual == element::f32)) && + ((required == element::u8) || (required == element::i8))) { + return true; + } + + if ((actual == element::f32) && ((required == element::u16) || (required == element::i16))) { + return true; + } + + // general conditions: any new added precision will support + return + (actual.is_real() == required.is_real()) && + (actual.bitwidth() >= required.bitwidth()); +} + +std::vector ngraph::snippets::pass::PropagatePrecision::get_precisions( + const std::vector& input_precisions, + const std::set>& supported_precisions_pack) noexcept { + bool was_found = false; + for (const auto& supported_precisions : supported_precisions_pack) { + for (size_t i = 0; i < supported_precisions.size(); ++i) { + const auto& supported_precision = supported_precisions[i]; + const auto& input_precision = input_precisions[i]; + if ((supported_precision.is_real() != input_precision.is_real()) || + (input_precision.bitwidth() > supported_precision.bitwidth())) { + was_found = false; + break; + } + + was_found = true; + } + if (was_found) { + return supported_precisions; + } + } + + if (!supported_precisions_pack.empty()) { + return *supported_precisions_pack.begin(); + } + + return {}; +} diff --git a/src/common/snippets/tests/include/lowering_utils.hpp b/src/common/snippets/tests/include/lowering_utils.hpp index be2e0f2e756044..b0b1bafb245308 100644 --- a/src/common/snippets/tests/include/lowering_utils.hpp +++ b/src/common/snippets/tests/include/lowering_utils.hpp @@ -16,7 +16,7 @@ using BlockedShapeVector = ngraph::snippets::op::Subgraph::BlockedShapeVector; class DummyEmitter : public ngraph::snippets::Emitter { public: // Here I pass Add to Emitter, but could be any other op, since it's ignored anyway. - DummyEmitter() : ngraph::snippets::Emitter(std::make_shared()) {} + DummyEmitter(const std::vector& custom_opset = {}) : ngraph::snippets::Emitter(std::make_shared()) {} void emit_code(const std::vector&, const std::vector&, const std::vector&, @@ -49,7 +49,9 @@ class LoweringTests : public TransformationTestsF { static std::shared_ptr getSubgraph(const std::shared_ptr& f); static std::shared_ptr getLoweredSubgraph(const std::shared_ptr& f, const ov::PartialShape& master_shape, - ov::pass::Manager target_optimizations = {}, + ov::pass::Manager pre_dialect = {}, + ov::pass::Manager post_dialect = {}, + ov::pass::Manager post_precision = {}, const std::shared_ptr generator = nullptr); static std::shared_ptr getTokenizedSubgraph(const std::shared_ptr& f); ov::PartialShape master_shape{}; diff --git a/src/common/snippets/tests/include/pass/precision_propagation.hpp b/src/common/snippets/tests/include/pass/precision_propagation.hpp new file mode 100644 index 00000000000000..a60b9161ab4fc4 --- /dev/null +++ b/src/common/snippets/tests/include/pass/precision_propagation.hpp @@ -0,0 +1,54 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "lowering_utils.hpp" +#include "snippets_helpers.hpp" + +namespace ov { +namespace test { +namespace snippets { + +class PrecisionPropagationParamsValues { +public: + class Actual { + public: + std::pair convertion_before_op1; + element::Type convertion_before_op2_1; + std::pair convertion_before_op2_2; + std::set> op1_supported_precisions; + std::set> op2_supported_precisions; + }; + + class Expected { + public: + std::pair convertion_before_op1; + element::Type convertion_before_op2_1; + std::pair convertion_before_op2_2; + element::Type convertion_after_op2; + }; + + std::vector input_types; + Actual actual; + Expected expected; +}; + +typedef std::tuple< + std::pair, // input shapes + PrecisionPropagationParamsValues +> PrecisionPropagationParams; + +class PrecisionPropagationTest : public TransformationTestsF, + public testing::WithParamInterface { +public: + static std::string getTestCaseName(testing::TestParamInfo obj); + +protected: + std::shared_ptr snippets_function; +}; + +} // namespace snippets +} // namespace test +} // namespace ov diff --git a/src/common/snippets/tests/src/lowering_utils.cpp b/src/common/snippets/tests/src/lowering_utils.cpp index a536a0317eae12..55480e95dae510 100644 --- a/src/common/snippets/tests/src/lowering_utils.cpp +++ b/src/common/snippets/tests/src/lowering_utils.cpp @@ -11,10 +11,12 @@ namespace ov { namespace test { namespace snippets { -DummyTargetMachine::DummyTargetMachine(const std::vector& custom_opset) { - auto dummy_functor = [](const std::shared_ptr& n) { - return std::make_shared(); +DummyTargetMachine::DummyTargetMachine(const std::vector&custom_opset) { + auto dummy_functor = ngraph::snippets::jitters_value { + [](const std::shared_ptr& n) { return std::make_shared(); }, + [](const std::shared_ptr& n) { return std::set>{};} }; + jitters[op::v0::Parameter::get_type_info_static()] = dummy_functor; jitters[op::v0::Constant::get_type_info_static()] = dummy_functor; jitters[op::v0::Result::get_type_info_static()] = dummy_functor; @@ -97,7 +99,9 @@ std::shared_ptr LoweringTests::getSubgraph(const std::shared_ptr LoweringTests::getLoweredSubgraph(const std::shared_ptr &f, const ov::PartialShape& master_shape, - ov::pass::Manager target_optimizations, + ov::pass::Manager pre_dialect, + ov::pass::Manager post_dialect, + ov::pass::Manager post_precision, const std::shared_ptr generator) { auto subgraph = getTokenizedSubgraph(f); subgraph->set_generator(generator == nullptr ? std::make_shared() : generator); @@ -119,7 +123,7 @@ std::shared_ptr LoweringTests::getLoweredSubgrap } body_rt_info["PluginShapesOverride"] = new_shapes; subgraph->set_tile_rank(2); - subgraph->generate(target_optimizations); + subgraph->generate(pre_dialect, post_precision, post_precision); return subgraph; } diff --git a/src/common/snippets/tests/src/pass/precision_propagation.cpp b/src/common/snippets/tests/src/pass/precision_propagation.cpp new file mode 100644 index 00000000000000..3c7da4d06aa165 --- /dev/null +++ b/src/common/snippets/tests/src/pass/precision_propagation.cpp @@ -0,0 +1,294 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "pass/precision_propagation.hpp" + +#include +#include "ngraph/pass/validate.hpp" +#include "snippets/pass/propagate_precision.hpp" +#include "snippets/op/convert_saturation.hpp" +#include "common_test_utils/common_utils.hpp" +#include "precision_propagation_function.hpp" + +namespace ov { +namespace test { +namespace snippets { + +namespace { + +class DummyPrecisionPropagationTargetMachine : public DummyTargetMachine { +public: + DummyPrecisionPropagationTargetMachine( + const std::set>& op1_supported_precisions, + const std::set>& op2_supported_precisions) + : DummyTargetMachine() { + jitters[DummyAdd::get_type_info_static()] = ngraph::snippets::jitters_value { + [](const std::shared_ptr& n) { return std::make_shared(); }, + [op1_supported_precisions](const std::shared_ptr& n) { return op1_supported_precisions; }}; + jitters[op::v1::Maximum::get_type_info_static()] = ngraph::snippets::jitters_value{ + [](const std::shared_ptr& n) { return std::make_shared(); }, + [op2_supported_precisions](const std::shared_ptr&n) { return op2_supported_precisions; }}; + + auto default_jitter = ngraph::snippets::jitters_value{ + [](const std::shared_ptr& n) { return std::make_shared(); }, + [](const std::shared_ptr& n) { return std::set>{};} }; + jitters[ngraph::snippets::op::ConvertSaturation::get_type_info_static()] = default_jitter; + } +}; + +} // namespace + +std::string PrecisionPropagationTest::getTestCaseName(testing::TestParamInfo obj) { + std::pair shapes; + PrecisionPropagationParamsValues test_values; + std::tie(shapes, test_values) = obj.param; + + auto to_string = [](const std::set>& precisions_pack) noexcept { + std::ostringstream result; + result << "{"; + for (const auto& precisions : precisions_pack) { + result << CommonTestUtils::vec2str(precisions) << "_"; + } + result << "}"; + return result.str(); + }; + + std::ostringstream result; + result << "IN0_" << shapes.first << "_" << test_values.input_types[0] << "_" + << "IN1_" << shapes.second << "_" << test_values.input_types[1] << "_" + << "IN2_" << test_values.input_types[2] + << to_string(test_values.actual.op1_supported_precisions) << "_" + << to_string(test_values.actual.op2_supported_precisions) << "_" + << test_values.expected.convertion_before_op1.first << "_" << test_values.expected.convertion_before_op1.second << "_" + << test_values.expected.convertion_before_op2_1 << "_" + << test_values.expected.convertion_before_op2_2.first << "_" << test_values.expected.convertion_before_op2_2.second << "_" + << test_values.expected.convertion_after_op2 << "_"; + return result.str(); +} + +TEST_P(PrecisionPropagationTest, CompareFunctions) { + disable_rt_info_check(); + + const auto param = GetParam(); + const auto shapes = std::get<0>(param); + const auto test_values = std::get<1>(param); + + const auto input_shapes = std::vector({ shapes.first, shapes.second }); + PrecisionPropagationAddFunction function_stub( + input_shapes, + test_values.input_types[0], + test_values.input_types[1], + test_values.input_types[2], + { + test_values.actual.convertion_before_op1, + test_values.actual.convertion_before_op2_1, + test_values.actual.convertion_before_op2_2 + }, + { + test_values.expected.convertion_before_op1, + test_values.expected.convertion_before_op2_1, + test_values.expected.convertion_before_op2_2, + test_values.expected.convertion_after_op2 + }); + function = function_stub.getOriginal(); + + const auto target_machine = std::make_shared( + test_values.actual.op1_supported_precisions, + test_values.actual.op2_supported_precisions); + + manager.register_pass(target_machine); + + function_ref = function_stub.getReference(); +} + +namespace PrecisionPropagationTestInstantiation { +// clang-format off + +std::vector> shapes { + {{1, 3, 16, 16}, {1, 3, 16, 16}} +}; + +std::vector test_cases { + { + {element::f32, element::f32, element::f32}, + { + {}, + {}, + {}, + {{element::f32, element::f32}}, + {{element::f32, element::f32}} + }, + {} + }, + // in: Parameter I8 => Op1 I32 => Convert I8 => Op1 I8 => Result + // out: Parameter I8 => Add I32 => Convert I8 => Convert FP32 => Op1 FP32 => Result + { + {element::i8, element::i8, element::i8}, + { + {}, + {}, + {}, + {{element::i8, element::i8}}, + {{element::f32, element::f32}} + }, + { + {}, + element::i8, + {element::f32, element::f32}, + {element::i8} + } + }, + { + {element::i8, element::i8, element::i8}, + { + {}, + {}, + {}, + {{element::i8, element::i8}}, + {{element::i8, element::i8}} + }, + { + {}, + {}, + {element::i8, element::undefined}, + {} + } + }, + { + {element::i8, element::i8, element::i8}, + { + {}, + {}, + {}, + {{element::i8, element::i8}}, + {{element::i32, element::i32}} + }, + { + {}, + {element::i8}, + {element::i32, element::i32}, + {element::i8} + } + }, + { + {element::bf16, element::bf16, element::f32}, + { + {element::f32, element::f32}, + {}, + {}, + { + {element::f32, element::f32}, + {element::i8, element::i8} + }, + { + {element::f32, element::f32}, + {element::i32, element::i32} + } + }, + { + {element::f32, element::f32}, + {}, + {}, + {} + } + }, + // propagate precision via operation #1 + { + {element::bf16, element::bf16, element::f32}, + { + {element::f32, element::f32}, + {}, + {}, + { + {element::f32, element::f32}, + {element::bf16, element::bf16} + }, + { + {element::f32, element::f32} + } + }, + { + {}, + {}, + {element::f32, element::undefined}, + {} + } + }, + // propagate precision via operation #1 + { + {element::bf16, element::bf16, element::bf16}, + { + {element::f32, element::f32}, + {}, + {element::undefined, element::f32}, + { + {element::f32, element::f32}, + {element::bf16, element::bf16} + }, + { + {element::f32, element::f32} + } + }, + { + {}, + {}, + {element::f32, element::f32}, + {} + } + }, + // propagate precision via both operations + { + {element::bf16, element::bf16, element::bf16}, + { + {element::f32, element::f32}, + {}, + {element::undefined, element::f32}, + { + {element::f32, element::f32}, + {element::bf16, element::bf16} + }, + { + {element::f32, element::f32}, + {element::bf16, element::bf16} + } + }, + { + {}, + {}, + {}, + {element::f32} + } + }, + { + {element::bf16, element::bf16, element::bf16}, + { + {}, + {}, + {}, + {{element::f32, element::f32}}, + {{element::f32, element::f32}} + }, + { + {{element::f32}, {element::f32}}, + {element::bf16}, + {{element::f32}, {element::f32}}, + {element::bf16} + } + }, +}; + +INSTANTIATE_TEST_SUITE_P( + smoke_Snippets_PrecisionPropagationTest, + PrecisionPropagationTest, + ::testing::Combine( + ::testing::ValuesIn(shapes), + ::testing::ValuesIn(test_cases)), + PrecisionPropagationTest::getTestCaseName); + +// clang-format on +} // namespace PrecisionPropagationTestInstantiation + +} // namespace snippets +} // namespace test +} // namespace ov diff --git a/src/common/snippets/tests/src/pass/precision_propagation_convert_test.cpp b/src/common/snippets/tests/src/pass/precision_propagation_convert_test.cpp new file mode 100644 index 00000000000000..cc6c113cc3f671 --- /dev/null +++ b/src/common/snippets/tests/src/pass/precision_propagation_convert_test.cpp @@ -0,0 +1,153 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include +#include "snippets/pass/propagate_precision.hpp" + +namespace ov { +namespace test { +namespace snippets { + +class PrecisionPropagationConvertTest : public testing::Test {}; + +TEST_F(PrecisionPropagationConvertTest, smoke_Snippets_PrecisionPropagation_can_be_fused) { + const std::set> precisions_set = { + {element::u64, element::u64}, + {element::u64, element::u32}, + {element::u64, element::u16}, + {element::u64, element::u8}, + {element::u32, element::u32}, + {element::u32, element::u16}, + {element::u32, element::u8}, + {element::u16, element::u16}, + {element::u16, element::u8}, + {element::u8, element::u8}, + + {element::i64, element::i64}, + {element::i64, element::i32}, + {element::i64, element::i16}, + {element::i64, element::i8}, + {element::i32, element::i32}, + {element::i32, element::i16}, + {element::i32, element::i8}, + {element::i16, element::i16}, + {element::i16, element::i8}, + {element::i8, element::i8}, + + {element::f64, element::f64}, + {element::f64, element::f32}, + {element::f64, element::f16}, + {element::f32, element::f32}, + {element::f32, element::f16}, + {element::f16, element::f16}, + + {element::f32, element::bf16}, + {element::bf16, element::bf16}, + {element::f32, element::i8}, + {element::f16, element::i8}, + {element::bf16, element::i8}, + {element::f32, element::u8}, + {element::f16, element::u8}, + {element::bf16, element::u8} + }; + + for (const auto& precisions : precisions_set) { + ASSERT_TRUE(ngraph::snippets::pass::PropagatePrecision::can_be_fused( + precisions.first, + precisions.second)) << precisions.second << " can replace " << precisions.first; + + if (precisions.first == precisions.second) { + continue; + } + + ASSERT_FALSE(ngraph::snippets::pass::PropagatePrecision::can_be_fused( + precisions.second, + precisions.first)) << precisions.second << " can not replace " << precisions.first; + } +} + +TEST_F(PrecisionPropagationConvertTest, smoke_Snippets_PrecisionPropagation_can_not_be_fused) { + const std::set> precisions_set = { + {element::i64, element::f32}, + {element::i64, element::f16}, + {element::i64, element::bf16}, + + {element::i32, element::f32}, + {element::i32, element::f16}, + {element::i32, element::bf16}, + + {element::i16, element::f16}, + {element::i16, element::bf16}, + + {element::u64, element::f32}, + {element::u64, element::f16}, + {element::u64, element::bf16}, + + {element::u32, element::f32}, + {element::u32, element::f16}, + {element::u32, element::bf16}, + + {element::u16, element::f16}, + {element::u16, element::bf16} + }; + + for (const auto& precisions : precisions_set) { + ASSERT_FALSE(ngraph::snippets::pass::PropagatePrecision::can_be_fused( + precisions.first, + precisions.second)) << precisions.second << " can not replace " << precisions.first; + } +} + +TEST_F(PrecisionPropagationConvertTest, smoke_Snippets_PrecisionPropagation_can_be_removed) { + const std::set> precisions_set = { + {element::u64, element::u64, element::u64}, + {element::u32, element::u64, element::u32}, + {element::u16, element::u64, element::u16}, + {element::u8, element::u64, element::u8}, + {element::u32, element::u32, element::u32}, + {element::u16, element::u32, element::u16}, + {element::u8, element::u32, element::u8}, + {element::u16, element::u16, element::u16}, + {element::u8, element::u16, element::u8}, + {element::u8, element::u8, element::u8}, + + {element::i64, element::i64, element::i64}, + {element::i32, element::i64, element::i32}, + {element::i16, element::i64, element::i16}, + {element::i8, element::i64, element::i8}, + {element::i32, element::i32, element::i32}, + {element::i16, element::i32, element::i16}, + {element::i8, element::i32, element::i8}, + {element::i16, element::i16, element::i16}, + {element::i8, element::i16, element::i8}, + {element::i8, element::i8, element::i8}, + + {element::f64, element::f64, element::f64}, + {element::f32, element::f64, element::f32}, + {element::f16, element::f64, element::f16}, + {element::f32, element::f32, element::f32}, + {element::f16, element::f16, element::f16}, + + {element::bf16, element::f32, element::bf16}, + {element::bf16, element::bf16, element::bf16}, + }; + + for (const auto& precisions : precisions_set) { + const auto actual_before = std::get<0>(precisions); + const auto actual_after = std::get<1>(precisions); + const auto required_after = std::get<2>(precisions); + ASSERT_TRUE(ngraph::snippets::pass::PropagatePrecision::can_be_removed( + actual_before, + actual_after, + required_after)) << "can_be_removed: " << actual_before << " => " << actual_after << " => " << required_after; + + if ((actual_before == actual_after) && (actual_before == required_after)) { + continue; + } + } +} + +} // namespace snippets +} // namespace test +} // namespace ov \ No newline at end of file diff --git a/src/common/snippets/tests/src/pass/precision_propagation_get_precisions.cpp b/src/common/snippets/tests/src/pass/precision_propagation_get_precisions.cpp new file mode 100644 index 00000000000000..9e97fcc8ad4aa1 --- /dev/null +++ b/src/common/snippets/tests/src/pass/precision_propagation_get_precisions.cpp @@ -0,0 +1,45 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include +#include "snippets/pass/propagate_precision.hpp" + +namespace ov { +namespace test { +namespace snippets { + + +class PrecisionPropagationGetPrecisionsTest : public testing::Test {}; + +TEST_F(PrecisionPropagationGetPrecisionsTest, empty) { + ASSERT_EQ(std::vector{}, ngraph::snippets::pass::PropagatePrecision::get_precisions({}, {})); +} + +TEST_F(PrecisionPropagationGetPrecisionsTest, selected) { + ASSERT_EQ( + std::vector({element::f32, element::f32}), + ngraph::snippets::pass::PropagatePrecision::get_precisions( + { element::f32, element::f32 }, + { + {element::bf16, element::bf16}, + {element::f32, element::f32}, + {element::i8, element::i8}, + })); +} + +TEST_F(PrecisionPropagationGetPrecisionsTest, first) { + ASSERT_EQ( + std::vector({ element::bf16, element::bf16 }), + ngraph::snippets::pass::PropagatePrecision::get_precisions( + { element::i32, element::i32 }, + { + {element::bf16, element::bf16}, + {element::f32, element::f32}, + {element::i8, element::i8}, + })); +} + +} // namespace snippets +} // namespace test +} // namespace ov diff --git a/src/core/src/pass/visualize_tree.cpp b/src/core/src/pass/visualize_tree.cpp index 70ee298b547e5e..c89decb3f42121 100644 --- a/src/core/src/pass/visualize_tree.cpp +++ b/src/core/src/pass/visualize_tree.cpp @@ -503,7 +503,9 @@ string pass::VisualizeTree::get_node_name(shared_ptr node) { if (node->get_friendly_name() != node->get_name()) { rc += "\\n" + (nvtmn ? string("name: ") : "") + node->get_name(); } - rc += "\\n" + (nvtmn ? string("type_name: ") : "") + std::string(node->get_type_name()); + const auto type_info = node->get_type_info(); + rc += "\\n" + (nvtmn ? string("type_name: ") : "") + std::string(type_info.version_id) + + "::" + std::string(type_info.name); static const bool nvttn = getenv_bool("OV_VISUALIZE_TREE_TENSORS_NAME"); if (nvttn) { diff --git a/src/plugins/intel_cpu/src/emitters/cpu_generator.cpp b/src/plugins/intel_cpu/src/emitters/cpu_generator.cpp index 8423a9bec9d611..8c2e666d6b6438 100644 --- a/src/plugins/intel_cpu/src/emitters/cpu_generator.cpp +++ b/src/plugins/intel_cpu/src/emitters/cpu_generator.cpp @@ -26,8 +26,14 @@ using namespace std; using namespace ngraph::snippets; -#define CREATE_EMITTER(e_type) [this](const std::shared_ptr& n) \ - -> std::shared_ptr {return std::make_shared(h.get(), isa, n);}; +#define CREATE_EMITTER(e_type) { \ + [this](const std::shared_ptr& n) -> std::shared_ptr { \ + return std::make_shared(h.get(), isa, n); \ + }, \ + [](const std::shared_ptr& n) -> std::set> { \ + return e_type::get_supported_precisions(n); \ + } \ +}; class jit_snippet : public dnnl::impl::cpu::x64::jit_generator { public: diff --git a/src/plugins/intel_cpu/src/emitters/jit_dnnl_emitters.cpp b/src/plugins/intel_cpu/src/emitters/jit_dnnl_emitters.cpp index 501cd934753b10..416218b92a3bb6 100644 --- a/src/plugins/intel_cpu/src/emitters/jit_dnnl_emitters.cpp +++ b/src/plugins/intel_cpu/src/emitters/jit_dnnl_emitters.cpp @@ -13,6 +13,10 @@ using namespace Xbyak; namespace ov { namespace intel_cpu { +std::set> jit_dnnl_emitter::get_supported_precisions(const std::shared_ptr& node) { + return {{element::f32}}; +} + jit_dnnl_emitter::jit_dnnl_emitter(jit_generator *host, cpu_isa_t host_isa, const std::shared_ptr& node, InferenceEngine::Precision exec_prc) : jit_emitter(host, host_isa, node, exec_prc) { diff --git a/src/plugins/intel_cpu/src/emitters/jit_dnnl_emitters.hpp b/src/plugins/intel_cpu/src/emitters/jit_dnnl_emitters.hpp index b9ea5ffd2339da..0b7165d2484580 100644 --- a/src/plugins/intel_cpu/src/emitters/jit_dnnl_emitters.hpp +++ b/src/plugins/intel_cpu/src/emitters/jit_dnnl_emitters.hpp @@ -20,6 +20,8 @@ class jit_dnnl_emitter : public jit_emitter { void emit_impl(const std::vector &in_idxs, const std::vector &out_idxs) const override {}; + static std::set> get_supported_precisions(const std::shared_ptr& node = nullptr); + protected: jit_dnnl_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, dnnl_alg_kind_t algKind, float inpAlpha, float inpBeta, diff --git a/src/plugins/intel_cpu/src/emitters/jit_eltwise_emitters.cpp b/src/plugins/intel_cpu/src/emitters/jit_eltwise_emitters.cpp index d222f8345511dc..150d524ac04ce7 100644 --- a/src/plugins/intel_cpu/src/emitters/jit_eltwise_emitters.cpp +++ b/src/plugins/intel_cpu/src/emitters/jit_eltwise_emitters.cpp @@ -3,6 +3,7 @@ // #include "jit_eltwise_emitters.hpp" +#include "ie_ngraph_utils.hpp" using namespace InferenceEngine; using namespace dnnl::impl::utils; @@ -16,9 +17,26 @@ using namespace Xbyak; namespace ov { namespace intel_cpu { +namespace { +InferenceEngine::Precision get_arithmetic_binary_exec_precision(const std::shared_ptr& n) { + std::vector input_precisions; + for (const auto& input : n->inputs()) { + input_precisions.push_back( + InferenceEngine::details::convertPrecision(input.get_source_output().get_element_type())); + } + + assert(std::all_of( + input_precisions.begin(), + input_precisions.end(), + [&input_precisions](const InferenceEngine::Precision& precision) {return precision == input_precisions[0]; })); + + return input_precisions[0]; +} +} // namespace + /// ADD /// -jit_add_emitter::jit_add_emitter(x64::jit_generator *host, x64::cpu_isa_t host_isa, const std::shared_ptr& node, Precision exec_prc) -: jit_emitter(host, host_isa, node, exec_prc) {} +jit_add_emitter::jit_add_emitter(x64::jit_generator *host, x64::cpu_isa_t host_isa, const std::shared_ptr& node) +: jit_emitter(host, host_isa, node, get_arithmetic_binary_exec_precision(node)) {} jit_add_emitter::jit_add_emitter(x64::jit_generator *host, x64::cpu_isa_t host_isa, Precision exec_prc) : jit_emitter(host, host_isa, exec_prc) {} @@ -59,13 +77,13 @@ void jit_add_emitter::emit_isa(const std::vector &in_vec_idxs, const std } } -std::set jit_add_emitter::get_supported_precisions() { - return {Precision::FP32, Precision::I32}; +std::set> jit_add_emitter::get_supported_precisions(const std::shared_ptr& node) { + return {{element::f32, element::f32}, {element::i32, element::i32}}; } /// MUL_ADD /// -jit_mul_add_emitter::jit_mul_add_emitter(x64::jit_generator *host, x64::cpu_isa_t host_isa, const std::shared_ptr& node, Precision exec_prc) -: jit_emitter(host, host_isa, node, exec_prc) {} +jit_mul_add_emitter::jit_mul_add_emitter(x64::jit_generator *host, x64::cpu_isa_t host_isa, const std::shared_ptr& node) +: jit_emitter(host, host_isa, node, get_arithmetic_binary_exec_precision(node)) {} jit_mul_add_emitter::jit_mul_add_emitter(x64::jit_generator *host, x64::cpu_isa_t host_isa, Precision exec_prc) : jit_emitter(host, host_isa, exec_prc) {} @@ -150,13 +168,13 @@ size_t jit_mul_add_emitter::aux_vecs_count() const { return 1; } -std::set jit_mul_add_emitter::get_supported_precisions() { - return {Precision::FP32, Precision::I32}; +std::set> jit_mul_add_emitter::get_supported_precisions(const std::shared_ptr& node) { + return {{element::f32, element::f32, element::f32}, {element::i32, element::i32, element::i32}}; } /// SUB /// -jit_subtract_emitter::jit_subtract_emitter(x64::jit_generator *host, x64::cpu_isa_t host_isa, const std::shared_ptr& node, Precision exec_prc) -: jit_emitter(host, host_isa, node, exec_prc) {} +jit_subtract_emitter::jit_subtract_emitter(x64::jit_generator *host, x64::cpu_isa_t host_isa, const std::shared_ptr& node) +: jit_emitter(host, host_isa, node, get_arithmetic_binary_exec_precision(node)) {} jit_subtract_emitter::jit_subtract_emitter(x64::jit_generator *host, x64::cpu_isa_t host_isa, Precision exec_prc) : jit_emitter(host, host_isa, exec_prc) {} @@ -197,13 +215,13 @@ void jit_subtract_emitter::emit_isa(const std::vector &in_vec_idxs, cons } } -std::set jit_subtract_emitter::get_supported_precisions() { - return {Precision::FP32, Precision::I32}; +std::set> jit_subtract_emitter::get_supported_precisions(const std::shared_ptr& node) { + return {{element::f32, element::f32}, {element::i32, element::i32}}; } /// MULTIPLY /// -jit_multiply_emitter::jit_multiply_emitter(x64::jit_generator *host, x64::cpu_isa_t host_isa, const std::shared_ptr& node, Precision exec_prc) -: jit_emitter(host, host_isa, node, exec_prc) {} +jit_multiply_emitter::jit_multiply_emitter(x64::jit_generator *host, x64::cpu_isa_t host_isa, const std::shared_ptr& node) +: jit_emitter(host, host_isa, node, get_arithmetic_binary_exec_precision(node)) {} jit_multiply_emitter::jit_multiply_emitter(x64::jit_generator *host, x64::cpu_isa_t host_isa, Precision exec_prc) : jit_emitter(host, host_isa, exec_prc) {} @@ -244,13 +262,13 @@ void jit_multiply_emitter::emit_isa(const std::vector &in_vec_idxs, cons } } -std::set jit_multiply_emitter::get_supported_precisions() { - return {Precision::FP32, Precision::I32}; +std::set> jit_multiply_emitter::get_supported_precisions(const std::shared_ptr& node) { + return {{element::f32, element::f32}, {element::i32, element::i32}}; } /// DIVIDE /// jit_divide_emitter::jit_divide_emitter(x64::jit_generator *host, x64::cpu_isa_t host_isa, const std::shared_ptr& node, Precision exec_prc) -: jit_emitter(host, host_isa, node, exec_prc) {} +: jit_emitter(host, host_isa, node, get_arithmetic_binary_exec_precision(node)) {} jit_divide_emitter::jit_divide_emitter(x64::jit_generator *host, x64::cpu_isa_t host_isa, Precision exec_prc) : jit_emitter(host, host_isa, exec_prc) {} @@ -305,8 +323,8 @@ void jit_divide_emitter::emit_isa(const std::vector &in_vec_idxs, const } } -std::set jit_divide_emitter::get_supported_precisions() { - return {Precision::FP32, Precision::I32}; +std::set> jit_divide_emitter::get_supported_precisions(const std::shared_ptr& node) { + return {{element::f32, element::f32}, {element::i32, element::i32}}; } size_t jit_divide_emitter::aux_vecs_count() const { @@ -321,7 +339,11 @@ jit_floor_emitter::jit_floor_emitter(x64::jit_generator *host, x64::cpu_isa_t ho size_t jit_floor_emitter::get_inputs_num() const { return 1; } -void jit_floor_emitter::emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const { +std::set> jit_floor_emitter::get_supported_precisions(const std::shared_ptr& node) { + return {{element::f32}}; +} + +void jit_floor_emitter::emit_impl(const std::vector& in_vec_idxs, const std::vector& out_vec_idxs) const { if (host_isa_ == x64::sse41) { emit_isa(in_vec_idxs, out_vec_idxs); } else if (host_isa_ == x64::avx2) { @@ -344,11 +366,15 @@ void jit_floor_emitter::emit_isa(const std::vector &in_vec_idxs, const s /// CEILING /// jit_ceiling_emitter::jit_ceiling_emitter(x64::jit_generator *host, x64::cpu_isa_t host_isa, const std::shared_ptr& node, Precision exec_prc) : jit_emitter(host, host_isa, node, exec_prc) {} -jit_ceiling_emitter::jit_ceiling_emitter(x64::jit_generator* host, x64::cpu_isa_t host_isa, Precision exec_prc) +jit_ceiling_emitter::jit_ceiling_emitter(x64::jit_generator *host, x64::cpu_isa_t host_isa, Precision exec_prc) : jit_emitter(host, host_isa, exec_prc) {} size_t jit_ceiling_emitter::get_inputs_num() const { return 1; } +std::set> jit_ceiling_emitter::get_supported_precisions(const std::shared_ptr& node) { + return {{element::f32}}; +} + void jit_ceiling_emitter::emit_impl(const std::vector& in_vec_idxs, const std::vector& out_vec_idxs) const { if (host_isa_ == x64::sse41) { @@ -378,7 +404,11 @@ jit_floor_mod_emitter::jit_floor_mod_emitter(x64::jit_generator *host, x64::cpu_ size_t jit_floor_mod_emitter::get_inputs_num() const { return 2; } -void jit_floor_mod_emitter::emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const { +std::set> jit_floor_mod_emitter::get_supported_precisions(const std::shared_ptr& node) { + return {{element::f32, element::f32}}; +} + +void jit_floor_mod_emitter::emit_impl(const std::vector& in_vec_idxs, const std::vector& out_vec_idxs) const { if (host_isa_ == x64::sse41) { emit_isa(in_vec_idxs, out_vec_idxs); } else if (host_isa_ == x64::avx2) { @@ -428,7 +458,11 @@ jit_mod_emitter::jit_mod_emitter(x64::jit_generator *host, x64::cpu_isa_t host_i size_t jit_mod_emitter::get_inputs_num() const { return 2; } -void jit_mod_emitter::emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const { +std::set> jit_mod_emitter::get_supported_precisions(const std::shared_ptr& node) { + return {{element::f32, element::f32}}; +} + +void jit_mod_emitter::emit_impl(const std::vector& in_vec_idxs, const std::vector& out_vec_idxs) const { if (host_isa_ == x64::sse41) { emit_isa(in_vec_idxs, out_vec_idxs); } else if (host_isa_ == x64::avx2) { @@ -471,8 +505,8 @@ size_t jit_mod_emitter::aux_vecs_count() const { } /// MAXIMUM /// -jit_maximum_emitter::jit_maximum_emitter(x64::jit_generator *host, x64::cpu_isa_t host_isa, const std::shared_ptr& node, Precision exec_prc) -: jit_emitter(host, host_isa, node, exec_prc) {} +jit_maximum_emitter::jit_maximum_emitter(x64::jit_generator *host, x64::cpu_isa_t host_isa, const std::shared_ptr& node) +: jit_emitter(host, host_isa, node, get_arithmetic_binary_exec_precision(node)) {} jit_maximum_emitter::jit_maximum_emitter(x64::jit_generator *host, x64::cpu_isa_t host_isa, Precision exec_prc) : jit_emitter(host, host_isa, exec_prc) {} @@ -514,13 +548,13 @@ void jit_maximum_emitter::emit_isa(const std::vector &in_vec_idxs, const } } -std::set jit_maximum_emitter::get_supported_precisions() { - return {Precision::FP32, Precision::I32}; +std::set> jit_maximum_emitter::get_supported_precisions(const std::shared_ptr& node) { + return {{element::f32, element::f32}, {element::i32, element::i32}}; } /// MINIMUM /// -jit_minimum_emitter::jit_minimum_emitter(x64::jit_generator *host, x64::cpu_isa_t host_isa, const std::shared_ptr& node, Precision exec_prc) -: jit_emitter(host, host_isa, node, exec_prc) {} +jit_minimum_emitter::jit_minimum_emitter(x64::jit_generator *host, x64::cpu_isa_t host_isa, const std::shared_ptr& node) +: jit_emitter(host, host_isa, node, get_arithmetic_binary_exec_precision(node)) {} jit_minimum_emitter::jit_minimum_emitter(x64::jit_generator *host, x64::cpu_isa_t host_isa, Precision exec_prc) : jit_emitter(host, host_isa, exec_prc) {} @@ -562,8 +596,8 @@ void jit_minimum_emitter::emit_isa(const std::vector &in_vec_idxs, const } } -std::set jit_minimum_emitter::get_supported_precisions() { - return {Precision::FP32, Precision::I32}; +std::set> jit_minimum_emitter::get_supported_precisions(const std::shared_ptr& node) { + return {{element::f32, element::f32}, {element::i32, element::i32}}; } /// SQUARED_DIFFERENCE /// @@ -617,8 +651,8 @@ void jit_squared_difference_emitter::emit_isa(const std::vector &in_vec_ } } -std::set jit_squared_difference_emitter::get_supported_precisions() { - return {Precision::FP32, Precision::I32}; +std::set> jit_squared_difference_emitter::get_supported_precisions(const std::shared_ptr& node) { + return {{element::f32, element::f32}, {element::i32, element::i32}}; } /// POWER_DYNAMIC /// @@ -630,7 +664,11 @@ jit_power_dynamic_emitter::jit_power_dynamic_emitter(x64::jit_generator *host, x size_t jit_power_dynamic_emitter::get_inputs_num() const { return 2; } -void jit_power_dynamic_emitter::emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const { +std::set> jit_power_dynamic_emitter::get_supported_precisions(const std::shared_ptr& node) { + return {{element::f32, element::f32}}; +} + +void jit_power_dynamic_emitter::emit_impl(const std::vector& in_vec_idxs, const std::vector& out_vec_idxs) const { if (host_isa_ == x64::sse41) { emit_isa(in_vec_idxs, out_vec_idxs); } else if (host_isa_ == x64::avx2) { @@ -741,7 +779,11 @@ jit_equal_emitter::jit_equal_emitter(x64::jit_generator *host, x64::cpu_isa_t ho size_t jit_equal_emitter::get_inputs_num() const { return 2; } -void jit_equal_emitter::emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const { +std::set> jit_equal_emitter::get_supported_precisions(const std::shared_ptr& node) { + return {{element::f32, element::f32}}; +} + +void jit_equal_emitter::emit_impl(const std::vector& in_vec_idxs, const std::vector& out_vec_idxs) const { if (host_isa_ == x64::sse41) { emit_isa(in_vec_idxs, out_vec_idxs); } else if (host_isa_ == x64::avx2) { @@ -800,7 +842,11 @@ jit_not_equal_emitter::jit_not_equal_emitter(x64::jit_generator *host, x64::cpu_ size_t jit_not_equal_emitter::get_inputs_num() const { return 2; } -void jit_not_equal_emitter::emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const { +std::set> jit_not_equal_emitter::get_supported_precisions(const std::shared_ptr& node) { + return {{element::f32, element::f32}}; +} + +void jit_not_equal_emitter::emit_impl(const std::vector& in_vec_idxs, const std::vector& out_vec_idxs) const { if (host_isa_ == x64::sse41) { emit_isa(in_vec_idxs, out_vec_idxs); } else if (host_isa_ == x64::avx2) { @@ -859,7 +905,11 @@ jit_greater_emitter::jit_greater_emitter(x64::jit_generator *host, x64::cpu_isa_ size_t jit_greater_emitter::get_inputs_num() const { return 2; } -void jit_greater_emitter::emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const { +std::set> jit_greater_emitter::get_supported_precisions(const std::shared_ptr& node) { + return {{element::f32, element::f32}}; +} + +void jit_greater_emitter::emit_impl(const std::vector& in_vec_idxs, const std::vector& out_vec_idxs) const { if (host_isa_ == x64::sse41) { emit_isa(in_vec_idxs, out_vec_idxs); } else if (host_isa_ == x64::avx2) { @@ -919,7 +969,11 @@ jit_greater_equal_emitter::jit_greater_equal_emitter(x64::jit_generator *host, x size_t jit_greater_equal_emitter::get_inputs_num() const { return 2; } -void jit_greater_equal_emitter::emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const { +std::set> jit_greater_equal_emitter::get_supported_precisions(const std::shared_ptr& node) { + return {{element::f32, element::f32}}; +} + +void jit_greater_equal_emitter::emit_impl(const std::vector& in_vec_idxs, const std::vector& out_vec_idxs) const { if (host_isa_ == x64::sse41) { emit_isa(in_vec_idxs, out_vec_idxs); } else if (host_isa_ == x64::avx2) { @@ -978,7 +1032,11 @@ jit_less_emitter::jit_less_emitter(x64::jit_generator *host, x64::cpu_isa_t host size_t jit_less_emitter::get_inputs_num() const { return 2; } -void jit_less_emitter::emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const { +std::set> jit_less_emitter::get_supported_precisions(const std::shared_ptr& node) { + return {{element::f32, element::f32}}; +} + +void jit_less_emitter::emit_impl(const std::vector& in_vec_idxs, const std::vector& out_vec_idxs) const { if (host_isa_ == x64::sse41) { emit_isa(in_vec_idxs, out_vec_idxs); } else if (host_isa_ == x64::avx2) { @@ -1037,7 +1095,11 @@ jit_less_equal_emitter::jit_less_equal_emitter(x64::jit_generator *host, x64::cp size_t jit_less_equal_emitter::get_inputs_num() const { return 2; } -void jit_less_equal_emitter::emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const { +std::set> jit_less_equal_emitter::get_supported_precisions(const std::shared_ptr& node) { + return {{element::f32, element::f32}}; +} + +void jit_less_equal_emitter::emit_impl(const std::vector& in_vec_idxs, const std::vector& out_vec_idxs) const { if (host_isa_ == x64::sse41) { emit_isa(in_vec_idxs, out_vec_idxs); } else if (host_isa_ == x64::avx2) { @@ -1097,7 +1159,11 @@ jit_logical_and_emitter::jit_logical_and_emitter(x64::jit_generator *host, x64:: size_t jit_logical_and_emitter::get_inputs_num() const { return 2; } -void jit_logical_and_emitter::emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const { +std::set> jit_logical_and_emitter::get_supported_precisions(const std::shared_ptr& node) { + return {{element::f32, element::f32}}; +} + +void jit_logical_and_emitter::emit_impl(const std::vector& in_vec_idxs, const std::vector& out_vec_idxs) const { if (host_isa_ == x64::sse41) { emit_isa(in_vec_idxs, out_vec_idxs); } else if (host_isa_ == x64::avx2) { @@ -1177,7 +1243,11 @@ jit_logical_or_emitter::jit_logical_or_emitter(x64::jit_generator *host, x64::cp size_t jit_logical_or_emitter::get_inputs_num() const { return 2; } -void jit_logical_or_emitter::emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const { +std::set> jit_logical_or_emitter::get_supported_precisions(const std::shared_ptr& node) { + return {{element::f32, element::f32}}; +} + +void jit_logical_or_emitter::emit_impl(const std::vector& in_vec_idxs, const std::vector& out_vec_idxs) const { if (host_isa_ == x64::sse41) { emit_isa(in_vec_idxs, out_vec_idxs); } else if (host_isa_ == x64::avx2) { @@ -1256,7 +1326,11 @@ jit_logical_xor_emitter::jit_logical_xor_emitter(x64::jit_generator *host, x64:: size_t jit_logical_xor_emitter::get_inputs_num() const { return 2; } -void jit_logical_xor_emitter::emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const { +std::set> jit_logical_xor_emitter::get_supported_precisions(const std::shared_ptr& node) { + return {{element::f32, element::f32}}; +} + +void jit_logical_xor_emitter::emit_impl(const std::vector& in_vec_idxs, const std::vector& out_vec_idxs) const { if (host_isa_ == x64::sse41) { emit_isa(in_vec_idxs, out_vec_idxs); } else if (host_isa_ == x64::avx2) { @@ -1335,7 +1409,11 @@ jit_logical_not_emitter::jit_logical_not_emitter(x64::jit_generator *host, x64:: size_t jit_logical_not_emitter::get_inputs_num() const { return 1; } -void jit_logical_not_emitter::emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const { +std::set> jit_logical_not_emitter::get_supported_precisions(const std::shared_ptr& node) { + return {{element::f32}}; +} + +void jit_logical_not_emitter::emit_impl(const std::vector& in_vec_idxs, const std::vector& out_vec_idxs) const { if (host_isa_ == x64::sse41) { emit_isa(in_vec_idxs, out_vec_idxs); } else if (host_isa_ == x64::avx2) { @@ -1405,7 +1483,11 @@ jit_power_static_emitter::jit_power_static_emitter(x64::jit_generator *host, x64 size_t jit_power_static_emitter::get_inputs_num() const { return 1; } -void jit_power_static_emitter::emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const { +std::set> jit_power_static_emitter::get_supported_precisions(const std::shared_ptr& node) { + return {{element::f32}}; +} + +void jit_power_static_emitter::emit_impl(const std::vector& in_vec_idxs, const std::vector& out_vec_idxs) const { if (host_isa_ == x64::sse41) { emit_isa(in_vec_idxs, out_vec_idxs); } else if (host_isa_ == x64::avx2) { @@ -1579,7 +1661,11 @@ jit_prelu_emitter::jit_prelu_emitter(x64::jit_generator *host, x64::cpu_isa_t ho } size_t jit_prelu_emitter::get_inputs_num() const { return 2; } -void jit_prelu_emitter::emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const { +std::set> jit_prelu_emitter::get_supported_precisions(const std::shared_ptr& node) { + return {{element::f32, element::f32}}; +} + +void jit_prelu_emitter::emit_impl(const std::vector& in_vec_idxs, const std::vector& out_vec_idxs) const { if (host_isa_ == x64::sse41) { emit_isa(in_vec_idxs, out_vec_idxs); } else if (host_isa_ == x64::avx2) { @@ -1634,7 +1720,11 @@ jit_sqrt_emitter::jit_sqrt_emitter(x64::jit_generator *host, x64::cpu_isa_t host size_t jit_sqrt_emitter::get_inputs_num() const { return 1; } -void jit_sqrt_emitter::emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const { +std::set> jit_sqrt_emitter::get_supported_precisions(const std::shared_ptr& node) { + return {{element::f32}}; +} + +void jit_sqrt_emitter::emit_impl(const std::vector& in_vec_idxs, const std::vector& out_vec_idxs) const { if (host_isa_ == x64::sse41) { emit_isa(in_vec_idxs, out_vec_idxs); } else if (host_isa_ == x64::avx2) { @@ -1661,7 +1751,11 @@ jit_negative_emitter::jit_negative_emitter(x64::jit_generator *host, x64::cpu_is size_t jit_negative_emitter::get_inputs_num() const { return 1; } -void jit_negative_emitter::emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const { +std::set> jit_negative_emitter::get_supported_precisions(const std::shared_ptr& node) { + return {{element::f32}}; +} + +void jit_negative_emitter::emit_impl(const std::vector& in_vec_idxs, const std::vector& out_vec_idxs) const { if (host_isa_ == x64::sse41) { emit_isa(in_vec_idxs, out_vec_idxs); } else if (host_isa_ == x64::avx2) { @@ -1695,6 +1789,10 @@ jit_erf_emitter::jit_erf_emitter(x64::jit_generator *host, x64::cpu_isa_t host_i size_t jit_erf_emitter::get_inputs_num() const { return 1; } +std::set> jit_erf_emitter::get_supported_precisions(const std::shared_ptr& node) { + return {{element::f32}}; +} + void jit_erf_emitter::emit_impl( const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const { @@ -1875,7 +1973,11 @@ jit_soft_sign_emitter::jit_soft_sign_emitter(x64::jit_generator *host, x64::cpu_ size_t jit_soft_sign_emitter::get_inputs_num() const { return 1; } -void jit_soft_sign_emitter::emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const { +std::set> jit_soft_sign_emitter::get_supported_precisions(const std::shared_ptr& node) { + return {{element::f32}}; +} + +void jit_soft_sign_emitter::emit_impl(const std::vector& in_vec_idxs, const std::vector& out_vec_idxs) const { if (host_isa_ == x64::sse41) { emit_isa(in_vec_idxs, out_vec_idxs); } else if (host_isa_ == x64::avx2) { @@ -2086,6 +2188,10 @@ jit_select_emitter::jit_select_emitter(x64::jit_generator *host, x64::cpu_isa_t size_t jit_select_emitter::get_inputs_num() const { return 3; } +std::set> jit_select_emitter::get_supported_precisions(const std::shared_ptr& node) { + return {{element::f32, element::f32, element::f32}}; +} + size_t jit_select_emitter::aux_vecs_count() const { if (host_isa_ == x64::avx512_core) return 0; diff --git a/src/plugins/intel_cpu/src/emitters/jit_eltwise_emitters.hpp b/src/plugins/intel_cpu/src/emitters/jit_eltwise_emitters.hpp index 138ba513eda71a..5c00e4584b4274 100644 --- a/src/plugins/intel_cpu/src/emitters/jit_eltwise_emitters.hpp +++ b/src/plugins/intel_cpu/src/emitters/jit_eltwise_emitters.hpp @@ -13,11 +13,10 @@ class jit_add_emitter : public jit_emitter { public: jit_add_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32); - jit_add_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr& n, - InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32); + jit_add_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr& n); size_t get_inputs_num() const override; - static std::set get_supported_precisions(); + static std::set> get_supported_precisions(const std::shared_ptr& node = nullptr); private: void emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const override; @@ -30,11 +29,10 @@ class jit_mul_add_emitter : public jit_emitter { public: jit_mul_add_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32); - jit_mul_add_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr& n, - InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32); + jit_mul_add_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr& n); size_t get_inputs_num() const override; - static std::set get_supported_precisions(); + static std::set> get_supported_precisions(const std::shared_ptr& node = nullptr); private: void emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const override; @@ -50,11 +48,10 @@ class jit_subtract_emitter : public jit_emitter { public: jit_subtract_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32); - jit_subtract_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr& n, - InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32); + jit_subtract_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr& n); size_t get_inputs_num() const override; - static std::set get_supported_precisions(); + static std::set> get_supported_precisions(const std::shared_ptr& node = nullptr); private: void emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const override; @@ -68,11 +65,10 @@ class jit_multiply_emitter : public jit_emitter { public: jit_multiply_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32); - jit_multiply_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr& n, - InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32); + jit_multiply_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr& n); size_t get_inputs_num() const override; - static std::set get_supported_precisions(); + static std::set> get_supported_precisions(const std::shared_ptr& node = nullptr); private: void emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const override; @@ -90,7 +86,7 @@ class jit_divide_emitter : public jit_emitter { InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32); size_t get_inputs_num() const override; - static std::set get_supported_precisions(); + static std::set> get_supported_precisions(const std::shared_ptr& node = nullptr); private: void emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const override; @@ -108,6 +104,7 @@ class jit_floor_emitter : public jit_emitter { InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32); size_t get_inputs_num() const override; + static std::set> get_supported_precisions(const std::shared_ptr& node = nullptr); private: void emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const override; @@ -124,6 +121,7 @@ class jit_ceiling_emitter : public jit_emitter { InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32); size_t get_inputs_num() const override; + static std::set> get_supported_precisions(const std::shared_ptr& node = nullptr); private: void emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const override; @@ -140,6 +138,7 @@ class jit_floor_mod_emitter : public jit_emitter { InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32); size_t get_inputs_num() const override; + static std::set> get_supported_precisions(const std::shared_ptr& node = nullptr); private: void emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const override; @@ -158,6 +157,7 @@ class jit_mod_emitter : public jit_emitter { InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32); size_t get_inputs_num() const override; + static std::set> get_supported_precisions(const std::shared_ptr& node = nullptr); private: void emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const override; @@ -172,11 +172,10 @@ class jit_maximum_emitter : public jit_emitter { public: jit_maximum_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32); - jit_maximum_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr& n, - InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32); + jit_maximum_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr& n); size_t get_inputs_num() const override; - static std::set get_supported_precisions(); + static std::set> get_supported_precisions(const std::shared_ptr& node = nullptr); private: void emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const override; @@ -190,11 +189,10 @@ class jit_minimum_emitter : public jit_emitter { public: jit_minimum_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32); - jit_minimum_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr& n, - InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32); + jit_minimum_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr& n); size_t get_inputs_num() const override; - static std::set get_supported_precisions(); + static std::set> get_supported_precisions(const std::shared_ptr& node = nullptr); private: void emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const override; @@ -213,7 +211,7 @@ class jit_squared_difference_emitter : public jit_emitter { InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32); size_t get_inputs_num() const override; - static std::set get_supported_precisions(); + static std::set> get_supported_precisions(const std::shared_ptr& node = nullptr); private: void emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const override; @@ -231,6 +229,7 @@ class jit_power_dynamic_emitter : public jit_emitter { InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32); size_t get_inputs_num() const override; + static std::set> get_supported_precisions(const std::shared_ptr& node = nullptr); private: void emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const override; @@ -248,6 +247,7 @@ class jit_equal_emitter : public jit_emitter { InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32); size_t get_inputs_num() const override; + static std::set> get_supported_precisions(const std::shared_ptr& node = nullptr); private: void emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const override; @@ -268,6 +268,7 @@ class jit_not_equal_emitter : public jit_emitter { InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32); size_t get_inputs_num() const override; + static std::set> get_supported_precisions(const std::shared_ptr& node = nullptr); private: void emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const override; @@ -288,6 +289,7 @@ class jit_greater_emitter : public jit_emitter { InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32); size_t get_inputs_num() const override; + static std::set> get_supported_precisions(const std::shared_ptr& node = nullptr); private: void emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const override; @@ -308,6 +310,7 @@ class jit_greater_equal_emitter : public jit_emitter { InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32); size_t get_inputs_num() const override; + static std::set> get_supported_precisions(const std::shared_ptr& node = nullptr); private: void emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const override; @@ -328,6 +331,7 @@ class jit_less_emitter : public jit_emitter { InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32); size_t get_inputs_num() const override; + static std::set> get_supported_precisions(const std::shared_ptr& node = nullptr); private: void emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const override; @@ -349,6 +353,7 @@ class jit_less_equal_emitter : public jit_emitter { InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32); size_t get_inputs_num() const override; + static std::set> get_supported_precisions(const std::shared_ptr& node = nullptr); private: void emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const override; @@ -369,6 +374,7 @@ class jit_logical_and_emitter : public jit_emitter { InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32); size_t get_inputs_num() const override; + static std::set> get_supported_precisions(const std::shared_ptr& node = nullptr); private: void emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const override; @@ -389,6 +395,7 @@ class jit_logical_or_emitter : public jit_emitter { InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32); size_t get_inputs_num() const override; + static std::set> get_supported_precisions(const std::shared_ptr& node = nullptr); private: void emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const override; @@ -409,6 +416,7 @@ class jit_logical_xor_emitter : public jit_emitter { InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32); size_t get_inputs_num() const override; + static std::set> get_supported_precisions(const std::shared_ptr& node = nullptr); private: void emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const override; @@ -428,6 +436,7 @@ class jit_logical_not_emitter : public jit_emitter { InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32); size_t get_inputs_num() const override; + static std::set> get_supported_precisions(const std::shared_ptr& node = nullptr); private: void emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const override; @@ -448,6 +457,8 @@ class jit_power_static_emitter : public jit_emitter { InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32); size_t get_inputs_num() const override; + static std::set> get_supported_precisions(const std::shared_ptr& node = nullptr); + private: void emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const override; @@ -471,6 +482,7 @@ class jit_prelu_emitter : public jit_emitter { InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32); size_t get_inputs_num() const override; + static std::set> get_supported_precisions(const std::shared_ptr& node = nullptr); private: void emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const override; @@ -489,6 +501,7 @@ class jit_sqrt_emitter : public jit_emitter { InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32); size_t get_inputs_num() const override; + static std::set> get_supported_precisions(const std::shared_ptr& node = nullptr); private: void emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const override; @@ -503,6 +516,7 @@ class jit_negative_emitter : public jit_emitter { InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32); size_t get_inputs_num() const override; + static std::set> get_supported_precisions(const std::shared_ptr& node = nullptr); private: void emit_impl(const std::vector& in, const std::vector& out) const override; @@ -520,6 +534,7 @@ class jit_erf_emitter : public jit_emitter { InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32); size_t get_inputs_num() const override; + static std::set> get_supported_precisions(const std::shared_ptr& node = nullptr); private: void emit_impl( @@ -541,6 +556,7 @@ class jit_soft_sign_emitter : public jit_emitter { InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32); size_t get_inputs_num() const override; + static std::set> get_supported_precisions(const std::shared_ptr& node = nullptr); private: void emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const override; @@ -563,6 +579,9 @@ class jit_is_finite_emitter : public jit_emitter { } size_t get_inputs_num() const override { return 1; }; + static std::set> get_supported_precisions(const std::shared_ptr& node = nullptr) { + return {{element::f32}}; + } protected: size_t aux_gprs_count() const override { return (entry_map_.empty() ? 0 : 1) + 1; } @@ -588,6 +607,9 @@ class jit_is_inf_emitter : public jit_emitter { } size_t get_inputs_num() const override { return 1; }; + static std::set> get_supported_precisions(const std::shared_ptr& node = nullptr) { + return {{element::f32}}; + } protected: size_t aux_gprs_count() const override { return (entry_map_.empty() ? 0 : 1) + 1; } @@ -615,6 +637,9 @@ class jit_is_nan_emitter : public jit_emitter { } size_t get_inputs_num() const override { return 1; } + static std::set> get_supported_precisions(const std::shared_ptr& node = nullptr) { + return {{element::f32}}; + } protected: size_t aux_gprs_count() const override { return (entry_map_.empty() ? 0 : 1) + 1; } @@ -635,6 +660,7 @@ class jit_select_emitter : public jit_emitter { InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32); size_t get_inputs_num() const override; + static std::set> get_supported_precisions(const std::shared_ptr& node = nullptr); size_t aux_vecs_count() const override; private: diff --git a/src/plugins/intel_cpu/src/emitters/jit_emitter.cpp b/src/plugins/intel_cpu/src/emitters/jit_emitter.cpp index 3bbd03935563f0..7d9ab0d0994315 100644 --- a/src/plugins/intel_cpu/src/emitters/jit_emitter.cpp +++ b/src/plugins/intel_cpu/src/emitters/jit_emitter.cpp @@ -3,8 +3,8 @@ // #include "jit_emitter.hpp" -#include "utils/general_utils.h" #include +#include "utils/general_utils.h" using namespace dnnl::impl::cpu; using namespace dnnl::impl; @@ -55,8 +55,8 @@ size_t jit_emitter::aux_gprs_count() const { return entry_map_.empty() ? 0 : 1; } -std::set jit_emitter::get_supported_precisions() { - return {InferenceEngine::Precision::FP32}; +std::set> jit_emitter::get_supported_precisions(const std::shared_ptr& node) { + return {}; } void jit_emitter::emitter_preamble(const std::vector &in_idxs, const std::vector &out_idxs, diff --git a/src/plugins/intel_cpu/src/emitters/jit_emitter.hpp b/src/plugins/intel_cpu/src/emitters/jit_emitter.hpp index be548c614e0aa2..eb3309de32d8c5 100644 --- a/src/plugins/intel_cpu/src/emitters/jit_emitter.hpp +++ b/src/plugins/intel_cpu/src/emitters/jit_emitter.hpp @@ -49,7 +49,13 @@ class jit_emitter : public ngraph::snippets::Emitter { virtual size_t get_inputs_num() const = 0; virtual size_t aux_vecs_count() const; emitter_in_out_map get_in_out_type() const; - static std::set get_supported_precisions(); + + /** + * @brief Returns supported precisions. + * Precisions are ordered, the first bigger bitness precision with the same type will be selected. + * Empty collection means the emitter supports any input precisions. + */ + static std::set> get_supported_precisions(const std::shared_ptr& node = nullptr); protected: virtual size_t aux_gprs_count() const; diff --git a/src/plugins/intel_cpu/src/emitters/jit_snippets_emitters.cpp b/src/plugins/intel_cpu/src/emitters/jit_snippets_emitters.cpp index af583e804b157f..4f63dd641f6295 100644 --- a/src/plugins/intel_cpu/src/emitters/jit_snippets_emitters.cpp +++ b/src/plugins/intel_cpu/src/emitters/jit_snippets_emitters.cpp @@ -479,7 +479,20 @@ void BroadcastMoveEmitter::emit_isa(const std::vector &in, const std::ve ScalarEmitter::ScalarEmitter(dnnl::impl::cpu::x64::jit_generator* h, dnnl::impl::cpu::x64::cpu_isa_t isa, const std::shared_ptr& n) : jit_emitter(h, isa, n) { - value = dnnl::impl::cpu::x64::float2int(ov::as_type_ptr(n)->cast_vector()[0]); + const auto precision = n->get_output_element_type(0); + switch (precision) { + case element::i32: { + value = ov::as_type_ptr(n)->cast_vector()[0]; + break; + } + case element::f32: { + value = dnnl::impl::cpu::x64::float2int(ov::as_type_ptr(n)->cast_vector()[0]); + break; + } + default: { + IE_THROW() << "Scalar emitter doesn't support " << precision; + } + } push_arg_entry_of("scalar", value, true); prepare_table(); } diff --git a/src/plugins/intel_cpu/src/emitters/jit_snippets_emitters.hpp b/src/plugins/intel_cpu/src/emitters/jit_snippets_emitters.hpp index caeab227ad4b44..cae08b3fe43ac8 100644 --- a/src/plugins/intel_cpu/src/emitters/jit_snippets_emitters.hpp +++ b/src/plugins/intel_cpu/src/emitters/jit_snippets_emitters.hpp @@ -322,6 +322,9 @@ class BrgemmEmitter : public jit_emitter { BrgemmEmitter(dnnl::impl::cpu::x64::jit_generator* h, dnnl::impl::cpu::x64::cpu_isa_t isa, const std::shared_ptr& n); size_t get_inputs_num() const override {return 2;} + static std::set> get_supported_precisions(const std::shared_ptr& node = nullptr) { + return {{element::f32, element::f32}}; + } private: void emit_impl(const std::vector& in, @@ -369,6 +372,9 @@ class HorizonMaxEmitter : public jit_emitter { HorizonMaxEmitter(dnnl::impl::cpu::x64::jit_generator* h, dnnl::impl::cpu::x64::cpu_isa_t isa, const std::shared_ptr& n); size_t get_inputs_num() const override {return 1;} + static std::set> get_supported_precisions(const std::shared_ptr& node = nullptr) { + return {{element::f32}}; + } protected: size_t aux_gprs_count() const override {return 1;} @@ -387,6 +393,9 @@ class HorizonSumEmitter : public jit_emitter { HorizonSumEmitter(dnnl::impl::cpu::x64::jit_generator* h, dnnl::impl::cpu::x64::cpu_isa_t isa, const std::shared_ptr& n); size_t get_inputs_num() const override {return 1;} + static std::set> get_supported_precisions(const std::shared_ptr& node = nullptr) { + return {{element::f32}}; + } protected: size_t aux_gprs_count() const override {return 1;} diff --git a/src/plugins/intel_cpu/src/nodes/eltwise.cpp b/src/plugins/intel_cpu/src/nodes/eltwise.cpp index 4ef400ae601a2f..5bc46c00b40b7e 100644 --- a/src/plugins/intel_cpu/src/nodes/eltwise.cpp +++ b/src/plugins/intel_cpu/src/nodes/eltwise.cpp @@ -9,6 +9,7 @@ #include "cpu_types.h" #include "utils/bfloat16.hpp" +#include "ie_ngraph_utils.hpp" #include #include @@ -58,7 +59,7 @@ namespace { template struct SupportedPrecisions { - void operator()(std::set &precisions) { + void operator()(std::set> &precisions) { precisions = T::get_supported_precisions(); } }; @@ -105,7 +106,7 @@ struct EltwiseEmitter { /** * Implements Eltwise shape inference algorithm. The algorithm is based on broadcasting all the input shapes * according to the NUMPY broadcast rule. This implementation is more lightweight than the ngraph one. - * + * */ class EltwiseShapeInfer : public ShapeInferEmptyPads { public: @@ -176,10 +177,31 @@ struct jit_uni_eltwise_generic : public jit_uni_eltwise_kernel, public jit_gener void generate() override { Precision exec_prc = Precision::UNSPECIFIED; - std::set supported_precision_intersection = get_supported_precisions(eltwise_data_.front().algo); + std::set> supported_precision_intersection = get_supported_precisions(eltwise_data_.front().algo); + + // for element-wise operations all inputs must to have the same precisions + assert(std::all_of( + supported_precision_intersection.begin(), + supported_precision_intersection.end(), + [&supported_precision_intersection](const std::vector& precisions) { + return std::all_of( + precisions.begin(), + precisions.end(), + [&precisions](const element::Type precision) { return precision == precisions[0]; }); + })); + for (size_t i = 1; i < eltwise_data_.size(); ++i) { - std::set prcs = get_supported_precisions(eltwise_data_[i].algo); - std::set prcs_intersect = {}; + std::set> prcs = get_supported_precisions(eltwise_data_[i].algo); + std::set> prcs_intersect = {}; + + // to support previous functionality + if (!std::all_of( + prcs.begin(), + prcs.end(), + [&supported_precision_intersection](const std::vector& types) { + return types.size() == supported_precision_intersection.size(); })) { + continue; + } std::set_intersection(supported_precision_intersection.begin(), supported_precision_intersection.end(), prcs.begin(), prcs.end(), std::inserter(prcs_intersect, prcs_intersect.begin())); @@ -187,19 +209,22 @@ struct jit_uni_eltwise_generic : public jit_uni_eltwise_kernel, public jit_gener supported_precision_intersection = prcs_intersect; } - static const Precision exec_precisions_priority[] = { - Precision::U8, - Precision::I8, - Precision::U16, - Precision::I16, - Precision::BF16, - Precision::I32, - Precision::FP32 + static const element::Type exec_precisions_priority[] = { + element::u8, + element::i8, + element::u16, + element::i16, + element::bf16, + element::i32, + element::f32 }; - for (auto prc : exec_precisions_priority) { - if (std::find(supported_precision_intersection.begin(), supported_precision_intersection.end(), prc) != supported_precision_intersection.end()) { - exec_prc = prc; + for (const auto prc : exec_precisions_priority) { + if (std::any_of( + supported_precision_intersection.begin(), + supported_precision_intersection.end(), + [&prc](const std::vector& precisions) { return std::find(precisions.begin(), precisions.end(), prc) != precisions.end(); })) { + exec_prc = InferenceEngine::details::convertPrecision(prc); break; } } @@ -482,8 +507,8 @@ struct jit_uni_eltwise_generic : public jit_uni_eltwise_kernel, public jit_gener const std::vector& ops_list_; const dnnl::post_ops& post_ops_; - std::set get_supported_precisions(Algorithm algo) { - std::set precisions; + std::set> get_supported_precisions(Algorithm algo) { + std::set> precisions; OV_SWITCH(intel_cpu, SupportedPrecisions, precisions, algo, OV_CASE(Algorithm::EltwiseRelu, jit_dnnl_aux_emitter), diff --git a/src/plugins/intel_cpu/src/nodes/subgraph.cpp b/src/plugins/intel_cpu/src/nodes/subgraph.cpp index d11fc50d33edfe..8eb425e7ec4921 100644 --- a/src/plugins/intel_cpu/src/nodes/subgraph.cpp +++ b/src/plugins/intel_cpu/src/nodes/subgraph.cpp @@ -25,6 +25,7 @@ #include "utils/cpu_utils.hpp" #include "snippets_transformations/fuse_load_store_and_convert.hpp" #include "snippets_transformations/mul_add_to_fma.hpp" +#include "snippets_transformations/remove_converts.hpp" #include "ngraph_transformations/convert_to_swish_cpu.hpp" using namespace InferenceEngine; @@ -39,7 +40,7 @@ namespace node { namespace { /* This class implementation is a temporal WA - TODO: revise the implementation to remove the node reference*/ + TODO: revise the implementation to remove the node reference*/ class SnippetShapeInfer : public ShapeInferEmptyPads { public: SnippetShapeInfer(Snippet* node) : m_node(node) {} @@ -531,28 +532,36 @@ bool Snippet::created() const { } void Snippet::generate(const jit_snippets_compile_args* jcp) { - ov::pass::Manager optManager; - optManager.register_pass(); - optManager.register_pass(); - optManager.register_pass(); - optManager.register_pass(); + ov::pass::Manager pre_dialect; + pre_dialect.register_pass(); + ov::pass::Manager post_dialect; + + ov::pass::Manager post_precision; + post_precision.register_pass(); + post_precision.register_pass(); + post_precision.register_pass(); // LoadConvert uses Load emitter that support conversion from any type to only f32 - optManager.get_pass_config()->set_callback( + post_precision.get_pass_config()->set_callback( [](const std::shared_ptr& n) -> bool { if (const auto& convert = std::dynamic_pointer_cast(n)) return convert->get_destination_type() != ov::element::f32; return true; }); - // StoreConvert uses Store emitter that support conversion from only f32 to any types - optManager.get_pass_config()->set_callback( + post_precision.get_pass_config()->set_callback( [](const std::shared_ptr& n) -> bool { if (const auto& convert = std::dynamic_pointer_cast(n)) return convert->get_input_element_type(0) != ov::element::f32; return true; }); - schedule = snippet->generate(optManager, reinterpret_cast(jcp)); + post_precision.register_pass(); + + schedule = snippet->generate( + pre_dialect, + post_dialect, + post_precision, + reinterpret_cast(jcp)); } void Snippet::update_ptrs(jit_snippets_call_args& call_args) { diff --git a/src/plugins/intel_cpu/src/snippets_transformations/remove_converts.cpp b/src/plugins/intel_cpu/src/snippets_transformations/remove_converts.cpp new file mode 100644 index 00000000000000..238fadaa47e897 --- /dev/null +++ b/src/plugins/intel_cpu/src/snippets_transformations/remove_converts.cpp @@ -0,0 +1,38 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "remove_converts.hpp" + +#include "snippets/itt.hpp" +#include "ngraph/opsets/opset1.hpp" +#include "ngraph/rt_info.hpp" +#include "ngraph/pattern/op/wrap_type.hpp" + +#include "snippets/op/convert_saturation.hpp" + +ov::intel_cpu::pass::RemoveConverts::RemoveConverts() { + MATCHER_SCOPE(RemoveConverts); + auto parent_convert_wrap = ngraph::pattern::wrap_type(); + auto child_convert_wrap = ngraph::pattern::wrap_type({ parent_convert_wrap }); + + auto callback = [=](ngraph::pattern::Matcher& m) { + OV_ITT_SCOPED_TASK(ngraph::pass::itt::domains::SnippetsTransform, "ov::intel_cpu::pass::RemoveConverts") + const auto& pm = m.get_pattern_value_map(); + const auto parent_convert = pm.at(parent_convert_wrap).get_node_shared_ptr(); + const auto child_convert = pm.at(child_convert_wrap).get_node_shared_ptr(); + if ( + (parent_convert->get_input_element_type(0) != element::f32) || + (parent_convert->get_output_target_inputs(0).size() != 1ull) || + (parent_convert->get_output_element_type(0) != element::bf16) || + (child_convert->get_output_element_type(0) != element::f32)) { + return false; + } + + replace_output_update_name(child_convert->output(0), parent_convert->get_input_source_output(0)); + return true; + }; + + auto m = std::make_shared(child_convert_wrap, matcher_name); + register_matcher(m, callback); +} diff --git a/src/plugins/intel_cpu/src/snippets_transformations/remove_converts.hpp b/src/plugins/intel_cpu/src/snippets_transformations/remove_converts.hpp new file mode 100644 index 00000000000000..b1fc6d4503d606 --- /dev/null +++ b/src/plugins/intel_cpu/src/snippets_transformations/remove_converts.hpp @@ -0,0 +1,27 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "ngraph/pass/graph_rewrite.hpp" +#include "ngraph/pattern/matcher.hpp" + +namespace ov { +namespace intel_cpu { +namespace pass { + +/** + * @interface RemoveConverts + * @brief Remove sequence of two ConvertSaturation operations for specific precisions: FP32 => BF16 => FP32 + * @ingroup snippets + */ +class RemoveConverts : public ngraph::pass::MatcherPass { +public: + OPENVINO_RTTI("RemoveConverts", "0"); + RemoveConverts(); +}; + +} // namespace pass +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/check_broadcast.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/check_broadcast.cpp new file mode 100644 index 00000000000000..9469bc9607141a --- /dev/null +++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/check_broadcast.cpp @@ -0,0 +1,81 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "snippets/check_broadcast.hpp" +#include "common_test_utils/test_constants.hpp" + +namespace ov { +namespace test { +namespace snippets { + + +namespace { + +const std::vector input_types = { + // TODO: 105804 + //ov::element::i32, + ov::element::f32 +}; + +const std::vector test_cases = { + // broadcast is neccessary + { + {{1, 3, 4, 4}, {4, 4}}, + ov::op::AutoBroadcastSpec(ov::op::AutoBroadcastType::PDPD, -1), + 1, + 0 + }, + { + {{1, 3, 4, 4}, {4, 4}}, + ov::op::AutoBroadcastSpec(ov::op::AutoBroadcastType::PDPD, 2), + 1, + 0 + }, + + // broadcast is not neccessary + { + {{1, 3, 4, 4}, {1, 3, 4, 4}}, + ov::op::AutoBroadcastSpec(ov::op::AutoBroadcastType::PDPD, -1), + 1, + 1 + }, + { + {{1, 3, 4, 4}, {1, 3, 4, 4}}, + ov::op::AutoBroadcastSpec(ov::op::AutoBroadcastType::PDPD, 0), + 1, + 1 + }, + + // any other PDPD + { + {{1, 3, 4, 4}, {4, 4}}, + ov::op::AutoBroadcastSpec(ov::op::AutoBroadcastType::NUMPY, -1), + 1, + 1 + }, + { + {{1, 3, 4, 4}, {4, 4}}, + ov::op::AutoBroadcastSpec(ov::op::AutoBroadcastType::NUMPY, 0), + 1, + 1 + }, + { + {{1, 3, 4, 4}, {4, 4}}, + ov::op::AutoBroadcastSpec(ov::op::AutoBroadcastType::NUMPY, 2), + 1, + 1 + }, +}; + +INSTANTIATE_TEST_SUITE_P(smoke_Snippets_CheckBroadcast, CheckBroadcast, + ::testing::Combine( + ::testing::ValuesIn(input_types), + ::testing::ValuesIn(test_cases), + ::testing::Values(CommonTestUtils::DEVICE_CPU)), + CheckBroadcast::getTestCaseName); + +} // namespace +} // namespace snippets +} // namespace test +} // namespace ov diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/precision_propagation_convertion.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/precision_propagation_convertion.cpp new file mode 100644 index 00000000000000..5c93badbd3c9e9 --- /dev/null +++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/precision_propagation_convertion.cpp @@ -0,0 +1,37 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "snippets/precision_propagation_convertion.hpp" +#include +#include + +namespace ov { +namespace test { +namespace snippets { + + +namespace { + +const std::vector> input_shapes = { + {{ 1, 3, 16, 16 }, { 1, 1, 1, 16 }}, +}; + +const std::vector> fake_quantize_intervals = { + {0.f, 2.55f, 0.f, 2.55f}, + {-1.28f, 1.27f, -1.28f, 1.27f} +}; + +INSTANTIATE_TEST_SUITE_P(smoke_Snippets_PrecisionPropagation_Convertion, PrecisionPropagationConvertion, + ::testing::Combine( + ::testing::ValuesIn(input_shapes), + ::testing::ValuesIn(fake_quantize_intervals), + ::testing::Values(1), + ::testing::Values(1), + ::testing::Values(CommonTestUtils::DEVICE_CPU)), + PrecisionPropagationConvertion::getTestCaseName); + +} // namespace +} // namespace snippets +} // namespace test +} // namespace ov diff --git a/src/plugins/intel_cpu/tests/unit/ngraph_transformations/mul_add_to_fma.cpp b/src/plugins/intel_cpu/tests/unit/ngraph_transformations/mul_add_to_fma.cpp index 0fcaaceadd70ab..5431cbb2626a55 100644 --- a/src/plugins/intel_cpu/tests/unit/ngraph_transformations/mul_add_to_fma.cpp +++ b/src/plugins/intel_cpu/tests/unit/ngraph_transformations/mul_add_to_fma.cpp @@ -155,7 +155,7 @@ class MulAddToFMATests : public LoweringTests, public testing::WithParamInterfac }; TEST_P(MulAddToFMATests, MulAddToFMATests) { - auto subgraph = getLoweredSubgraph(snippets_function->getOriginal(), master_shape, cpu_manager, generator); + auto subgraph = getLoweredSubgraph(snippets_function->getOriginal(), master_shape, {}, {}, cpu_manager, generator); model = subgraph->body_ptr(); model_ref = snippets_function->getLowered(); } diff --git a/src/tests/functional/plugin/shared/include/snippets/check_broadcast.hpp b/src/tests/functional/plugin/shared/include/snippets/check_broadcast.hpp new file mode 100644 index 00000000000000..1c33792cd328ec --- /dev/null +++ b/src/tests/functional/plugin/shared/include/snippets/check_broadcast.hpp @@ -0,0 +1,38 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "shared_test_classes/base/snippets_test_utils.hpp" + +namespace ov { +namespace test { +namespace snippets { + +class CheckBroadcastTestCaseParams { +public: + std::pair input_shapes; + ov::op::AutoBroadcastSpec broadcast; + size_t num_nodes; + size_t num_subgraphs; +}; + +typedef std::tuple < + ov::element::Type, // input types + CheckBroadcastTestCaseParams, // test case details + std::string // target device +> CheckBroadcastParams; + +class CheckBroadcast : public testing::WithParamInterface, + virtual public ov::test::SnippetsTestsCommon { +public: + static std::string getTestCaseName(testing::TestParamInfo obj); + +protected: + void SetUp() override; +}; + +} // namespace snippets +} // namespace test +} // namespace ov diff --git a/src/tests/functional/plugin/shared/include/snippets/precision_propagation_convertion.hpp b/src/tests/functional/plugin/shared/include/snippets/precision_propagation_convertion.hpp new file mode 100644 index 00000000000000..3ab24d7cf299f3 --- /dev/null +++ b/src/tests/functional/plugin/shared/include/snippets/precision_propagation_convertion.hpp @@ -0,0 +1,33 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "shared_test_classes/base/snippets_test_utils.hpp" + +namespace ov { +namespace test { +namespace snippets { + +typedef std::tuple< + std::vector, // Input shapes + std::vector, // FakeQuantize intervals + size_t, // Expected num nodes + size_t, // Expected num subgraphs + std::string // Target Device +> PrecisionPropagationParams; + +class PrecisionPropagationConvertion : + public testing::WithParamInterface, + virtual public ov::test::SnippetsTestsCommon { +public: + static std::string getTestCaseName(testing::TestParamInfo obj); + +protected: + void SetUp() override; +}; + +} // namespace snippets +} // namespace test +} // namespace ov diff --git a/src/tests/functional/plugin/shared/src/low_precision_transformations/fuse_fake_quantize_transformation.cpp b/src/tests/functional/plugin/shared/src/low_precision_transformations/fuse_fake_quantize_transformation.cpp index 0dc3d899f7988a..8c4109c439365d 100644 --- a/src/tests/functional/plugin/shared/src/low_precision_transformations/fuse_fake_quantize_transformation.cpp +++ b/src/tests/functional/plugin/shared/src/low_precision_transformations/fuse_fake_quantize_transformation.cpp @@ -20,7 +20,7 @@ std::string FuseFakeQuantizeTransformation::getTestCaseName(const testing::TestP std::tie(targetDevice, testValues) = obj.param; std::ostringstream result; - result << targetDevice << "_" << + result << "targetDevice=" << targetDevice << "_" << testValues.actual.precisionBeforeAdd << "_" << testValues.actual.add.values.size() << "_" << testValues.actual.add.outPrecision << "_" << diff --git a/src/tests/functional/plugin/shared/src/snippets/check_broadcast.cpp b/src/tests/functional/plugin/shared/src/snippets/check_broadcast.cpp new file mode 100644 index 00000000000000..3730771a1a44d5 --- /dev/null +++ b/src/tests/functional/plugin/shared/src/snippets/check_broadcast.cpp @@ -0,0 +1,89 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "snippets/check_broadcast.hpp" + +#include "common_test_utils/common_utils.hpp" +#include "subgraph_converts.hpp" +#include "common_test_utils/ov_tensor_utils.hpp" + +namespace ov { +namespace test { +namespace snippets { + +class CheckBroadcastFunction { +public: + static std::shared_ptr get( + const PartialShape& input_shape1, + const PartialShape& input_shape2, + const ov::element::Type input_type, + const ov::op::AutoBroadcastSpec broadcast) { + const auto parameter1 = std::make_shared(input_type, input_shape1); + parameter1->set_friendly_name("parameter1"); + + const auto parameter2 = std::make_shared(input_type, input_shape2); + parameter2->set_friendly_name("parameter2"); + + std::shared_ptr parent = std::make_shared( + parameter1, + parameter2, + broadcast); + parent->set_friendly_name("multiply"); + + const auto result = std::make_shared(parent); + result->set_friendly_name("result"); + + return std::make_shared( + ngraph::ResultVector{ result }, + ngraph::ParameterVector{ parameter1, parameter2 }, + "CheckBroadcastFunction"); + } +}; + +std::string CheckBroadcast::getTestCaseName(testing::TestParamInfo obj) { + ov::element::Type input_type; + CheckBroadcastTestCaseParams test_case_params; + std::string target_device; + + std::tie(input_type, test_case_params, target_device) = obj.param; + + std::ostringstream result; + result << "IS=" << test_case_params.input_shapes.first.get_shape() << "_" << + test_case_params.input_shapes.second.get_shape() << "_"; + result << "IT=" << input_type << "_"; + result << "BCT=" << test_case_params.broadcast.m_type << "_"; + result << "BCA=" << test_case_params.broadcast.m_axis << "_"; + result << "#N=" << test_case_params.num_nodes << "_"; + result << "#S=" << test_case_params.num_subgraphs << "_"; + result << "targetDevice=" << target_device; + return result.str(); +} + +void CheckBroadcast::SetUp() { + ov::element::Type input_type; + CheckBroadcastTestCaseParams test_case_params; + + std::tie(input_type, test_case_params, targetDevice) = this->GetParam(); + ref_num_nodes = test_case_params.num_nodes; + ref_num_subgraphs = test_case_params.num_subgraphs; + + init_input_shapes(static_partial_shapes_to_test_representation({ + test_case_params.input_shapes.first, + test_case_params.input_shapes.second})); + + function = CheckBroadcastFunction::get( + test_case_params.input_shapes.first, + test_case_params.input_shapes.second, + input_type, + test_case_params.broadcast); +} + +TEST_P(CheckBroadcast, CompareWithRefImpl) { + run(); + validateNumSubgraphs(); +} + +} // namespace snippets +} // namespace test +} // namespace ov diff --git a/src/tests/functional/plugin/shared/src/snippets/convert.cpp b/src/tests/functional/plugin/shared/src/snippets/convert.cpp index 60419d28b2f96f..95749f32da1272 100644 --- a/src/tests/functional/plugin/shared/src/snippets/convert.cpp +++ b/src/tests/functional/plugin/shared/src/snippets/convert.cpp @@ -106,8 +106,8 @@ parameters ConvertInput::generate_params_random() const { break; case ov::element::i32: case ov::element::i8: - startFrom = -10; - range = 20; + startFrom = -32; + range = 64; break; case ov::element::u8: startFrom = 10; diff --git a/src/tests/functional/plugin/shared/src/snippets/precision_propagation_convertion.cpp b/src/tests/functional/plugin/shared/src/snippets/precision_propagation_convertion.cpp new file mode 100644 index 00000000000000..570fa4b44dac70 --- /dev/null +++ b/src/tests/functional/plugin/shared/src/snippets/precision_propagation_convertion.cpp @@ -0,0 +1,48 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "snippets/precision_propagation_convertion.hpp" + +#include "common_test_utils/common_utils.hpp" +#include "precision_propagation_convertion_function.hpp" + +namespace ov { +namespace test { +namespace snippets { + +std::string PrecisionPropagationConvertion::getTestCaseName(testing::TestParamInfo obj) { + std::vector input_shapes; + std::vector fake_quantize_intervals; + std::string targetDevice; + size_t num_nodes, num_subgraphs; + std::tie(input_shapes, fake_quantize_intervals, num_nodes, num_subgraphs, targetDevice) = obj.param; + + std::ostringstream result; + for (size_t i = 0; i < input_shapes.size(); ++i) + result << "IS[" << i << "]=" << input_shapes[i] << "_"; + for (size_t i = 0; i < fake_quantize_intervals.size(); ++i) + result << "FQ[" << i << "]=" << fake_quantize_intervals[i] << "_"; + result << "#N=" << num_nodes << "_"; + result << "#S=" << num_subgraphs << "_"; + result << "targetDevice=" << targetDevice; + return result.str(); +} + +void PrecisionPropagationConvertion::SetUp() { + std::vector input_shapes; + std::vector fake_quantize_intervals; + std::tie(input_shapes, fake_quantize_intervals, ref_num_nodes, ref_num_subgraphs, targetDevice) = this->GetParam(); + init_input_shapes(static_partial_shapes_to_test_representation(input_shapes)); + + function = PrecisionPropagationConvertionFunction(input_shapes, ov::element::f32, fake_quantize_intervals).getOriginal(); +} + +TEST_P(PrecisionPropagationConvertion, CompareWithRefImpl) { + run(); + validateNumSubgraphs(); +} + +} // namespace snippets +} // namespace test +} // namespace ov diff --git a/src/tests/ngraph_helpers/snippets_ngraph_functions/include/precision_propagation_convertion_function.hpp b/src/tests/ngraph_helpers/snippets_ngraph_functions/include/precision_propagation_convertion_function.hpp new file mode 100644 index 00000000000000..554d7b08fc5134 --- /dev/null +++ b/src/tests/ngraph_helpers/snippets_ngraph_functions/include/precision_propagation_convertion_function.hpp @@ -0,0 +1,49 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include "openvino/core/model.hpp" +#include "snippets_helpers.hpp" + +namespace ov { +namespace test { +namespace snippets { + +/** + * @class PrecisionPropagationConvertionFunction + * @brief PrecisionPropagationConvertionFunction instance returns reference and original functions. + * + * Input arguments are used to create function in getOriginal methods only. + * Dont use getReference and getLowered method, they are not implemented and throw std::runtime_error exception. + * Note, ov::element::Type_t precision base type input argument is not used. + */ +class PrecisionPropagationConvertionFunction : public SnippetsFunctionBase { +public: + PrecisionPropagationConvertionFunction( + const std::vector& input_shapes, + const element::Type input_type, + const std::vector& fake_quantize_intervals); + + /* + * Don't call this method explicity. You should create the instance of PrecisionPropagationConvertionFunction before. + * After the method will be called implicitly in getOriginal. + * Note, please, getReference and getLowered methods are not implemented and throw exception. + */ + static std::shared_ptr get( + const std::vector& input_shapes, + const element::Type input_type, + const std::vector& fake_quantize_intervals); + +protected: + std::shared_ptr initOriginal() const override; + +private: + const std::vector fake_quantize_intervals; +}; + +} // namespace snippets +} // namespace test +} // namespace ov diff --git a/src/tests/ngraph_helpers/snippets_ngraph_functions/include/precision_propagation_function.hpp b/src/tests/ngraph_helpers/snippets_ngraph_functions/include/precision_propagation_function.hpp new file mode 100644 index 00000000000000..b32099cf3020de --- /dev/null +++ b/src/tests/ngraph_helpers/snippets_ngraph_functions/include/precision_propagation_function.hpp @@ -0,0 +1,131 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include + +#include +#include "ngraph/opsets/opset1.hpp" +#include "snippets/op/convert_saturation.hpp" +#include "snippets_helpers.hpp" + +namespace ov { +namespace test { +namespace snippets { + +/** + * @class DummyAdd + * @brief DummyAdd operation has custom validate_and_infer_types method implementation. + */ +class DummyAdd : public ngraph::opset1::Add { +public: + OPENVINO_OP("DummyAdd", "test::snippets"); + + DummyAdd(const Output& arg0, + const Output& arg1, + const ngraph::op::AutoBroadcastSpec& auto_broadcast = + ngraph::op::AutoBroadcastSpec(ngraph::op::AutoBroadcastType::NUMPY)) + : ngraph::opset1::Add(arg0, arg1, auto_broadcast) { + constructor_validate_and_infer_types(); + } + + DummyAdd(const ngraph::opset1::Add& add) + : Add(add.get_input_source_output(0), add.get_input_source_output(1), add.get_autob()) { + constructor_validate_and_infer_types(); + } + + DummyAdd() = default; + + void validate_and_infer_types() override { + const auto input_type1 = get_input_element_type(0); + const auto input_type2 = get_input_element_type(1); + + const element::Type output_type = (input_type1 == element::i8) || (input_type2 == element::i8) ? + element::i32 : + get_input_element_type(0); + + set_output_type(0, output_type, get_input_partial_shape(0)); + } + + std::shared_ptr clone_with_new_inputs(const OutputVector& new_args) const override { + return std::make_shared(new_args.at(0), new_args.at(1), this->get_autob()); + } +}; + +class PrecisionPropagationAddFunctionParams { +public: + class Actual { + public: + std::pair convertion_before_op1; + element::Type convertion_before_op2_1; + std::pair convertion_before_op2_2; + }; + + class Expected { + public: + std::pair convertion_before_op1; + element::Type convertion_before_op2_1; + std::pair convertion_before_op2_2; + element::Type convertion_after_op2; + }; +}; + +/** + * @class PrecisionPropagationAddFunction + * @brief PrecisionPropagationAddFunction instance returns reference and original functions. + * + * Input arguments are used to create function in getOriginal or getReference methods only. + * Dont use getLowered method, it is not implemented and throw std::runtime_error exception. + * Note, ov::element::Type_t precision base type input argument is not used. + */ +class PrecisionPropagationAddFunction : public SnippetsFunctionBase { +public: + explicit PrecisionPropagationAddFunction( + const std::vector input_shapes, + const ngraph::element::Type precision1, + const ngraph::element::Type precision2, + const ngraph::element::Type constant_precision, + PrecisionPropagationAddFunctionParams::Actual actual, + PrecisionPropagationAddFunctionParams::Expected expected) : + SnippetsFunctionBase(input_shapes), + precision1(precision1), + precision2(precision2), + constant_precision(constant_precision), + actual(actual), + expected(expected) { + OPENVINO_ASSERT(input_shapes.size() == 2ull, "input_shapes size has to be equal to 2"); + } + + /* + * Don't call this method explicity. You should create the instance of PrecisionPropagationAddFunction before. + * After the method will be called implicitly in getOriginal or getReference methods. + * Note, please, getLowered method is not implemented and throws exception. + */ + static std::shared_ptr get( + const ngraph::element::Type precision1, + const ngraph::PartialShape& inputShape1, + const ngraph::element::Type precision2, + const ngraph::PartialShape& inputShape2, + const ngraph::element::Type constant_precision, + const std::pair& convertion_before_op1 = std::pair(), + const element::Type convertion_before_op2_1 = element::undefined, + const std::pair& convertion_before_op2_2 = std::pair(), + const element::Type convertion_after_op2 = {}); + +protected: + std::shared_ptr initOriginal() const override; + std::shared_ptr initReference() const override; + + const ngraph::element::Type precision1; + const ngraph::element::Type precision2; + const ngraph::element::Type constant_precision; + const PrecisionPropagationAddFunctionParams::Actual actual; + const PrecisionPropagationAddFunctionParams::Expected expected; +}; + +} // namespace snippets +} // namespace test +} // namespace ov diff --git a/src/tests/ngraph_helpers/snippets_ngraph_functions/include/snippets_helpers.hpp b/src/tests/ngraph_helpers/snippets_ngraph_functions/include/snippets_helpers.hpp index b4073b2d065ae0..9d3edad4b55339 100644 --- a/src/tests/ngraph_helpers/snippets_ngraph_functions/include/snippets_helpers.hpp +++ b/src/tests/ngraph_helpers/snippets_ngraph_functions/include/snippets_helpers.hpp @@ -17,6 +17,7 @@ using ov::Model; class SnippetsFunctionBase { public: SnippetsFunctionBase() = delete; + virtual ~SnippetsFunctionBase() = default; explicit SnippetsFunctionBase(const std::vector& inputShapes, ov::element::Type_t precision = element::f32) : precision{precision}, input_shapes{inputShapes} {} diff --git a/src/tests/ngraph_helpers/snippets_ngraph_functions/src/precision_propagation_convertion_function.cpp b/src/tests/ngraph_helpers/snippets_ngraph_functions/src/precision_propagation_convertion_function.cpp new file mode 100644 index 00000000000000..20f517b16dfceb --- /dev/null +++ b/src/tests/ngraph_helpers/snippets_ngraph_functions/src/precision_propagation_convertion_function.cpp @@ -0,0 +1,92 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "precision_propagation_convertion_function.hpp" +#include +#include + +namespace ov { +namespace test { +namespace snippets { + +namespace { +std::shared_ptr make_fake_quantize( + const Output& parent, + const ngraph::PartialShape& inputShape, + const element::Type inputType, + const std::vector& fake_quantize_intervals) { + auto generate = [](const ov::element::Type precision, + const ngraph::Shape& shape, + const float initialValue, + const std::string& name) { + const auto size = ngraph::shape_size(shape); + std::vector values(size); + for (auto i = 0; i < size; ++i) { + values[i] = static_cast(initialValue + i); + } + auto constant = std::make_shared(precision, shape, values); + constant->set_friendly_name(name); + return constant; + }; + + const auto fakeQuantize = std::make_shared( + parent, + generate(inputType, {}, fake_quantize_intervals[0], "inputLow"), + generate(inputType, {}, fake_quantize_intervals[1], "inputHigh"), + generate(inputType, {}, fake_quantize_intervals[2], "outputLow"), + generate(inputType, {}, fake_quantize_intervals[3], "outputHigh"), + 256ul); + fakeQuantize->set_friendly_name("fakeQuantize"); + + return fakeQuantize; +} +} // namespace + +PrecisionPropagationConvertionFunction::PrecisionPropagationConvertionFunction( + const std::vector& input_shapes, + const element::Type input_type, + const std::vector& fake_quantize_intervals) : + SnippetsFunctionBase(input_shapes, input_type), + fake_quantize_intervals(fake_quantize_intervals) { +} + +std::shared_ptr PrecisionPropagationConvertionFunction::get( + const std::vector& input_shapes, + const element::Type input_type, + const std::vector& fake_quantize_intervals) { + assert(2ull == input_shapes.size()); + assert(4ull == fake_quantize_intervals.size()); + const auto parameter1 = std::make_shared(input_type, input_shapes[0]); + parameter1->set_friendly_name("parameter1"); + + const auto parameter2 = std::make_shared(input_type, input_shapes[1]); + parameter2->set_friendly_name("parameter2"); + + std::shared_ptr parent = make_fake_quantize( + parameter1, + input_shapes[0], + input_type, + fake_quantize_intervals); + parent->set_friendly_name("fakeQuantize"); + + parent = std::make_shared(parent, parameter2); + parent->set_friendly_name("add"); + + const auto result = std::make_shared(parent); + result->set_friendly_name("result"); + + auto function = std::make_shared( + ngraph::ResultVector{ result }, + ParameterVector{ parameter1, parameter2 }, + "PrecisionPropagationConvertionFunction"); + return function; +} + +std::shared_ptr PrecisionPropagationConvertionFunction::initOriginal() const { + return get(input_shapes, precision, fake_quantize_intervals); +} + +} // namespace snippets +} // namespace test +} // namespace ov diff --git a/src/tests/ngraph_helpers/snippets_ngraph_functions/src/precision_propagation_function.cpp b/src/tests/ngraph_helpers/snippets_ngraph_functions/src/precision_propagation_function.cpp new file mode 100644 index 00000000000000..6a9ef600409e84 --- /dev/null +++ b/src/tests/ngraph_helpers/snippets_ngraph_functions/src/precision_propagation_function.cpp @@ -0,0 +1,105 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "precision_propagation_function.hpp" +#include +#include + +namespace ov { +namespace test { +namespace snippets { + +std::shared_ptr PrecisionPropagationAddFunction::get( + const ngraph::element::Type precision1, + const ngraph::PartialShape& inputShape1, + const ngraph::element::Type precision2, + const ngraph::PartialShape& inputShape2, + const ngraph::element::Type constant_precision, + const std::pair& convertion_before_op1, + const element::Type convertion_before_op2_1, + const std::pair& convertion_before_op2_2, + const element::Type convertion_after_op2) { + const auto create_convert = [](std::shared_ptr parent, const element::Type convertion_type) -> std::shared_ptr { + return convertion_type == element::undefined + ? std::dynamic_pointer_cast(parent) + : std::make_shared(parent, convertion_type); + }; + + const auto make_branch = [&create_convert]( + const ngraph::element::Type precision, + const ngraph::PartialShape& inputShape, + const size_t index, + const element::Type convertion_type) -> std::pair, std::shared_ptr> { + const auto parameter = std::make_shared(precision, inputShape); + parameter->set_friendly_name("parameter" + std::to_string(index)); + + std::shared_ptr parent = create_convert(parameter, convertion_type); + + return { parameter, parent }; + }; + + const auto branch1 = make_branch(precision1, inputShape1, 1, convertion_before_op1.first); + const auto branch2 = make_branch(precision2, inputShape2, 2, convertion_before_op1.second); + + std::shared_ptr parent = std::make_shared(branch1.second, branch2.second); + parent->set_friendly_name("add"); + + parent = create_convert(parent, convertion_before_op2_1); + + const auto maximum_in2_type = convertion_before_op2_2.second == element::undefined ? + constant_precision : + convertion_before_op2_2.second; + if ((convertion_before_op2_2.first == element::undefined) && + (parent->get_output_element_type(0) != maximum_in2_type)) { + parent = std::make_shared(parent, maximum_in2_type); + } + + parent = std::make_shared( + create_convert(parent, convertion_before_op2_2.first), + create_convert( + std::make_shared(constant_precision, Shape{}, std::vector{0.f}), + convertion_before_op2_2.second)); + parent->set_friendly_name("maximum"); + + parent = create_convert(parent, convertion_after_op2); + + const auto result = std::make_shared(parent); + auto& result_out_tensor = result->get_output_tensor(0); + result_out_tensor.set_names({ "result_tensor" }); + result->set_friendly_name("result"); + + const ngraph::ResultVector results{ result }; + const ngraph::ParameterVector parameters{ branch1.first, branch2.first }; + const auto model = std::make_shared(results, parameters, "SnippetsPrecisionPropagation"); + return model; +} + +std::shared_ptr PrecisionPropagationAddFunction::initOriginal() const { + return get( + precision1, + input_shapes[0], + precision2, + input_shapes[1], + constant_precision, + actual.convertion_before_op1, + actual.convertion_before_op2_1, + actual.convertion_before_op2_2); +} + +std::shared_ptr PrecisionPropagationAddFunction::initReference() const { + return get( + precision1, + input_shapes[0], + precision2, + input_shapes[1], + constant_precision, + expected.convertion_before_op1, + expected.convertion_before_op2_1, + expected.convertion_before_op2_2, + expected.convertion_after_op2); +} + +} // namespace snippets +} // namespace test +} // namespace ov From 982e1c1192855192c8c262d479fe9c9ac94435d5 Mon Sep 17 00:00:00 2001 From: Anastasia Kuporosova Date: Thu, 23 Mar 2023 10:29:32 +0100 Subject: [PATCH 053/296] [PyOV] Fix issues with RTMap (#15636) * [PyOV] Fix issues with RTMap * update year * some clean-up and items fix * tests and small fixes * Update src/bindings/python/src/pyopenvino/utils/utils.cpp * undo changes * fix serialization on python side * rt_info as rt_map * undo several changes in tests * fix mo test * sadd docstrings * add tests * fix codestyle * try to fix win * fix master * apply comments --- .../pyopenvino/frontend/frontend_module.cmake | 2 +- .../python/src/pyopenvino/graph/any.cpp | 77 ++++++++++++++++++- .../python/src/pyopenvino/graph/model.cpp | 26 +++---- .../python/src/pyopenvino/graph/rt_map.cpp | 58 ++++++++++++-- .../python/src/pyopenvino/utils/utils.cpp | 59 +++++++++++++- .../python/src/pyopenvino/utils/utils.hpp | 8 ++ .../python/tests/test_graph/test_any.py | 25 +++++- .../python/tests/test_runtime/test_model.py | 51 ++++++------ .../tests/test_utils/test_data_dispatch.py | 2 +- src/core/src/model.cpp | 2 +- .../unit_tests/mo/convert/meta_data_test.py | 6 +- .../mo/convert/meta_data_test_actual.py | 6 +- 12 files changed, 256 insertions(+), 66 deletions(-) diff --git a/src/bindings/python/src/pyopenvino/frontend/frontend_module.cmake b/src/bindings/python/src/pyopenvino/frontend/frontend_module.cmake index 33aaa10a6b3b55..d056bbfc1e93b0 100644 --- a/src/bindings/python/src/pyopenvino/frontend/frontend_module.cmake +++ b/src/bindings/python/src/pyopenvino/frontend/frontend_module.cmake @@ -25,7 +25,7 @@ function(frontend_module TARGET FRAMEWORK INSTALL_COMPONENT) target_include_directories(${TARGET_NAME} PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}" "${OpenVINOPython_SOURCE_DIR}/src/pyopenvino/utils/") - target_link_libraries(${TARGET_NAME} PRIVATE openvino::runtime openvino::frontend::${FRAMEWORK}) + target_link_libraries(${TARGET_NAME} PRIVATE openvino::runtime openvino::runtime::dev openvino::frontend::${FRAMEWORK}) set_target_properties(${TARGET_NAME} PROPERTIES INTERPROCEDURAL_OPTIMIZATION_RELEASE ${ENABLE_LTO}) diff --git a/src/bindings/python/src/pyopenvino/graph/any.cpp b/src/bindings/python/src/pyopenvino/graph/any.cpp index 19c9e3c52cb111..a53fffac102119 100644 --- a/src/bindings/python/src/pyopenvino/graph/any.cpp +++ b/src/bindings/python/src/pyopenvino/graph/any.cpp @@ -5,12 +5,19 @@ #include "openvino/core/any.hpp" #include +#include #include "pyopenvino/graph/any.hpp" #include "pyopenvino/utils/utils.hpp" namespace py = pybind11; +namespace { +bool check_key(py::object key, py::object obj) { + return key.is(py::type::of(obj)); +} +}; // namespace + void regclass_graph_Any(py::module m) { py::class_> ov_any(m, "OVAny"); @@ -23,9 +30,7 @@ void regclass_graph_Any(py::module m) { })); ov_any.def("__repr__", [](const ov::Any& self) { - std::stringstream ret; - self.print(ret); - return ret.str(); + return ""; }); ov_any.def("__hash__", [](ov::Any& self) { @@ -62,6 +67,72 @@ void regclass_graph_Any(py::module m) { ov_any.def("__eq__", [](const ov::Any& a, py::object& b) -> bool { return a == ov::Any(Common::utils::py_object_to_any(b)); }); + ov_any.def( + "astype", + [](ov::Any& self, py::object dtype) { + if (check_key(dtype, py::bool_())) { + return py::cast(self.as()); + } else if (check_key(dtype, py::str())) { + return py::cast(self.as()); + } else if (check_key(dtype, py::int_())) { + return py::cast(self.as()); + } else if (check_key(dtype, py::float_())) { + return py::cast(self.as()); + } else if (check_key(dtype, py::dict())) { + return Common::utils::from_ov_any_map_no_leaves(self); + } + std::stringstream str; + str << "Unsupported data type : '" << dtype << "' is passed as an argument."; + OPENVINO_THROW(str.str()); + }, + R"( + Returns runtime attribute casted to defined data type. + + :param dtype: Data type in which runtime attribute will be casted. + :type dtype: Union[bool, int, str, float, dict] + + :return: A runtime attribute. + :rtype: Any + )"); + ov_any.def( + "aslist", + [](ov::Any& self, py::object dtype) { + // before serialization + if (self.is() || dtype.is_none()) { + return py::cast(py::list()); + } else if (self.is>()) { + return py::cast(self.as>()); + } else if (self.is>()) { + return py::cast(self.as>()); + } else if (self.is>()) { + return py::cast(self.as>()); + } else if (self.is>()) { + return py::cast(self.as>()); + } + // after serialization + if (check_key(dtype, py::str())) { + return py::cast(self.as>()); + } else if (check_key(dtype, py::int_())) { + return py::cast(self.as>()); + } else if (check_key(dtype, py::float_())) { + return py::cast(self.as>()); + } else if (check_key(dtype, py::bool_())) { + return py::cast(self.as>()); + } + std::stringstream str; + str << "Unsupported data type : '" << dtype << "' is passed as an argument."; + OPENVINO_THROW(str.str()); + }, + py::arg("dtype") = py::none(), + R"( + Returns runtime attribute as a list with specified data type. + + :param dtype: Data type of a list in which runtime attribute will be casted. + :type dtype: Union[bool, int, str, float] + + :return: A runtime attribute as a list. + :rtype: Union[List[float], List[int], List[str], List[bool]] + )"); ov_any.def( "get", [](const ov::Any& self) -> py::object { diff --git a/src/bindings/python/src/pyopenvino/graph/model.cpp b/src/bindings/python/src/pyopenvino/graph/model.cpp index f95801bdb11258..acaa8792f0538a 100644 --- a/src/bindings/python/src/pyopenvino/graph/model.cpp +++ b/src/bindings/python/src/pyopenvino/graph/model.cpp @@ -774,47 +774,41 @@ void regclass_graph_Model(py::module m) { for (size_t i = 0; i < path.size(); i++) { cpp_args[i] = path[i].cast(); } - return Common::utils::from_ov_any(self.get_rt_info(cpp_args)); + return py::cast(self.get_rt_info(cpp_args)); }, py::arg("path"), R"( - Returns runtime attribute. + Returns runtime attribute as a OVAny object. :param path: List of strings which defines a path to runtime info. :type path: List[str] :return: A runtime attribute. - :rtype: Any + :rtype: openvino.runtime.OVAny )"); model.def( "get_rt_info", [](const ov::Model& self, const py::str& path) -> py::object { - return Common::utils::from_ov_any(self.get_rt_info(path.cast())); + return py::cast(self.get_rt_info(path.cast())); }, py::arg("path"), R"( - Returns runtime attribute. + Returns runtime attribute as a OVAny object. :param path: List of strings which defines a path to runtime info. :type path: str :return: A runtime attribute. - :rtype: Any + :rtype: openvino.runtime.OVAny )"); model.def( "has_rt_info", [](const ov::Model& self, const py::list& path) -> bool { - // FIXME: understand why has_rt_info causes Python crash - try { - std::vector cpp_args(path.size()); - for (size_t i = 0; i < path.size(); i++) { - cpp_args[i] = path[i].cast(); - } - self.get_rt_info(cpp_args); - return true; - } catch (ov::Exception&) { - return false; + std::vector cpp_args(path.size()); + for (size_t i = 0; i < path.size(); i++) { + cpp_args[i] = path[i].cast(); } + return self.has_rt_info(cpp_args); }, py::arg("path"), R"( diff --git a/src/bindings/python/src/pyopenvino/graph/rt_map.cpp b/src/bindings/python/src/pyopenvino/graph/rt_map.cpp index 5985c87f06136f..e666b3972e605c 100644 --- a/src/bindings/python/src/pyopenvino/graph/rt_map.cpp +++ b/src/bindings/python/src/pyopenvino/graph/rt_map.cpp @@ -10,6 +10,7 @@ #include #include "dict_attribute_visitor.hpp" +#include "meta_data.hpp" #include "openvino/core/node.hpp" #include "openvino/core/runtime_attribute.hpp" #include "openvino/op/add.hpp" @@ -27,11 +28,48 @@ using PyRTMap = ov::RTMap; PYBIND11_MAKE_OPAQUE(PyRTMap); +// Create our custom iterator to return python object not OVAny itself. +class PyRTMapIterator { +public: + PyRTMapIterator(const PyRTMap& py_rt_map, py::object ref, bool is_value) + : py_rt_map(py_rt_map), + is_value(is_value), + ref(ref), + it(py_rt_map.cbegin()) {} + + py::object next() { + if (it == py_rt_map.end()) { + throw py::stop_iteration(); + } + const auto result = *it; + it++; + if (is_value) { + return Common::utils::from_ov_any_no_leaves(result.second); + } else { + std::pair res = {result.first, + Common::utils::from_ov_any_no_leaves(result.second)}; + return py::cast(res); + } + } + + const PyRTMap& py_rt_map; + bool is_value = false; + py::object ref; // keep a reference + std::map::const_iterator it; +}; + void regclass_graph_PyRTMap(py::module m) { auto py_map = py::class_(m, "RTMap"); py_map.doc() = "openvino.runtime.RTMap makes bindings for std::map, which can later be used as ov::Node::RTMap"; + py::class_(m, "Iterator") + .def("__iter__", + [](PyRTMapIterator& it) -> PyRTMapIterator& { + return it; + }) + .def("__next__", &PyRTMapIterator::next); + py_map.def("__setitem__", [](PyRTMap& m, const std::string& k, const std::string v) { m[k] = v; }); @@ -39,7 +77,7 @@ void regclass_graph_PyRTMap(py::module m) { m[k] = v; }); py_map.def("__getitem__", [](PyRTMap& m, const std::string& k) -> py::object { - return Common::utils::from_ov_any(m[k]); + return Common::utils::from_ov_any_no_leaves(m[k]); }); py_map.def( "__bool__", @@ -50,20 +88,28 @@ void regclass_graph_PyRTMap(py::module m) { py_map.def( "__iter__", - [](PyRTMap& m) { - return py::make_key_iterator(m.begin(), m.end()); + [](PyRTMap& rt_map) { + return py::make_key_iterator(rt_map.begin(), rt_map.end()); }, py::keep_alive<0, 1>() /* Essential: keep list alive while iterator exists */ ); py_map.def( - "items", - [](PyRTMap& m) { - return py::make_iterator(m.begin(), m.end()); + "keys", + [](PyRTMap& rt_map) { + return py::make_key_iterator(rt_map.begin(), rt_map.end()); }, py::keep_alive<0, 1>() /* Essential: keep list alive while iterator exists */ ); + py_map.def("items", [](py::object rt_map) { + return PyRTMapIterator(rt_map.cast(), rt_map, false); + }); + + py_map.def("values", [](py::object rt_map) { + return PyRTMapIterator(rt_map.cast(), rt_map, true); + }); + py_map.def("__contains__", [](PyRTMap& m, const std::string& k) -> bool { auto it = m.find(k); if (it == m.end()) diff --git a/src/bindings/python/src/pyopenvino/utils/utils.cpp b/src/bindings/python/src/pyopenvino/utils/utils.cpp index f029323e35ab09..12f08410a67271 100644 --- a/src/bindings/python/src/pyopenvino/utils/utils.cpp +++ b/src/bindings/python/src/pyopenvino/utils/utils.cpp @@ -12,6 +12,7 @@ #include #include "Python.h" +#include "meta_data.hpp" #include "openvino/core/except.hpp" #include "openvino/frontend/decoder.hpp" @@ -20,12 +21,57 @@ using Version = ov::pass::Serialize::Version; namespace Common { namespace utils { +// For complex structure if an element isn't map, then just cast it to OVAny +py::object from_ov_any_no_leaves(const ov::Any& any) { + if (any.is>() || any.is()) { + return Common::utils::from_ov_any_map_no_leaves(any); + } else { + return py::cast(any); + } +} + +// Recursively go through dict to unwrap nested dicts and keep leaves as OVAny. +py::object from_ov_any_map_no_leaves(const ov::Any& any) { + const auto traverse_map = [](const ov::AnyMap& map) { + const auto unwrap_only_maps = [](const ov::Any& any) { + if (any.is>()) { + const ov::AnyMap& as_map = *any.as>(); + return from_ov_any_map_no_leaves(as_map); + } else if (any.is()) { + return from_ov_any_map_no_leaves(any.as()); + } + return py::cast(any); + }; + + std::map result; + for (const auto& entry : map) { + result[entry.first] = unwrap_only_maps(entry.second); + } + return py::cast(result); + }; + + if (any.is>()) { + const ov::AnyMap& as_map = *any.as>(); + return traverse_map(as_map); + } else if (any.is()) { + return traverse_map(any.as()); + } + OPENVINO_THROW("Only ov::AnyMap or ov::Meta are expected here."); +} + +py::object from_ov_any_map(const ov::AnyMap& map) { + std::map result; + for (const auto& entry : map) { + result[entry.first] = from_ov_any(entry.second); + } + return py::cast(result); +} + py::object from_ov_any(const ov::Any& any) { // Check for py::object if (any.is()) { return any.as(); - } - // Check for std::string + } // Check for std::string else if (any.is()) { return py::cast(any.as().c_str()); } @@ -98,6 +144,9 @@ py::object from_ov_any(const ov::Any& any) { // Check for std::map else if (any.is>()) { return py::cast(any.as>()); + } // Check for ov::AnyMap (std::map) + else if (any.is()) { + return from_ov_any_map(any.as()); } // Check for std::map { else if (any.is>()) { @@ -113,6 +162,9 @@ py::object from_ov_any(const ov::Any& any) { PyDict_SetItemString(dict, property_name.c_str(), PyUnicode_FromString(mutability.c_str())); } return py::cast(dict); + } else if (any.is>()) { + const ov::AnyMap& as_map = *any.as>(); + return from_ov_any_map(as_map); } else if (any.is()) { return py::cast(any.as()); } else if (any.is()) { @@ -258,9 +310,8 @@ ov::Any py_object_to_any(const py::object& py_obj) { } } - // In case of empty vector works like with vector of strings if (_list.empty()) - return _list.cast>(); + return ov::Any(EmptyList()); switch (detected_type) { case PY_TYPE::STR: diff --git a/src/bindings/python/src/pyopenvino/utils/utils.hpp b/src/bindings/python/src/pyopenvino/utils/utils.hpp index 5b0d00165b968a..328f06820033f8 100644 --- a/src/bindings/python/src/pyopenvino/utils/utils.hpp +++ b/src/bindings/python/src/pyopenvino/utils/utils.hpp @@ -15,6 +15,14 @@ namespace py = pybind11; namespace Common { namespace utils { + struct EmptyList {}; + + py::object from_ov_any_no_leaves(const ov::Any& any); + + py::object from_ov_any_map_no_leaves(const ov::Any& almost_map); + + py::object from_ov_any_map(const ov::AnyMap& map); + py::object from_ov_any(const ov::Any& any); std::map properties_to_any_map(const std::map& properties); diff --git a/src/bindings/python/tests/test_graph/test_any.py b/src/bindings/python/tests/test_graph/test_any.py index e4817e52f7e296..4a8643a7586189 100644 --- a/src/bindings/python/tests/test_graph/test_any.py +++ b/src/bindings/python/tests/test_graph/test_any.py @@ -34,7 +34,7 @@ def test_any_list(values, data_type): @pytest.mark.parametrize(("value_dict", "value_type", "data_type"), [ - ({"key": "value"}, OVAny, str), + ({"key": "value"}, str, str), ({21: 37}, int, int), ({21.0: 37.0}, float, float), ]) @@ -65,3 +65,26 @@ def __init__(self): value = OVAny(TestClass()) assert isinstance(value.value, TestClass) assert value.value.text == "test" + + +@pytest.mark.parametrize(("value", "dtype"), [ + ("some_value", str), + (31.23456, float), + (True, bool), + (42, int), +]) +def test_astype(value, dtype): + ovany = OVAny(value) + assert ovany.astype(dtype) == value + + +@pytest.mark.parametrize(("value", "dtype"), [ + (["some_value", "another value"], str), + ([31.23456, -31.3453], float), + ([True, False], bool), + ([42, 21], int), + ([], None), +]) +def test_aslist(value, dtype): + ovany = OVAny(value) + assert ovany.aslist(dtype) == value diff --git a/src/bindings/python/tests/test_runtime/test_model.py b/src/bindings/python/tests/test_runtime/test_model.py index 20f58201ee84e3..e0af9cbda469be 100644 --- a/src/bindings/python/tests/test_runtime/test_model.py +++ b/src/bindings/python/tests/test_runtime/test_model.py @@ -5,6 +5,7 @@ import os import numpy as np import pytest +import math import openvino.runtime.opset8 as ops from openvino.runtime import ( @@ -549,22 +550,6 @@ def check_rt_info(model): # request - https://docs.pytest.org/en/7.1.x/reference/reference.html#request def test_serialize_complex_rt_info(request, tmp_path): def check_rt_info(model, serialized): - if serialized: - threshold = "13.23" - min_val = "-3.24543" - max_val = "3.23422" - directed = "YES" - empty = "" - ids = "sasd fdfdfsdf" - mean = "22.3 33.11 44" - else: - threshold = 13.23 - min_val = -3.24543 - max_val = 3.234223 - directed = True - empty = [] - ids = ["sasd", "fdfdfsdf"] - mean = [22.3, 33.11, 44.0] assert model.has_rt_info(["config", "type_of_model"]) is True assert model.has_rt_info(["config", "converter_type"]) is True assert model.has_rt_info(["config", "model_parameters", "threshold"]) is True @@ -577,17 +562,29 @@ def check_rt_info(model, serialized): assert model.has_rt_info(["config", "model_parameters", "labels", "label_groups", "ids"]) is True assert model.has_rt_info(["config", "model_parameters", "mean_values"]) is True - assert model.get_rt_info(["config", "type_of_model"]) == "classification" - assert model.get_rt_info(["config", "converter_type"]) == "classification" - assert model.get_rt_info(["config", "model_parameters", "threshold"]) == threshold - assert model.get_rt_info(["config", "model_parameters", "min"]) == min_val - assert model.get_rt_info(["config", "model_parameters", "max"]) == max_val - assert model.get_rt_info(["config", "model_parameters", "labels", "label_tree", "type"]) == "tree" - assert model.get_rt_info(["config", "model_parameters", "labels", "label_tree", "directed"]) == directed - assert model.get_rt_info(["config", "model_parameters", "labels", "label_tree", "float_empty"]) == empty - assert model.get_rt_info(["config", "model_parameters", "labels", "label_tree", "nodes"]) == empty - assert model.get_rt_info(["config", "model_parameters", "labels", "label_groups", "ids"]) == ids - assert model.get_rt_info(["config", "model_parameters", "mean_values"]) == mean + assert model.get_rt_info(["config", "type_of_model"]).astype(str) == "classification" + assert model.get_rt_info(["config", "converter_type"]).astype(str) == "classification" + assert math.isclose(model.get_rt_info(["config", "model_parameters", "threshold"]).astype(float), 13.23, rel_tol=0.0001) + assert math.isclose(model.get_rt_info(["config", "model_parameters", "min"]).astype(float), -3.24543, rel_tol=0.0001) + assert math.isclose(model.get_rt_info(["config", "model_parameters", "max"]).astype(float), 3.234223, rel_tol=0.0001) + assert model.get_rt_info(["config", "model_parameters", "labels", "label_tree", "type"]).astype(str) == "tree" + assert model.get_rt_info(["config", "model_parameters", "labels", "label_tree", "directed"]).astype(bool) is True + + assert model.get_rt_info(["config", "model_parameters", "labels", "label_tree", "float_empty"]).aslist() == [] + assert model.get_rt_info(["config", "model_parameters", "labels", "label_tree", "nodes"]).aslist() == [] + assert model.get_rt_info(["config", "model_parameters", "labels", "label_groups", "ids"]).aslist(str) == ["sasd", "fdfdfsdf"] + assert model.get_rt_info(["config", "model_parameters", "mean_values"]).aslist(float) == [22.3, 33.11, 44.0] + + rt_info = model.get_rt_info() + assert isinstance(rt_info["config"], dict) + + for key, value in rt_info.items(): + if key == "config": + for config_value in value: + assert config_value in ["type_of_model", "converter_type", "model_parameters"] + + for rt_info_val in model.get_rt_info(["config", "model_parameters", "labels", "label_tree"]).astype(dict): + assert rt_info_val in ["float_empty", "nodes", "type", "directed"] core = Core() xml_path, bin_path = create_filename_for_test(request.node.name, tmp_path) diff --git a/src/bindings/python/tests/test_utils/test_data_dispatch.py b/src/bindings/python/tests/test_utils/test_data_dispatch.py index fad863f61a52e8..e2ce00f10e7482 100644 --- a/src/bindings/python/tests/test_utils/test_data_dispatch.py +++ b/src/bindings/python/tests/test_utils/test_data_dispatch.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# Copyright (C) 2018-2022 Intel Corporation +# Copyright (C) 2018-2023 Intel Corporation # SPDX-License-Identifier: Apache-2.0 import os diff --git a/src/core/src/model.cpp b/src/core/src/model.cpp index df540d81f5554b..142514be45384b 100644 --- a/src/core/src/model.cpp +++ b/src/core/src/model.cpp @@ -955,7 +955,7 @@ bool ov::Model::has_rt_info(const std::vector& args) const { return false; if (i == args.size() - 1) break; - const ov::Any& rt_attr = get_rt_arg(info, args[i]); + const ov::Any rt_attr = get_rt_arg(info, args[i]); info = get_map_from_attr(rt_attr); } return true; diff --git a/tools/mo/unit_tests/mo/convert/meta_data_test.py b/tools/mo/unit_tests/mo/convert/meta_data_test.py index c5d2d05cc111ce..b5e78a15b0f67d 100644 --- a/tools/mo/unit_tests/mo/convert/meta_data_test.py +++ b/tools/mo/unit_tests/mo/convert/meta_data_test.py @@ -76,16 +76,16 @@ def check_meta_data(ov_model): for key, value in ref_meta.items(): if key == 'conversion_parameters': for param_name, param_value in value.items(): - val = ov_model.get_rt_info([key, param_name]) + val = ov_model.get_rt_info([key, param_name]).astype(str) if param_name in ['extensions', 'caffe_parser_path', 'input_model', 'k', 'output_dir']: val = Path(val) assert val == param_value, \ "Runtime info attribute with name {} does not match. Expected: {}, " \ "got {}".format(param_name, param_value, val) continue - assert str(ov_model.get_rt_info(key)) == value, \ + assert ov_model.get_rt_info(key).astype(str) == value, \ "Runtime info attribute with name {} does not match. Expected: {}, " \ - "got {}".format(key, value, ov_model.get_rt_info(key)) + "got {}".format(key, value, ov_model.get_rt_info(key).astype(str)) with tempfile.TemporaryDirectory(dir=self.test_directory) as tmpdir: diff --git a/tools/mo/unit_tests/mo/convert/meta_data_test_actual.py b/tools/mo/unit_tests/mo/convert/meta_data_test_actual.py index 7839f56bd28610..643510a5a631f0 100644 --- a/tools/mo/unit_tests/mo/convert/meta_data_test_actual.py +++ b/tools/mo/unit_tests/mo/convert/meta_data_test_actual.py @@ -22,16 +22,16 @@ def check_meta_data(ov_model, ref_meta): for key, value in ref_meta.items(): if key == 'conversion_parameters': for param_name, param_value in value.items(): - val = ov_model.get_rt_info([key, param_name]) + val = ov_model.get_rt_info([key, param_name]).astype(str) if param_name in ['extensions', 'caffe_parser_path', 'input_model', 'k', 'output_dir']: val = Path(val) assert val == param_value, \ "Runtime info attribute with name {} does not match. Expected: {}, " \ "got {}".format(param_name, param_value, val) continue - assert str(ov_model.get_rt_info(key)) == value, \ + assert ov_model.get_rt_info(key).astype(str) == value, \ "Runtime info attribute with name {} does not match. Expected: {}, " \ - "got {}".format(key, value, ov_model.get_rt_info(key)) + "got {}".format(key, value, ov_model.get_rt_info(key).astype(str)) for key, value in ov_model.get_rt_info().items(): if key in ignore_attrs: From a3958d6ddfa375a612666bf6e10cb278e99bad3c Mon Sep 17 00:00:00 2001 From: Ilya Churaev Date: Thu, 23 Mar 2023 13:52:03 +0400 Subject: [PATCH 054/296] Use evaluation context for the inference (#16492) --- src/plugins/template/backend/executable.hpp | 8 ++++ .../template/backend/int_executable.cpp | 46 +++++++++++-------- .../template/backend/int_executable.hpp | 3 ++ .../template/src/sync_infer_request.cpp | 2 +- 4 files changed, 40 insertions(+), 19 deletions(-) diff --git a/src/plugins/template/backend/executable.hpp b/src/plugins/template/backend/executable.hpp index 2375d6e0a096af..0794488c334c3b 100644 --- a/src/plugins/template/backend/executable.hpp +++ b/src/plugins/template/backend/executable.hpp @@ -24,6 +24,14 @@ class Executable { /// \returns true if iteration is successful, false otherwise virtual bool call(std::vector& outputs, const std::vector& inputs) = 0; + /// \param outputs vector of runtime::Tensor used as outputs + /// \param inputs vector of runtime::Tensor used as inputs + /// \param context Evaluation context + /// \returns true if iteration is successful, false otherwise + virtual bool call(std::vector& outputs, + const std::vector& inputs, + const ov::EvaluationContext& context) = 0; + /// \brief Executes a single iteration of a Function. /// \param outputs vector of runtime::Tensor used as outputs /// \param inputs vector of runtime::Tensor used as inputs diff --git a/src/plugins/template/backend/int_executable.cpp b/src/plugins/template/backend/int_executable.cpp index fc4057955fecfa..49253aec58f379 100644 --- a/src/plugins/template/backend/int_executable.cpp +++ b/src/plugins/template/backend/int_executable.cpp @@ -105,7 +105,31 @@ ov::runtime::interpreter::INTExecutable::INTExecutable(const std::shared_ptr& outputs, const std::vector& inputs) { - // map function params -> HostTensor + EvaluationContext eval_context; + ov::op::util::VariableContext variable_context; + eval_context.emplace("VariableContext", variable_context); + + // for each ordered op in the graph + for (const auto& op : m_nodes) { + if (auto var_extension = std::dynamic_pointer_cast(op)) { + auto variable = var_extension->get_variable(); + if (!variable_context.get_variable_value(variable)) { + auto h_tensor = ov::Tensor(op->get_input_element_type(0), op->get_input_shape(0)); + // h_tensor->write(h_tensor->get_data_ptr(), h_tensor->get_size_in_bytes()); + const auto tensor_input = make_tmp_host_tensor(h_tensor); + variable_context.set_variable_value(variable, + std::make_shared(tensor_input)); + } + } + } + + return call(outputs, inputs, eval_context); +} + +bool ov::runtime::interpreter::INTExecutable::call(std::vector& outputs, + const std::vector& inputs, + const ov::EvaluationContext& context) { + // map function params -> ov::Tensor std::unordered_map, ov::Tensor> tensor_map; size_t input_count = 0; for (const auto& param : get_parameters()) { @@ -116,17 +140,13 @@ bool ov::runtime::interpreter::INTExecutable::call(std::vector& outp } std::unordered_map, size_t> results_map; - // map function outputs -> HostTensor + // map function outputs -> ov::Tensor for (size_t output_count = 0; output_count < get_results().size(); ++output_count) { auto output = get_results()[output_count]->output(0).get_tensor_ptr(); if (!results_map.count(output)) results_map.emplace(output, output_count); } - EvaluationContext eval_context; - ov::op::util::VariableContext variable_context; - eval_context.emplace("VariableContext", variable_context); - // for each ordered op in the graph for (const auto& op : m_nodes) { if (std::dynamic_pointer_cast(op)) { @@ -165,19 +185,9 @@ bool ov::runtime::interpreter::INTExecutable::call(std::vector& outp op_outputs.push_back(host_tensor); } - if (auto var_extension = std::dynamic_pointer_cast(cloned_node)) { - auto variable = var_extension->get_variable(); - if (!variable_context.get_variable_value(variable)) { - auto h_tensor = ov::Tensor(cloned_node->get_input_element_type(0), cloned_node->get_input_shape(0)); - // h_tensor->write(h_tensor->get_data_ptr(), h_tensor->get_size_in_bytes()); - const auto tensor_input = make_tmp_host_tensor(h_tensor); - variable_context.set_variable_value(variable, - std::make_shared(tensor_input)); - } - } - // Call evaluate for cloned_node with static shapes - if (!cloned_node->evaluate(op_outputs, op_inputs, eval_context)) { + if (!cloned_node->evaluate(op_outputs, op_inputs, context)) { + // TODO: extend evaluate map for the context evaluate_node(cloned_node, op_outputs, op_inputs); } // Update tensors in tensor map diff --git a/src/plugins/template/backend/int_executable.hpp b/src/plugins/template/backend/int_executable.hpp index 1ca49ff4253a79..2610a82ee23e4e 100644 --- a/src/plugins/template/backend/int_executable.hpp +++ b/src/plugins/template/backend/int_executable.hpp @@ -29,6 +29,9 @@ class INTExecutable : public Executable { INTExecutable(const std::shared_ptr& model); bool call(std::vector& outputs, const std::vector& inputs) override; + bool call(std::vector& outputs, + const std::vector& inputs, + const ov::EvaluationContext& context) override; ov::Tensor create_input_tensor(size_t input_index) override; diff --git a/src/plugins/template/src/sync_infer_request.cpp b/src/plugins/template/src/sync_infer_request.cpp index f560e876dee47e..6fa96c02a23d68 100644 --- a/src/plugins/template/src/sync_infer_request.cpp +++ b/src/plugins/template/src/sync_infer_request.cpp @@ -207,7 +207,7 @@ void ov::template_plugin::InferRequest::infer_preprocess() { void ov::template_plugin::InferRequest::start_pipeline() { OV_ITT_SCOPED_TASK(itt::domains::TemplatePlugin, m_profiling_task[StartPipeline]) auto start = Time::now(); - m_executable->call(m_backend_output_tensors, m_backend_input_tensors); + m_executable->call(m_backend_output_tensors, m_backend_input_tensors, m_eval_context); m_durations[StartPipeline] = Time::now() - start; } // ! [infer_request:start_pipeline] From a00460177466c8ab16f0ad025a134e82339a1437 Mon Sep 17 00:00:00 2001 From: Mateusz Bencer Date: Thu, 23 Mar 2023 10:59:00 +0100 Subject: [PATCH 055/296] [ONNX FE] Fix Windows warnings (#16141) --- src/frontends/onnx/frontend/CMakeLists.txt | 6 - .../onnx/frontend/src/core/graph.cpp | 9 +- src/frontends/onnx/frontend/src/editor.cpp | 5 +- .../onnx/frontend/src/op/roi_align.cpp | 6 +- src/frontends/onnx/frontend/src/place.cpp | 4 +- src/frontends/onnx/tests/CMakeLists.txt | 7 - src/frontends/onnx/tests/onnx_import.in.cpp | 949 ++++++++------- .../tests/onnx_import_com_microsoft.in.cpp | 1053 +++++++++-------- .../tests/onnx_import_const_folding.in.cpp | 2 +- .../onnx/tests/onnx_import_controlflow.in.cpp | 25 +- .../onnx/tests/onnx_import_dyn_shapes.in.cpp | 85 +- .../tests/onnx_import_org_openvino.in.cpp | 292 ++--- .../onnx/tests/onnx_import_org_pytorch.in.cpp | 38 +- .../onnx/tests/onnx_import_quant.in.cpp | 4 +- .../onnx/tests/onnx_import_reshape.in.cpp | 46 +- .../onnx/tests/onnx_import_rnn.in.cpp | 442 +++---- .../onnx/tests/onnx_transformations.cpp | 2 +- 17 files changed, 1518 insertions(+), 1457 deletions(-) diff --git a/src/frontends/onnx/frontend/CMakeLists.txt b/src/frontends/onnx/frontend/CMakeLists.txt index 2edeaae86ec594..db84dae67ddb36 100644 --- a/src/frontends/onnx/frontend/CMakeLists.txt +++ b/src/frontends/onnx/frontend/CMakeLists.txt @@ -2,12 +2,6 @@ # SPDX-License-Identifier: Apache-2.0 # -if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC") - ie_add_compiler_flags(/wd4267) - ie_add_compiler_flags(/wd4018) - ie_add_compiler_flags(/wd4244) -endif() - ov_add_frontend(NAME onnx LINKABLE_FRONTEND PROTOBUF_LITE diff --git a/src/frontends/onnx/frontend/src/core/graph.cpp b/src/frontends/onnx/frontend/src/core/graph.cpp index e940c172e87eec..5d46da8805efe1 100644 --- a/src/frontends/onnx/frontend/src/core/graph.cpp +++ b/src/frontends/onnx/frontend/src/core/graph.cpp @@ -328,7 +328,7 @@ std::shared_ptr Graph::create_function() { const auto& onnx_outputs = m_model->get_graph().output(); for (std::size_t i{0}; i < function->get_output_size(); ++i) { const auto& result_node = function->get_output_op(i); - const std::string onnx_output_name = onnx_outputs.Get(i).name(); + const std::string onnx_output_name = onnx_outputs.Get(static_cast(i)).name(); result_node->set_friendly_name(onnx_output_name + "/sink_port_0"); const auto& previous_operation = result_node->get_input_node_shared_ptr(0); previous_operation->set_friendly_name(onnx_output_name); @@ -386,7 +386,7 @@ OutputVector Graph::make_ng_nodes(const Node& onnx_node) { const size_t outputs_size = std::accumulate(std::begin(ng_subgraph_outputs), std::end(ng_subgraph_outputs), - 0, + static_cast(0), [](const size_t lhs, const Output& rhs) { return lhs + rhs.get_node()->get_output_size(); }); @@ -420,10 +420,11 @@ void Graph::set_friendly_names(const Node& onnx_node, const OutputVector& ng_sub const auto common_node = detail::common_node_for_all_outputs(ng_subgraph_outputs); - for (size_t i = 0; i < ng_subgraph_outputs.size(); ++i) { + const auto ng_subgraph_output_size = static_cast(ng_subgraph_outputs.size()); + for (int i = 0; i < ng_subgraph_output_size; ++i) { // Trailing optional outputs may not be specified in the ONNX model. // Other optional outputs should have name set to an empty string. - if (i >= onnx_node.get_outputs_size()) { + if (i >= static_cast(onnx_node.get_outputs_size())) { break; } diff --git a/src/frontends/onnx/frontend/src/editor.cpp b/src/frontends/onnx/frontend/src/editor.cpp index 56ba992a5c882b..af968ac16572d5 100644 --- a/src/frontends/onnx/frontend/src/editor.cpp +++ b/src/frontends/onnx/frontend/src/editor.cpp @@ -213,9 +213,10 @@ void graph_topological_sort(GraphProto* graph) { std::multimap output_name_to_node; GraphProto result; - for (int i = 0; i < graph->node().size(); ++i) { + const auto nodes_number = static_cast(graph->node().size()); + for (int i = 0; i < nodes_number; ++i) { for (const auto& output_name : graph->node(i).output()) { - output_name_to_node.emplace(output_name, graph->mutable_node(static_cast(i))); + output_name_to_node.emplace(output_name, graph->mutable_node(i)); } } auto get_node_by_out_name = [&output_name_to_node](const std::string& out_name) -> const NodeProto* { diff --git a/src/frontends/onnx/frontend/src/op/roi_align.cpp b/src/frontends/onnx/frontend/src/op/roi_align.cpp index 9b6959e126c38a..6773806fe993a7 100644 --- a/src/frontends/onnx/frontend/src/op/roi_align.cpp +++ b/src/frontends/onnx/frontend/src/op/roi_align.cpp @@ -68,9 +68,9 @@ OutputVector roi_align(const Node& node) { return {std::make_shared(data, rois, num_rois, - pooled_h, - pooled_w, - sampling_ratio, + static_cast(pooled_h), + static_cast(pooled_w), + static_cast(sampling_ratio), spatial_scale, pooling_mode, aligned_mode)}; diff --git a/src/frontends/onnx/frontend/src/place.cpp b/src/frontends/onnx/frontend/src/place.cpp index 6b67cf7efcfd03..3430bf4e6b56cb 100644 --- a/src/frontends/onnx/frontend/src/place.cpp +++ b/src/frontends/onnx/frontend/src/place.cpp @@ -317,8 +317,8 @@ ov::frontend::Place::Ptr PlaceOp::get_input_port(const std::string& input_name) std::vector PlaceOp::get_consuming_ports() const { std::vector consuming_ports; - const auto out_ports_number = m_editor->get_output_ports(m_node).size(); - for (size_t out_idx = 0; out_idx < out_ports_number; ++out_idx) { + const auto out_ports_number = static_cast(m_editor->get_output_ports(m_node).size()); + for (int out_idx = 0; out_idx < out_ports_number; ++out_idx) { auto consuming_ops_out = get_output_port(out_idx)->get_consuming_ports(); consuming_ports.insert(consuming_ports.end(), consuming_ops_out.begin(), consuming_ops_out.end()); } diff --git a/src/frontends/onnx/tests/CMakeLists.txt b/src/frontends/onnx/tests/CMakeLists.txt index ac41687b9b47f2..b6fed5f851ae43 100644 --- a/src/frontends/onnx/tests/CMakeLists.txt +++ b/src/frontends/onnx/tests/CMakeLists.txt @@ -6,13 +6,6 @@ set(CMAKE_INTERPROCEDURAL_OPTIMIZATION_RELEASE OFF) ov_try_use_gold_linker() -if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC") - ie_add_compiler_flags(/wd4244) - ie_add_compiler_flags(/wd4267) - ie_add_compiler_flags(/wd4305) - ie_add_compiler_flags(/wd4756) -endif() - message(STATUS "ONNX frontend test enabled") add_compile_definitions( diff --git a/src/frontends/onnx/tests/onnx_import.in.cpp b/src/frontends/onnx/tests/onnx_import.in.cpp index 91ce448c02c06f..b49861e1806fa7 100644 --- a/src/frontends/onnx/tests/onnx_import.in.cpp +++ b/src/frontends/onnx/tests/onnx_import.in.cpp @@ -469,7 +469,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_expand_function_greater_or_equal_inside_if) { // expected value == x * y std::vector x(40, 2); std::vector y(40); - std::iota(y.begin(), y.end(), -20); + std::iota(y.begin(), y.end(), -20.f); std::vector expected; std::transform(x.begin(), x.end(), y.begin(), std::back_inserter(expected), [](float i, float j) -> float { return i * j; @@ -489,21 +489,21 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_expand_context_dependent_function) { auto test_case = test::TestCase(function, s_device); test_case.add_input(Shape{3, 5}, - {0.54881352186203, - 0.7151893377304077, - 0.6027633547782898, - 0.5448831915855408, - 0.42365479469299316, - 0.6458941102027893, - 0.4375872015953064, - 0.891772985458374, - 0.9636627435684204, - 0.3834415078163147, - 0.7917250394821167, - 0.5288949012756348, - 0.5680445432662964, - 0.9255966544151306, - 0.07103605568408966}); + {0.54881352186203f, + 0.7151893377304077f, + 0.6027633547782898f, + 0.5448831915855408f, + 0.42365479469299316f, + 0.6458941102027893f, + 0.4375872015953064f, + 0.891772985458374f, + 0.9636627435684204f, + 0.3834415078163147f, + 0.7917250394821167f, + 0.5288949012756348f, + 0.5680445432662964f, + 0.9255966544151306f, + 0.07103605568408966f}); test_case.add_input(Shape{3}, {1, 4, 3}); test_case.add_expected_output(Shape{}, {1}); test_case.run(); @@ -797,27 +797,27 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_softmax_1D) { auto test_case = test::TestCase(function, s_device); test_case.add_input({-1.0, 0.0, 1.0}); - test_case.add_expected_output({0.09003058, 0.24472848, 0.66524094}); + test_case.add_expected_output({0.09003058f, 0.24472848f, 0.66524094f}); test_case.run(); } namespace { // common input for all Softmax 3D test cases (Shape = {3,4,5}) // clang-format off const std::vector SOFTMAX_INPUT = { - 2.75793882, -0.50841322, 0.82013929, -0.62409912, -0.96136118, - 0.21004745, 1.38337255, 1.19030397, 2.0940445, -0.03551657, - -0.78686039, 1.992782, 0.04300319, -0.29230777, -0.56797112, - -1.26732165, -0.61935399, 0.57670432, 0.92844898, 2.82469233, - - 0.98721677, -0.05100663, -1.21178917, -0.17530157, 1.40051805, - -0.13259761, -1.14313018, 0.2673723, -0.87996154, 1.29053106, - 1.55, 0.8396538, 1.20729817, 0.23727845, -0.89113606, - -1.70909842, 0.26460363, -0.70566808, 2.383518, 1.07024615, - - -1.21722605, 0.82919357, 0.55765697, 0.12657686, 0.63432172, - 0.75425957, -2.43721014, -1.24478184, 2.65316853, 1.19509542, - -0.95523998, 0.5149006, -0.01151649, 0.68327026, -0.4589638, - -0.46554745, 0.21055324, 0.39266729, 2.05098086, 1.83207919}; + 2.75793882f, -0.50841322f, 0.82013929f, -0.62409912f, -0.96136118f, + 0.21004745f, 1.38337255f, 1.19030397f, 2.0940445f, -0.03551657f, + -0.78686039f, 1.992782f, 0.04300319f, -0.29230777f, -0.56797112f, + -1.26732165f, -0.61935399f, 0.57670432f, 0.92844898f, 2.82469233f, + + 0.98721677f, -0.05100663f, -1.21178917f, -0.17530157f, 1.40051805f, + -0.13259761f, -1.14313018f, 0.2673723f, -0.87996154f, 1.29053106f, + 1.55f, 0.8396538f, 1.20729817f, 0.23727845f, -0.89113606f, + -1.70909842f, 0.26460363f, -0.70566808f, 2.383518f, 1.07024615f, + + -1.21722605f, 0.82919357f, 0.55765697f, 0.12657686f, 0.63432172f, + 0.75425957f, -2.43721014f, -1.24478184f, 2.65316853f, 1.19509542f, + -0.95523998f, 0.5149006f, -0.01151649f, 0.68327026f, -0.4589638f, + -0.46554745f, 0.21055324f, 0.39266729f, 2.05098086f, 1.83207919f}; } // namespace // clang-format on @@ -831,20 +831,20 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_softmax_axis_0) { // clang-format off test_case.add_expected_output( Shape{3, 4, 5}, - {0.09683057, 0.00369363, 0.01394559, 0.00329012, 0.00234823, - 0.00757665, 0.02449322, 0.02019284, 0.04985249, 0.00592694, - 0.00279593, 0.04505148, 0.00641108, 0.00458466, 0.00348007, - 0.00172928, 0.00330577, 0.01093237, 0.01554086, 0.10351497, - - 0.01648154, 0.00583583, 0.00182802, 0.00515374, 0.02491679, - 0.00537859, 0.00195794, 0.00802367, 0.00254737, 0.0223216, - 0.02893419, 0.0142204, 0.02053893, 0.00778581, 0.00251907, - 0.00111174, 0.00800149, 0.0030324, 0.06658917, 0.0179084, - - 0.00181811, 0.01407243, 0.01072611, 0.0069699, 0.01158077, - 0.01305647, 0.00053677, 0.0017687, 0.08719896, 0.02028982, - 0.00236265, 0.01027717, 0.0060709, 0.01216173, 0.00388087, - 0.00385541, 0.00758048, 0.00909469, 0.04775123, 0.03836337}); + {0.09683057f, 0.00369363f, 0.01394559f, 0.00329012f, 0.00234823f, + 0.00757665f, 0.02449322f, 0.02019284f, 0.04985249f, 0.00592694f, + 0.00279593f, 0.04505148f, 0.00641108f, 0.00458466f, 0.00348007f, + 0.00172928f, 0.00330577f, 0.01093237f, 0.01554086f, 0.10351497f, + + 0.01648154f, 0.00583583f, 0.00182802f, 0.00515374f, 0.02491679f, + 0.00537859f, 0.00195794f, 0.00802367f, 0.00254737f, 0.0223216f, + 0.02893419f, 0.0142204f, 0.02053893f, 0.00778581f, 0.00251907f, + 0.00111174f, 0.00800149f, 0.0030324f, 0.06658917f, 0.0179084f, + + 0.00181811f, 0.01407243f, 0.01072611f, 0.0069699f, 0.01158077f, + 0.01305647f, 0.00053677f, 0.0017687f, 0.08719896f, 0.02028982f, + 0.00236265f, 0.01027717f, 0.0060709f, 0.01216173f, 0.00388087f, + 0.00385541f, 0.00758048f, 0.00909469f, 0.04775123f, 0.03836337f}); // clang-format on test_case.run(6); @@ -860,20 +860,20 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_softmax_axis_1) { // clang-format off test_case.add_expected_output( Shape{3, 4, 5}, - {0.22757064, 0.00868076, 0.03277484, 0.00773243, 0.0055188, - 0.0178066, 0.05756383, 0.04745709, 0.11716303, 0.01392945, - 0.00657097, 0.10587974, 0.01506727, 0.01077484, 0.00817884, - 0.00406413, 0.00776921, 0.0256932, 0.03652405, 0.24328028, - - 0.06217413, 0.02201481, 0.00689594, 0.01944171, 0.09399488, - 0.02028993, 0.00738604, 0.03026811, 0.00960958, 0.08420492, - 0.10914991, 0.05364435, 0.07748005, 0.02937079, 0.0095028, - 0.00419387, 0.03018442, 0.01143929, 0.2511977, 0.06755678, - - 0.00587593, 0.04548053, 0.0346656, 0.02252594, 0.03742775, - 0.04219705, 0.00173478, 0.00571623, 0.2818174, 0.06557446, - 0.00763582, 0.03321466, 0.01962049, 0.03930537, 0.01254255, - 0.01246025, 0.02449929, 0.02939305, 0.15432668, 0.12398617}); + {0.22757064f, 0.00868076f, 0.03277484f, 0.00773243f, 0.0055188f, + 0.0178066f, 0.05756383f, 0.04745709f, 0.11716303f, 0.01392945f, + 0.00657097f, 0.10587974f, 0.01506727f, 0.01077484f, 0.00817884f, + 0.00406413f, 0.00776921f, 0.0256932f, 0.03652405f, 0.24328028f, + + 0.06217413f, 0.02201481f, 0.00689594f, 0.01944171f, 0.09399488f, + 0.02028993f, 0.00738604f, 0.03026811f, 0.00960958f, 0.08420492f, + 0.10914991f, 0.05364435f, 0.07748005f, 0.02937079f, 0.0095028f, + 0.00419387f, 0.03018442f, 0.01143929f, 0.2511977f, 0.06755678f, + + 0.00587593f, 0.04548053f, 0.0346656f, 0.02252594f, 0.03742775f, + 0.04219705f, 0.00173478f, 0.00571623f, 0.2818174f, 0.06557446f, + 0.00763582f, 0.03321466f, 0.01962049f, 0.03930537f, 0.01254255f, + 0.01246025f, 0.02449929f, 0.02939305f, 0.15432668f, 0.12398617f}); // clang-format on test_case.run(4); @@ -890,20 +890,20 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_softmax_axis_1_opset11) { // clang-format off test_case.add_expected_output( Shape{3, 4, 5}, - {0.88890495, 0.04825497, 0.27088348, 0.04490523, 0.02037154, - 0.06955369, 0.31998834, 0.39223197, 0.68041159, 0.05141776, - 0.02566661, 0.5885689, 0.12453075, 0.06257374, 0.03019055, - 0.01587475, 0.0431878, 0.21235381, 0.21210944, 0.89802015, - - 0.31752626, 0.19442629, 0.0546935, 0.06279221, 0.36823282, - 0.10362164, 0.06523066, 0.24006419, 0.03103672, 0.32987983, - 0.55743381, 0.473766, 0.61451431, 0.09486084, 0.03722801, - 0.02141829, 0.26657706, 0.090728, 0.81131024, 0.26465935, - - 0.08619648, 0.43343993, 0.3877785, 0.04523505, 0.15625437, - 0.61900597, 0.01653285, 0.06394322, 0.56592636, 0.27376196, - 0.11201305, 0.31654337, 0.21947994, 0.07893034, 0.05236297, - 0.18278451, 0.23348385, 0.32879834, 0.30990825, 0.5176207}); + {0.88890495f, 0.04825497f, 0.27088348f, 0.04490523f, 0.02037154f, + 0.06955369f, 0.31998834f, 0.39223197f, 0.68041159f, 0.05141776f, + 0.02566661f, 0.5885689f, 0.12453075f, 0.06257374f, 0.03019055f, + 0.01587475f, 0.0431878f, 0.21235381f, 0.21210944f, 0.89802015f, + + 0.31752626f, 0.19442629f, 0.0546935f, 0.06279221f, 0.36823282f, + 0.10362164f, 0.06523066f, 0.24006419f, 0.03103672f, 0.32987983f, + 0.55743381f, 0.473766f, 0.61451431f, 0.09486084f, 0.03722801f, + 0.02141829f, 0.26657706f, 0.090728f, 0.81131024f, 0.26465935f, + + 0.08619648f, 0.43343993f, 0.3877785f, 0.04523505f, 0.15625437f, + 0.61900597f, 0.01653285f, 0.06394322f, 0.56592636f, 0.27376196f, + 0.11201305f, 0.31654337f, 0.21947994f, 0.07893034f, 0.05236297f, + 0.18278451f, 0.23348385f, 0.32879834f, 0.30990825f, 0.5176207f}); // clang-format on test_case.run(4); @@ -920,20 +920,20 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_softmax_axis_negative_1_opset11) { // clang-format off test_case.add_expected_output( Shape{3, 4, 5}, - {0.80619484, 0.03075256, 0.1161086, 0.027393, 0.01955098, - 0.07012683, 0.22670066, 0.18689778, 0.4614171, 0.05485764, - 0.04486171, 0.7228683, 0.10286818, 0.07356264, 0.05583908, - 0.01280724, 0.02448298, 0.08096659, 0.11509769, 0.76664555, - - 0.30399805, 0.10764059, 0.03371745, 0.09505949, 0.4595844, - 0.13369875, 0.04866969, 0.19944906, 0.0633215, 0.554861, - 0.39101103, 0.19217177, 0.27755913, 0.10521588, 0.03404216, - 0.01150354, 0.08279411, 0.03137731, 0.6890207, 0.18530433, - - 0.0402528, 0.31156224, 0.23747502, 0.15431291, 0.25639707, - 0.10627912, 0.00436928, 0.01439711, 0.7097961, 0.16515835, - 0.06798343, 0.29571748, 0.17468554, 0.34994435, 0.11166911, - 0.03615172, 0.07108136, 0.08527993, 0.4477579, 0.35972902}); + {0.80619484f, 0.03075256f, 0.1161086f, 0.027393f, 0.01955098f, + 0.07012683f, 0.22670066f, 0.18689778f, 0.4614171f, 0.05485764f, + 0.04486171f, 0.7228683f, 0.10286818f, 0.07356264f, 0.05583908f, + 0.01280724f, 0.02448298f, 0.08096659f, 0.11509769f, 0.76664555f, + + 0.30399805f, 0.10764059f, 0.03371745f, 0.09505949f, 0.4595844f, + 0.13369875f, 0.04866969f, 0.19944906f, 0.0633215f, 0.554861f, + 0.39101103f, 0.19217177f, 0.27755913f, 0.10521588f, 0.03404216f, + 0.01150354f, 0.08279411f, 0.03137731f, 0.6890207f, 0.18530433f, + + 0.0402528f, 0.31156224f, 0.23747502f, 0.15431291f, 0.25639707f, + 0.10627912f, 0.00436928f, 0.01439711f, 0.7097961f, 0.16515835f, + 0.06798343f, 0.29571748f, 0.17468554f, 0.34994435f, 0.11166911f, + 0.03615172f, 0.07108136f, 0.08527993f, 0.4477579f, 0.35972902f}); // clang-format on test_case.run(6); @@ -950,20 +950,20 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_softmax_axis_negative_1_opset13) { // clang-format off test_case.add_expected_output( Shape{3, 4, 5}, - {0.80619484, 0.03075256, 0.1161086, 0.027393, 0.01955098, - 0.07012683, 0.22670066, 0.18689778, 0.4614171, 0.05485764, - 0.04486171, 0.7228683, 0.10286818, 0.07356264, 0.05583908, - 0.01280724, 0.02448298, 0.08096659, 0.11509769, 0.76664555, - - 0.30399805, 0.10764059, 0.03371745, 0.09505949, 0.4595844, - 0.13369875, 0.04866969, 0.19944906, 0.0633215, 0.554861, - 0.39101103, 0.19217177, 0.27755913, 0.10521588, 0.03404216, - 0.01150354, 0.08279411, 0.03137731, 0.6890207, 0.18530433, - - 0.0402528, 0.31156224, 0.23747502, 0.15431291, 0.25639707, - 0.10627912, 0.00436928, 0.01439711, 0.7097961, 0.16515835, - 0.06798343, 0.29571748, 0.17468554, 0.34994435, 0.11166911, - 0.03615172, 0.07108136, 0.08527993, 0.4477579, 0.35972902}); + {0.80619484f, 0.03075256f, 0.1161086f, 0.027393f, 0.01955098f, + 0.07012683f, 0.22670066f, 0.18689778f, 0.4614171f, 0.05485764f, + 0.04486171f, 0.7228683f, 0.10286818f, 0.07356264f, 0.05583908f, + 0.01280724f, 0.02448298f, 0.08096659f, 0.11509769f, 0.76664555f, + + 0.30399805f, 0.10764059f, 0.03371745f, 0.09505949f, 0.4595844f, + 0.13369875f, 0.04866969f, 0.19944906f, 0.0633215f, 0.554861f, + 0.39101103f, 0.19217177f, 0.27755913f, 0.10521588f, 0.03404216f, + 0.01150354f, 0.08279411f, 0.03137731f, 0.6890207f, 0.18530433f, + + 0.0402528f, 0.31156224f, 0.23747502f, 0.15431291f, 0.25639707f, + 0.10627912f, 0.00436928f, 0.01439711f, 0.7097961f, 0.16515835f, + 0.06798343f, 0.29571748f, 0.17468554f, 0.34994435f, 0.11166911f, + 0.03615172f, 0.07108136f, 0.08527993f, 0.4477579f, 0.35972902f}); // clang-format on test_case.run(6); @@ -1786,7 +1786,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_resize11_down_sizes_cubic_half_pixel) { test_case.add_input(input_data); test_case.add_expected_output( expected_output_shape, - {1.6307871, 3.0046299, 4.3784733, 7.1261587, 8.5, 9.873844, 12.621532, 13.995373, 15.369216}); + {1.6307871f, 3.0046299f, 4.3784733f, 7.1261587f, 8.5f, 9.873844f, 12.621532f, 13.995373f, 15.369216f}); test_case.run_with_tolerance_as_fp(2.0e-5f); } @@ -1848,18 +1848,18 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_resize11_up_sizes_cubic_half_pixel) { test_case.add_input(input_data); test_case.add_expected_output( expected_output_shape, - {0.45507922f, 0.64057922f, 0.97157922f, 1.42257922f, 1.90732922, 2.22332922f, 2.70807922f, 3.15907922f, - 3.49007922f, 3.67557922, 1.39437963f, 1.57987963f, 1.91087963f, 2.36187963f, 2.84662963, 3.16262963f, - 3.64737963f, 4.09837963f, 4.42937963f, 4.61487963, 2.95130693f, 3.13680693f, 3.46780693f, 3.91880693f, - 4.40355693, 4.71955693f, 5.20430693f, 5.65530693f, 5.98630693f, 6.17180693, 5.20525069f, 5.39075069f, - 5.72175069f, 6.17275069f, 6.65750069, 6.97350069f, 7.45825069f, 7.90925069f, 8.24025069f, 8.42575069, - 6.88975f, 7.07525f, 7.40625f, 7.85725f, 8.342, 8.658f, 9.14275f, 9.59375f, - 9.92475f, 10.11025f, 8.57424931f, 8.75974931f, 9.09074931f, 9.54174931f, 10.02649931, 10.34249931f, - 10.82724931f, 11.27824931f, 11.60924931f, 11.79474931, 10.82819307f, 11.01369307f, 11.34469307f, 11.79569307f, - 12.28044307, 12.59644307f, 13.08119307f, 13.53219307f, 13.86319307f, 14.04869307, 12.38512037f, 12.57062037f, - 12.90162037f, 13.35262037f, 13.83737037, 14.15337037f, 14.63812037f, 15.08912037f, 15.42012037f, 15.60562037, - 13.32442078f, 13.50992078f, 13.84092078f, 14.29192078f, 14.77667078, 15.09267078f, 15.57742078f, 16.02842078f, - 16.35942078f, 16.54492078}); + {0.45507922f, 0.64057922f, 0.97157922f, 1.42257922f, 1.90732922f, 2.22332922f, 2.70807922f, 3.15907922f, + 3.49007922f, 3.67557922f, 1.39437963f, 1.57987963f, 1.91087963f, 2.36187963f, 2.84662963f, 3.16262963f, + 3.64737963f, 4.09837963f, 4.42937963f, 4.61487963f, 2.95130693f, 3.13680693f, 3.46780693f, 3.91880693f, + 4.40355693f, 4.71955693f, 5.20430693f, 5.65530693f, 5.98630693f, 6.17180693f, 5.20525069f, 5.39075069f, + 5.72175069f, 6.17275069f, 6.65750069f, 6.97350069f, 7.45825069f, 7.90925069f, 8.24025069f, 8.42575069f, + 6.88975f, 7.07525f, 7.40625f, 7.85725f, 8.342f, 8.658f, 9.14275f, 9.59375f, + 9.92475f, 10.11025f, 8.57424931f, 8.75974931f, 9.09074931f, 9.54174931f, 10.02649931f, 10.34249931f, + 10.82724931f, 11.27824931f, 11.60924931f, 11.79474931f, 10.82819307f, 11.01369307f, 11.34469307f, 11.79569307f, + 12.28044307f, 12.59644307f, 13.08119307f, 13.53219307f, 13.86319307f, 14.04869307f, 12.38512037f, 12.57062037f, + 12.90162037f, 13.35262037f, 13.83737037f, 14.15337037f, 14.63812037f, 15.08912037f, 15.42012037f, 15.60562037f, + 13.32442078f, 13.50992078f, 13.84092078f, 14.29192078f, 14.77667078f, 15.09267078f, 15.57742078f, 16.02842078f, + 16.35942078f, 16.54492078f}); test_case.run_with_tolerance_as_fp(2.0e-5f); } @@ -1891,18 +1891,18 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_resize11_up_sizes_cubic_half_pixel_dynamic_siz test_case.add_input(std::vector{1, 1, 9, 10}); // sizes test_case.add_expected_output( expected_output_shape, - {0.45507922f, 0.64057922f, 0.97157922f, 1.42257922f, 1.90732922, 2.22332922f, 2.70807922f, 3.15907922f, - 3.49007922f, 3.67557922, 1.39437963f, 1.57987963f, 1.91087963f, 2.36187963f, 2.84662963, 3.16262963f, - 3.64737963f, 4.09837963f, 4.42937963f, 4.61487963, 2.95130693f, 3.13680693f, 3.46780693f, 3.91880693f, - 4.40355693, 4.71955693f, 5.20430693f, 5.65530693f, 5.98630693f, 6.17180693, 5.20525069f, 5.39075069f, - 5.72175069f, 6.17275069f, 6.65750069, 6.97350069f, 7.45825069f, 7.90925069f, 8.24025069f, 8.42575069, - 6.88975f, 7.07525f, 7.40625f, 7.85725f, 8.342, 8.658f, 9.14275f, 9.59375f, - 9.92475f, 10.11025f, 8.57424931f, 8.75974931f, 9.09074931f, 9.54174931f, 10.02649931, 10.34249931f, - 10.82724931f, 11.27824931f, 11.60924931f, 11.79474931, 10.82819307f, 11.01369307f, 11.34469307f, 11.79569307f, - 12.28044307, 12.59644307f, 13.08119307f, 13.53219307f, 13.86319307f, 14.04869307, 12.38512037f, 12.57062037f, - 12.90162037f, 13.35262037f, 13.83737037, 14.15337037f, 14.63812037f, 15.08912037f, 15.42012037f, 15.60562037, - 13.32442078f, 13.50992078f, 13.84092078f, 14.29192078f, 14.77667078, 15.09267078f, 15.57742078f, 16.02842078f, - 16.35942078f, 16.54492078}); + {0.45507922f, 0.64057922f, 0.97157922f, 1.42257922f, 1.90732922f, 2.22332922f, 2.70807922f, 3.15907922f, + 3.49007922f, 3.67557922f, 1.39437963f, 1.57987963f, 1.91087963f, 2.36187963f, 2.84662963f, 3.16262963f, + 3.64737963f, 4.09837963f, 4.42937963f, 4.61487963f, 2.95130693f, 3.13680693f, 3.46780693f, 3.91880693f, + 4.40355693f, 4.71955693f, 5.20430693f, 5.65530693f, 5.98630693f, 6.17180693f, 5.20525069f, 5.39075069f, + 5.72175069f, 6.17275069f, 6.65750069f, 6.97350069f, 7.45825069f, 7.90925069f, 8.24025069f, 8.42575069f, + 6.88975f, 7.07525f, 7.40625f, 7.85725f, 8.342f, 8.658f, 9.14275f, 9.59375f, + 9.92475f, 10.11025f, 8.57424931f, 8.75974931f, 9.09074931f, 9.54174931f, 10.02649931f, 10.34249931f, + 10.82724931f, 11.27824931f, 11.60924931f, 11.79474931f, 10.82819307f, 11.01369307f, 11.34469307f, 11.79569307f, + 12.28044307f, 12.59644307f, 13.08119307f, 13.53219307f, 13.86319307f, 14.04869307f, 12.38512037f, 12.57062037f, + 12.90162037f, 13.35262037f, 13.83737037f, 14.15337037f, 14.63812037f, 15.08912037f, 15.42012037f, 15.60562037f, + 13.32442078f, 13.50992078f, 13.84092078f, 14.29192078f, 14.77667078f, 15.09267078f, 15.57742078f, 16.02842078f, + 16.35942078f, 16.54492078f}); test_case.run_with_tolerance_as_fp(2.0e-5f); } @@ -2177,7 +2177,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_prelu_batch_nd_elementwise) { // Shape{2, 3, 4, 5} std::vector slope(shape_size(Shape{2, 3, 4, 5})); - std::iota(std::begin(slope), std::end(slope), 0); + std::iota(std::begin(slope), std::end(slope), 0.f); inputs.emplace_back(slope); // Shape{2, 3, 4, 5} @@ -2426,19 +2426,19 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_softplus) { -FLT_MAX}}; const auto inf = std::numeric_limits::infinity(); - std::vector output{0.3132616579532623291, - 0.6931471824645996094, - 1.313261628150939941, - 10.0000457763671875, - 100.0, - 0.0, - 1000.0, - 0.0, - 0.6931471824645996094, - 0.6931471824645996094, - 0.6931471824645996094, + std::vector output{0.3132616579532623291f, + 0.6931471824645996094f, + 1.313261628150939941f, + 10.0000457763671875f, + 100.0f, + 0.0f, + 1000.0f, + 0.0f, + 0.6931471824645996094f, + 0.6931471824645996094f, + 0.6931471824645996094f, inf, - 0.0}; + 0.0f}; auto test_case = test::TestCase(function, s_device); test_case.add_multiple_inputs(inputs); @@ -2507,7 +2507,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_argmax_float) { file_util::path_join(CommonTestUtils::getExecutableDirectory(), SERIALIZED_ZOO, "onnx/argmax_float.onnx")); auto test_case = test::TestCase(function, s_device); - test_case.add_input({4, 0.1, 2, 3, -3, 1, -0.9, 0, 1, 2, 3, 0}); + test_case.add_input({4.f, 0.1f, 2.f, 3.f, -3.f, 1.f, -0.9f, 0.f, 1.f, 2.f, 3.f, 0.f}); test_case.add_expected_output({0, 3, 0}); test_case.run(); } @@ -2517,7 +2517,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_argmin_float) { file_util::path_join(CommonTestUtils::getExecutableDirectory(), SERIALIZED_ZOO, "onnx/argmin_float.onnx")); auto test_case = test::TestCase(function, s_device); - test_case.add_input({4, 0.1, 2, 3, -3, 1, -0.9, 0, 1, 2, 3, 0}); + test_case.add_input({4.f, 0.1f, 2.f, 3.f, -3.f, 1.f, -0.9f, 0.f, 1.f, 2.f, 3.f, 0.f}); test_case.add_expected_output({1, 1, 0, 2}); test_case.run(); } @@ -2528,7 +2528,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_argmax_select_last_index) { "onnx/argmax_select_last_index.onnx")); auto test_case = test::TestCase(function, s_device); - test_case.add_input(Shape{4, 3}, {1, 1, 1, 0.5, 3, 4, 0.5, 1, 1.1, 0, 3, 0}); + test_case.add_input(Shape{4, 3}, {1.f, 1.f, 1.f, 0.5f, 3.f, 4.f, 0.5f, 1.f, 1.1f, 0.f, 3.f, 0.f}); test_case.add_expected_output(Shape{1, 3}, {0, 3, 1}); test_case.run(); } @@ -2539,7 +2539,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_argmin_select_last_index) { "onnx/argmin_select_last_index.onnx")); auto test_case = test::TestCase(function, s_device); - test_case.add_input(Shape{4, 3}, {1, 1, 1, 2, 3, 4, 2, 1, 1.1, 3, 3, 8}); + test_case.add_input(Shape{4, 3}, {1.f, 1.f, 1.f, 2.f, 3.f, 4.f, 2.f, 1.f, 1.1f, 3.f, 3.f, 8.f}); test_case.add_expected_output(Shape{4}, {2, 0, 1, 1}); test_case.run(); } @@ -2736,10 +2736,11 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_one_hot_with_axis) { auto function = onnx_import::import_onnx_model( file_util::path_join(CommonTestUtils::getExecutableDirectory(), SERIALIZED_ZOO, "onnx/one_hot_axis.onnx")); - Inputs inputs{{1.0, 9.0, 2.0, 4.0}, {1.0, 3.0}}; - std::vector expected_output{{1.0, 1.0, 3.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, - 1.0, 1.0, 1.0, 1.0, 1.0, 3.0, 1.0, 1.0, 1.0, 1.0, 3.0, 1.0, 1.0, 1.0, - 1.0, 3.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0}}; + Inputs inputs{{1.0f, 9.0f, 2.0f, 4.0f}, {1.0f, 3.0f}}; + std::vector expected_output{{1.0f, 1.0f, 3.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, + 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 3.0f, + 1.0f, 1.0f, 1.0f, 1.0f, 3.0f, 1.0f, 1.0f, 1.0f, 1.0f, 3.0f, + 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f}}; auto test_case = test::TestCase(function, s_device); test_case.add_multiple_inputs(inputs); @@ -2851,7 +2852,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_lp_norm_p1) { Shape data_shape{2, 3, 4}; std::vector data(shape_size(data_shape)); - std::iota(std::begin(data), std::end(data), 1); + std::iota(std::begin(data), std::end(data), 1.f); auto test_case = test::TestCase(function, s_device); test_case.add_input(data); @@ -2870,7 +2871,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_lp_norm_p2) { Shape data_shape{2, 3, 4}; std::vector data(shape_size(data_shape)); - std::iota(std::begin(data), std::end(data), 1); + std::iota(std::begin(data), std::end(data), 1.f); auto test_case = test::TestCase(function, s_device); test_case.add_input(data); @@ -2889,7 +2890,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_lp_norm_default) { Shape data_shape{2, 3, 4}; std::vector data(shape_size(data_shape)); - std::iota(std::begin(data), std::end(data), 1); + std::iota(std::begin(data), std::end(data), 1.f); auto test_case = test::TestCase(function, s_device); test_case.add_input(data); @@ -2909,7 +2910,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_lp_norm_default_dynamic) { Shape data_shape{2, 3, 4}; std::vector data(shape_size(data_shape)); - std::iota(std::begin(data), std::end(data), 1); + std::iota(std::begin(data), std::end(data), 1.f); auto test_case = test::TestCase(function, s_device); test_case.add_input(data_shape, data); @@ -2928,7 +2929,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_instance_normalization) { Shape data_shape{1, 2, 3, 4}; std::vector data(shape_size(data_shape)); - std::iota(std::begin(data), std::end(data), 1); + std::iota(std::begin(data), std::end(data), 1.f); auto test_case = test::TestCase(function, s_device); @@ -2953,7 +2954,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_instance_normalization_dynamic) { std::vector input_data{1.f, 2.f, 3.f}; test_case.add_input(Shape{1, 3, 1, 1}, input_data); test_case.add_expected_output(Shape{1, 3, 1, 1}, - {0.3341970741748809814, 0.3321160078048706055, 0.3407136797904968262}); + {0.3341970741748809814f, 0.3321160078048706055f, 0.3407136797904968262f}); test_case.run(); } @@ -3121,9 +3122,9 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_mod_sign_fmod_f32) { file_util::path_join(CommonTestUtils::getExecutableDirectory(), SERIALIZED_ZOO, "onnx/mod_sign_fmod_f32.onnx")); auto test_case = test::TestCase(function, s_device); - test_case.add_input({-4.3, 7.2, 5.0, 4.3, -7.2, 8.0}); - test_case.add_input({2.1, -3.4, 8.0, -2.1, 3.4, 5.0}); - test_case.add_expected_output(Shape{6}, {-0.10000038, 0.39999962, 5., 0.10000038, -0.39999962, 3.}); + test_case.add_input({-4.3f, 7.2f, 5.0f, 4.3f, -7.2f, 8.0f}); + test_case.add_input({2.1f, -3.4f, 8.0f, -2.1f, 3.4f, 5.0f}); + test_case.add_expected_output(Shape{6}, {-0.10000038f, 0.39999962f, 5.f, 0.10000038f, -0.39999962f, 3.f}); test_case.run(); } @@ -3280,22 +3281,22 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_gather_float_2D_neg_indices) { // clang-format off test_case.add_input(Shape{3, 3}, - { 0.0, 0.1, 0.2, - 1.0, 1.1, 1.2, - 2.0, 2.1, 2.2 }); + { 0.0f, 0.1f, 0.2f, + 1.0f, 1.1f, 1.2f, + 2.0f, 2.1f, 2.2f }); test_case.add_input(Shape{2, 2}, { -1, -2, -3, -2 }); test_case.add_expected_output(Shape{3, 2, 2}, { - 0.2, 0.1, - 0.0, 0.1, + 0.2f, 0.1f, + 0.0f, 0.1f, - 1.2, 1.1, - 1.0, 1.1, + 1.2f, 1.1f, + 1.0f, 1.1f, - 2.2, 2.1, - 2.0, 2.1 }); + 2.2f, 2.1f, + 2.0f, 2.1f }); // clang-format on test_case.run(); @@ -3633,10 +3634,10 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_upsample8_nearest_infer) { const Shape expected_output_shape{1, 1, 4, 6}; auto test_case = test::TestCase(function, s_device); - test_case.add_input({1.0, 2.0, 3.0, 4.0}); + test_case.add_input({1.0f, 2.0f, 3.0f, 4.0f}); test_case.add_expected_output(expected_output_shape, - {1.0, 1.0, 1.0, 2.0, 2.0, 2.0, 1.0, 1.0, 1.0, 2.0, 2.0, 2.0, - 3.0, 3.0, 3.0, 4.0, 4.0, 4.0, 3.0, 3.0, 3.0, 4.0, 4.0, 4.0}); + {1.0f, 1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 1.0f, 1.0f, 1.0f, 2.0f, 2.0f, 2.0f, + 3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 4.0f, 3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 4.0f}); test_case.run(); } @@ -3650,10 +3651,10 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_upsample8_linear_infer) { const Shape expected_output_shape{1, 1, 4, 4}; auto test_case = test::TestCase(function, s_device); - test_case.add_input({1.0, 2.0, 3.0, 4.0}); + test_case.add_input({1.0f, 2.0f, 3.0f, 4.0f}); test_case.add_expected_output( expected_output_shape, - {1.0, 1.5, 2.0, 2.0, 2.0, 2.5, 3.0, 3.0, 3.0, 3.5, 4.0, 4.0, 3.0, 3.5, 4.0, 4.0}); + {1.0f, 1.5f, 2.0f, 2.0f, 2.0f, 2.5f, 3.0f, 3.0f, 3.0f, 3.5f, 4.0f, 4.0f, 3.0f, 3.5f, 4.0f, 4.0f}); test_case.run(); } @@ -3669,10 +3670,10 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_upsample9_scales_const_nearest_infer) { const Shape expected_output_shape{1, 1, 4, 6}; auto test_case = test::TestCase(function, s_device); - test_case.add_input({1.0, 2.0, 3.0, 4.0}); + test_case.add_input({1.0f, 2.0f, 3.0f, 4.0f}); test_case.add_expected_output(expected_output_shape, - {1.0, 1.0, 1.0, 2.0, 2.0, 2.0, 1.0, 1.0, 1.0, 2.0, 2.0, 2.0, - 3.0, 3.0, 3.0, 4.0, 4.0, 4.0, 3.0, 3.0, 3.0, 4.0, 4.0, 4.0}); + {1.0f, 1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 1.0f, 1.0f, 1.0f, 2.0f, 2.0f, 2.0f, + 3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 4.0f, 3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 4.0f}); test_case.run(); } @@ -3688,10 +3689,10 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_upsample9_scales_const_linear_infer) { const Shape expected_output_shape{1, 1, 4, 4}; auto test_case = test::TestCase(function, s_device); - test_case.add_input({1.0, 2.0, 3.0, 4.0}); + test_case.add_input({1.0f, 2.0f, 3.0f, 4.0f}); test_case.add_expected_output( expected_output_shape, - {1.0, 1.5, 2.0, 2.0, 2.0, 2.5, 3.0, 3.0, 3.0, 3.5, 4.0, 4.0, 3.0, 3.5, 4.0, 4.0}); + {1.0f, 1.5f, 2.0f, 2.0f, 2.0f, 2.5f, 3.0f, 3.0f, 3.0f, 3.5f, 4.0f, 4.0f, 3.0f, 3.5f, 4.0f, 4.0f}); test_case.run(); } @@ -3700,8 +3701,8 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_image_scaler) { file_util::path_join(CommonTestUtils::getExecutableDirectory(), SERIALIZED_ZOO, "onnx/image_scaler.onnx")); auto test_case = test::TestCase(function, s_device); - test_case.add_input({1.0, 2.0, 3.0, 4.0, 10.0, 20.0, 30.0, 40.0}); - test_case.add_expected_output(Shape{1, 2, 2, 2}, {12.0, 14.0, 16.0, 18.0, 21.0, 41.0, 61.0, 81.0}); + test_case.add_input({1.0f, 2.0f, 3.0f, 4.0f, 10.0f, 20.0f, 30.0f, 40.0f}); + test_case.add_expected_output(Shape{1, 2, 2, 2}, {12.0f, 14.0f, 16.0f, 18.0f, 21.0f, 41.0f, 61.0f, 81.0f}); test_case.run(); } @@ -3710,7 +3711,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_size_op_single) { file_util::path_join(CommonTestUtils::getExecutableDirectory(), SERIALIZED_ZOO, "onnx/size_op_single.onnx")); auto test_case = test::TestCase(function, s_device); - test_case.add_input(Shape{2, 3}, {1.0, 2.0, 3.0, 4.0, 5.0, 6.0}); + test_case.add_input(Shape{2, 3}, {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f}); test_case.add_expected_output(Shape{}, {6}); test_case.run(); } @@ -3720,7 +3721,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_size_op_graph_end) { file_util::path_join(CommonTestUtils::getExecutableDirectory(), SERIALIZED_ZOO, "onnx/size_op_graph_end.onnx")); auto test_case = test::TestCase(function, s_device); - test_case.add_input({1.0, 2.0, 3.0, 4.0}); + test_case.add_input({1.0f, 2.0f, 3.0f, 4.0f}); test_case.add_expected_output(Shape{}, {4}); test_case.run(); } @@ -3731,8 +3732,8 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_size_op_graph_middle) { "onnx/size_op_graph_middle.onnx")); auto test_case = test::TestCase(function, s_device); - test_case.add_input({1.0, 2.0, 3.0, 4.0}); - test_case.add_expected_output(Shape{}, {4.0}); + test_case.add_input({1.0f, 2.0f, 3.0f, 4.0f}); + test_case.add_expected_output(Shape{}, {4.0f}); test_case.run(); } @@ -3743,11 +3744,11 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_size_op_on_input_graph_middle) { "onnx/size_op_on_input_graph_middle.onnx")); auto test_case = test::TestCase(function, s_device); - test_case.add_input(Shape{1, 2, 4, 1, 3}, {0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., - 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.}); + test_case.add_input(Shape{1, 2, 4, 1, 3}, {0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, + 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f}); test_case.add_expected_output(Shape{1, 2, 4, 1, 3}, - {24., 24., 24., 24., 24., 24., 24., 24., 24., 24., 24., 24., - 24., 24., 24., 24., 24., 24., 24., 24., 24., 24., 24., 24.}); + {24.f, 24.f, 24.f, 24.f, 24.f, 24.f, 24.f, 24.f, 24.f, 24.f, 24.f, 24.f, + 24.f, 24.f, 24.f, 24.f, 24.f, 24.f, 24.f, 24.f, 24.f, 24.f, 24.f, 24.f}); test_case.run(); } @@ -3818,36 +3819,39 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_roialign16_avg_out_half_pixel) { auto test_case = test::TestCase(function, s_device); test_case.add_input( - {1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8, 9.9, 11., 12.1, 13.2, 14.3, 15.4, 16.5, 17.6, - 18.7, 19.8, 20.9, 22., 23.1, 24.2, 25.3, 26.4, 27.5, 28.6, 29.7, 30.8, 31.9, 33., 34.1, 35.2, - 36.3, 37.4, 38.5, 39.6, 40.7, 41.8, 42.9, 44., 45.1, 46.2, 47.3, 48.4, 49.5, 50.6, 51.7, 52.8, - 53.9, 55., 56.1, 57.2, 58.3, 59.4, 60.5, 61.6, 62.7, 63.8, 64.9, 66., 67.1, 68.2, 69.3, 70.4, - 71.5, 72.6, 73.7, 74.8, 75.9, 77., 78.1, 79.2, 80.3, 81.4, 82.5, 83.6, 84.7, 85.8, 86.9, 88., - 89.1, 90.2, 91.3, 92.4, 93.5, 94.6, 95.7, 96.8, 97.9, 99., 100.1, 101.2, 102.3, 103.4, 104.5, 105.6, - 106.7, 107.8, 108.9, 110., 111.1, 112.2, 113.3, 114.4, 115.5, 116.6, 117.7, 118.8, 119.9, 121., 122.1, 123.2, - 124.3, 125.4, 126.5, 127.6, 128.7, 129.8, 130.9, 132., 133.1, 134.2, 135.3, 136.4, 137.5, 138.6, 139.7, 140.8, - 141.9, 143., 144.1, 145.2, 146.3, 147.4, 148.5, 149.6, 150.7, 151.8, 152.9, 154., 155.1, 156.2, 157.3, 158.4, - 159.5, 160.6, 161.7, 162.8, 163.9, 165., 166.1, 167.2, 168.3, 169.4, 170.5, 171.6, 172.7, 173.8, 174.9, 176., - 177.1, 178.2, 179.3, 180.4, 181.5, 182.6, 183.7, 184.8, 185.9, 187., 188.1, 189.2, 190.3, 191.4, 192.5, 193.6, - 194.7, 195.8, 196.9, 198., 199.1, 200.2, 201.3, 202.4, 203.5, 204.6, 205.7, 206.8, 207.9, 209., 210.1, 211.2, - 212.3, 213.4, 214.5, 215.6, 216.7, 217.8, 218.9, 220., 221.1, 222.2, 223.3, 224.4, 225.5, 226.6, 227.7, 228.8, - 229.9, 231., 232.1, 233.2, 234.3, 235.4, 236.5, 237.6}); - - test_case.add_input({0, 0, 0.75, 2.2, 1.2, 0.5, 2.8, 1.9, 0, 3, 0, 3}); + {1.1f, 2.2f, 3.3f, 4.4f, 5.5f, 6.6f, 7.7f, 8.8f, 9.9f, 11.f, 12.1f, 13.2f, 14.3f, 15.4f, + 16.5f, 17.6f, 18.7f, 19.8f, 20.9f, 22.f, 23.1f, 24.2f, 25.3f, 26.4f, 27.5f, 28.6f, 29.7f, 30.8f, + 31.9f, 33.f, 34.1f, 35.2f, 36.3f, 37.4f, 38.5f, 39.6f, 40.7f, 41.8f, 42.9f, 44.f, 45.1f, 46.2f, + 47.3f, 48.4f, 49.5f, 50.6f, 51.7f, 52.8f, 53.9f, 55.f, 56.1f, 57.2f, 58.3f, 59.4f, 60.5f, 61.6f, + 62.7f, 63.8f, 64.9f, 66.f, 67.1f, 68.2f, 69.3f, 70.4f, 71.5f, 72.6f, 73.7f, 74.8f, 75.9f, 77.f, + 78.1f, 79.2f, 80.3f, 81.4f, 82.5f, 83.6f, 84.7f, 85.8f, 86.9f, 88.f, 89.1f, 90.2f, 91.3f, 92.4f, + 93.5f, 94.6f, 95.7f, 96.8f, 97.9f, 99.f, 100.1f, 101.2f, 102.3f, 103.4f, 104.5f, 105.6f, 106.7f, 107.8f, + 108.9f, 110.f, 111.1f, 112.2f, 113.3f, 114.4f, 115.5f, 116.6f, 117.7f, 118.8f, 119.9f, 121.f, 122.1f, 123.2f, + 124.3f, 125.4f, 126.5f, 127.6f, 128.7f, 129.8f, 130.9f, 132.f, 133.1f, 134.2f, 135.3f, 136.4f, 137.5f, 138.6f, + 139.7f, 140.8f, 141.9f, 143.f, 144.1f, 145.2f, 146.3f, 147.4f, 148.5f, 149.6f, 150.7f, 151.8f, 152.9f, 154.f, + 155.1f, 156.2f, 157.3f, 158.4f, 159.5f, 160.6f, 161.7f, 162.8f, 163.9f, 165.f, 166.1f, 167.2f, 168.3f, 169.4f, + 170.5f, 171.6f, 172.7f, 173.8f, 174.9f, 176.f, 177.1f, 178.2f, 179.3f, 180.4f, 181.5f, 182.6f, 183.7f, 184.8f, + 185.9f, 187.f, 188.1f, 189.2f, 190.3f, 191.4f, 192.5f, 193.6f, 194.7f, 195.8f, 196.9f, 198.f, 199.1f, 200.2f, + 201.3f, 202.4f, 203.5f, 204.6f, 205.7f, 206.8f, 207.9f, 209.f, 210.1f, 211.2f, 212.3f, 213.4f, 214.5f, 215.6f, + 216.7f, 217.8f, 218.9f, 220.f, 221.1f, 222.2f, 223.3f, 224.4f, 225.5f, 226.6f, 227.7f, 228.8f, 229.9f, 231.f, + 232.1f, 233.2f, 234.3f, 235.4f, 236.5f, 237.6f}); + + test_case.add_input({0.f, 0.f, 0.75f, 2.2f, 1.2f, 0.5f, 2.8f, 1.9f, 0.f, 3.f, 0.f, 3.f}); test_case.add_input({0, 2, 1}); test_case.add_expected_output( Shape{3, 2, 4, 4}, - {2.145, 2.42, 2.6950002, 2.9700003, 3.96, 4.235, 4.51, 4.7850003, 5.775, 6.05, - 6.325, 6.6000004, 7.59, 7.8650007, 8.14, 8.415001, 41.745003, 42.019997, 42.295, 42.57, - 43.56, 43.835, 44.11, 44.385002, 45.375, 45.65, 45.925003, 46.200005, 47.190002, 47.465004, - 47.74, 48.015, 162.77249, 163.0475, 163.32251, 163.5975, 164.42252, 164.69751, 164.9725, 165.2475, - 166.07251, 166.3475, 166.6225, 166.8975, 167.72249, 167.9975, 168.27249, 168.5475, 202.3725, 202.6475, - 202.9225, 203.19751, 204.02252, 204.2975, 204.57251, 204.8475, 205.6725, 205.94751, 206.2225, 206.4975, - 207.32251, 207.5975, 207.8725, 208.1475, 91.162506, 91.4375, 91.7125, 91.9875, 92.8125, 93.0875, - 93.3625, 93.6375, 94.4625, 94.7375, 95.0125, 95.28749, 96.1125, 96.3875, 96.6625, 96.9375, - 130.76251, 131.0375, 131.3125, 131.5875, 132.4125, 132.6875, 132.9625, 133.2375, 134.0625, 134.33751, - 134.6125, 134.88751, 135.7125, 135.9875, 136.26251, 136.53749}); + {2.145f, 2.42f, 2.6950002f, 2.9700003f, 3.96f, 4.235f, 4.51f, 4.7850003f, 5.775f, + 6.05f, 6.325f, 6.6000004f, 7.59f, 7.8650007f, 8.14f, 8.415001f, 41.745003f, 42.019997f, + 42.295f, 42.57f, 43.56f, 43.835f, 44.11f, 44.385002f, 45.375f, 45.65f, 45.925003f, + 46.200005f, 47.190002f, 47.465004f, 47.74f, 48.015f, 162.77249f, 163.0475f, 163.32251f, 163.5975f, + 164.42252f, 164.69751f, 164.9725f, 165.2475f, 166.07251f, 166.3475f, 166.6225f, 166.8975f, 167.72249f, + 167.9975f, 168.27249f, 168.5475f, 202.3725f, 202.6475f, 202.9225f, 203.19751f, 204.02252f, 204.2975f, + 204.57251f, 204.8475f, 205.6725f, 205.94751f, 206.2225f, 206.4975f, 207.32251f, 207.5975f, 207.8725f, + 208.1475f, 91.162506f, 91.4375f, 91.7125f, 91.9875f, 92.8125f, 93.0875f, 93.3625f, 93.6375f, + 94.4625f, 94.7375f, 95.0125f, 95.28749f, 96.1125f, 96.3875f, 96.6625f, 96.9375f, 130.76251f, + 131.0375f, 131.3125f, 131.5875f, 132.4125f, 132.6875f, 132.9625f, 133.2375f, 134.0625f, 134.33751f, + 134.6125f, 134.88751f, 135.7125f, 135.9875f, 136.26251f, 136.53749f}); test_case.run(); } @@ -3858,36 +3862,40 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_roialign16_avg_half_pixel) { auto test_case = test::TestCase(function, s_device); test_case.add_input( - {1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8, 9.9, 11., 12.1, 13.2, 14.3, 15.4, 16.5, 17.6, - 18.7, 19.8, 20.9, 22., 23.1, 24.2, 25.3, 26.4, 27.5, 28.6, 29.7, 30.8, 31.9, 33., 34.1, 35.2, - 36.3, 37.4, 38.5, 39.6, 40.7, 41.8, 42.9, 44., 45.1, 46.2, 47.3, 48.4, 49.5, 50.6, 51.7, 52.8, - 53.9, 55., 56.1, 57.2, 58.3, 59.4, 60.5, 61.6, 62.7, 63.8, 64.9, 66., 67.1, 68.2, 69.3, 70.4, - 71.5, 72.6, 73.7, 74.8, 75.9, 77., 78.1, 79.2, 80.3, 81.4, 82.5, 83.6, 84.7, 85.8, 86.9, 88., - 89.1, 90.2, 91.3, 92.4, 93.5, 94.6, 95.7, 96.8, 97.9, 99., 100.1, 101.2, 102.3, 103.4, 104.5, 105.6, - 106.7, 107.8, 108.9, 110., 111.1, 112.2, 113.3, 114.4, 115.5, 116.6, 117.7, 118.8, 119.9, 121., 122.1, 123.2, - 124.3, 125.4, 126.5, 127.6, 128.7, 129.8, 130.9, 132., 133.1, 134.2, 135.3, 136.4, 137.5, 138.6, 139.7, 140.8, - 141.9, 143., 144.1, 145.2, 146.3, 147.4, 148.5, 149.6, 150.7, 151.8, 152.9, 154., 155.1, 156.2, 157.3, 158.4, - 159.5, 160.6, 161.7, 162.8, 163.9, 165., 166.1, 167.2, 168.3, 169.4, 170.5, 171.6, 172.7, 173.8, 174.9, 176., - 177.1, 178.2, 179.3, 180.4, 181.5, 182.6, 183.7, 184.8, 185.9, 187., 188.1, 189.2, 190.3, 191.4, 192.5, 193.6, - 194.7, 195.8, 196.9, 198., 199.1, 200.2, 201.3, 202.4, 203.5, 204.6, 205.7, 206.8, 207.9, 209., 210.1, 211.2, - 212.3, 213.4, 214.5, 215.6, 216.7, 217.8, 218.9, 220., 221.1, 222.2, 223.3, 224.4, 225.5, 226.6, 227.7, 228.8, - 229.9, 231., 232.1, 233.2, 234.3, 235.4, 236.5, 237.6}); - - test_case.add_input({0, 0, 0.75, 2.2, 1.2, 0.5, 2.8, 1.9, 0, 3, 0, 3}); + {1.1f, 2.2f, 3.3f, 4.4f, 5.5f, 6.6f, 7.7f, 8.8f, 9.9f, 11.f, 12.1f, 13.2f, 14.3f, + 15.4f, 16.5f, 17.6f, 18.7f, 19.8f, 20.9f, 22.f, 23.1f, 24.2f, 25.3f, 26.4f, 27.5f, 28.6f, + 29.7f, 30.8f, 31.9f, 33.f, 34.1f, 35.2f, 36.3f, 37.4f, 38.5f, 39.6f, 40.7f, 41.8f, 42.9f, + 44.f, 45.1f, 46.2f, 47.3f, 48.4f, 49.5f, 50.6f, 51.7f, 52.8f, 53.9f, 55.f, 56.1f, 57.2f, + 58.3f, 59.4f, 60.5f, 61.6f, 62.7f, 63.8f, 64.9f, 66.f, 67.1f, 68.2f, 69.3f, 70.4f, 71.5f, + 72.6f, 73.7f, 74.8f, 75.9f, 77.f, 78.1f, 79.2f, 80.3f, 81.4f, 82.5f, 83.6f, 84.7f, 85.8f, + 86.9f, 88.f, 89.1f, 90.2f, 91.3f, 92.4f, 93.5f, 94.6f, 95.7f, 96.8f, 97.9f, 99.f, 100.1f, + 101.2f, 102.3f, 103.4f, 104.5f, 105.6f, 106.7f, 107.8f, 108.9f, 110.f, 111.1f, 112.2f, 113.3f, 114.4f, + 115.5f, 116.6f, 117.7f, 118.8f, 119.9f, 121.f, 122.1f, 123.2f, 124.3f, 125.4f, 126.5f, 127.6f, 128.7f, + 129.8f, 130.9f, 132.f, 133.1f, 134.2f, 135.3f, 136.4f, 137.5f, 138.6f, 139.7f, 140.8f, 141.9f, 143.f, + 144.1f, 145.2f, 146.3f, 147.4f, 148.5f, 149.6f, 150.7f, 151.8f, 152.9f, 154.f, 155.1f, 156.2f, 157.3f, + 158.4f, 159.5f, 160.6f, 161.7f, 162.8f, 163.9f, 165.f, 166.1f, 167.2f, 168.3f, 169.4f, 170.5f, 171.6f, + 172.7f, 173.8f, 174.9f, 176.f, 177.1f, 178.2f, 179.3f, 180.4f, 181.5f, 182.6f, 183.7f, 184.8f, 185.9f, + 187.198f, 188.1f, 189.2f, 190.3f, 191.4f, 192.5f, 193.6f, 194.7f, 195.8f, 196.9f, 198.f, 199.1f, 200.2f, + 201.3f, 202.4f, 203.5f, 204.6f, 205.7f, 206.8f, 207.9f, 209.f, 210.1f, 211.2f, 212.3f, 213.4f, 214.5f, + 215.6f, 216.7f, 217.8f, 218.9f, 220.f, 221.1f, 222.2f, 223.3f, 224.4f, 225.5f, 226.6f, 227.7f, 228.8f, + 229.9f, 231.f, 232.1f, 233.2f, 234.3f, 235.4f, 236.5f, 237.6f}); + + test_case.add_input({0.f, 0.f, 0.75f, 2.2f, 1.2f, 0.5f, 2.8f, 1.9f, 0.f, 3.f, 0.f, 3.f}); test_case.add_input({0, 2, 1}); test_case.add_expected_output( Shape{3, 2, 4, 4}, - {1.1, 1.1, 1.1, 1.1, 1.1, 1.1, 1.1, 1.1, 2.3375, 2.3375, - 2.3375, 2.3375, 4.1525, 4.1525, 4.1525, 4.1525, 40.7, 40.7, 40.7, 40.7, - 40.7, 40.7, 40.7, 40.7, 41.9375, 41.9375, 41.9375, 41.9375, 43.7525, 43.7525, - 43.7525, 43.7525, 159.72, 159.94, 160.16, 160.38, 159.90562, 160.12563, 160.34563, 160.56563, - 160.9575, 161.1775, 161.3975, 161.61751, 162.1125, 162.3325, 162.55249, 162.77249, 199.32, 199.54001, - 199.76001, 199.97998, 199.50562, 199.72563, 199.94562, 200.16562, 200.5575, 200.7775, 200.9975, 201.2175, - 201.7125, 201.93251, 202.1525, 202.37251, 86.9, 86.9, 86.9, 86.9, 86.9, 86.9, - 86.9, 86.9, 86.9, 86.9, 86.9, 86.9, 86.9, 86.9, 86.9, 86.9, - 126.5, 126.5, 126.5, 126.5, 126.5, 126.5, 126.5, 126.5, 126.5, 126.5, - 126.5, 126.5, 126.5, 126.5, 126.5, 126.5}); + {1.1f, 1.1f, 1.1f, 1.1f, 1.1f, 1.1f, 1.1f, 1.1f, 2.3375f, + 2.3375f, 2.3375f, 2.3375f, 4.1525f, 4.1525f, 4.1525f, 4.1525f, 40.7f, 40.7f, + 40.7f, 40.7f, 40.7f, 40.7f, 40.7f, 40.7f, 41.9375f, 41.9375f, 41.9375f, + 41.9375f, 43.7525f, 43.7525f, 43.7525f, 43.7525f, 159.72f, 159.94f, 160.16f, 160.38f, + 159.90562f, 160.12563f, 160.34563f, 160.56563f, 160.9575f, 161.1775f, 161.3975f, 161.61751f, 162.1125f, + 162.3325f, 162.55249f, 162.77249f, 199.32f, 199.54001f, 199.76001f, 199.97998f, 199.50562f, 199.72563f, + 199.94562f, 200.16562f, 200.5575f, 200.7775f, 200.9975f, 201.2175f, 201.7125f, 201.93251f, 202.1525f, + 202.37251f, 86.9f, 86.9f, 86.9f, 86.9f, 86.9f, 86.9f, 86.9f, 86.9f, + 86.9f, 86.9f, 86.9f, 86.9f, 86.9f, 86.9f, 86.9f, 86.9f, 126.5f, + 126.5f, 126.5f, 126.5f, 126.5f, 126.5f, 126.5f, 126.5f, 126.5f, 126.5f, + 126.5f, 126.5f, 126.5f, 126.5f, 126.5f, 126.5f}); test_case.run_with_tolerance_as_fp(0.01f); } @@ -3898,9 +3906,9 @@ NGRAPH_TEST(${BACKEND_NAME}, quant_dequant_pattern) { auto test_case = test::TestCase(function, s_device); // scale == 3.0 // zero point == 10 - test_case.add_input({9.0, 10.0, 15.0, 20.0, 30.0}); - test_case.add_input({1}); - test_case.add_expected_output(Shape{5}, {9.0, 9.0, 15.0, 21.0, 30.0}); + test_case.add_input({9.0f, 10.0f, 15.0f, 20.0f, 30.0f}); + test_case.add_input({1.f}); + test_case.add_expected_output(Shape{5}, {9.0f, 9.0f, 15.0f, 21.0f, 30.0f}); test_case.run(); } @@ -3912,9 +3920,9 @@ NGRAPH_TEST(${BACKEND_NAME}, quant_dequant_pattern_axis) { // axis = 1 // scale == {2.0, 3.0, 4.0} // zero point == {10, 20, 30} - test_case.add_input({1.0, 2.0, 3.0, 10.0, 20.0, 30.0, 40.0, 50.0, 100.0}); - test_case.add_expected_output(Shape{3, 3}, {0, 3, 4, 10, 21, 32, 40, 51, 100}); - test_case.add_input({1}); + test_case.add_input({1.0f, 2.0f, 3.0f, 10.0f, 20.0f, 30.0f, 40.0f, 50.0f, 100.0f}); + test_case.add_expected_output(Shape{3, 3}, {0.f, 3.f, 4.f, 10.f, 21.f, 32.f, 40.f, 51.f, 100.f}); + test_case.add_input({1.f}); test_case.run(); } @@ -3923,8 +3931,8 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_logsoftmax_0D) { file_util::path_join(CommonTestUtils::getExecutableDirectory(), SERIALIZED_ZOO, "onnx/softmax_0D.onnx")); auto test_case = test::TestCase(function, s_device); - test_case.add_input({3.141592}); - test_case.add_expected_output({0.0}); + test_case.add_input({3.141592f}); + test_case.add_expected_output({0.0f}); test_case.run(); } @@ -3934,7 +3942,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_logsoftmax_1D) { auto test_case = test::TestCase(function, s_device); test_case.add_input({-1.0f, 0.0f, 1.0f}); - test_case.add_expected_output(Shape{3}, {-2.4076061, -1.407606, -0.407606}); + test_case.add_expected_output(Shape{3}, {-2.4076061f, -1.407606f, -0.407606f}); test_case.run(); } @@ -3944,7 +3952,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_logsoftmax13_1D) { auto test_case = test::TestCase(function, s_device); test_case.add_input({-1.0f, 0.0f, 1.0f}); - test_case.add_expected_output(Shape{3}, {-2.4076061, -1.407606, -0.407606}); + test_case.add_expected_output(Shape{3}, {-2.4076061f, -1.407606f, -0.407606f}); test_case.run(); } @@ -3953,10 +3961,10 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_logsoftmax13_2D) { file_util::path_join(CommonTestUtils::getExecutableDirectory(), SERIALIZED_ZOO, "onnx/logsoftmax13_2D.onnx")); auto test_case = test::TestCase(function, s_device); - test_case.add_input({0.0f, 1.0f, 2.0f, 3.0f, 10000, 10001, 10002, 10003}); + test_case.add_input({0.0f, 1.0f, 2.0f, 3.0f, 10000.f, 10001.f, 10002.f, 10003.f}); test_case.add_expected_output( Shape{2, 4}, - {-3.4401896, -2.4401896, -1.4401896, -0.44018966, -3.4401896, -2.4401896, -1.4401896, -0.44018966}); + {-3.4401896f, -2.4401896f, -1.4401896f, -0.44018966f, -3.4401896f, -2.4401896f, -1.4401896f, -0.44018966f}); test_case.run_with_tolerance_as_fp(); } @@ -4004,7 +4012,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_mul_v6_broadcast_axis_1) { Shape shape{1, 3, 2, 2}; std::vector A(shape_size(shape)); - std::iota(A.begin(), A.end(), 1); + std::iota(A.begin(), A.end(), 1.f); test_case.add_input(A); test_case.add_input({3.0f, 4.0f, 5.0f}); test_case.add_expected_output( @@ -4036,7 +4044,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_mul_v6_broadcast_no_axis) { Shape shape{2, 2}; std::vector A(shape_size(shape)); - std::iota(A.begin(), A.end(), 1); + std::iota(A.begin(), A.end(), 1.f); test_case.add_input(A); test_case.add_input({3.0f}); test_case.add_expected_output(shape, {3.0f, 6.0f, 9.0f, 12.0f}); @@ -4061,7 +4069,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_mul_v7_broadcast) { Shape shape{1, 2, 3}; std::vector A(shape_size(shape)); - std::iota(A.begin(), A.end(), 1); + std::iota(A.begin(), A.end(), 1.f); test_case.add_input(A); test_case.add_input({3.0f, 4.0f, 5.0f}); test_case.add_expected_output(shape, {3.0f, 8.0f, 15.0f, 12.0f, 20.0f, 30.0f}); @@ -4076,7 +4084,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_add_v6_broadcast_axis_1) { Shape shape{1, 3, 2, 2}; std::vector A(shape_size(shape)); - std::iota(A.begin(), A.end(), 1); + std::iota(A.begin(), A.end(), 1.f); test_case.add_input(A); test_case.add_input({3.0f, 4.0f, 5.0f}); test_case.add_expected_output( @@ -4107,7 +4115,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_add_v6_broadcast_no_axis) { Shape shape{2, 2}; std::vector A(shape_size(shape)); - std::iota(A.begin(), A.end(), 1); + std::iota(A.begin(), A.end(), 1.f); test_case.add_input(A); test_case.add_input({3.0f}); test_case.add_expected_output(shape, {4.0f, 5.0f, 6.0f, 7.0f}); @@ -4133,7 +4141,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_sub_v6_broadcast_axis_1) { Shape shape{1, 3, 2, 2}; std::vector A(shape_size(shape)); - std::iota(A.begin(), A.end(), 1); + std::iota(A.begin(), A.end(), 1.f); test_case.add_input(A); test_case.add_input({3.0f, 4.0f, 5.0f}); test_case.add_expected_output(shape, @@ -4164,7 +4172,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_sub_v6_broadcast_no_axis) { Shape shape{2, 2}; std::vector A(shape_size(shape)); - std::iota(A.begin(), A.end(), 1); + std::iota(A.begin(), A.end(), 1.f); test_case.add_input(A); test_case.add_input({3.0f}); test_case.add_expected_output(shape, {-2.0f, -1.0f, 0.0f, 1.0f}); @@ -4189,7 +4197,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_sub_v7_broadcast) { Shape shape{1, 2, 3}; std::vector A(shape_size(shape)); - std::iota(A.begin(), A.end(), 1); + std::iota(A.begin(), A.end(), 1.f); test_case.add_input(A); test_case.add_input({3.0f, 4.0f, 5.0f}); test_case.add_expected_output(shape, {-2.0f, -2.0f, -2.0f, 1.0f, 1.0f, 1.0f}); @@ -4204,7 +4212,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_div_v6_broadcast_axis_1) { Shape shape{1, 3, 2, 2}; std::vector A(shape_size(shape)); - std::iota(A.begin(), A.end(), 1); + std::iota(A.begin(), A.end(), 1.f); test_case.add_input(A); test_case.add_input({3.0f, 4.0f, 5.0f}); test_case.add_expected_output( @@ -4237,7 +4245,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_div_v6_broadcast_no_axis) { Shape shape{2, 2}; std::vector A(shape_size(shape)); - std::iota(A.begin(), A.end(), 1); + std::iota(A.begin(), A.end(), 1.f); test_case.add_input(A); test_case.add_input({2.0f}); test_case.add_expected_output(shape, {0.5f, 1.0f, 1.5f, 2.0f}); @@ -4262,7 +4270,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_div_v7_broadcast) { Shape shape{1, 2, 3}; std::vector A(shape_size(shape)); - std::iota(A.begin(), A.end(), 1); + std::iota(A.begin(), A.end(), 1.f); test_case.add_input(A); test_case.add_input({3.0f, 4.0f, 5.0f}); test_case.add_expected_output(shape, {0.3333333f, 0.5f, 0.6f, 1.3333333f, 1.25f, 1.2f}); @@ -4299,7 +4307,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_clip_no_min_no_max) { "onnx/clip_no_min_no_max.onnx")); auto test_case = test::TestCase(function, s_device); - const std::vector data{-1.6, -0.1, 10., 0., -10., 1.99, 2.015, 3.}; + const std::vector data{-1.6f, -0.1f, 10.f, 0.f, -10.f, 1.99f, 2.015f, 3.f}; test_case.add_input(data); @@ -4315,12 +4323,12 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_clip_no_min_no_max_inf) { auto test_case = test::TestCase(function, s_device); const std::vector data{std::numeric_limits::infinity(), -std::numeric_limits::infinity(), - static_cast(std::numeric_limits::max()), + static_cast(std::numeric_limits::max()), std::numeric_limits::min(), std::numeric_limits::max(), std::numeric_limits::lowest(), - 0, - -1}; + 0.f, + -1.f}; const std::vector expected_output{std::numeric_limits::max(), std::numeric_limits::lowest(), @@ -4328,13 +4336,13 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_clip_no_min_no_max_inf) { std::numeric_limits::min(), std::numeric_limits::max(), std::numeric_limits::lowest(), - 0, - -1}; + 0.f, + -1.f}; test_case.add_input(data); test_case.add_expected_output(Shape{2, 4}, expected_output); - test_case.run_with_tolerance_as_fp(0); + test_case.run_with_tolerance_as_fp(0.f); } NGRAPH_TEST(${BACKEND_NAME}, onnx_clip_no_min_set_max) { @@ -4343,9 +4351,9 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_clip_no_min_set_max) { "onnx/clip_no_min_set_max.onnx")); auto test_case = test::TestCase(function, s_device); - const std::vector data{-1.6, -0.1, 10., 0., -10., 1.99, 2.015, 3.}; - const std::vector max_val{2.01}; - const std::vector output{-1.6, -0.1, 2.01, 0., -10., 1.99, 2.01, 2.01}; + const std::vector data{-1.6f, -0.1f, 10.f, 0.f, -10.f, 1.99f, 2.015f, 3.f}; + const std::vector max_val{2.01f}; + const std::vector output{-1.6f, -0.1f, 2.01f, 0.f, -10.f, 1.99f, 2.01f, 2.01f}; test_case.add_input(data); test_case.add_input(max_val); @@ -4360,9 +4368,9 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_clip_set_min_no_max) { "onnx/clip_set_min_no_max.onnx")); auto test_case = test::TestCase(function, s_device); - const std::vector data{-1.6, -0.1, 10., 0., -10., 1.99, 2.015, 3.}; - const std::vector min_val{-1.59}; - const std::vector output{-1.59, -0.1, 10., 0., -1.59, 1.99, 2.015, 3.}; + const std::vector data{-1.6f, -0.1f, 10.f, 0.f, -10.f, 1.99f, 2.015f, 3.f}; + const std::vector min_val{-1.59f}; + const std::vector output{-1.59f, -0.1f, 10.f, 0.f, -1.59f, 1.99f, 2.015f, 3.f}; test_case.add_input(data); test_case.add_input(min_val); @@ -4408,8 +4416,8 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_clip_set_min_no_max_initializers) { "onnx/clip_set_min_no_max_initializers.onnx")); auto test_case = test::TestCase(function, s_device); - const std::vector data{-1.6, -0.1, 10., 0., -10., 1.99, 2.015, 3.}; - const std::vector output{-1.59, -0.1, 10., 0., -1.59, 1.99, 2.015, 3.}; + const std::vector data{-1.6f, -0.1f, 10.f, 0.f, -10.f, 1.99f, 2.015f, 3.f}; + const std::vector output{-1.59f, -0.1f, 10.f, 0.f, -1.59f, 1.99f, 2.015f, 3.f}; test_case.add_input(data); @@ -4423,10 +4431,10 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_clip_set_min_set_max) { "onnx/clip_set_min_set_max.onnx")); auto test_case = test::TestCase(function, s_device); - const std::vector data{-1.6, -0.1, 10., 0., -10., 1.99, 2.015, 3.}; - const std::vector min_val{-1.59}; - const std::vector max_val{2.01}; - const std::vector output{-1.59, -0.1, 2.01, 0., -1.59, 1.99, 2.01, 2.01}; + const std::vector data{-1.6f, -0.1f, 10.f, 0.f, -10.f, 1.99f, 2.015f, 3.f}; + const std::vector min_val{-1.59f}; + const std::vector max_val{2.01f}; + const std::vector output{-1.59f, -0.1f, 2.01f, 0.f, -1.59f, 1.99f, 2.01f, 2.01f}; test_case.add_input(data); test_case.add_input(min_val); @@ -4442,8 +4450,8 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_clip_set_min_set_max_initializers) { "onnx/clip_set_min_set_max_initializers.onnx")); auto test_case = test::TestCase(function, s_device); - const std::vector data{-1.6, -0.1, 10., 0., -10., 1.99, 2.015, 3.}; - const std::vector output{-1.59, -0.1, 2.01, 0., -1.59, 1.99, 2.01, 2.01}; + const std::vector data{-1.6f, -0.1f, 10.f, 0.f, -10.f, 1.99f, 2.015f, 3.f}; + const std::vector output{-1.59f, -0.1f, 2.01f, 0.f, -1.59f, 1.99f, 2.01f, 2.01f}; test_case.add_input(data); @@ -4456,16 +4464,16 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_mvn_v6) { file_util::path_join(CommonTestUtils::getExecutableDirectory(), SERIALIZED_ZOO, "onnx/mvn_v6.onnx")); auto test_case = test::TestCase(function, s_device); - test_case.add_input({0.8439683, 0.5665144, 0.05836735, 0.02916367, 0.12964272, 0.5060197, 0.79538304, - 0.9411346, 0.9546573, 0.17730942, 0.46192095, 0.26480448, 0.6746842, 0.01665257, - 0.62473077, 0.9240844, 0.9722341, 0.11965699, 0.41356155, 0.9129373, 0.59330076, - 0.81929934, 0.7862604, 0.11799799, 0.69248444, 0.54119414, 0.07513223}); + test_case.add_input({0.8439683f, 0.5665144f, 0.05836735f, 0.02916367f, 0.12964272f, 0.5060197f, 0.79538304f, + 0.9411346f, 0.9546573f, 0.17730942f, 0.46192095f, 0.26480448f, 0.6746842f, 0.01665257f, + 0.62473077f, 0.9240844f, 0.9722341f, 0.11965699f, 0.41356155f, 0.9129373f, 0.59330076f, + 0.81929934f, 0.7862604f, 0.11799799f, 0.69248444f, 0.54119414f, 0.07513223f}); test_case.add_expected_output( Shape{3, 3, 3, 1}, - {1.3546423, 0.33053496, -1.5450814, -1.2106764, -0.8925952, 0.29888135, 0.38083088, - 0.81808794, 0.85865635, -1.1060555, -0.05552877, -0.78310335, 0.83281356, -1.250282, - 0.67467856, 0.7669372, 0.9113869, -1.6463585, -0.23402764, 1.6092131, 0.42940593, - 1.2906139, 1.1860244, -0.92945826, 0.0721334, -0.38174, -1.7799333}); + {1.3546423f, 0.33053496f, -1.5450814f, -1.2106764f, -0.8925952f, 0.29888135f, 0.38083088f, + 0.81808794f, 0.85865635f, -1.1060555f, -0.05552877f, -0.78310335f, 0.83281356f, -1.250282f, + 0.67467856f, 0.7669372f, 0.9113869f, -1.6463585f, -0.23402764f, 1.6092131f, 0.42940593f, + 1.2906139f, 1.1860244f, -0.92945826f, 0.0721334f, -0.38174f, -1.7799333f}); test_case.run(); } @@ -4578,17 +4586,17 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_dropout12_not_const_training_mode) { NGRAPH_TEST(${BACKEND_NAME}, onnx_multiple_slices_last_layer) { std::vector data(1 * 30 * 320 * 320); - std::fill(data.begin(), data.end(), 1); + std::fill(data.begin(), data.end(), 1.f); const auto function = onnx_import::import_onnx_model(file_util::path_join(CommonTestUtils::getExecutableDirectory(), SERIALIZED_ZOO, "onnx/multiple_slices_last_layer.onnx")); auto test_case = test::TestCase(function, s_device); std::vector o1(1 * 320 * 320 * 21); - std::fill(o1.begin(), o1.end(), 1); + std::fill(o1.begin(), o1.end(), 1.f); std::vector o2(1 * 320 * 320 * 9); - std::fill(o2.begin(), o2.end(), 1); + std::fill(o2.begin(), o2.end(), 1.f); test_case.add_input(data); test_case.add_expected_output(Shape{1, 320, 320, 21}, o1); @@ -4613,23 +4621,23 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_softmax_crossentropy_loss_mean) { "onnx/softmax_crossentropy_loss_mean.onnx")); auto test_case = test::TestCase(function, s_device); - test_case.add_input({0.54881352186203, - 0.7151893377304077, - 0.6027633547782898, - 0.5448831915855408, - 0.42365479469299316, - 0.6458941102027893, - 0.4375872015953064, - 0.891772985458374, - 0.9636627435684204, - 0.3834415078163147, - 0.7917250394821167, - 0.5288949012756348, - 0.5680445432662964, - 0.9255966544151306, - 0.07103605568408966}); + test_case.add_input({0.54881352186203f, + 0.7151893377304077f, + 0.6027633547782898f, + 0.5448831915855408f, + 0.42365479469299316f, + 0.6458941102027893f, + 0.4375872015953064f, + 0.891772985458374f, + 0.9636627435684204f, + 0.3834415078163147f, + 0.7917250394821167f, + 0.5288949012756348f, + 0.5680445432662964f, + 0.9255966544151306f, + 0.07103605568408966f}); test_case.add_input({1, 4, 3}); - test_case.add_expected_output(Shape{}, {1.561384797096252441}); + test_case.add_expected_output(Shape{}, {1.561384797096252441f}); test_case.run(); } @@ -4640,15 +4648,15 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_negativelog_likelihood_loss) { auto test_case = test::TestCase(function, s_device); test_case.add_input({ - 0.54881352186203, 0.7151893377304077, 0.6027633547782898, 0.5448831915855408, 0.42365479469299316, - 0.6458941102027893, 0.4375872015953064, 0.891772985458374, 0.9636627435684204, 0.3834415078163147, - 0.7917250394821167, 0.5288949012756348, 0.5680445432662964, 0.9255966544151306, 0.07103605568408966, - 0.08712930232286453, 0.020218396559357643, 0.832619845867157, 0.7781567573547363, 0.8700121641159058, - 0.978618323802948, 0.7991585731506348, 0.4614793658256531, 0.7805292010307312, 0.11827442795038223, - 0.6399210095405579, 0.14335328340530396, 0.9446688890457153, 0.5218483209609985, 0.4146619439125061, + 0.54881352186203f, 0.7151893377304077f, 0.6027633547782898f, 0.5448831915855408f, 0.42365479469299316f, + 0.6458941102027893f, 0.4375872015953064f, 0.891772985458374f, 0.9636627435684204f, 0.3834415078163147f, + 0.7917250394821167f, 0.5288949012756348f, 0.5680445432662964f, 0.9255966544151306f, 0.07103605568408966f, + 0.08712930232286453f, 0.020218396559357643f, 0.832619845867157f, 0.7781567573547363f, 0.8700121641159058f, + 0.978618323802948f, 0.7991585731506348f, 0.4614793658256531f, 0.7805292010307312f, 0.11827442795038223f, + 0.6399210095405579f, 0.14335328340530396f, 0.9446688890457153f, 0.5218483209609985f, 0.4146619439125061f, }); test_case.add_input({3, 3, 2, 4, 2, 0}); - test_case.add_expected_output(Shape{}, {-0.531306922435760498}); + test_case.add_expected_output(Shape{}, {-0.531306922435760498f}); test_case.run(); } @@ -4958,19 +4966,19 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_einsum_sum) { file_util::path_join(CommonTestUtils::getExecutableDirectory(), SERIALIZED_ZOO, "onnx/einsum_sum.onnx")); auto test_case = test::TestCase(function, s_device); test_case.add_input(Shape{3, 4}, - {1.764052345967664, - 0.4001572083672233, - 0.9787379841057392, - 2.240893199201458, - 1.8675579901499675, - -0.977277879876411, - 0.9500884175255894, - -0.1513572082976979, - -0.10321885179355784, - 0.41059850193837233, - 0.144043571160878, - 1.454273506962975}); - test_case.add_expected_output(Shape{3}, {5.3838407376420845, 1.689011319501448, 1.9056967282686674}); + {1.764052345967664f, + 0.4001572083672233f, + 0.9787379841057392f, + 2.240893199201458f, + 1.8675579901499675f, + -0.977277879876411f, + 0.9500884175255894f, + -0.1513572082976979f, + -0.10321885179355784f, + 0.41059850193837233f, + 0.144043571160878f, + 1.454273506962975f}); + test_case.add_expected_output(Shape{3}, {5.3838407376420845f, 1.689011319501448f, 1.9056967282686674f}); test_case.run(); } @@ -5074,7 +5082,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_random_uniform) { file_util::path_join(CommonTestUtils::getExecutableDirectory(), SERIALIZED_ZOO, "onnx/random_uniform.onnx")); auto test_case = test::TestCase(function, s_device); - test_case.add_expected_output(Shape{2, 2}, {43.45518, 48.67585, 42.227386, 40.86294}); + test_case.add_expected_output(Shape{2, 2}, {43.45518f, 48.67585f, 42.227386f, 40.86294f}); test_case.run(); } @@ -5085,7 +5093,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_random_uniform_like) { auto test_case = test::TestCase(function, s_device); test_case.add_input(Shape{2, 2}, {41, 42, 43, 44}); - test_case.add_expected_output(Shape{2, 2}, {43.45518, 48.67585, 42.227386, 40.86294}); + test_case.add_expected_output(Shape{2, 2}, {43.45518f, 48.67585f, 42.227386f, 40.86294f}); test_case.run(); } @@ -5094,7 +5102,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_random_normal) { file_util::path_join(CommonTestUtils::getExecutableDirectory(), SERIALIZED_ZOO, "onnx/random_normal.onnx")); auto test_case = test::TestCase(function, s_device); - test_case.add_expected_output(Shape{2, 2}, {13.459274, 41.75028, -19.311913, 131.79282}); + test_case.add_expected_output(Shape{2, 2}, {13.459274f, 41.75028f, -19.311913f, 131.79282f}); test_case.run(); } @@ -5105,50 +5113,50 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_random_normal_like) { auto test_case = test::TestCase(function, s_device); test_case.add_input(Shape{2, 2}, {0, 0, 0, 0}); - test_case.add_expected_output(Shape{2, 2}, {13.459274, 41.75028, -19.311913, 131.79282}); + test_case.add_expected_output(Shape{2, 2}, {13.459274f, 41.75028f, -19.311913f, 131.79282f}); test_case.run(); } -NGRAPH_TEST(${BACKEND_NAME}, onnx_aten_embedding_bag_packed_sum_2in) { +NGRAPH_TEST(${BACKEND_NAME}, onnx_aten_embedding_bag_packed_sum_2fin) { const auto function = onnx_import::import_onnx_model(file_util::path_join(CommonTestUtils::getExecutableDirectory(), SERIALIZED_ZOO, "onnx/aten_embedding_sum_packed_2in.onnx")); auto test_case = test::TestCase(function, s_device); - test_case.add_input(Shape{5, 2}, {-0.2, -0.6, -0.1, -0.4, -1.9, -1.8, -1., 1.5, 0.8, -0.7}); + test_case.add_input(Shape{5, 2}, {-0.2f, -0.6f, -0.1f, -0.4f, -1.9f, -1.8f, -1.f, 1.5f, 0.8f, -0.7f}); test_case.add_input(Shape{3, 2}, {0, 2, 1, 2, 3, 4}); // indices - test_case.add_expected_output(Shape{3, 2}, {-2.1, -2.4, -2., -2.2, -0.19999999, 0.8}); + test_case.add_expected_output(Shape{3, 2}, {-2.1f, -2.4f, -2.f, -2.2f, -0.19999999f, 0.8f}); test_case.run(); } -NGRAPH_TEST(${BACKEND_NAME}, onnx_aten_embedding_bag_packed_sum_3in_offsets_none) { +NGRAPH_TEST(${BACKEND_NAME}, onnx_aten_embedding_bag_packed_sum_3fin_offsets_none) { const auto function = onnx_import::import_onnx_model(file_util::path_join(CommonTestUtils::getExecutableDirectory(), SERIALIZED_ZOO, "onnx/aten_embedding_sum_packed_3in_offset_none.onnx")); auto test_case = test::TestCase(function, s_device); - test_case.add_input(Shape{5, 2}, {-0.2, -0.6, -0.1, -0.4, -1.9, -1.8, -1., 1.5, 0.8, -0.7}); + test_case.add_input(Shape{5, 2}, {-0.2f, -0.6f, -0.1f, -0.4f, -1.9f, -1.8f, -1.f, 1.5f, 0.8f, -0.7f}); test_case.add_input(Shape{3, 2}, {0, 2, 1, 2, 3, 4}); // indices - test_case.add_expected_output(Shape{3, 2}, {-2.1, -2.4, -2., -2.2, -0.19999999, 0.8}); + test_case.add_expected_output(Shape{3, 2}, {-2.1f, -2.4f, -2.f, -2.2f, -0.19999999f, 0.8f}); test_case.run(); } -NGRAPH_TEST(${BACKEND_NAME}, onnx_aten_embedding_bag_packed_sum_4in_per_sample_weights) { +NGRAPH_TEST(${BACKEND_NAME}, onnx_aten_embedding_bag_packed_sum_4fin_per_sample_weights) { const auto function = onnx_import::import_onnx_model( file_util::path_join(CommonTestUtils::getExecutableDirectory(), SERIALIZED_ZOO, "onnx/aten_embedding_sum_packed_4in_per_sample_weights.onnx")); auto test_case = test::TestCase(function, s_device); - test_case.add_input(Shape{5, 2}, {-0.2, -0.6, -0.1, -0.4, -1.9, -1.8, -1., 1.5, 0.8, -0.7}); - test_case.add_input(Shape{3, 2}, {0, 2, 1, 2, 3, 4}); // indices - test_case.add_input(Shape{3, 2}, {0.5, 0.5, 0.5, 0.5, 0.5, 0.5}); // per_sample_weights + test_case.add_input(Shape{5, 2}, {-0.2f, -0.6f, -0.1f, -0.4f, -1.9f, -1.8f, -1.f, 1.5f, 0.8f, -0.7f}); + test_case.add_input(Shape{3, 2}, {0, 2, 1, 2, 3, 4}); // indices + test_case.add_input(Shape{3, 2}, {0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f}); // per_sample_weights - test_case.add_expected_output(Shape{3, 2}, {-1.05, -1.2, -1., -1.1, -0.09999999, 0.4}); + test_case.add_expected_output(Shape{3, 2}, {-1.05f, -1.2f, -1.f, -1.1f, -0.09999999f, 0.4f}); test_case.run(); } @@ -5159,10 +5167,10 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_aten_embedding_bag_packed_sum_4in_two_none) { "onnx/aten_embedding_sum_packed_4in_two_none.onnx")); auto test_case = test::TestCase(function, s_device); - test_case.add_input(Shape{5, 2}, {-0.2, -0.6, -0.1, -0.4, -1.9, -1.8, -1., 1.5, 0.8, -0.7}); + test_case.add_input(Shape{5, 2}, {-0.2f, -0.6f, -0.1f, -0.4f, -1.9f, -1.8f, -1.f, 1.5f, 0.8f, -0.7f}); test_case.add_input(Shape{3, 2}, {0, 2, 1, 2, 3, 4}); // indices - test_case.add_expected_output(Shape{3, 2}, {-2.1, -2.4, -2., -2.2, -0.19999999, 0.8}); + test_case.add_expected_output(Shape{3, 2}, {-2.1f, -2.4f, -2.f, -2.2f, -0.19999999f, 0.8f}); test_case.run(); } @@ -5173,11 +5181,11 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_aten_embedding_bag_offsets_sum_3in) { "onnx/aten_embedding_sum_offset_3in.onnx")); auto test_case = test::TestCase(function, s_device); - test_case.add_input(Shape{5, 2}, {-0.2, -0.6, -0.1, -0.4, -1.9, -1.8, -1., 1.5, 0.8, -0.7}); + test_case.add_input(Shape{5, 2}, {-0.2f, -0.6f, -0.1f, -0.4f, -1.9f, -1.8f, -1.f, 1.5f, 0.8f, -0.7f}); test_case.add_input(Shape{4}, {0, 2, 3, 4}); // indices test_case.add_input(Shape{3}, {0, 2, 2}); // offsets - test_case.add_expected_output(Shape{3, 2}, {-2.1, -2.4, 0, 0, -0.2, 0.8}); + test_case.add_expected_output(Shape{3, 2}, {-2.1f, -2.4f, 0.f, 0.f, -0.2f, 0.8f}); test_case.run(); } @@ -5188,12 +5196,12 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_aten_embedding_bag_offsets_sum_4in) { "onnx/aten_embedding_sum_offset_4in.onnx")); auto test_case = test::TestCase(function, s_device); - test_case.add_input(Shape{5, 2}, {-0.2, -0.6, -0.1, -0.4, -1.9, -1.8, -1., 1.5, 0.8, -0.7}); - test_case.add_input(Shape{4}, {0, 2, 3, 4}); // indices - test_case.add_input(Shape{3}, {0, 2, 2}); // offsets - test_case.add_input(Shape{4}, {0.5, 0.5, 0.5, 0.5}); // per_sample_weights + test_case.add_input(Shape{5, 2}, {-0.2f, -0.6f, -0.1f, -0.4f, -1.9f, -1.8f, -1.f, 1.5f, 0.8f, -0.7f}); + test_case.add_input(Shape{4}, {0, 2, 3, 4}); // indices + test_case.add_input(Shape{3}, {0, 2, 2}); // offsets + test_case.add_input(Shape{4}, {0.5f, 0.5f, 0.5f, 0.5f}); // per_sample_weights - test_case.add_expected_output(Shape{3, 2}, {-1.05, -1.2, 0., 0., -0.09999999, 0.4}); + test_case.add_expected_output(Shape{3, 2}, {-1.05f, -1.2f, 0.f, 0.f, -0.09999999f, 0.4f}); test_case.run(); } @@ -5208,11 +5216,11 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_aten_embedding_bag_many_node_outputs) { EXPECT_EQ(function->get_results().size(), 1); auto test_case = test::TestCase(function, s_device); - test_case.add_input(Shape{5, 2}, {-0.2, -0.6, -0.1, -0.4, -1.9, -1.8, -1., 1.5, 0.8, -0.7}); + test_case.add_input(Shape{5, 2}, {-0.2f, -0.6f, -0.1f, -0.4f, -1.9f, -1.8f, -1.f, 1.5f, 0.8f, -0.7f}); test_case.add_input(Shape{4}, {0, 2, 3, 4}); // indices test_case.add_input(Shape{3}, {0, 2, 2}); // offsets - test_case.add_expected_output(Shape{3, 2}, {-2.1, -2.4, 0, 0, -0.2, 0.8}); + test_case.add_expected_output(Shape{3, 2}, {-2.1f, -2.4f, 0.f, 0.f, -0.2f, 0.8f}); test_case.run(); } @@ -5388,13 +5396,14 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_scan15_fib_like_input_rev) { auto test_case = test::TestCase(function, s_device); test_case.add_input(Shape{}, {0}); test_case.add_input(Shape{}, {1}); - test_case.add_input(Shape{10}, std::vector{0., 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9}); + test_case.add_input(Shape{10}, + std::vector{0.f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f, 0.9f}); - test_case.add_expected_output(Shape{}, {0.14897026}); - test_case.add_expected_output(Shape{}, {0.}); + test_case.add_expected_output(Shape{}, {0.14897026f}); + test_case.add_expected_output(Shape{}, {0.f}); test_case.add_expected_output( Shape{10}, - {0.9, 1.52, 1.694, 1.9284, 1.8112, 1.4958401, 0.9921121, 0.49759045, 0.14897026, 0.}); + {0.9f, 1.52f, 1.694f, 1.9284f, 1.8112f, 1.4958401f, 0.9921121f, 0.49759045f, 0.14897026f, 0.f}); test_case.run(); } @@ -5407,13 +5416,14 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_scan15_fib_like_input_out_rev) { auto test_case = test::TestCase(function, s_device); test_case.add_input(Shape{}, {0}); test_case.add_input(Shape{}, {1}); - test_case.add_input(Shape{10}, std::vector{0., 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9}); + test_case.add_input(Shape{10}, + std::vector{0.f, 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f, 0.9f}); - test_case.add_expected_output(Shape{}, {0.14897026}); + test_case.add_expected_output(Shape{}, {0.14897026f}); test_case.add_expected_output(Shape{}, {0.}); test_case.add_expected_output( Shape{10}, - {0., 0.14897026, 0.49759045, 0.9921121, 1.4958401, 1.8112, 1.9284, 1.694, 1.52, 0.9}); + {0.f, 0.14897026f, 0.49759045f, 0.9921121f, 1.4958401f, 1.8112f, 1.9284f, 1.694f, 1.52f, 0.9f}); test_case.run(); } @@ -5435,27 +5445,29 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_scan15_ND_mixed_ones) { test_case.run(); } -NGRAPH_TEST(${BACKEND_NAME}, onnx_scan15_ND_mixed_vals) { +NGRAPH_TEST(${BACKEND_NAME}, onnx_scan15f_ND_mixed_vals) { const auto function = onnx_import::import_onnx_model( file_util::path_join(CommonTestUtils::getExecutableDirectory(), SERIALIZED_ZOO, "onnx/scan15_ND_mixed.onnx")); auto test_case = test::TestCase(function, s_device); - test_case.add_input(Shape{1, 3, 2}, {0, 0, 0, 0, 0, 0}); - test_case.add_input(Shape{1, 3, 2}, {1, 1, 1, 1, 1, 1}); - std::vector sequence_vals{0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1., 1.1, 1.2, 1.3, 1.4, 1.5, - 1.6, 1.7, 1.8, 1.9, 2., 2.1, 2.2, 2.3, 2.4, 2.5, 2.6, 2.7, 2.8, 2.9, 3.}; + test_case.add_input(Shape{1, 3, 2}, {0.f, 0.f, 0.f, 0.f, 0.f, 0.f}); + test_case.add_input(Shape{1, 3, 2}, {1.f, 1.f, 1.f, 1.f, 1.f, 1.f}); + std::vector sequence_vals{0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f, 0.9f, 1.f, + 1.1f, 1.2f, 1.3f, 1.4f, 1.5f, 1.6f, 1.7f, 1.8f, 1.9f, 2.f, + 2.1f, 2.2f, 2.3f, 2.4f, 2.5f, 2.6f, 2.7f, 2.8f, 2.9f, 3.f}; test_case.add_input(Shape{1, 3, 5, 2}, sequence_vals); // multiply factor (reverse) test_case.add_input(Shape{1, 5, 3, 2}, sequence_vals); // div factor test_case.add_expected_output(Shape{1, 3, 2}, - {2.7327938, 2.1428573, 21.070545, 16.92727, 49.765778, 41.444443}); + {2.7327938f, 2.1428573f, 21.070545f, 16.92727f, 49.765778f, 41.444443f}); test_case.add_expected_output(Shape{1, 3, 2}, - {0.40161943, 0.5274726, 16.80789, 14.025973, 59.98805, 50.518517}); + {0.40161943f, 0.5274726f, 16.80789f, 14.025973f, 59.98805f, 50.518517f}); test_case.add_expected_output( Shape{1, 3, 2, 5}, - {0.40161943, 2.7327938, 7.3076925, 10., 9., 0.5274726, 2.1428573, 4.714286, 6., 5., - 16.80789, 21.070545, 20.185184, 13.851851, 6.333333, 14.025973, 16.92727, 15.799998, 10.799999, 5., - 59.98805, 49.765778, 33.074867, 16.690908, 5.8, 50.518517, 41.444443, 27.444445, 14., 5.}); + {0.40161943f, 2.7327938f, 7.3076925f, 10.f, 9.f, 0.5274726f, 2.1428573f, 4.714286f, + 6.f, 5.f, 16.80789f, 21.070545f, 20.185184f, 13.851851f, 6.333333f, 14.025973f, + 16.92727f, 15.799998f, 10.799999f, 5.f, 59.98805f, 49.765778f, 33.074867f, 16.690908f, + 5.8f, 50.518517f, 41.444443f, 27.444445f, 14.f, 5.f}); test_case.run(); } @@ -5466,22 +5478,24 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_scan15_ND_mixed_vals_neg_axes) { "onnx/scan15_ND_mixed_neg_axes.onnx")); auto test_case = test::TestCase(function, s_device); - test_case.add_input(Shape{1, 3, 2}, {0, 0, 0, 0, 0, 0}); - test_case.add_input(Shape{1, 3, 2}, {1, 1, 1, 1, 1, 1}); - std::vector sequence_vals{0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1., 1.1, 1.2, 1.3, 1.4, 1.5, - 1.6, 1.7, 1.8, 1.9, 2., 2.1, 2.2, 2.3, 2.4, 2.5, 2.6, 2.7, 2.8, 2.9, 3.}; + test_case.add_input(Shape{1, 3, 2}, {0.f, 0.f, 0.f, 0.f, 0.f, 0.f}); + test_case.add_input(Shape{1, 3, 2}, {1.f, 1.f, 1.f, 1.f, 1.f, 1.f}); + std::vector sequence_vals{0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f, 0.9f, 1.f, + 1.1f, 1.2f, 1.3f, 1.4f, 1.5f, 1.6f, 1.7f, 1.8f, 1.9f, 2.f, + 2.1f, 2.2f, 2.3f, 2.4f, 2.5f, 2.6f, 2.7f, 2.8f, 2.9f, 3.f}; test_case.add_input(Shape{1, 3, 5, 2}, sequence_vals); // multiply factor (reverse) test_case.add_input(Shape{1, 5, 3, 2}, sequence_vals); // div factor test_case.add_expected_output(Shape{1, 3, 2}, - {2.7327938, 2.1428573, 21.070545, 16.92727, 49.765778, 41.444443}); + {2.7327938f, 2.1428573f, 21.070545f, 16.92727f, 49.765778f, 41.444443f}); test_case.add_expected_output(Shape{1, 3, 2}, - {0.40161943, 0.5274726, 16.80789, 14.025973, 59.98805, 50.518517}); + {0.40161943f, 0.5274726f, 16.80789f, 14.025973f, 59.98805f, 50.518517f}); test_case.add_expected_output( Shape{1, 3, 2, 5}, - {0.40161943, 2.7327938, 7.3076925, 10., 9., 0.5274726, 2.1428573, 4.714286, 6., 5., - 16.80789, 21.070545, 20.185184, 13.851851, 6.333333, 14.025973, 16.92727, 15.799998, 10.799999, 5., - 59.98805, 49.765778, 33.074867, 16.690908, 5.8, 50.518517, 41.444443, 27.444445, 14., 5.}); + {0.40161943f, 2.7327938f, 7.3076925f, 10.f, 9.f, 0.5274726f, 2.1428573f, 4.714286f, + 6.f, 5.f, 16.80789f, 21.070545f, 20.185184f, 13.851851f, 6.333333f, 14.025973f, + 16.92727f, 15.799998f, 10.799999f, 5.f, 59.98805f, 49.765778f, 33.074867f, 16.690908f, + 5.8f, 50.518517f, 41.444443f, 27.444445f, 14.f, 5.f}); test_case.run(); } @@ -5490,22 +5504,24 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_scan15_dyn_rank_vals) { file_util::path_join(CommonTestUtils::getExecutableDirectory(), SERIALIZED_ZOO, "onnx/scan15_dyn_rank.onnx")); auto test_case = test::TestCase(function, s_device); - test_case.add_input(Shape{1, 3, 2}, {0, 0, 0, 0, 0, 0}); - test_case.add_input(Shape{1, 3, 2}, {1, 1, 1, 1, 1, 1}); - std::vector sequence_vals{0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1., 1.1, 1.2, 1.3, 1.4, 1.5, - 1.6, 1.7, 1.8, 1.9, 2., 2.1, 2.2, 2.3, 2.4, 2.5, 2.6, 2.7, 2.8, 2.9, 3.}; + test_case.add_input(Shape{1, 3, 2}, {0.f, 0.f, 0.f, 0.f, 0.f, 0.f}); + test_case.add_input(Shape{1, 3, 2}, {1.f, 1.f, 1.f, 1.f, 1.f, 1.f}); + std::vector sequence_vals{0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f, 0.9f, 1.f, + 1.1f, 1.2f, 1.3f, 1.4f, 1.5f, 1.6f, 1.7f, 1.8f, 1.9f, 2.f, + 2.1f, 2.2f, 2.3f, 2.4f, 2.5f, 2.6f, 2.7f, 2.8f, 2.9f, 3.f}; test_case.add_input(Shape{1, 3, 5, 2}, sequence_vals); // multiply factor (reverse) test_case.add_input(Shape{1, 5, 3, 2}, sequence_vals); // div factor test_case.add_expected_output(Shape{1, 3, 2}, - {2.7327938, 2.1428573, 21.070545, 16.92727, 49.765778, 41.444443}); + {2.7327938f, 2.1428573f, 21.070545f, 16.92727f, 49.765778f, 41.444443f}); test_case.add_expected_output(Shape{1, 3, 2}, - {0.40161943, 0.5274726, 16.80789, 14.025973, 59.98805, 50.518517}); + {0.40161943f, 0.5274726f, 16.80789f, 14.025973f, 59.98805f, 50.518517f}); test_case.add_expected_output( Shape{1, 3, 2, 5}, - {0.40161943, 2.7327938, 7.3076925, 10., 9., 0.5274726, 2.1428573, 4.714286, 6., 5., - 16.80789, 21.070545, 20.185184, 13.851851, 6.333333, 14.025973, 16.92727, 15.799998, 10.799999, 5., - 59.98805, 49.765778, 33.074867, 16.690908, 5.8, 50.518517, 41.444443, 27.444445, 14., 5.}); + {0.40161943f, 2.7327938f, 7.3076925f, 10.f, 9.f, 0.5274726f, 2.1428573f, 4.714286f, + 6.f, 5.f, 16.80789f, 21.070545f, 20.185184f, 13.851851f, 6.333333f, 14.025973f, + 16.92727f, 15.799998f, 10.799999f, 5.f, 59.98805f, 49.765778f, 33.074867f, 16.690908f, + 5.8f, 50.518517f, 41.444443f, 27.444445f, 14.f, 5.f}); test_case.run(); } @@ -5529,43 +5545,46 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_scan15_ND_b4_input_rev_vals) { "onnx/scan15_ND_b4_input_rev.onnx")); auto test_case = test::TestCase(function, s_device); - test_case.add_input(Shape{4, 3, 2}, std::vector(24, 0)); - test_case.add_input(Shape{4, 3, 2}, std::vector(24, 1)); + test_case.add_input(Shape{4, 3, 2}, std::vector(24, 0.f)); + test_case.add_input(Shape{4, 3, 2}, std::vector(24, 1.f)); std::vector sequence_vals{ - 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1., 1.1, 1.2, 1.3, 1.4, 1.5, 1.6, 1.7, 1.8, - 1.9, 2., 2.1, 2.2, 2.3, 2.4, 2.5, 2.6, 2.7, 2.8, 2.9, 3., 3.1, 3.2, 3.3, 3.4, 3.5, 3.6, - 3.7, 3.8, 3.9, 4., 4.1, 4.2, 4.3, 4.4, 4.5, 4.6, 4.7, 4.8, 4.9, 5., 5.1, 5.2, 5.3, 5.4, - 5.5, 5.6, 5.7, 5.8, 5.9, 6., 6.1, 6.2, 6.3, 6.4, 6.5, 6.6, 6.7, 6.8, 6.9, 7., 7.1, 7.2, - 7.3, 7.4, 7.5, 7.6, 7.7, 7.8, 7.9, 8., 8.1, 8.2, 8.3, 8.4, 8.5, 8.6, 8.7, 8.8, 8.9, 9., - 9.1, 9.2, 9.3, 9.4, 9.5, 9.6, 9.7, 9.8, 9.9, 10., 10.1, 10.2, 10.3, 10.4, 10.5, 10.6, 10.7, 10.8, - 10.9, 11., 11.1, 11.2, 11.3, 11.4, 11.5, 11.6, 11.7, 11.8, 11.9, 12.}; - test_case.add_input(Shape{4, 5, 3, 2}, sequence_vals); // multiply factor (reverse) + 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f, 0.9f, 1.f, 1.1f, 1.2f, 1.3f, 1.4f, 1.5f, + 1.6f, 1.7f, 1.8f, 1.9f, 2.f, 2.1f, 2.2f, 2.3f, 2.4f, 2.5f, 2.6f, 2.7f, 2.8f, 2.9f, 3.f, + 3.1f, 3.2f, 3.3f, 3.4f, 3.5f, 3.6f, 3.7f, 3.8f, 3.9f, 4.f, 4.1f, 4.2f, 4.3f, 4.4f, 4.5f, + 4.6f, 4.7f, 4.8f, 4.9f, 5.f, 5.1f, 5.2f, 5.3f, 5.4f, 5.5f, 5.6f, 5.7f, 5.8f, 5.9f, 6.f, + 6.1f, 6.2f, 6.3f, 6.4f, 6.5f, 6.6f, 6.7f, 6.8f, 6.9f, 7.f, 7.1f, 7.2f, 7.3f, 7.4f, 7.5f, + 7.6f, 7.7f, 7.8f, 7.9f, 8.f, 8.1f, 8.2f, 8.3f, 8.4f, 8.5f, 8.6f, 8.7f, 8.8f, 8.9f, 9.f, + 9.1f, 9.2f, 9.3f, 9.4f, 9.5f, 9.6f, 9.7f, 9.8f, 9.9f, 10.f, 10.1f, 10.2f, 10.3f, 10.4f, 10.5f, + 10.6f, 10.7f, 10.8f, 10.9f, 11.f, 11.1f, 11.2f, 11.3f, 11.4f, 11.5f, 11.6f, 11.7f, 11.8f, 11.9f, 12.f}; + test_case.add_input(Shape{4, 5, 3, 2}, sequence_vals); // multiply factor (areverse) test_case.add_input(Shape{4, 5, 3, 2}, sequence_vals); // div factor test_case.add_expected_output( Shape{4, 3, 2}, - {61.210526, 33.2, 23.857145, 19.181818, 16.373913, 14.5, 6.8880844, 6.83, - 6.7754016, 6.7239814, 6.6754713, 6.6296296, 5.9686656, 5.953226, 5.9382715, 5.9237804, - 5.9097314, 5.896105, 5.652082, 5.645059, 5.638186, 5.6314588, 5.624872, 5.618421}); + {61.210526f, 33.2f, 23.857145f, 19.181818f, 16.373913f, 14.5f, 6.8880844f, 6.83f, + 6.7754016f, 6.7239814f, 6.6754713f, 6.6296296f, 5.9686656f, 5.953226f, 5.9382715f, 5.9237804f, + 5.9097314f, 5.896105f, 5.652082f, 5.645059f, 5.638186f, 5.6314588f, 5.624872f, 5.618421f}); test_case.add_expected_output( Shape{4, 3, 2}, - {6.271278, 6.2461543, 6.2433867, 6.2545457, 6.2744985, 6.3, 6.9531364, 6.970527, - 6.987378, 7.003712, 7.019554, 7.034921, 7.30868, 7.3164845, 7.324116, 7.3315806, - 7.338885, 7.346032, 7.485426, 7.489783, 7.494067, 7.49828, 7.5024257, 7.506502}); + {6.271278f, 6.2461543f, 6.2433867f, 6.2545457f, 6.2744985f, 6.3f, 6.9531364f, 6.970527f, + 6.987378f, 7.003712f, 7.019554f, 7.034921f, 7.30868f, 7.3164845f, 7.324116f, 7.3315806f, + 7.338885f, 7.346032f, 7.485426f, 7.489783f, 7.494067f, 7.49828f, 7.5024257f, 7.506502f}); test_case.add_expected_output( Shape{5, 4, 3, 2}, - {25., 13., 9., 7., 5.8, 5., 1.7741936, 1.75, 1.7272727, 1.7058823, - 1.6857144, 1.6666667, 1.3934426, 1.3870969, 1.3809522, 1.375, 1.3692307, 1.3636364, 1.2637362, 1.2608696, - 1.2580644, 1.2553192, 1.2526315, 1.25, 70.57143, 35., 23.333334, 17.6, 14.218181, 12., - 3.6739323, 3.618421, 3.5664334, 3.5176468, 3.471777, 3.4285717, 2.822119, 2.8083491, 2.7950313, 2.7821426, - 2.7696643, 2.757576, 2.543786, 2.5377107, 2.5317693, 2.5259573, 2.520271, 2.514706, 95.57143, 47.999996, - 32.333336, 24.6, 20.01818, 17., 5.448126, 5.368421, 5.293706, 5.223529, 5.157491, 5.0952387, - 4.215562, 4.195446, 4.1759834, 4.1571426, 4.138895, 4.1212125, 3.8075223, 3.7985802, 3.7898335, 3.7812767, - 3.7729027, 3.764706, 61.210526, 33.2, 23.857145, 19.181818, 16.373913, 14.5, 6.8880844, 6.83, - 6.7754016, 6.7239814, 6.6754713, 6.6296296, 5.9686656, 5.953226, 5.9382715, 5.9237804, 5.9097314, 5.896105, - 5.652082, 5.645059, 5.638186, 5.6314588, 5.624872, 5.618421, 6.271278, 6.2461543, 6.2433867, 6.2545457, - 6.2744985, 6.3, 6.9531364, 6.970527, 6.987378, 7.003712, 7.019554, 7.034921, 7.30868, 7.3164845, - 7.324116, 7.3315806, 7.338885, 7.346032, 7.485426, 7.489783, 7.494067, 7.49828, 7.5024257, 7.506502}); + {25.f, 13.f, 9.f, 7.f, 5.8f, 5.f, 1.7741936f, 1.75f, 1.7272727f, + 1.7058823f, 1.6857144f, 1.6666667f, 1.3934426f, 1.3870969f, 1.3809522f, 1.375f, 1.3692307f, 1.3636364f, + 1.2637362f, 1.2608696f, 1.2580644f, 1.2553192f, 1.2526315f, 1.25f, 70.57143f, 35.f, 23.333334f, + 17.6f, 14.218181f, 12.f, 3.6739323f, 3.618421f, 3.5664334f, 3.5176468f, 3.471777f, 3.4285717f, + 2.822119f, 2.8083491f, 2.7950313f, 2.7821426f, 2.7696643f, 2.757576f, 2.543786f, 2.5377107f, 2.5317693f, + 2.5259573f, 2.520271f, 2.514706f, 95.57143f, 47.999996f, 32.333336f, 24.6f, 20.01818f, 17.f, + 5.448126f, 5.368421f, 5.293706f, 5.223529f, 5.157491f, 5.0952387f, 4.215562f, 4.195446f, 4.1759834f, + 4.1571426f, 4.138895f, 4.1212125f, 3.8075223f, 3.7985802f, 3.7898335f, 3.7812767f, 3.7729027f, 3.764706f, + 61.210526f, 33.2f, 23.857145f, 19.181818f, 16.373913f, 14.5f, 6.8880844f, 6.83f, 6.7754016f, + 6.7239814f, 6.6754713f, 6.6296296f, 5.9686656f, 5.953226f, 5.9382715f, 5.9237804f, 5.9097314f, 5.896105f, + 5.652082f, 5.645059f, 5.638186f, 5.6314588f, 5.624872f, 5.618421f, 6.271278f, 6.2461543f, 6.2433867f, + 6.2545457f, 6.2744985f, 6.3f, 6.9531364f, 6.970527f, 6.987378f, 7.003712f, 7.019554f, 7.034921f, + 7.30868f, 7.3164845f, 7.324116f, 7.3315806f, 7.338885f, 7.346032f, 7.485426f, 7.489783f, 7.494067f, + 7.49828f, 7.5024257f, 7.506502f}); test_case.run(); } @@ -5600,43 +5619,46 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_scan8_ND_b4_input_rev_vals) { "onnx/scan8_ND_b4_input_rev.onnx")); auto test_case = test::TestCase(function, s_device); - test_case.add_input(Shape{4, 3, 2}, std::vector(24, 0)); - test_case.add_input(Shape{4, 3, 2}, std::vector(24, 1)); + test_case.add_input(Shape{4, 3, 2}, std::vector(24, 0.f)); + test_case.add_input(Shape{4, 3, 2}, std::vector(24, 1.f)); std::vector sequence_vals{ - 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1., 1.1, 1.2, 1.3, 1.4, 1.5, 1.6, 1.7, 1.8, - 1.9, 2., 2.1, 2.2, 2.3, 2.4, 2.5, 2.6, 2.7, 2.8, 2.9, 3., 3.1, 3.2, 3.3, 3.4, 3.5, 3.6, - 3.7, 3.8, 3.9, 4., 4.1, 4.2, 4.3, 4.4, 4.5, 4.6, 4.7, 4.8, 4.9, 5., 5.1, 5.2, 5.3, 5.4, - 5.5, 5.6, 5.7, 5.8, 5.9, 6., 6.1, 6.2, 6.3, 6.4, 6.5, 6.6, 6.7, 6.8, 6.9, 7., 7.1, 7.2, - 7.3, 7.4, 7.5, 7.6, 7.7, 7.8, 7.9, 8., 8.1, 8.2, 8.3, 8.4, 8.5, 8.6, 8.7, 8.8, 8.9, 9., - 9.1, 9.2, 9.3, 9.4, 9.5, 9.6, 9.7, 9.8, 9.9, 10., 10.1, 10.2, 10.3, 10.4, 10.5, 10.6, 10.7, 10.8, - 10.9, 11., 11.1, 11.2, 11.3, 11.4, 11.5, 11.6, 11.7, 11.8, 11.9, 12.}; + 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f, 0.9f, 1.f, 1.1f, 1.2f, 1.3f, 1.4f, 1.5f, + 1.6f, 1.7f, 1.8f, 1.9f, 2.f, 2.1f, 2.2f, 2.3f, 2.4f, 2.5f, 2.6f, 2.7f, 2.8f, 2.9f, 3.f, + 3.1f, 3.2f, 3.3f, 3.4f, 3.5f, 3.6f, 3.7f, 3.8f, 3.9f, 4.f, 4.1f, 4.2f, 4.3f, 4.4f, 4.5f, + 4.6f, 4.7f, 4.8f, 4.9f, 5.f, 5.1f, 5.2f, 5.3f, 5.4f, 5.5f, 5.6f, 5.7f, 5.8f, 5.9f, 6.f, + 6.1f, 6.2f, 6.3f, 6.4f, 6.5f, 6.6f, 6.7f, 6.8f, 6.9f, 7.f, 7.1f, 7.2f, 7.3f, 7.4f, 7.5f, + 7.6f, 7.7f, 7.8f, 7.9f, 8.f, 8.1f, 8.2f, 8.3f, 8.4f, 8.5f, 8.6f, 8.7f, 8.8f, 8.9f, 9.f, + 9.1f, 9.2f, 9.3f, 9.4f, 9.5f, 9.6f, 9.7f, 9.8f, 9.9f, 10.f, 10.1f, 10.2f, 10.3f, 10.4f, 10.5f, + 10.6f, 10.7f, 10.8f, 10.9f, 11.f, 11.1f, 11.2f, 11.3f, 11.4f, 11.5f, 11.6f, 11.7f, 11.8f, 11.9f, 12.f}; test_case.add_input(Shape{4, 5, 3, 2}, sequence_vals); // multiply factor (reverse) test_case.add_input(Shape{4, 5, 3, 2}, sequence_vals); // div factor test_case.add_expected_output( Shape{4, 3, 2}, - {61.210526, 33.2, 23.857145, 19.181818, 16.373913, 14.5, 6.8880844, 6.83, - 6.7754016, 6.7239814, 6.6754713, 6.6296296, 5.9686656, 5.953226, 5.9382715, 5.9237804, - 5.9097314, 5.896105, 5.652082, 5.645059, 5.638186, 5.6314588, 5.624872, 5.618421}); + {61.210526f, 33.2f, 23.857145f, 19.181818f, 16.373913f, 14.5f, 6.8880844f, 6.83f, + 6.7754016f, 6.7239814f, 6.6754713f, 6.6296296f, 5.9686656f, 5.953226f, 5.9382715f, 5.9237804f, + 5.9097314f, 5.896105f, 5.652082f, 5.645059f, 5.638186f, 5.6314588f, 5.624872f, 5.618421f}); test_case.add_expected_output( Shape{4, 3, 2}, - {6.271278, 6.2461543, 6.2433867, 6.2545457, 6.2744985, 6.3, 6.9531364, 6.970527, - 6.987378, 7.003712, 7.019554, 7.034921, 7.30868, 7.3164845, 7.324116, 7.3315806, - 7.338885, 7.346032, 7.485426, 7.489783, 7.494067, 7.49828, 7.5024257, 7.506502}); + {6.271278f, 6.2461543f, 6.2433867f, 6.2545457f, 6.2744985f, 6.3f, 6.9531364f, 6.970527f, + 6.987378f, 7.003712f, 7.019554f, 7.034921f, 7.30868f, 7.3164845f, 7.324116f, 7.3315806f, + 7.338885f, 7.346032f, 7.485426f, 7.489783f, 7.494067f, 7.49828f, 7.5024257f, 7.506502f}); test_case.add_expected_output( Shape{4, 5, 3, 2}, - {25., 13., 9., 7., 5.8, 5., 70.57143, 35., 23.333334, 17.6, - 14.218181, 12., 95.57143, 47.999996, 32.333336, 24.6, 20.01818, 17., 61.210526, 33.2, - 23.857145, 19.181818, 16.373913, 14.5, 6.271278, 6.2461543, 6.2433867, 6.2545457, 6.2744985, 6.3, - 1.7741936, 1.75, 1.7272727, 1.7058823, 1.6857144, 1.6666667, 3.6739323, 3.618421, 3.5664334, 3.5176468, - 3.471777, 3.4285717, 5.448126, 5.368421, 5.293706, 5.223529, 5.157491, 5.0952387, 6.8880844, 6.83, - 6.7754016, 6.7239814, 6.6754713, 6.6296296, 6.9531364, 6.970527, 6.987378, 7.003712, 7.019554, 7.034921, - 1.3934426, 1.3870969, 1.3809522, 1.375, 1.3692307, 1.3636364, 2.822119, 2.8083491, 2.7950313, 2.7821426, - 2.7696643, 2.757576, 4.215562, 4.195446, 4.1759834, 4.1571426, 4.138895, 4.1212125, 5.9686656, 5.953226, - 5.9382715, 5.9237804, 5.9097314, 5.896105, 7.30868, 7.3164845, 7.324116, 7.3315806, 7.338885, 7.346032, - 1.2637362, 1.2608696, 1.2580644, 1.2553192, 1.2526315, 1.25, 2.543786, 2.5377107, 2.5317693, 2.5259573, - 2.520271, 2.514706, 3.8075223, 3.7985802, 3.7898335, 3.7812767, 3.7729027, 3.764706, 5.652082, 5.645059, - 5.638186, 5.6314588, 5.624872, 5.618421, 7.485426, 7.489783, 7.494067, 7.49828, 7.5024257, 7.506502}); + {25.f, 13.f, 9.f, 7.f, 5.8f, 5.f, 70.57143f, 35.f, 23.333334f, + 17.6f, 14.218181f, 12.f, 95.57143f, 47.999996f, 32.333336f, 24.6f, 20.01818f, 17.f, + 61.210526f, 33.2f, 23.857145f, 19.181818f, 16.373913f, 14.5f, 6.271278f, 6.2461543f, 6.2433867f, + 6.2545457f, 6.2744985f, 6.3f, 1.7741936f, 1.75f, 1.7272727f, 1.7058823f, 1.6857144f, 1.6666667f, + 3.6739323f, 3.618421f, 3.5664334f, 3.5176468f, 3.471777f, 3.4285717f, 5.448126f, 5.368421f, 5.293706f, + 5.223529f, 5.157491f, 5.0952387f, 6.8880844f, 6.83f, 6.7754016f, 6.7239814f, 6.6754713f, 6.6296296f, + 6.9531364f, 6.970527f, 6.987378f, 7.003712f, 7.019554f, 7.034921f, 1.3934426f, 1.3870969f, 1.3809522f, + 1.375f, 1.3692307f, 1.3636364f, 2.822119f, 2.8083491f, 2.7950313f, 2.7821426f, 2.7696643f, 2.757576f, + 4.215562f, 4.195446f, 4.1759834f, 4.1571426f, 4.138895f, 4.1212125f, 5.9686656f, 5.953226f, 5.9382715f, + 5.9237804f, 5.9097314f, 5.896105f, 7.30868f, 7.3164845f, 7.324116f, 7.3315806f, 7.338885f, 7.346032f, + 1.2637362f, 1.2608696f, 1.2580644f, 1.2553192f, 1.2526315f, 1.25f, 2.543786f, 2.5377107f, 2.5317693f, + 2.5259573f, 2.520271f, 2.514706f, 3.8075223f, 3.7985802f, 3.7898335f, 3.7812767f, 3.7729027f, 3.764706f, + 5.652082f, 5.645059f, 5.638186f, 5.6314588f, 5.624872f, 5.618421f, 7.485426f, 7.489783f, 7.494067f, + 7.49828f, 7.5024257f, 7.506502f}); test_case.run(); } @@ -5658,10 +5680,18 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_softsign) { auto model = onnx_import::import_onnx_model( file_util::path_join(CommonTestUtils::getExecutableDirectory(), SERIALIZED_ZOO, "onnx/softsign.onnx")); - Inputs inputs{std::vector{1.0, 0.1, 20.0, 12.0, -12.0, -0.2, 0.5, 100.0, 0.0, -1.0}}; + Inputs inputs{std::vector{1.0f, 0.1f, 20.0f, 12.0f, -12.0f, -0.2f, 0.5f, 100.0f, 0.0f, -1.0f}}; - std::vector - output{0.5, 0.09090909, 0.95238096, 0.9230769, -0.9230769, -0.16666666, 0.33333334, 0.990099, 0., -0.5}; + std::vector output{0.5f, + 0.09090909f, + 0.95238096f, + 0.9230769f, + -0.9230769f, + -0.16666666f, + 0.33333334f, + 0.990099f, + 0.f, + -0.5f}; auto test_case = test::TestCase(model, s_device); test_case.add_multiple_inputs(inputs); @@ -6215,7 +6245,7 @@ NGRAPH_TEST(${BACKEND_NAME}, castlike_float16_to_uint32) { auto test_case = test::TestCase(function, s_device); - test_case.add_input(Shape{1, 1, 2, 2}, std::vector{1.5, 2.3, 3, 4}); + test_case.add_input(Shape{1, 1, 2, 2}, std::vector{1.5f, 2.3f, 3.f, 4.f}); test_case.add_input(Shape{4}, {1, 2, 3, 4}); test_case.add_expected_output(std::vector{1, 2, 3, 4}); @@ -6229,7 +6259,7 @@ NGRAPH_TEST(${BACKEND_NAME}, castlike_float16_to_int64) { auto test_case = test::TestCase(function, s_device); - test_case.add_input(Shape{1, 1, 2, 2}, std::vector{1.5, 2.3, 3, 4}); + test_case.add_input(Shape{1, 1, 2, 2}, std::vector{1.5f, 2.3f, 3.f, 4.f}); test_case.add_input(Shape{4}, {1, 2, 3, 4}); test_case.add_expected_output(std::vector{1, 2, 3, 4}); @@ -6287,7 +6317,7 @@ NGRAPH_TEST(${BACKEND_NAME}, castlike_int32_to_float) { test_case.add_input(Shape{1, 1, 2, 2}, std::vector{-1, 2, 3, 4}); test_case.add_input(Shape{4}, {1, 2, 3, 4}); - test_case.add_expected_output(std::vector{-1.0, 2.0, 3.0, 4.0}); + test_case.add_expected_output(std::vector{-1.0f, 2.0f, 3.0f, 4.0f}); test_case.run(); } @@ -6299,7 +6329,7 @@ NGRAPH_TEST(${BACKEND_NAME}, castlike_float64_to_int32) { auto test_case = test::TestCase(function, s_device); - test_case.add_input(Shape{1, 1, 2, 2}, std::vector{-107374.9876543, -2.2, 3.3, 4.4}); + test_case.add_input(Shape{1, 1, 2, 2}, std::vector{-107374.9876543f, -2.2f, 3.3f, 4.4f}); test_case.add_input(Shape{4}, {1, 2, 3, 4}); test_case.add_expected_output(std::vector{-107374, -2, 3, 4}); @@ -6313,9 +6343,13 @@ NGRAPH_TEST(${BACKEND_NAME}, DISABLED_castlike_float32_to_bfloat16) { auto test_case = test::TestCase(function, s_device); - test_case.add_input(Shape{3, 4}, std::vector{121.5, 122.7, 3, 4, 5, 6, 7, 8.8, 9, 10, 11, 12}); - test_case.add_input(Shape{3, 4}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}); - test_case.add_expected_output(std::vector{121.5, 122.7, 3, 4, 5, 6, 7, 8.8, 9, 10, 11, 12}); + test_case.add_input( + Shape{3, 4}, + std::vector{121.5f, 122.7f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.8f, 9.f, 10.f, 11.f, 12.f}); + test_case.add_input(Shape{3, 4}, + {1.5f, 2.5f, 3.5f, 4.5f, 5.5f, 6.5f, 7.5f, 8.5f, 9.5f, 10.5f, 11.5f, 12.5f}); + test_case.add_expected_output( + std::vector{121.5f, 122.7f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.8f, 9.f, 10.f, 11.f, 12.f}); test_case.run(); } @@ -6327,9 +6361,12 @@ NGRAPH_TEST(${BACKEND_NAME}, DISABLED_castlike_bfloat16_to_float32) { auto test_case = test::TestCase(function, s_device); - test_case.add_input(Shape{3, 4}, std::vector{121.5, 122.7, 3, 4, 5, 6, 7, 8.8, 9, 10, 11, 12}); + test_case.add_input( + Shape{3, 4}, + std::vector{121.5f, 122.7f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.8f, 9.f, 10.f, 11.f, 12.f}); test_case.add_input(Shape{3, 4}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}); - test_case.add_expected_output(std::vector{121.5, 122.7, 3, 4, 5, 6, 7, 8.75, 9, 10, 11, 12}); + test_case.add_expected_output( + std::vector{121.5f, 122.7f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.75f, 9.f, 10.f, 11.f, 12.f}); test_case.run(); } diff --git a/src/frontends/onnx/tests/onnx_import_com_microsoft.in.cpp b/src/frontends/onnx/tests/onnx_import_com_microsoft.in.cpp index 45e4876998acc5..e231334cfc87e8 100644 --- a/src/frontends/onnx/tests/onnx_import_com_microsoft.in.cpp +++ b/src/frontends/onnx/tests/onnx_import_com_microsoft.in.cpp @@ -33,19 +33,27 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_bias_gelu) { "onnx/com.microsoft/bias_gelu.onnx")); auto test_case = test::TestCase(function, s_device); - test_case.add_input({0.5488135, - 0.71518934, - 0.60276335, - 0.5448832, - 0.4236548, - 0.6458941, - 0.4375872, - 0.891773, - 0.96366274, - 0.3834415}); - test_case.add_input({0.79172504, 0.5288949, 0.56804454, 0.92559665, 0.07103606}); - test_case.add_expected_output( - {1.2198428, 1.1112978, 1.0293297, 1.366493, 0.3411342, 1.329408, 0.8051748, 1.354462, 1.8336612, 0.3068893}); + test_case.add_input({0.5488135f, + 0.71518934f, + 0.60276335f, + 0.5448832f, + 0.4236548f, + 0.6458941f, + 0.4375872f, + 0.891773f, + 0.96366274f, + 0.3834415f}); + test_case.add_input({0.79172504f, 0.5288949f, 0.56804454f, 0.92559665f, 0.07103606f}); + test_case.add_expected_output({1.2198428f, + 1.1112978f, + 1.0293297f, + 1.366493f, + 0.3411342f, + 1.329408f, + 0.8051748f, + 1.354462f, + 1.8336612f, + 0.3068893f}); test_case.run(); } @@ -56,19 +64,19 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_skip_layer_normalization_with_gamma_beta "onnx/com.microsoft/skip_layer_normalization_with_gamma_beta_bias.onnx")); std::vector input = { - 0.54881352, 0.71518934, 0.60276335, 0.54488319, 0.42365479, 0.64589411, 0.43758720, 0.89177299, - 0.96366274, 0.38344151, 0.79172504, 0.52889490, 0.56804454, 0.92559665, 0.07103606, 0.08712930, - 0.02021840, 0.83261985, 0.77815676, 0.87001216, 0.97861832, 0.79915857, 0.46147937, 0.78052920, + 0.54881352f, 0.71518934f, 0.60276335f, 0.54488319f, 0.42365479f, 0.64589411f, 0.43758720f, 0.89177299f, + 0.96366274f, 0.38344151f, 0.79172504f, 0.52889490f, 0.56804454f, 0.92559665f, 0.07103606f, 0.08712930f, + 0.02021840f, 0.83261985f, 0.77815676f, 0.87001216f, 0.97861832f, 0.79915857f, 0.46147937f, 0.78052920f, }; std::vector skip = { - 0.11827443, 0.63992101, 0.14335328, 0.94466889, 0.52184832, 0.41466194, 0.26455560, 0.77423370, - 0.45615032, 0.56843394, 0.01878980, 0.61763549, 0.61209571, 0.61693400, 0.94374806, 0.68182027, - 0.35950789, 0.43703195, 0.69763118, 0.06022547, 0.66676670, 0.67063785, 0.21038257, 0.12892629, + 0.11827443f, 0.63992101f, 0.14335328f, 0.94466889f, 0.52184832f, 0.41466194f, 0.26455560f, 0.77423370f, + 0.45615032f, 0.56843394f, 0.01878980f, 0.61763549f, 0.61209571f, 0.61693400f, 0.94374806f, 0.68182027f, + 0.35950789f, 0.43703195f, 0.69763118f, 0.06022547f, 0.66676670f, 0.67063785f, 0.21038257f, 0.12892629f, }; std::vector expected = { - -0.19721794, -0.42944565, 0.18620640, 0.61282152, -0.11097327, -0.59518522, 0.13393641, 0.66901535, - 0.04256713, -0.71902490, 0.23107991, 0.17300847, -0.04390603, -0.31109563, 0.51021838, -0.66914201, - -0.20009395, -0.43313017, 0.67281967, -0.01712347, 0.09767530, -0.43024653, -0.01836969, -0.29238200, + -0.19721794f, -0.42944565f, 0.18620640f, 0.61282152f, -0.11097327f, -0.59518522f, 0.13393641f, 0.66901535f, + 0.04256713f, -0.71902490f, 0.23107991f, 0.17300847f, -0.04390603f, -0.31109563f, 0.51021838f, -0.66914201f, + -0.20009395f, -0.43313017f, 0.67281967f, -0.01712347f, 0.09767530f, -0.43024653f, -0.01836969f, -0.29238200f, }; auto test_case = test::TestCase(function, s_device); test_case.add_input(input); @@ -84,19 +92,19 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_skip_layer_normalization_with_gamma_beta "onnx/com.microsoft/skip_layer_normalization_with_gamma_beta.onnx")); std::vector input = { - 0.54881352, 0.71518934, 0.60276335, 0.54488319, 0.42365479, 0.64589411, 0.43758720, 0.89177299, - 0.96366274, 0.38344151, 0.79172504, 0.52889490, 0.56804454, 0.92559665, 0.07103606, 0.08712930, - 0.02021840, 0.83261985, 0.77815676, 0.87001216, 0.97861832, 0.79915857, 0.46147937, 0.78052920, + 0.54881352f, 0.71518934f, 0.60276335f, 0.54488319f, 0.42365479f, 0.64589411f, 0.43758720f, 0.89177299f, + 0.96366274f, 0.38344151f, 0.79172504f, 0.52889490f, 0.56804454f, 0.92559665f, 0.07103606f, 0.08712930f, + 0.02021840f, 0.83261985f, 0.77815676f, 0.87001216f, 0.97861832f, 0.79915857f, 0.46147937f, 0.78052920f, }; std::vector skip = { - 0.11827443, 0.63992101, 0.14335328, 0.94466889, 0.52184832, 0.41466194, 0.26455560, 0.77423370, - 0.45615032, 0.56843394, 0.01878980, 0.61763549, 0.61209571, 0.61693400, 0.94374806, 0.68182027, - 0.35950789, 0.43703195, 0.69763118, 0.06022547, 0.66676670, 0.67063785, 0.21038257, 0.12892629, + 0.11827443f, 0.63992101f, 0.14335328f, 0.94466889f, 0.52184832f, 0.41466194f, 0.26455560f, 0.77423370f, + 0.45615032f, 0.56843394f, 0.01878980f, 0.61763549f, 0.61209571f, 0.61693400f, 0.94374806f, 0.68182027f, + 0.35950789f, 0.43703195f, 0.69763118f, 0.06022547f, 0.66676670f, 0.67063785f, 0.21038257f, 0.12892629f, }; std::vector expected = { - -0.17974678, -0.23946194, -0.04376268, 0.46959469, -0.11171167, -0.41859278, -0.11082965, 0.64513868, - 0.07773457, -0.51403606, -0.13661698, 0.11262375, -0.05096011, -0.10416907, 0.10070466, -0.50876135, - -0.22290939, -0.27663514, 0.55416691, -0.08064821, 0.04857478, -0.25121087, -0.15912610, -0.26637587, + -0.17974678f, -0.23946194f, -0.04376268f, 0.46959469f, -0.11171167f, -0.41859278f, -0.11082965f, 0.64513868f, + 0.07773457f, -0.51403606f, -0.13661698f, 0.11262375f, -0.05096011f, -0.10416907f, 0.10070466f, -0.50876135f, + -0.22290939f, -0.27663514f, 0.55416691f, -0.08064821f, 0.04857478f, -0.25121087f, -0.15912610f, -0.26637587f, }; auto test_case = test::TestCase(function, s_device); test_case.add_input(input); @@ -112,19 +120,19 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_skip_layer_normalization_with_gamma) { "onnx/com.microsoft/skip_layer_normalization_with_gamma.onnx")); std::vector input = { - 0.54881352, 0.71518934, 0.60276335, 0.54488319, 0.42365479, 0.64589411, 0.43758720, 0.89177299, - 0.96366274, 0.38344151, 0.79172504, 0.52889490, 0.56804454, 0.92559665, 0.07103606, 0.08712930, - 0.02021840, 0.83261985, 0.77815676, 0.87001216, 0.97861832, 0.79915857, 0.46147937, 0.78052920, + 0.54881352f, 0.71518934f, 0.60276335f, 0.54488319f, 0.42365479f, 0.64589411f, 0.43758720f, 0.89177299f, + 0.96366274f, 0.38344151f, 0.79172504f, 0.52889490f, 0.56804454f, 0.92559665f, 0.07103606f, 0.08712930f, + 0.02021840f, 0.83261985f, 0.77815676f, 0.87001216f, 0.97861832f, 0.79915857f, 0.46147937f, 0.78052920f, }; std::vector skip = { - 0.11827443, 0.63992101, 0.14335328, 0.94466889, 0.52184832, 0.41466194, 0.26455560, 0.77423370, - 0.45615032, 0.56843394, 0.01878980, 0.61763549, 0.61209571, 0.61693400, 0.94374806, 0.68182027, - 0.35950789, 0.43703195, 0.69763118, 0.06022547, 0.66676670, 0.67063785, 0.21038257, 0.12892629, + 0.11827443f, 0.63992101f, 0.14335328f, 0.94466889f, 0.52184832f, 0.41466194f, 0.26455560f, 0.77423370f, + 0.45615032f, 0.56843394f, 0.01878980f, 0.61763549f, 0.61209571f, 0.61693400f, 0.94374806f, 0.68182027f, + 0.35950789f, 0.43703195f, 0.69763118f, 0.06022547f, 0.66676670f, 0.67063785f, 0.21038257f, 0.12892629f, }; std::vector expected = { - -0.10974677, 0.16053806, -0.26376268, 0.46959469, -0.04171166, -0.01859277, -0.33082965, 0.64513868, - 0.14773457, -0.11403608, -0.35661697, 0.11262375, 0.01903989, 0.29583094, -0.11929534, -0.50876135, - -0.15290938, 0.12336487, 0.33416691, -0.08064821, 0.11857478, 0.14878914, -0.37912610, -0.26637587, + -0.10974677f, 0.16053806f, -0.26376268f, 0.46959469f, -0.04171166f, -0.01859277f, -0.33082965f, 0.64513868f, + 0.14773457f, -0.11403608f, -0.35661697f, 0.11262375f, 0.01903989f, 0.29583094f, -0.11929534f, -0.50876135f, + -0.15290938f, 0.12336487f, 0.33416691f, -0.08064821f, 0.11857478f, 0.14878914f, -0.37912610f, -0.26637587f, }; auto test_case = test::TestCase(function, s_device); test_case.add_input(input); @@ -140,37 +148,37 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_skip_layer_normalization_dynamic_shapes) "onnx/com.microsoft/skip_layer_normalization_dynamic_shapes.onnx")); std::vector input = { - 0.54881352, 0.71518934, 0.60276335, 0.54488319, 0.42365479, 0.64589411, 0.43758720, 0.89177299, - 0.96366274, 0.38344151, 0.79172504, 0.52889490, 0.56804454, 0.92559665, 0.07103606, 0.08712930, - 0.02021840, 0.83261985, 0.77815676, 0.87001216, 0.97861832, 0.79915857, 0.46147937, 0.78052920, + 0.54881352f, 0.71518934f, 0.60276335f, 0.54488319f, 0.42365479f, 0.64589411f, 0.43758720f, 0.89177299f, + 0.96366274f, 0.38344151f, 0.79172504f, 0.52889490f, 0.56804454f, 0.92559665f, 0.07103606f, 0.08712930f, + 0.02021840f, 0.83261985f, 0.77815676f, 0.87001216f, 0.97861832f, 0.79915857f, 0.46147937f, 0.78052920f, }; std::vector skip = { - 0.11827443, 0.63992101, 0.14335328, 0.94466889, 0.52184832, 0.41466194, 0.26455560, 0.77423370, - 0.45615032, 0.56843394, 0.01878980, 0.61763549, 0.61209571, 0.61693400, 0.94374806, 0.68182027, - 0.35950789, 0.43703195, 0.69763118, 0.06022547, 0.66676670, 0.67063785, 0.21038257, 0.12892629, + 0.11827443f, 0.63992101f, 0.14335328f, 0.94466889f, 0.52184832f, 0.41466194f, 0.26455560f, 0.77423370f, + 0.45615032f, 0.56843394f, 0.01878980f, 0.61763549f, 0.61209571f, 0.61693400f, 0.94374806f, 0.68182027f, + 0.35950789f, 0.43703195f, 0.69763118f, 0.06022547f, 0.66676670f, 0.67063785f, 0.21038257f, 0.12892629f, }; std::vector gamma = { - 0.31542835, - 0.36371076, - 0.57019675, - 0.43860152, + 0.31542835f, + 0.36371076f, + 0.57019675f, + 0.43860152f, }; std::vector beta = { - 0.98837382, - 0.10204481, - 0.20887676, - 0.16130951, + 0.98837382f, + 0.10204481f, + 0.20887676f, + 0.16130951f, }; std::vector bias = { - 0.65310830, - 0.25329161, - 0.46631077, - 0.24442559, + 0.65310830f, + 0.25329161f, + 0.46631077f, + 0.24442559f, }; std::vector expected = { - 0.76600611, 0.34308332, -0.48470584, 0.71335256, 1.10028172, -0.13354334, -0.45232186, 0.79840088, - 1.52454257, -0.19450217, -0.13759643, 0.03988872, 1.27861762, 0.39529073, 0.12247884, -0.52944231, - 0.64228040, 0.21059875, 1.05966032, -0.14278713, 1.46366918, 0.21215858, -0.31640187, -0.22832340, + 0.76600611f, 0.34308332f, -0.48470584f, 0.71335256f, 1.10028172f, -0.13354334f, -0.45232186f, 0.79840088f, + 1.52454257f, -0.19450217f, -0.13759643f, 0.03988872f, 1.27861762f, 0.39529073f, 0.12247884f, -0.52944231f, + 0.64228040f, 0.21059875f, 1.05966032f, -0.14278713f, 1.46366918f, 0.21215858f, -0.31640187f, -0.22832340f, }; auto test_case = test::TestCase(function, s_device); @@ -193,21 +201,21 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_embed_layer_normalization) { 8, 1, 5, 9, 8, 9, 4, 3, 0, 3, 5, 0, 2, 3, 8, 1, 3, 3, 3, 7, 0, 1, 9, 9, }; std::vector expected_output = { - -0.06615843, -0.18040463, 0.02199928, 0.01868065, 0.05397778, -0.11761580, -0.09138932, -0.02506775, - -0.02368510, -0.10373901, -0.05551499, -0.20972314, 0.01365213, 0.01132561, -0.08603337, -0.08906764, - 0.09692993, -0.04444099, -0.02037602, -0.03453060, -0.10214549, -0.13331436, -0.02665862, -0.01228805, - -0.14232540, -0.07032782, 0.05511986, -0.00120272, -0.04875736, -0.13051267, -0.05709254, 0.17854357, - -0.01759873, -0.01819968, 0.07573269, 0.00557164, 0.06232717, 0.00530490, -0.01565807, -0.14841977, - -0.02299280, 0.02038561, -0.00049481, 0.02575402, 0.10081697, -0.12517214, -0.09316762, -0.00974943, - -0.03093284, -0.06309240, -0.05551499, -0.20972314, 0.01365213, 0.01132561, -0.08603337, -0.06176658, - 0.08304203, -0.05025182, 0.00383657, -0.02288112, -0.11407227, -0.01386134, -0.04411830, -0.00537948, - 0.00164397, -0.03739140, 0.09941526, 0.00333974, -0.04251949, -0.12992151, -0.09509478, -0.11811313, - -0.03307065, -0.00866115, -0.15162414, 0.01106802, 0.06037656, 0.00035292, -0.00223284, -0.11215645, - -0.01390734, 0.07064321, 0.04028325, -0.00290875, 0.12875907, -0.12517214, -0.09316762, -0.00974943, - -0.03093284, -0.06309240, -0.08723789, 0.03130914, 0.03131931, -0.01526242, 0.20811458, -0.05696163, - 0.16304255, -0.02407495, -0.02955675, -0.03086288, -0.08130091, -0.05001551, -0.04875683, 0.00143666, - -0.12153473, -0.00018507, 0.10957482, -0.00416618, -0.01612359, -0.11605026, -0.08593204, 0.09055272, - -0.03054028, -0.03603891, -0.08479506, -0.00034568, 0.03713699, 0.00163411, -0.01738501, -0.18267182, + -0.06615843f, -0.18040463f, 0.02199928f, 0.01868065f, 0.05397778f, -0.11761580f, -0.09138932f, -0.02506775f, + -0.02368510f, -0.10373901f, -0.05551499f, -0.20972314f, 0.01365213f, 0.01132561f, -0.08603337f, -0.08906764f, + 0.09692993f, -0.04444099f, -0.02037602f, -0.03453060f, -0.10214549f, -0.13331436f, -0.02665862f, -0.01228805f, + -0.14232540f, -0.07032782f, 0.05511986f, -0.00120272f, -0.04875736f, -0.13051267f, -0.05709254f, 0.17854357f, + -0.01759873f, -0.01819968f, 0.07573269f, 0.00557164f, 0.06232717f, 0.00530490f, -0.01565807f, -0.14841977f, + -0.02299280f, 0.02038561f, -0.00049481f, 0.02575402f, 0.10081697f, -0.12517214f, -0.09316762f, -0.00974943f, + -0.03093284f, -0.06309240f, -0.05551499f, -0.20972314f, 0.01365213f, 0.01132561f, -0.08603337f, -0.06176658f, + 0.08304203f, -0.05025182f, 0.00383657f, -0.02288112f, -0.11407227f, -0.01386134f, -0.04411830f, -0.00537948f, + 0.00164397f, -0.03739140f, 0.09941526f, 0.00333974f, -0.04251949f, -0.12992151f, -0.09509478f, -0.11811313f, + -0.03307065f, -0.00866115f, -0.15162414f, 0.01106802f, 0.06037656f, 0.00035292f, -0.00223284f, -0.11215645f, + -0.01390734f, 0.07064321f, 0.04028325f, -0.00290875f, 0.12875907f, -0.12517214f, -0.09316762f, -0.00974943f, + -0.03093284f, -0.06309240f, -0.08723789f, 0.03130914f, 0.03131931f, -0.01526242f, 0.20811458f, -0.05696163f, + 0.16304255f, -0.02407495f, -0.02955675f, -0.03086288f, -0.08130091f, -0.05001551f, -0.04875683f, 0.00143666f, + -0.12153473f, -0.00018507f, 0.10957482f, -0.00416618f, -0.01612359f, -0.11605026f, -0.08593204f, 0.09055272f, + -0.03054028f, -0.03603891f, -0.08479506f, -0.00034568f, 0.03713699f, 0.00163411f, -0.01738501f, -0.18267182f, }; auto test_case = test::TestCase(function, s_device); @@ -229,21 +237,21 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_embed_layer_normalization_with_segment_e 0, 2, 0, 2, 2, 0, 2, 0, 0, 0, 1, 1, 2, 0, 0, 1, 0, 1, 2, 2, 0, 1, 1, 1, }; std::vector expected_output = { - -0.06044213, -0.14845914, 0.02457689, 0.02091519, 0.09514004, -0.10280035, -0.02087995, -0.03323204, - -0.02967127, -0.13447416, -0.05191760, -0.16518904, 0.02340531, 0.02176395, 0.04972410, -0.07360736, - 0.12192874, -0.04081530, -0.02338044, -0.05671440, -0.09475864, -0.08944942, -0.03362993, -0.01683486, - -0.16770349, -0.07382569, 0.06230322, 0.02215859, -0.05212611, -0.03934773, -0.04748865, 0.18134241, - -0.01965741, -0.02202452, 0.01973994, 0.01575558, 0.04300199, 0.01436110, -0.00198062, -0.09065692, - -0.02923042, -0.00748686, 0.00717049, 0.02638642, 0.12174864, -0.12973398, -0.11872391, -0.00549398, - -0.02386289, -0.02210563, -0.03590920, -0.13728066, -0.01337939, 0.01538021, -0.14687485, -0.05033565, - 0.03818212, -0.04939338, 0.00961064, -0.07407621, -0.09624685, 0.05594898, -0.04948713, -0.01305631, - -0.03779668, -0.01469170, 0.12346989, 0.02082030, -0.03449103, -0.06029151, -0.09300473, -0.16308543, - -0.02370042, 0.01066893, -0.06523034, 0.00497636, 0.01933458, -0.00900802, 0.00430878, -0.13999483, - -0.02377289, 0.01760014, 0.03896973, 0.00831112, 0.15634246, -0.11109130, -0.11997811, -0.02304414, - -0.01989413, -0.12763791, -0.05698400, 0.17125534, 0.00499324, -0.02953288, 0.09178342, -0.05001877, - 0.16157132, -0.02312993, -0.02932195, -0.04914058, -0.07994118, -0.07199102, -0.04517454, 0.01249476, - -0.07525793, -0.00207180, 0.03993115, -0.01676321, -0.00214832, -0.16074482, -0.05012497, -0.00552153, - -0.04302063, -0.00549224, -0.18399858, -0.00767871, -0.02209404, -0.01383207, -0.00082931, -0.19533031, + -0.06044213f, -0.14845914f, 0.02457689f, 0.02091519f, 0.09514004f, -0.10280035f, -0.02087995f, -0.03323204f, + -0.02967127f, -0.13447416f, -0.05191760f, -0.16518904f, 0.02340531f, 0.02176395f, 0.04972410f, -0.07360736f, + 0.12192874f, -0.04081530f, -0.02338044f, -0.05671440f, -0.09475864f, -0.08944942f, -0.03362993f, -0.01683486f, + -0.16770349f, -0.07382569f, 0.06230322f, 0.02215859f, -0.05212611f, -0.03934773f, -0.04748865f, 0.18134241f, + -0.01965741f, -0.02202452f, 0.01973994f, 0.01575558f, 0.04300199f, 0.01436110f, -0.00198062f, -0.09065692f, + -0.02923042f, -0.00748686f, 0.00717049f, 0.02638642f, 0.12174864f, -0.12973398f, -0.11872391f, -0.00549398f, + -0.02386289f, -0.02210563f, -0.03590920f, -0.13728066f, -0.01337939f, 0.01538021f, -0.14687485f, -0.05033565f, + 0.03818212f, -0.04939338f, 0.00961064f, -0.07407621f, -0.09624685f, 0.05594898f, -0.04948713f, -0.01305631f, + -0.03779668f, -0.01469170f, 0.12346989f, 0.02082030f, -0.03449103f, -0.06029151f, -0.09300473f, -0.16308543f, + -0.02370042f, 0.01066893f, -0.06523034f, 0.00497636f, 0.01933458f, -0.00900802f, 0.00430878f, -0.13999483f, + -0.02377289f, 0.01760014f, 0.03896973f, 0.00831112f, 0.15634246f, -0.11109130f, -0.11997811f, -0.02304414f, + -0.01989413f, -0.12763791f, -0.05698400f, 0.17125534f, 0.00499324f, -0.02953288f, 0.09178342f, -0.05001877f, + 0.16157132f, -0.02312993f, -0.02932195f, -0.04914058f, -0.07994118f, -0.07199102f, -0.04517454f, 0.01249476f, + -0.07525793f, -0.00207180f, 0.03993115f, -0.01676321f, -0.00214832f, -0.16074482f, -0.05012497f, -0.00552153f, + -0.04302063f, -0.00549224f, -0.18399858f, -0.00767871f, -0.02209404f, -0.01383207f, -0.00082931f, -0.19533031f, }; std::vector expected_mask_index = { @@ -257,7 +265,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_embed_layer_normalization_with_segment_e test_case.add_input(segment_ids); test_case.add_expected_output(expected_output); test_case.add_expected_output(expected_mask_index); - test_case.run_with_tolerance_as_fp(1e-7); + test_case.run_with_tolerance_as_fp(1e-7f); } NGRAPH_TEST(${BACKEND_NAME}, onnx_model_embed_layer_normalization_with_segment_embedding_and_mask) { @@ -276,21 +284,21 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_embed_layer_normalization_with_segment_e 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, }; std::vector expected_output = { - -0.06044213, -0.14845914, 0.02457689, 0.02091519, 0.09514004, -0.10280035, -0.02087995, -0.03323204, - -0.02967127, -0.13447416, -0.05191760, -0.16518904, 0.02340531, 0.02176395, 0.04972410, -0.07360736, - 0.12192874, -0.04081530, -0.02338044, -0.05671440, -0.09475864, -0.08944942, -0.03362993, -0.01683486, - -0.16770349, -0.07382569, 0.06230322, 0.02215859, -0.05212611, -0.03934773, -0.04748865, 0.18134241, - -0.01965741, -0.02202452, 0.01973994, 0.01575558, 0.04300199, 0.01436110, -0.00198062, -0.09065692, - -0.02923042, -0.00748686, 0.00717049, 0.02638642, 0.12174864, -0.12973398, -0.11872391, -0.00549398, - -0.02386289, -0.02210563, -0.03590920, -0.13728066, -0.01337939, 0.01538021, -0.14687485, -0.05033565, - 0.03818212, -0.04939338, 0.00961064, -0.07407621, -0.09624685, 0.05594898, -0.04948713, -0.01305631, - -0.03779668, -0.01469170, 0.12346989, 0.02082030, -0.03449103, -0.06029151, -0.09300473, -0.16308543, - -0.02370042, 0.01066893, -0.06523034, 0.00497636, 0.01933458, -0.00900802, 0.00430878, -0.13999483, - -0.02377289, 0.01760014, 0.03896973, 0.00831112, 0.15634246, -0.11109130, -0.11997811, -0.02304414, - -0.01989413, -0.12763791, -0.05698400, 0.17125534, 0.00499324, -0.02953288, 0.09178342, -0.05001877, - 0.16157132, -0.02312993, -0.02932195, -0.04914058, -0.07994118, -0.07199102, -0.04517454, 0.01249476, - -0.07525793, -0.00207180, 0.03993115, -0.01676321, -0.00214832, -0.16074482, -0.05012497, -0.00552153, - -0.04302063, -0.00549224, -0.18399858, -0.00767871, -0.02209404, -0.01383207, -0.00082931, -0.19533031, + -0.06044213f, -0.14845914f, 0.02457689f, 0.02091519f, 0.09514004f, -0.10280035f, -0.02087995f, -0.03323204f, + -0.02967127f, -0.13447416f, -0.05191760f, -0.16518904f, 0.02340531f, 0.02176395f, 0.04972410f, -0.07360736f, + 0.12192874f, -0.04081530f, -0.02338044f, -0.05671440f, -0.09475864f, -0.08944942f, -0.03362993f, -0.01683486f, + -0.16770349f, -0.07382569f, 0.06230322f, 0.02215859f, -0.05212611f, -0.03934773f, -0.04748865f, 0.18134241f, + -0.01965741f, -0.02202452f, 0.01973994f, 0.01575558f, 0.04300199f, 0.01436110f, -0.00198062f, -0.09065692f, + -0.02923042f, -0.00748686f, 0.00717049f, 0.02638642f, 0.12174864f, -0.12973398f, -0.11872391f, -0.00549398f, + -0.02386289f, -0.02210563f, -0.03590920f, -0.13728066f, -0.01337939f, 0.01538021f, -0.14687485f, -0.05033565f, + 0.03818212f, -0.04939338f, 0.00961064f, -0.07407621f, -0.09624685f, 0.05594898f, -0.04948713f, -0.01305631f, + -0.03779668f, -0.01469170f, 0.12346989f, 0.02082030f, -0.03449103f, -0.06029151f, -0.09300473f, -0.16308543f, + -0.02370042f, 0.01066893f, -0.06523034f, 0.00497636f, 0.01933458f, -0.00900802f, 0.00430878f, -0.13999483f, + -0.02377289f, 0.01760014f, 0.03896973f, 0.00831112f, 0.15634246f, -0.11109130f, -0.11997811f, -0.02304414f, + -0.01989413f, -0.12763791f, -0.05698400f, 0.17125534f, 0.00499324f, -0.02953288f, 0.09178342f, -0.05001877f, + 0.16157132f, -0.02312993f, -0.02932195f, -0.04914058f, -0.07994118f, -0.07199102f, -0.04517454f, 0.01249476f, + -0.07525793f, -0.00207180f, 0.03993115f, -0.01676321f, -0.00214832f, -0.16074482f, -0.05012497f, -0.00552153f, + -0.04302063f, -0.00549224f, -0.18399858f, -0.00767871f, -0.02209404f, -0.01383207f, -0.00082931f, -0.19533031f, }; std::vector expected_mask_index = { 5, @@ -304,7 +312,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_embed_layer_normalization_with_segment_e test_case.add_input(mask); test_case.add_expected_output(expected_output); test_case.add_expected_output(expected_mask_index); - test_case.run_with_tolerance_as_fp(1e-7); + test_case.run_with_tolerance_as_fp(1e-7f); } NGRAPH_TEST(${BACKEND_NAME}, onnx_model_embed_layer_normalization_dynamic_shapes) { @@ -320,70 +328,71 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_embed_layer_normalization_dynamic_shapes 0, 2, 0, 2, 2, 0, 2, 0, 0, 0, 1, 1, 2, 0, 0, 1, 0, 1, 2, 2, 0, 1, 1, 1, }; std::vector word_embeddings = { - 0.96980906, 0.65314001, 0.17090958, 0.35815218, 0.75068617, 0.60783064, 0.32504722, 0.03842543, 0.63427407, - 0.95894927, 0.65279031, 0.63505888, 0.99529958, 0.58185035, 0.41436860, 0.47469750, 0.62351012, 0.33800763, - 0.67475230, 0.31720173, 0.77834547, 0.94957107, 0.66252685, 0.01357164, 0.62284607, 0.67365962, 0.97194499, - 0.87819350, 0.50962436, 0.05571469, 0.45115921, 0.01998767, 0.44171092, 0.97958672, 0.35944447, 0.48089352, - 0.68866116, 0.88047588, 0.91823548, 0.21682213, 0.56518888, 0.86510259, 0.50896895, 0.91672295, 0.92115760, - 0.08311249, 0.27771857, 0.00935670, 0.84234208, 0.64717412, + 0.96980906f, 0.65314001f, 0.17090958f, 0.35815218f, 0.75068617f, 0.60783064f, 0.32504722f, 0.03842543f, + 0.63427407f, 0.95894927f, 0.65279031f, 0.63505888f, 0.99529958f, 0.58185035f, 0.41436860f, 0.47469750f, + 0.62351012f, 0.33800763f, 0.67475230f, 0.31720173f, 0.77834547f, 0.94957107f, 0.66252685f, 0.01357164f, + 0.62284607f, 0.67365962f, 0.97194499f, 0.87819350f, 0.50962436f, 0.05571469f, 0.45115921f, 0.01998767f, + 0.44171092f, 0.97958672f, 0.35944447f, 0.48089352f, 0.68866116f, 0.88047588f, 0.91823548f, 0.21682213f, + 0.56518888f, 0.86510259f, 0.50896895f, 0.91672295f, 0.92115760f, 0.08311249f, 0.27771857f, 0.00935670f, + 0.84234208f, 0.64717412f, }; std::vector position_embeddings = { - 0.84138614, 0.26473016, 0.39782074, 0.55282146, 0.16494046, 0.36980811, 0.14644176, 0.56961840, - 0.70373726, 0.28847644, 0.43328807, 0.75610667, 0.39609829, 0.89603841, 0.63892108, 0.89155442, - 0.68005556, 0.44919774, 0.97857094, 0.11620191, 0.76702368, 0.41182014, 0.67543906, 0.24979627, - 0.31321833, 0.96541619, 0.58846509, 0.65966839, 0.53320622, 0.23053302, 0.39486930, 0.61880857, - 0.47486752, 0.47013220, 0.71607453, 0.28799102, 0.38346222, 0.74916983, 0.87845218, 0.10286336, + 0.84138614f, 0.26473016f, 0.39782074f, 0.55282146f, 0.16494046f, 0.36980811f, 0.14644176f, 0.56961840f, + 0.70373726f, 0.28847644f, 0.43328807f, 0.75610667f, 0.39609829f, 0.89603841f, 0.63892108f, 0.89155442f, + 0.68005556f, 0.44919774f, 0.97857094f, 0.11620191f, 0.76702368f, 0.41182014f, 0.67543906f, 0.24979627f, + 0.31321833f, 0.96541619f, 0.58846509f, 0.65966839f, 0.53320622f, 0.23053302f, 0.39486930f, 0.61880857f, + 0.47486752f, 0.47013220f, 0.71607453f, 0.28799102f, 0.38346222f, 0.74916983f, 0.87845218f, 0.10286336f, }; std::vector segment_embeddings = { - 0.09237389, - 0.35404667, - 0.55181628, - 0.03362509, - 0.96896178, - 0.32099724, - 0.22126268, - 0.14126390, - 0.09725992, - 0.98404223, - 0.26034093, - 0.53702253, - 0.44792616, - 0.09956909, - 0.35231167, + 0.09237389f, + 0.35404667f, + 0.55181628f, + 0.03362509f, + 0.96896178f, + 0.32099724f, + 0.22126268f, + 0.14126390f, + 0.09725992f, + 0.98404223f, + 0.26034093f, + 0.53702253f, + 0.44792616f, + 0.09956909f, + 0.35231167f, }; std::vector gamma = { - 0.46924916, - 0.84114015, - 0.90464777, - 0.03755938, - 0.50831544, + 0.46924916f, + 0.84114015f, + 0.90464777f, + 0.03755938f, + 0.50831544f, }; std::vector beta = { - 0.16684751, - 0.77905101, - 0.86493331, - 0.41139671, - 0.13997258, + 0.16684751f, + 0.77905101f, + 0.86493331f, + 0.41139671f, + 0.13997258f, }; std::vector mask = { 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, }; std::vector expected_output = { - -0.04089922, 0.35108989, 0.30442458, 0.39546335, 1.15422225, 0.10419128, -0.19301927, 0.01070970, - 0.43977541, 0.89119899, -0.51436460, 1.99256825, 1.41077507, 0.38642293, 0.17583044, 0.03320138, - 1.16508031, -0.24356931, 0.47440714, -0.17844005, 0.20463173, 1.90038323, 1.14138567, 0.34504607, - 0.16403235, -0.24976699, 0.29362509, 0.34502214, 0.41751838, 1.09390712, 0.12354189, 1.83025289, - 1.05569196, 0.34413773, 0.35469764, -0.69760042, 0.76338542, 1.75443077, 0.44126555, 0.18181801, - 0.73277575, 0.45443264, 0.17068321, 0.36591727, 0.72869974, -0.56090516, 0.14415455, 1.47314119, - 0.42908576, 0.73084539, -0.22373237, 2.26550221, 0.05606699, 0.39417523, 0.35234636, 0.78569502, - 0.77521765, -0.65131050, 0.40168875, 0.45527256, 0.38715565, 0.98521245, 2.21446753, 0.36345237, - -0.33269632, 0.36558092, 1.36846578, 1.37523413, 0.33698002, 0.28889543, -0.40639281, 1.01643157, - 0.59668219, 0.39197800, 1.03101778, 0.02551098, -0.03612846, -0.01371557, 0.43444607, 0.96746695, - 0.60583955, -0.10362893, 0.40574494, 0.38046724, 0.87445319, -0.00880148, -0.15437943, 0.08118075, - 0.44650543, 0.85956848, -0.27865338, 2.10837507, 0.04798460, 0.43948367, -0.10185169, 0.19978794, - 1.32323360, 1.20525467, 0.44288942, -0.84200430, 0.52563053, 0.69949460, 0.73987913, 0.34668452, - 0.74545687, 0.57696682, 0.22452033, -0.27099937, 0.39649010, 0.87083614, -0.18965788, 0.58206403, - -0.08108193, 0.42067638, 1.05117214, -0.34287399, 0.20424896, 0.27994895, 0.46011117, 0.70890665, + -0.04089922f, 0.35108989f, 0.30442458f, 0.39546335f, 1.15422225f, 0.10419128f, -0.19301927f, 0.01070970f, + 0.43977541f, 0.89119899f, -0.51436460f, 1.99256825f, 1.41077507f, 0.38642293f, 0.17583044f, 0.03320138f, + 1.16508031f, -0.24356931f, 0.47440714f, -0.17844005f, 0.20463173f, 1.90038323f, 1.14138567f, 0.34504607f, + 0.16403235f, -0.24976699f, 0.29362509f, 0.34502214f, 0.41751838f, 1.09390712f, 0.12354189f, 1.83025289f, + 1.05569196f, 0.34413773f, 0.35469764f, -0.69760042f, 0.76338542f, 1.75443077f, 0.44126555f, 0.18181801f, + 0.73277575f, 0.45443264f, 0.17068321f, 0.36591727f, 0.72869974f, -0.56090516f, 0.14415455f, 1.47314119f, + 0.42908576f, 0.73084539f, -0.22373237f, 2.26550221f, 0.05606699f, 0.39417523f, 0.35234636f, 0.78569502f, + 0.77521765f, -0.65131050f, 0.40168875f, 0.45527256f, 0.38715565f, 0.98521245f, 2.21446753f, 0.36345237f, + -0.33269632f, 0.36558092f, 1.36846578f, 1.37523413f, 0.33698002f, 0.28889543f, -0.40639281f, 1.01643157f, + 0.59668219f, 0.39197800f, 1.03101778f, 0.02551098f, -0.03612846f, -0.01371557f, 0.43444607f, 0.96746695f, + 0.60583955f, -0.10362893f, 0.40574494f, 0.38046724f, 0.87445319f, -0.00880148f, -0.15437943f, 0.08118075f, + 0.44650543f, 0.85956848f, -0.27865338f, 2.10837507f, 0.04798460f, 0.43948367f, -0.10185169f, 0.19978794f, + 1.32323360f, 1.20525467f, 0.44288942f, -0.84200430f, 0.52563053f, 0.69949460f, 0.73987913f, 0.34668452f, + 0.74545687f, 0.57696682f, 0.22452033f, -0.27099937f, 0.39649010f, 0.87083614f, -0.18965788f, 0.58206403f, + -0.08108193f, 0.42067638f, 1.05117214f, -0.34287399f, 0.20424896f, 0.27994895f, 0.46011117f, 0.70890665f, }; std::vector expected_mask_index = { 6, @@ -402,7 +411,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_embed_layer_normalization_dynamic_shapes test_case.add_input(Shape{3, 8}, mask); test_case.add_expected_output(Shape{3, 8, 5}, expected_output); test_case.add_expected_output(Shape{3}, expected_mask_index); - test_case.run_with_tolerance_as_fp(1e-6); + test_case.run_with_tolerance_as_fp(1e-6f); } NGRAPH_TEST(${BACKEND_NAME}, onnx_model_embed_layer_normalization_diff_seq_len_pos_embed_len) { @@ -418,70 +427,72 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_embed_layer_normalization_diff_seq_len_p 0, 2, 0, 2, 2, 0, 2, 0, 0, 0, 1, 1, 2, 0, 0, 1, 0, 1, 2, 2, 0, 1, 1, 1, }; std::vector word_embeddings = { - 0.96980906, 0.65314001, 0.17090958, 0.35815218, 0.75068617, 0.60783064, 0.32504722, 0.03842543, 0.63427407, - 0.95894927, 0.65279031, 0.63505888, 0.99529958, 0.58185035, 0.41436860, 0.47469750, 0.62351012, 0.33800763, - 0.67475230, 0.31720173, 0.77834547, 0.94957107, 0.66252685, 0.01357164, 0.62284607, 0.67365962, 0.97194499, - 0.87819350, 0.50962436, 0.05571469, 0.45115921, 0.01998767, 0.44171092, 0.97958672, 0.35944447, 0.48089352, - 0.68866116, 0.88047588, 0.91823548, 0.21682213, 0.56518888, 0.86510259, 0.50896895, 0.91672295, 0.92115760, - 0.08311249, 0.27771857, 0.00935670, 0.84234208, 0.64717412, + 0.96980906f, 0.65314001f, 0.17090958f, 0.35815218f, 0.75068617f, 0.60783064f, 0.32504722f, 0.03842543f, + 0.63427407f, 0.95894927f, 0.65279031f, 0.63505888f, 0.99529958f, 0.58185035f, 0.41436860f, 0.47469750f, + 0.62351012f, 0.33800763f, 0.67475230f, 0.31720173f, 0.77834547f, 0.94957107f, 0.66252685f, 0.01357164f, + 0.62284607f, 0.67365962f, 0.97194499f, 0.87819350f, 0.50962436f, 0.05571469f, 0.45115921f, 0.01998767f, + 0.44171092f, 0.97958672f, 0.35944447f, 0.48089352f, 0.68866116f, 0.88047588f, 0.91823548f, 0.21682213f, + 0.56518888f, 0.86510259f, 0.50896895f, 0.91672295f, 0.92115760f, 0.08311249f, 0.27771857f, 0.00935670f, + 0.84234208f, 0.64717412f, }; std::vector position_embeddings = { - 0.84138614, 0.26473016, 0.39782074, 0.55282146, 0.16494046, 0.36980811, 0.14644176, 0.56961840, 0.70373726, - 0.28847644, 0.43328807, 0.75610667, 0.39609829, 0.89603841, 0.63892108, 0.89155442, 0.68005556, 0.44919774, - 0.97857094, 0.11620191, 0.76702368, 0.41182014, 0.67543906, 0.24979627, 0.31321833, 0.96541619, 0.58846509, - 0.65966839, 0.53320622, 0.23053302, 0.39486930, 0.61880857, 0.47486752, 0.47013220, 0.71607453, 0.28799102, - 0.38346222, 0.74916983, 0.87845218, 0.10286336, 0.84138614, 0.26473016, 0.39782074, 0.55282146, 0.16494046, + 0.84138614f, 0.26473016f, 0.39782074f, 0.55282146f, 0.16494046f, 0.36980811f, 0.14644176f, 0.56961840f, + 0.70373726f, 0.28847644f, 0.43328807f, 0.75610667f, 0.39609829f, 0.89603841f, 0.63892108f, 0.89155442f, + 0.68005556f, 0.44919774f, 0.97857094f, 0.11620191f, 0.76702368f, 0.41182014f, 0.67543906f, 0.24979627f, + 0.31321833f, 0.96541619f, 0.58846509f, 0.65966839f, 0.53320622f, 0.23053302f, 0.39486930f, 0.61880857f, + 0.47486752f, 0.47013220f, 0.71607453f, 0.28799102f, 0.38346222f, 0.74916983f, 0.87845218f, 0.10286336f, + 0.84138614f, 0.26473016f, 0.39782074f, 0.55282146f, 0.16494046f, }; std::vector segment_embeddings = { - 0.09237389, - 0.35404667, - 0.55181628, - 0.03362509, - 0.96896178, - 0.32099724, - 0.22126268, - 0.14126390, - 0.09725992, - 0.98404223, - 0.26034093, - 0.53702253, - 0.44792616, - 0.09956909, - 0.35231167, + 0.09237389f, + 0.35404667f, + 0.55181628f, + 0.03362509f, + 0.96896178f, + 0.32099724f, + 0.22126268f, + 0.14126390f, + 0.09725992f, + 0.98404223f, + 0.26034093f, + 0.53702253f, + 0.44792616f, + 0.09956909f, + 0.35231167f, }; std::vector gamma = { - 0.46924916, - 0.84114015, - 0.90464777, - 0.03755938, - 0.50831544, + 0.46924916f, + 0.84114015f, + 0.90464777f, + 0.03755938f, + 0.50831544f, }; std::vector beta = { - 0.16684751, - 0.77905101, - 0.86493331, - 0.41139671, - 0.13997258, + 0.16684751f, + 0.77905101f, + 0.86493331f, + 0.41139671f, + 0.13997258f, }; std::vector mask = { 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, }; std::vector expected_output = { - -0.04089922, 0.35108989, 0.30442458, 0.39546335, 1.15422225, 0.10419128, -0.19301927, 0.01070970, - 0.43977541, 0.89119899, -0.51436460, 1.99256825, 1.41077507, 0.38642293, 0.17583044, 0.03320138, - 1.16508031, -0.24356931, 0.47440714, -0.17844005, 0.20463173, 1.90038323, 1.14138567, 0.34504607, - 0.16403235, -0.24976699, 0.29362509, 0.34502214, 0.41751838, 1.09390712, 0.12354189, 1.83025289, - 1.05569196, 0.34413773, 0.35469764, -0.69760042, 0.76338542, 1.75443077, 0.44126555, 0.18181801, - 0.73277575, 0.45443264, 0.17068321, 0.36591727, 0.72869974, -0.56090516, 0.14415455, 1.47314119, - 0.42908576, 0.73084539, -0.22373237, 2.26550221, 0.05606699, 0.39417523, 0.35234636, 0.78569502, - 0.77521765, -0.65131050, 0.40168875, 0.45527256, 0.38715565, 0.98521245, 2.21446753, 0.36345237, - -0.33269632, 0.36558092, 1.36846578, 1.37523413, 0.33698002, 0.28889543, -0.40639281, 1.01643157, - 0.59668219, 0.39197800, 1.03101778, 0.02551098, -0.03612846, -0.01371557, 0.43444607, 0.96746695, - 0.60583955, -0.10362893, 0.40574494, 0.38046724, 0.87445319, -0.00880148, -0.15437943, 0.08118075, - 0.44650543, 0.85956848, -0.27865338, 2.10837507, 0.04798460, 0.43948367, -0.10185169, 0.19978794, - 1.32323360, 1.20525467, 0.44288942, -0.84200430, 0.52563053, 0.69949460, 0.73987913, 0.34668452, - 0.74545687, 0.57696682, 0.22452033, -0.27099937, 0.39649010, 0.87083614, -0.18965788, 0.58206403, - -0.08108193, 0.42067638, 1.05117214, -0.34287399, 0.20424896, 0.27994895, 0.46011117, 0.70890665, + -0.04089922f, 0.35108989f, 0.30442458f, 0.39546335f, 1.15422225f, 0.10419128f, -0.19301927f, 0.01070970f, + 0.43977541f, 0.89119899f, -0.51436460f, 1.99256825f, 1.41077507f, 0.38642293f, 0.17583044f, 0.03320138f, + 1.16508031f, -0.24356931f, 0.47440714f, -0.17844005f, 0.20463173f, 1.90038323f, 1.14138567f, 0.34504607f, + 0.16403235f, -0.24976699f, 0.29362509f, 0.34502214f, 0.41751838f, 1.09390712f, 0.12354189f, 1.83025289f, + 1.05569196f, 0.34413773f, 0.35469764f, -0.69760042f, 0.76338542f, 1.75443077f, 0.44126555f, 0.18181801f, + 0.73277575f, 0.45443264f, 0.17068321f, 0.36591727f, 0.72869974f, -0.56090516f, 0.14415455f, 1.47314119f, + 0.42908576f, 0.73084539f, -0.22373237f, 2.26550221f, 0.05606699f, 0.39417523f, 0.35234636f, 0.78569502f, + 0.77521765f, -0.65131050f, 0.40168875f, 0.45527256f, 0.38715565f, 0.98521245f, 2.21446753f, 0.36345237f, + -0.33269632f, 0.36558092f, 1.36846578f, 1.37523413f, 0.33698002f, 0.28889543f, -0.40639281f, 1.01643157f, + 0.59668219f, 0.39197800f, 1.03101778f, 0.02551098f, -0.03612846f, -0.01371557f, 0.43444607f, 0.96746695f, + 0.60583955f, -0.10362893f, 0.40574494f, 0.38046724f, 0.87445319f, -0.00880148f, -0.15437943f, 0.08118075f, + 0.44650543f, 0.85956848f, -0.27865338f, 2.10837507f, 0.04798460f, 0.43948367f, -0.10185169f, 0.19978794f, + 1.32323360f, 1.20525467f, 0.44288942f, -0.84200430f, 0.52563053f, 0.69949460f, 0.73987913f, 0.34668452f, + 0.74545687f, 0.57696682f, 0.22452033f, -0.27099937f, 0.39649010f, 0.87083614f, -0.18965788f, 0.58206403f, + -0.08108193f, 0.42067638f, 1.05117214f, -0.34287399f, 0.20424896f, 0.27994895f, 0.46011117f, 0.70890665f, }; std::vector expected_mask_index = { 6, @@ -500,7 +511,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_embed_layer_normalization_diff_seq_len_p test_case.add_input(Shape{3, 8}, mask); test_case.add_expected_output(Shape{3, 8, 5}, expected_output); test_case.add_expected_output(Shape{3}, expected_mask_index); - test_case.run_with_tolerance_as_fp(1e-6); + test_case.run_with_tolerance_as_fp(1e-6f); } NGRAPH_TEST(${BACKEND_NAME}, onnx_model_embed_layer_normalization_with_position_ids) { @@ -516,50 +527,52 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_embed_layer_normalization_with_position_ 0, 2, 0, 2, 2, 0, 2, 0, 0, 0, 1, 1, 2, 0, 0, 1, 0, 1, 2, 2, 0, 1, 1, 1, }; std::vector word_embeddings = { - 0.96980906, 0.65314001, 0.17090958, 0.35815218, 0.75068617, 0.60783064, 0.32504722, 0.03842543, 0.63427407, - 0.95894927, 0.65279031, 0.63505888, 0.99529958, 0.58185035, 0.41436860, 0.47469750, 0.62351012, 0.33800763, - 0.67475230, 0.31720173, 0.77834547, 0.94957107, 0.66252685, 0.01357164, 0.62284607, 0.67365962, 0.97194499, - 0.87819350, 0.50962436, 0.05571469, 0.45115921, 0.01998767, 0.44171092, 0.97958672, 0.35944447, 0.48089352, - 0.68866116, 0.88047588, 0.91823548, 0.21682213, 0.56518888, 0.86510259, 0.50896895, 0.91672295, 0.92115760, - 0.08311249, 0.27771857, 0.00935670, 0.84234208, 0.64717412, + 0.96980906f, 0.65314001f, 0.17090958f, 0.35815218f, 0.75068617f, 0.60783064f, 0.32504722f, 0.03842543f, + 0.63427407f, 0.95894927f, 0.65279031f, 0.63505888f, 0.99529958f, 0.58185035f, 0.41436860f, 0.47469750f, + 0.62351012f, 0.33800763f, 0.67475230f, 0.31720173f, 0.77834547f, 0.94957107f, 0.66252685f, 0.01357164f, + 0.62284607f, 0.67365962f, 0.97194499f, 0.87819350f, 0.50962436f, 0.05571469f, 0.45115921f, 0.01998767f, + 0.44171092f, 0.97958672f, 0.35944447f, 0.48089352f, 0.68866116f, 0.88047588f, 0.91823548f, 0.21682213f, + 0.56518888f, 0.86510259f, 0.50896895f, 0.91672295f, 0.92115760f, 0.08311249f, 0.27771857f, 0.00935670f, + 0.84234208f, 0.64717412f, }; std::vector position_embeddings = { - 0.84138614, 0.26473016, 0.39782074, 0.55282146, 0.16494046, 0.43328807, 0.75610667, 0.39609829, 0.89603841, - 0.63892108, 0.36980811, 0.14644176, 0.56961840, 0.70373726, 0.28847644, 0.89155442, 0.68005556, 0.44919774, - 0.97857094, 0.11620191, 0.76702368, 0.41182014, 0.67543906, 0.24979627, 0.31321833, 0.28799102, 0.38346222, - 0.74916983, 0.87845218, 0.10286336, 0.96541619, 0.58846509, 0.65966839, 0.53320622, 0.23053302, 0.39486930, - 0.61880857, 0.47486752, 0.47013220, 0.71607453, 0.84138614, 0.26473016, 0.39782074, 0.55282146, 0.16494046, + 0.84138614f, 0.26473016f, 0.39782074f, 0.55282146f, 0.16494046f, 0.43328807f, 0.75610667f, 0.39609829f, + 0.89603841f, 0.63892108f, 0.36980811f, 0.14644176f, 0.56961840f, 0.70373726f, 0.28847644f, 0.89155442f, + 0.68005556f, 0.44919774f, 0.97857094f, 0.11620191f, 0.76702368f, 0.41182014f, 0.67543906f, 0.24979627f, + 0.31321833f, 0.28799102f, 0.38346222f, 0.74916983f, 0.87845218f, 0.10286336f, 0.96541619f, 0.58846509f, + 0.65966839f, 0.53320622f, 0.23053302f, 0.39486930f, 0.61880857f, 0.47486752f, 0.47013220f, 0.71607453f, + 0.84138614f, 0.26473016f, 0.39782074f, 0.55282146f, 0.16494046f, }; std::vector segment_embeddings = { - 0.09237389, - 0.35404667, - 0.55181628, - 0.03362509, - 0.96896178, - 0.32099724, - 0.22126268, - 0.14126390, - 0.09725992, - 0.98404223, - 0.26034093, - 0.53702253, - 0.44792616, - 0.09956909, - 0.35231167, + 0.09237389f, + 0.35404667f, + 0.55181628f, + 0.03362509f, + 0.96896178f, + 0.32099724f, + 0.22126268f, + 0.14126390f, + 0.09725992f, + 0.98404223f, + 0.26034093f, + 0.53702253f, + 0.44792616f, + 0.09956909f, + 0.35231167f, }; std::vector gamma = { - 0.46924916, - 0.84114015, - 0.90464777, - 0.03755938, - 0.50831544, + 0.46924916f, + 0.84114015f, + 0.90464777f, + 0.03755938f, + 0.50831544f, }; std::vector beta = { - 0.16684751, - 0.77905101, - 0.86493331, - 0.41139671, - 0.13997258, + 0.16684751f, + 0.77905101f, + 0.86493331f, + 0.41139671f, + 0.13997258f, }; std::vector mask = { 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, @@ -568,21 +581,21 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_embed_layer_normalization_with_position_ 0, 2, 1, 3, 4, 6, 7, 5, 8, 2, 1, 3, 4, 6, 7, 5, 0, 2, 1, 3, 4, 6, 7, 5, }; std::vector expected_output = { - -0.04089922, 0.35108989, 0.30442458, 0.39546335, 1.15422225, 0.10419128, -0.19301927, 0.01070970, - 0.43977541, 0.89119899, -0.51436460, 1.99256825, 1.41077507, 0.38642293, 0.17583044, 0.03320138, - 1.16508031, -0.24356931, 0.47440714, -0.17844005, 0.20463173, 1.90038323, 1.14138567, 0.34504607, - 0.16403235, -0.24976699, 0.29362509, 0.34502214, 0.41751838, 1.09390712, 0.12354189, 1.83025289, - 1.05569196, 0.34413773, 0.35469764, -0.69760042, 0.76338542, 1.75443077, 0.44126555, 0.18181801, - 0.73277575, 0.45443264, 0.17068321, 0.36591727, 0.72869974, -0.56090516, 0.14415455, 1.47314119, - 0.42908576, 0.73084539, -0.22373237, 2.26550221, 0.05606699, 0.39417523, 0.35234636, 0.78569502, - 0.77521765, -0.65131050, 0.40168875, 0.45527256, 0.38715565, 0.98521245, 2.21446753, 0.36345237, - -0.33269632, 0.36558092, 1.36846578, 1.37523413, 0.33698002, 0.28889543, -0.40639281, 1.01643157, - 0.59668219, 0.39197800, 1.03101778, 0.02551098, -0.03612846, -0.01371557, 0.43444607, 0.96746695, - 0.60583955, -0.10362893, 0.40574494, 0.38046724, 0.87445319, -0.00880148, -0.15437943, 0.08118075, - 0.44650543, 0.85956848, -0.27865338, 2.10837507, 0.04798460, 0.43948367, -0.10185169, 0.19978794, - 1.32323360, 1.20525467, 0.44288942, -0.84200430, 0.52563053, 0.69949460, 0.73987913, 0.34668452, - 0.74545687, 0.57696682, 0.22452033, -0.27099937, 0.39649010, 0.87083614, -0.18965788, 0.58206403, - -0.08108193, 0.42067638, 1.05117214, -0.34287399, 0.20424896, 0.27994895, 0.46011117, 0.70890665, + -0.04089922f, 0.35108989f, 0.30442458f, 0.39546335f, 1.15422225f, 0.10419128f, -0.19301927f, 0.01070970f, + 0.43977541f, 0.89119899f, -0.51436460f, 1.99256825f, 1.41077507f, 0.38642293f, 0.17583044f, 0.03320138f, + 1.16508031f, -0.24356931f, 0.47440714f, -0.17844005f, 0.20463173f, 1.90038323f, 1.14138567f, 0.34504607f, + 0.16403235f, -0.24976699f, 0.29362509f, 0.34502214f, 0.41751838f, 1.09390712f, 0.12354189f, 1.83025289f, + 1.05569196f, 0.34413773f, 0.35469764f, -0.69760042f, 0.76338542f, 1.75443077f, 0.44126555f, 0.18181801f, + 0.73277575f, 0.45443264f, 0.17068321f, 0.36591727f, 0.72869974f, -0.56090516f, 0.14415455f, 1.47314119f, + 0.42908576f, 0.73084539f, -0.22373237f, 2.26550221f, 0.05606699f, 0.39417523f, 0.35234636f, 0.78569502f, + 0.77521765f, -0.65131050f, 0.40168875f, 0.45527256f, 0.38715565f, 0.98521245f, 2.21446753f, 0.36345237f, + -0.33269632f, 0.36558092f, 1.36846578f, 1.37523413f, 0.33698002f, 0.28889543f, -0.40639281f, 1.01643157f, + 0.59668219f, 0.39197800f, 1.03101778f, 0.02551098f, -0.03612846f, -0.01371557f, 0.43444607f, 0.96746695f, + 0.60583955f, -0.10362893f, 0.40574494f, 0.38046724f, 0.87445319f, -0.00880148f, -0.15437943f, 0.08118075f, + 0.44650543f, 0.85956848f, -0.27865338f, 2.10837507f, 0.04798460f, 0.43948367f, -0.10185169f, 0.19978794f, + 1.32323360f, 1.20525467f, 0.44288942f, -0.84200430f, 0.52563053f, 0.69949460f, 0.73987913f, 0.34668452f, + 0.74545687f, 0.57696682f, 0.22452033f, -0.27099937f, 0.39649010f, 0.87083614f, -0.18965788f, 0.58206403f, + -0.08108193f, 0.42067638f, 1.05117214f, -0.34287399f, 0.20424896f, 0.27994895f, 0.46011117f, 0.70890665f, }; std::vector expected_mask_index = { 6, @@ -602,7 +615,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_embed_layer_normalization_with_position_ test_case.add_input(Shape{3, 8}, position_ids); test_case.add_expected_output(Shape{3, 8, 5}, expected_output); test_case.add_expected_output(Shape{3}, expected_mask_index); - test_case.run_with_tolerance_as_fp(1e-6); + test_case.run_with_tolerance_as_fp(1e-6f); } NGRAPH_TEST(${BACKEND_NAME}, onnx_model_attention) { @@ -612,20 +625,20 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_attention) { auto test_case = test::TestCase(function, s_device); std::vector input = { - 0.91475844, 0.91523546, 0.82536930, 0.37491974, 0.22384071, 0.05941105, 0.01902100, 0.70131350, - 0.09603709, 0.44200060, 0.53106076, 0.79464376, 0.35469049, 0.25225943, 0.25179818, 0.29592562, - 0.24836586, 0.65088797, 0.93126643, 0.67980725, 0.85708112, 0.59808528, 0.46321425, 0.19301885, + 0.91475844f, 0.91523546f, 0.82536930f, 0.37491974f, 0.22384071f, 0.05941105f, 0.01902100f, 0.70131350f, + 0.09603709f, 0.44200060f, 0.53106076f, 0.79464376f, 0.35469049f, 0.25225943f, 0.25179818f, 0.29592562f, + 0.24836586f, 0.65088797f, 0.93126643f, 0.67980725f, 0.85708112f, 0.59808528f, 0.46321425f, 0.19301885f, }; std::vector output = { - 0.07966283, 0.10783536, -0.19424979, 0.54514766, 0.07965867, 0.10783093, -0.19424866, 0.54510003, - 0.07965846, 0.10783067, -0.19424550, 0.54509139, 0.07966217, 0.10783640, -0.19424903, 0.54512268, - 0.06940663, 0.10962760, -0.19698445, 0.53492010, 0.06940675, 0.10962828, -0.19698484, 0.53492326, - 0.06940714, 0.10963022, -0.19698712, 0.53494006, 0.06940673, 0.10962812, -0.19698519, 0.53492481, + 0.07966283f, 0.10783536f, -0.19424979f, 0.54514766f, 0.07965867f, 0.10783093f, -0.19424866f, 0.54510003f, + 0.07965846f, 0.10783067f, -0.19424550f, 0.54509139f, 0.07966217f, 0.10783640f, -0.19424903f, 0.54512268f, + 0.06940663f, 0.10962760f, -0.19698445f, 0.53492010f, 0.06940675f, 0.10962828f, -0.19698484f, 0.53492326f, + 0.06940714f, 0.10963022f, -0.19698712f, 0.53494006f, 0.06940673f, 0.10962812f, -0.19698519f, 0.53492481f, }; test_case.add_input(input); test_case.add_expected_output(output); - test_case.run_with_tolerance_as_fp(1e-7); + test_case.run_with_tolerance_as_fp(1e-7f); } NGRAPH_TEST(${BACKEND_NAME}, onnx_model_attention_qkv_hidden_sizes) { @@ -636,24 +649,24 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_attention_qkv_hidden_sizes) { auto test_case = test::TestCase(function, s_device); std::vector input = { - 0.56477863, 0.60309958, 0.35158035, 0.03123519, 0.81918180, 0.76905495, 0.47219241, 0.72016627, - 0.59377003, 0.91380632, 0.56797302, 0.34846428, 0.83839595, 0.16394103, 0.34676281, 0.09161621, - 0.45562279, 0.23317528, 0.37197968, 0.06727808, 0.08500192, 0.84915495, 0.68266946, 0.00227691, + 0.56477863f, 0.60309958f, 0.35158035f, 0.03123519f, 0.81918180f, 0.76905495f, 0.47219241f, 0.72016627f, + 0.59377003f, 0.91380632f, 0.56797302f, 0.34846428f, 0.83839595f, 0.16394103f, 0.34676281f, 0.09161621f, + 0.45562279f, 0.23317528f, 0.37197968f, 0.06727808f, 0.08500192f, 0.84915495f, 0.68266946f, 0.00227691f, }; std::vector output = { - -0.59370947, -0.30300471, 0.12048547, -0.09029539, 0.08041390, 0.10250041, -0.19381392, 0.55126983, - -0.59370828, -0.30301332, 0.12049319, -0.09029691, 0.08041921, 0.10250521, -0.19381438, 0.55127531, - -0.59370869, -0.30301058, 0.12049074, -0.09029643, 0.08041564, 0.10250199, -0.19381410, 0.55127168, - -0.59370929, -0.30300608, 0.12048667, -0.09029562, 0.08041184, 0.10249855, -0.19381374, 0.55126774, - -0.59681994, -0.26327702, 0.07638434, -0.06311120, 0.06671587, 0.10916986, -0.19412412, 0.51977092, - -0.59682053, -0.26328400, 0.07638102, -0.06311222, 0.06671817, 0.10917170, -0.19412397, 0.51977223, - -0.59682077, -0.26328647, 0.07637984, -0.06311259, 0.06671739, 0.10917108, -0.19412403, 0.51977175, - -0.59682101, -0.26328778, 0.07637922, -0.06311278, 0.06671065, 0.10916568, -0.19412443, 0.51976782, + -0.59370947f, -0.30300471f, 0.12048547f, -0.09029539f, 0.08041390f, 0.10250041f, -0.19381392f, 0.55126983f, + -0.59370828f, -0.30301332f, 0.12049319f, -0.09029691f, 0.08041921f, 0.10250521f, -0.19381438f, 0.55127531f, + -0.59370869f, -0.30301058f, 0.12049074f, -0.09029643f, 0.08041564f, 0.10250199f, -0.19381410f, 0.55127168f, + -0.59370929f, -0.30300608f, 0.12048667f, -0.09029562f, 0.08041184f, 0.10249855f, -0.19381374f, 0.55126774f, + -0.59681994f, -0.26327702f, 0.07638434f, -0.06311120f, 0.06671587f, 0.10916986f, -0.19412412f, 0.51977092f, + -0.59682053f, -0.26328400f, 0.07638102f, -0.06311222f, 0.06671817f, 0.10917170f, -0.19412397f, 0.51977223f, + -0.59682077f, -0.26328647f, 0.07637984f, -0.06311259f, 0.06671739f, 0.10917108f, -0.19412403f, 0.51977175f, + -0.59682101f, -0.26328778f, 0.07637922f, -0.06311278f, 0.06671065f, 0.10916568f, -0.19412443f, 0.51976782f, }; test_case.add_input(input); test_case.add_expected_output(output); - test_case.run_with_tolerance_as_fp(1e-4); + test_case.run_with_tolerance_as_fp(1e-4f); } NGRAPH_TEST(${BACKEND_NAME}, onnx_model_attention_unidirectional) { @@ -664,34 +677,34 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_attention_unidirectional) { auto test_case = test::TestCase(function, s_device); std::vector input = { - 0.89578921, 0.42421508, 0.35630688, 0.77461642, 0.65753633, 0.09723099, 0.62597734, 0.72117692, - 0.57636845, 0.17104276, 0.13245547, 0.59879875, 0.15624641, 0.44903454, 0.50483286, 0.92975074, - 0.36934483, 0.29919949, 0.57185954, 0.83036488, 0.08384345, 0.20378476, 0.74684393, 0.46716982, + 0.89578921f, 0.42421508f, 0.35630688f, 0.77461642f, 0.65753633f, 0.09723099f, 0.62597734f, 0.72117692f, + 0.57636845f, 0.17104276f, 0.13245547f, 0.59879875f, 0.15624641f, 0.44903454f, 0.50483286f, 0.92975074f, + 0.36934483f, 0.29919949f, 0.57185954f, 0.83036488f, 0.08384345f, 0.20378476f, 0.74684393f, 0.46716982f, }; std::vector output = { - 0.05604819, 0.09000472, -0.19437021, 0.52487367, 0.06211422, 0.08740954, -0.19139624, 0.52762908, - 0.06708897, 0.08992603, -0.19214047, 0.53631783, 0.06896879, 0.10248676, -0.19485690, 0.53477794, - 0.08577005, 0.12807365, -0.19762954, 0.54432857, 0.06929274, 0.10893210, -0.19599904, 0.53184807, - 0.07348281, 0.10215081, -0.19280069, 0.53552240, 0.07861833, 0.10517240, -0.19285706, 0.54126489, + 0.05604819f, 0.09000472f, -0.19437021f, 0.52487367f, 0.06211422f, 0.08740954f, -0.19139624f, 0.52762908f, + 0.06708897f, 0.08992603f, -0.19214047f, 0.53631783f, 0.06896879f, 0.10248676f, -0.19485690f, 0.53477794f, + 0.08577005f, 0.12807365f, -0.19762954f, 0.54432857f, 0.06929274f, 0.10893210f, -0.19599904f, 0.53184807f, + 0.07348281f, 0.10215081f, -0.19280069f, 0.53552240f, 0.07861833f, 0.10517240f, -0.19285706f, 0.54126489f, }; std::vector present = { - -0.60427380, -0.25958878, -0.59609234, -0.24055196, -0.59613681, -0.30088067, -0.59633607, -0.33270463, - 0.06899665, -0.09284544, 0.08059876, -0.06146053, 0.11841078, -0.10019838, 0.10605468, -0.09273906, - -0.59036821, -0.32410735, -0.60532302, -0.25127757, -0.58926487, -0.25271094, -0.58640373, -0.31730092, - 0.12509561, -0.07968873, 0.06005794, -0.08937149, 0.10523240, -0.05083811, 0.14162725, -0.07438751, - 0.05604819, 0.09000472, 0.06819826, 0.08480665, 0.07700446, 0.09494394, 0.07459175, 0.14003153, - -0.19437021, 0.52487367, -0.18843602, 0.53037173, -0.19362189, 0.55360907, -0.20299932, 0.53020388, - 0.08577005, 0.12807365, 0.05276009, 0.08972625, 0.08190014, 0.08852972, 0.09400313, 0.11423884, - -0.19762954, 0.54432857, -0.19435294, 0.51924801, -0.18643703, 0.54280555, -0.19302703, 0.55837619, + -0.60427380f, -0.25958878f, -0.59609234f, -0.24055196f, -0.59613681f, -0.30088067f, -0.59633607f, -0.33270463f, + 0.06899665f, -0.09284544f, 0.08059876f, -0.06146053f, 0.11841078f, -0.10019838f, 0.10605468f, -0.09273906f, + -0.59036821f, -0.32410735f, -0.60532302f, -0.25127757f, -0.58926487f, -0.25271094f, -0.58640373f, -0.31730092f, + 0.12509561f, -0.07968873f, 0.06005794f, -0.08937149f, 0.10523240f, -0.05083811f, 0.14162725f, -0.07438751f, + 0.05604819f, 0.09000472f, 0.06819826f, 0.08480665f, 0.07700446f, 0.09494394f, 0.07459175f, 0.14003153f, + -0.19437021f, 0.52487367f, -0.18843602f, 0.53037173f, -0.19362189f, 0.55360907f, -0.20299932f, 0.53020388f, + 0.08577005f, 0.12807365f, 0.05276009f, 0.08972625f, 0.08190014f, 0.08852972f, 0.09400313f, 0.11423884f, + -0.19762954f, 0.54432857f, -0.19435294f, 0.51924801f, -0.18643703f, 0.54280555f, -0.19302703f, 0.55837619f, }; test_case.add_input(input); test_case.add_expected_output(output); test_case.add_expected_output(present); - test_case.run_with_tolerance_as_fp(1e-7); + test_case.run_with_tolerance_as_fp(1e-7f); } -NGRAPH_TEST(${BACKEND_NAME}, onnx_model_attention_mask_index_1) { +NGRAPH_TEST(${BACKEND_NAME}, onnx_model_attention_mask_index_1f) { const auto function = onnx_import::import_onnx_model(file_util::path_join(CommonTestUtils::getExecutableDirectory(), SERIALIZED_ZOO, @@ -699,29 +712,29 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_attention_mask_index_1) { auto test_case = test::TestCase(function, s_device); std::vector input = { - 0.02841483, 0.47845092, 0.14633700, 0.54597300, 0.40160629, 0.55281311, 0.14931096, 0.64483738, - 0.96559167, 0.05262021, 0.12391864, 0.20093553, 0.74290562, 0.19367455, 0.19253619, 0.41593507, - 0.91188699, 0.61606920, 0.72673517, 0.86981291, 0.19963337, 0.22747350, 0.34308898, 0.57267183, + 0.02841483f, 0.47845092f, 0.14633700f, 0.54597300f, 0.40160629f, 0.55281311f, 0.14931096f, 0.64483738f, + 0.96559167f, 0.05262021f, 0.12391864f, 0.20093553f, 0.74290562f, 0.19367455f, 0.19253619f, 0.41593507f, + 0.91188699f, 0.61606920f, 0.72673517f, 0.86981291f, 0.19963337f, 0.22747350f, 0.34308898f, 0.57267183f, }; std::vector mask_index = { 0, 1, }; std::vector output = { - 0.08298690, 0.12711772, -0.19757506, 0.54029012, 0.08298548, 0.12711433, -0.19757731, 0.54031140, - 0.08298430, 0.12711799, -0.19757695, 0.54031777, 0.08298548, 0.12711433, -0.19757444, 0.54028159, - 0.05380550, 0.10459180, -0.19593412, 0.50907606, 0.05380550, 0.10459180, -0.19593412, 0.50907606, - 0.05380550, 0.10459180, -0.19593412, 0.50907606, 0.05380550, 0.10459180, -0.19593412, 0.50907606, + 0.08298690f, 0.12711772f, -0.19757506f, 0.54029012f, 0.08298548f, 0.12711433f, -0.19757731f, 0.54031140f, + 0.08298430f, 0.12711799f, -0.19757695f, 0.54031777f, 0.08298548f, 0.12711433f, -0.19757444f, 0.54028159f, + 0.05380550f, 0.10459180f, -0.19593412f, 0.50907606f, 0.05380550f, 0.10459180f, -0.19593412f, 0.50907606f, + 0.05380550f, 0.10459180f, -0.19593412f, 0.50907606f, 0.05380550f, 0.10459180f, -0.19593412f, 0.50907606f, }; std::vector present = { - -0.58437425, -0.29483819, -0.59927911, -0.30336475, -0.59104657, -0.37327260, -0.59078789, -0.29863101, - 0.11751597, -0.04114649, 0.09933343, -0.09884726, 0.16250694, -0.12028439, 0.09319257, -0.05129660, - -0.60341775, -0.25221461, -0.58933026, -0.31912822, -0.59271193, -0.25470981, -0.59399152, -0.32643768, - 0.05398282, -0.07468132, 0.14743008, -0.09407346, 0.10399222, -0.06682440, 0.11632499, -0.08986320, - 0.09104910, 0.12973849, 0.06917210, 0.11059431, 0.09356256, 0.12594685, 0.07814129, 0.14221822, - -0.19329809, 0.53526556, -0.19787431, 0.53673857, -0.20045389, 0.57165766, -0.19869246, 0.51749766, - 0.05380550, 0.10459180, 0.09169570, 0.09892380, 0.07746917, 0.08042616, 0.07953370, 0.12909687, - -0.19593412, 0.50907606, -0.19202785, 0.56904894, -0.18689045, 0.54643762, -0.19969353, 0.53976399, + -0.58437425f, -0.29483819f, -0.59927911f, -0.30336475f, -0.59104657f, -0.37327260f, -0.59078789f, -0.29863101f, + 0.11751597f, -0.04114649f, 0.09933343f, -0.09884726f, 0.16250694f, -0.12028439f, 0.09319257f, -0.05129660f, + -0.60341775f, -0.25221461f, -0.58933026f, -0.31912822f, -0.59271193f, -0.25470981f, -0.59399152f, -0.32643768f, + 0.05398282f, -0.07468132f, 0.14743008f, -0.09407346f, 0.10399222f, -0.06682440f, 0.11632499f, -0.08986320f, + 0.09104910f, 0.12973849f, 0.06917210f, 0.11059431f, 0.09356256f, 0.12594685f, 0.07814129f, 0.14221822f, + -0.19329809f, 0.53526556f, -0.19787431f, 0.53673857f, -0.20045389f, 0.57165766f, -0.19869246f, 0.51749766f, + 0.05380550f, 0.10459180f, 0.09169570f, 0.09892380f, 0.07746917f, 0.08042616f, 0.07953370f, 0.12909687f, + -0.19593412f, 0.50907606f, -0.19202785f, 0.56904894f, -0.18689045f, 0.54643762f, -0.19969353f, 0.53976399f, }; test_case.add_input(input); @@ -739,9 +752,9 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_attention_mask_index_2) { auto test_case = test::TestCase(function, s_device); std::vector input = { - 0.75259578, 0.81492645, 0.46713001, 0.29483622, 0.06768602, 0.95105755, 0.32065326, 0.52417183, - 0.73136383, 0.77176476, 0.60997742, 0.64625764, 0.16311000, 0.89680773, 0.01331447, 0.42468646, - 0.58711547, 0.00345124, 0.13053808, 0.46278623, 0.13786320, 0.65182054, 0.74864876, 0.81506181, + 0.75259578f, 0.81492645f, 0.46713001f, 0.29483622f, 0.06768602f, 0.95105755f, 0.32065326f, 0.52417183f, + 0.73136383f, 0.77176476f, 0.60997742f, 0.64625764f, 0.16311000f, 0.89680773f, 0.01331447f, 0.42468646f, + 0.58711547f, 0.00345124f, 0.13053808f, 0.46278623f, 0.13786320f, 0.65182054f, 0.74864876f, 0.81506181f, }; std::vector mask_index = { 3, @@ -750,20 +763,20 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_attention_mask_index_2) { 1, }; std::vector output = { - 0.07524174, 0.11320241, -0.19909523, 0.54785377, 0.06825337, 0.13981669, -0.20774621, 0.53718704, - 0.07531278, 0.12957911, -0.20330518, 0.54547405, 0.07531209, 0.12958010, -0.20330583, 0.54547292, - 0.08900890, 0.11150353, -0.18931937, 0.53757656, 0.07915881, 0.10416336, -0.18914750, 0.52921104, - 0.08285815, 0.11462159, -0.19115375, 0.53077918, 0.08285838, 0.11462225, -0.19115454, 0.53077984, + 0.07524174f, 0.11320241f, -0.19909523f, 0.54785377f, 0.06825337f, 0.13981669f, -0.20774621f, 0.53718704f, + 0.07531278f, 0.12957911f, -0.20330518f, 0.54547405f, 0.07531209f, 0.12958010f, -0.20330583f, 0.54547292f, + 0.08900890f, 0.11150353f, -0.18931937f, 0.53757656f, 0.07915881f, 0.10416336f, -0.18914750f, 0.52921104f, + 0.08285815f, 0.11462159f, -0.19115375f, 0.53077918f, 0.08285838f, 0.11462225f, -0.19115454f, 0.53077984f, }; std::vector present = { - -0.59630549, -0.28110915, -0.60274345, -0.36154836, -0.59437746, -0.33717164, -0.60134649, -0.29849592, - 0.11169122, -0.09345293, 0.11103803, -0.13096604, 0.13131849, -0.10597084, 0.10463209, -0.11332577, - -0.57949269, -0.27235535, -0.58941406, -0.25372508, -0.58658379, -0.28718373, -0.59821802, -0.32433146, - 0.13244939, -0.02865628, 0.09308393, -0.04083736, 0.10948701, -0.04423397, 0.13060363, -0.12316251, - 0.07509718, 0.08392500, 0.06825337, 0.13981669, 0.08239168, 0.11931328, 0.06770951, 0.09240761, - -0.19074154, 0.55260652, -0.20774621, 0.53718704, -0.19888818, 0.55371630, -0.19559640, 0.54754448, - 0.09983939, 0.10603377, 0.07915881, 0.10416336, 0.08655046, 0.12505992, 0.07738422, 0.09509270, - -0.18571433, 0.55095005, -0.18914750, 0.52921104, -0.19315663, 0.53234470, -0.19601485, 0.56322992, + -0.59630549f, -0.28110915f, -0.60274345f, -0.36154836f, -0.59437746f, -0.33717164f, -0.60134649f, -0.29849592f, + 0.11169122f, -0.09345293f, 0.11103803f, -0.13096604f, 0.13131849f, -0.10597084f, 0.10463209f, -0.11332577f, + -0.57949269f, -0.27235535f, -0.58941406f, -0.25372508f, -0.58658379f, -0.28718373f, -0.59821802f, -0.32433146f, + 0.13244939f, -0.02865628f, 0.09308393f, -0.04083736f, 0.10948701f, -0.04423397f, 0.13060363f, -0.12316251f, + 0.07509718f, 0.08392500f, 0.06825337f, 0.13981669f, 0.08239168f, 0.11931328f, 0.06770951f, 0.09240761f, + -0.19074154f, 0.55260652f, -0.20774621f, 0.53718704f, -0.19888818f, 0.55371630f, -0.19559640f, 0.54754448f, + 0.09983939f, 0.10603377f, 0.07915881f, 0.10416336f, 0.08655046f, 0.12505992f, 0.07738422f, 0.09509270f, + -0.18571433f, 0.55095005f, -0.18914750f, 0.52921104f, -0.19315663f, 0.53234470f, -0.19601485f, 0.56322992f, }; test_case.add_input(input); @@ -781,9 +794,9 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_attention_mask_index_3) { auto test_case = test::TestCase(function, s_device); std::vector input = { - 0.33093750, 0.39181390, 0.14586255, 0.39709702, 0.98086524, 0.03891133, 0.72234219, 0.21966648, - 0.79986620, 0.97251678, 0.04131543, 0.43971965, 0.50185394, 0.11452501, 0.88111717, 0.76076663, - 0.31870860, 0.54107893, 0.91756296, 0.58112669, 0.99117357, 0.00256292, 0.58885485, 0.93481058, + 0.33093750f, 0.39181390f, 0.14586255f, 0.39709702f, 0.98086524f, 0.03891133f, 0.72234219f, 0.21966648f, + 0.79986620f, 0.97251678f, 0.04131543f, 0.43971965f, 0.50185394f, 0.11452501f, 0.88111717f, 0.76076663f, + 0.31870860f, 0.54107893f, 0.91756296f, 0.58112669f, 0.99117357f, 0.00256292f, 0.58885485f, 0.93481058f, }; std::vector mask = { 1, @@ -796,27 +809,27 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_attention_mask_index_3) { 1, }; std::vector output = { - 0.07551830, 0.10666487, -0.19357042, 0.53683108, 0.07551410, 0.10666656, -0.19356072, 0.53684169, - 0.07552745, 0.10666100, -0.19358172, 0.53682435, 0.07552218, 0.10666317, -0.19358677, 0.53681952, - 0.09727416, 0.13513327, -0.20121223, 0.57003713, 0.09727416, 0.13513327, -0.20121223, 0.57003713, - 0.09727416, 0.13513327, -0.20121223, 0.57003713, 0.09727416, 0.13513327, -0.20121223, 0.57003713, + 0.07551830f, 0.10666487f, -0.19357042f, 0.53683108f, 0.07551410f, 0.10666656f, -0.19356072f, 0.53684169f, + 0.07552745f, 0.10666100f, -0.19358172f, 0.53682435f, 0.07552218f, 0.10666317f, -0.19358677f, 0.53681952f, + 0.09727416f, 0.13513327f, -0.20121223f, 0.57003713f, 0.09727416f, 0.13513327f, -0.20121223f, 0.57003713f, + 0.09727416f, 0.13513327f, -0.20121223f, 0.57003713f, 0.09727416f, 0.13513327f, -0.20121223f, 0.57003713f, }; std::vector present = { - -0.59174627, -0.27471560, -0.58307797, -0.25967693, -0.60766846, -0.31754097, -0.61241394, -0.26291698, - 0.09206123, -0.05307099, 0.12491645, -0.03853742, 0.08732655, -0.13050151, 0.04073093, -0.10792807, - -0.60556883, -0.34055573, -0.60474855, -0.28785610, -0.60757709, -0.32514900, -0.58872569, -0.37967020, - 0.09779400, -0.13136166, 0.07915612, -0.10649752, 0.11043755, -0.15124020, 0.16626491, -0.11274654, - 0.07639833, 0.11762549, 0.09370039, 0.09133558, 0.05661478, 0.11096847, 0.04019671, 0.10117501, - -0.19371650, 0.52530587, -0.18429738, 0.55240726, -0.20283231, 0.53265429, -0.20036045, 0.50568837, - 0.06171235, 0.12687264, 0.05802051, 0.10266830, 0.06172965, 0.08967118, 0.09727416, 0.13513327, - -0.20576829, 0.53365225, -0.19832623, 0.52809310, -0.19971462, 0.55584043, -0.20121223, 0.57003713, + -0.59174627f, -0.27471560f, -0.58307797f, -0.25967693f, -0.60766846f, -0.31754097f, -0.61241394f, -0.26291698f, + 0.09206123f, -0.05307099f, 0.12491645f, -0.03853742f, 0.08732655f, -0.13050151f, 0.04073093f, -0.10792807f, + -0.60556883f, -0.34055573f, -0.60474855f, -0.28785610f, -0.60757709f, -0.32514900f, -0.58872569f, -0.37967020f, + 0.09779400f, -0.13136166f, 0.07915612f, -0.10649752f, 0.11043755f, -0.15124020f, 0.16626491f, -0.11274654f, + 0.07639833f, 0.11762549f, 0.09370039f, 0.09133558f, 0.05661478f, 0.11096847f, 0.04019671f, 0.10117501f, + -0.19371650f, 0.52530587f, -0.18429738f, 0.55240726f, -0.20283231f, 0.53265429f, -0.20036045f, 0.50568837f, + 0.06171235f, 0.12687264f, 0.05802051f, 0.10266830f, 0.06172965f, 0.08967118f, 0.09727416f, 0.13513327f, + -0.20576829f, 0.53365225f, -0.19832623f, 0.52809310f, -0.19971462f, 0.55584043f, -0.20121223f, 0.57003713f, }; test_case.add_input(input); test_case.add_input(mask); test_case.add_expected_output(output); test_case.add_expected_output(present); - test_case.run_with_tolerance_as_fp(1e-7); + test_case.run_with_tolerance_as_fp(1e-7f); } NGRAPH_TEST(${BACKEND_NAME}, onnx_model_attention_mask_index_4) { @@ -827,35 +840,35 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_attention_mask_index_4) { auto test_case = test::TestCase(function, s_device); std::vector input = { - 0.23565151, 0.58627969, 0.75137484, 0.68586946, 0.62750375, 0.13284931, 0.13347220, 0.36357051, - 0.56910241, 0.48275986, 0.49440190, 0.45483324, 0.63547862, 0.97893149, 0.40630588, 0.38783622, - 0.07172249, 0.46385381, 0.99764502, 0.22219376, 0.67735291, 0.40799847, 0.74337566, 0.87263006, + 0.23565151f, 0.58627969f, 0.75137484f, 0.68586946f, 0.62750375f, 0.13284931f, 0.13347220f, 0.36357051f, + 0.56910241f, 0.48275986f, 0.49440190f, 0.45483324f, 0.63547862f, 0.97893149f, 0.40630588f, 0.38783622f, + 0.07172249f, 0.46385381f, 0.99764502f, 0.22219376f, 0.67735291f, 0.40799847f, 0.74337566f, 0.87263006f, }; std::vector mask = { 1, 1, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, }; std::vector output = { - 0.07771622, 0.10724538, -0.19453585, 0.54342043, 0.07459468, 0.10934003, -0.19561143, 0.53936625, - 0.07927690, 0.10619678, -0.19399606, 0.54543519, 0.07459468, 0.10934003, -0.19561143, 0.53936625, - 0.05485561, 0.11278091, -0.20117569, 0.52096349, 0.06629646, 0.10195158, -0.19900991, 0.54654449, - 0.06491723, 0.10292297, -0.19678673, 0.53451663, 0.06549793, 0.11126325, -0.19989857, 0.53717279, + 0.07771622f, 0.10724538f, -0.19453585f, 0.54342043f, 0.07459468f, 0.10934003f, -0.19561143f, 0.53936625f, + 0.07927690f, 0.10619678f, -0.19399606f, 0.54543519f, 0.07459468f, 0.10934003f, -0.19561143f, 0.53936625f, + 0.05485561f, 0.11278091f, -0.20117569f, 0.52096349f, 0.06629646f, 0.10195158f, -0.19900991f, 0.54654449f, + 0.06491723f, 0.10292297f, -0.19678673f, 0.53451663f, 0.06549793f, 0.11126325f, -0.19989857f, 0.53717279f, }; std::vector present = { - -0.59188855, -0.34495637, -0.59508181, -0.25013468, -0.59176934, -0.33229247, -0.59576762, -0.29731843, - 0.14217430, -0.10403840, 0.08584045, -0.06193545, 0.12358667, -0.08588549, 0.10515238, -0.08629489, - -0.59092808, -0.28260738, -0.60047609, -0.30411413, -0.61210287, -0.28645760, -0.59391296, -0.34649473, - 0.12789863, -0.08159252, 0.08122411, -0.08866425, 0.06395009, -0.12896645, 0.14855847, -0.11978809, - 0.08783118, 0.12152332, 0.07067389, 0.09078297, 0.08385989, 0.13306075, 0.07459468, 0.10934003, - -0.19849420, 0.55928540, -0.18948570, 0.53154731, -0.19960676, 0.54237455, -0.19561143, 0.53936625, - 0.08509844, 0.08314656, 0.06388859, 0.12990499, 0.04582624, 0.09566365, 0.08674107, 0.10823163, - -0.18808734, 0.56137776, -0.20168513, 0.51830697, -0.20066255, 0.52363914, -0.19737384, 0.56921995, + -0.59188855f, -0.34495637f, -0.59508181f, -0.25013468f, -0.59176934f, -0.33229247f, -0.59576762f, -0.29731843f, + 0.14217430f, -0.10403840f, 0.08584045f, -0.06193545f, 0.12358667f, -0.08588549f, 0.10515238f, -0.08629489f, + -0.59092808f, -0.28260738f, -0.60047609f, -0.30411413f, -0.61210287f, -0.28645760f, -0.59391296f, -0.34649473f, + 0.12789863f, -0.08159252f, 0.08122411f, -0.08866425f, 0.06395009f, -0.12896645f, 0.14855847f, -0.11978809f, + 0.08783118f, 0.12152332f, 0.07067389f, 0.09078297f, 0.08385989f, 0.13306075f, 0.07459468f, 0.10934003f, + -0.19849420f, 0.55928540f, -0.18948570f, 0.53154731f, -0.19960676f, 0.54237455f, -0.19561143f, 0.53936625f, + 0.08509844f, 0.08314656f, 0.06388859f, 0.12990499f, 0.04582624f, 0.09566365f, 0.08674107f, 0.10823163f, + -0.18808734f, 0.56137776f, -0.20168513f, 0.51830697f, -0.20066255f, 0.52363914f, -0.19737384f, 0.56921995f, }; test_case.add_input(input); test_case.add_input(mask); test_case.add_expected_output(output); test_case.add_expected_output(present); - test_case.run_with_tolerance_as_fp(1e-7); + test_case.run_with_tolerance_as_fp(1e-7f); } NGRAPH_TEST(${BACKEND_NAME}, onnx_model_attention_past) { @@ -866,9 +879,9 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_attention_past) { auto test_case = test::TestCase(function, s_device); std::vector input = { - 0.82966000, 0.77751911, 0.08977074, 0.06076468, 0.40659550, 0.19995944, 0.55544919, 0.83971608, - 0.86254036, 0.30894691, 0.80156928, 0.83092463, 0.14506543, 0.32196075, 0.42209163, 0.24465553, - 0.93944097, 0.73528159, 0.23347616, 0.60544974, 0.93329269, 0.67604774, 0.56349903, 0.26199624, + 0.82966000f, 0.77751911f, 0.08977074f, 0.06076468f, 0.40659550f, 0.19995944f, 0.55544919f, 0.83971608f, + 0.86254036f, 0.30894691f, 0.80156928f, 0.83092463f, 0.14506543f, 0.32196075f, 0.42209163f, 0.24465553f, + 0.93944097f, 0.73528159f, 0.23347616f, 0.60544974f, 0.93329269f, 0.67604774f, 0.56349903f, 0.26199624f, }; std::vector mask = { 1, @@ -891,41 +904,42 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_attention_past) { 1, }; std::vector past = { - 0.92467678, 0.79873562, 0.00939191, 0.34891853, 0.35521412, 0.21872006, 0.89974332, 0.74132687, 0.73566031, - 0.75168055, 0.06773245, 0.85702997, 0.76256698, 0.51739877, 0.91567177, 0.66617578, 0.88056499, 0.08436447, - 0.54744655, 0.25466520, 0.08500137, 0.19271941, 0.86525357, 0.21717627, 0.97158766, 0.42288730, 0.09890039, - 0.01148765, 0.97024685, 0.19697112, 0.67671591, 0.67960924, 0.46656516, 0.30850092, 0.73536104, 0.73938161, - 0.91650903, 0.57628596, 0.51164514, 0.11695814, 0.79792547, 0.97192264, 0.29246020, 0.41030061, 0.19014873, - 0.90233624, 0.84986305, 0.26141909, 0.84528726, 0.81416380, 0.00429944, 0.31476986, 0.00440918, 0.77413058, - 0.13409913, 0.20965169, 0.61764991, 0.55266041, 0.56107825, 0.42051074, 0.16804738, 0.80362344, 0.52392679, - 0.27550557, 0.66738850, 0.39348483, 0.31801429, 0.30325863, 0.37068403, 0.92767614, 0.60799408, 0.01458820, - 0.24194679, 0.59596598, 0.81762302, 0.38094005, 0.16618672, 0.92488551, 0.84298438, 0.21752745, + 0.92467678f, 0.79873562f, 0.00939191f, 0.34891853f, 0.35521412f, 0.21872006f, 0.89974332f, 0.74132687f, + 0.73566031f, 0.75168055f, 0.06773245f, 0.85702997f, 0.76256698f, 0.51739877f, 0.91567177f, 0.66617578f, + 0.88056499f, 0.08436447f, 0.54744655f, 0.25466520f, 0.08500137f, 0.19271941f, 0.86525357f, 0.21717627f, + 0.97158766f, 0.42288730f, 0.09890039f, 0.01148765f, 0.97024685f, 0.19697112f, 0.67671591f, 0.67960924f, + 0.46656516f, 0.30850092f, 0.73536104f, 0.73938161f, 0.91650903f, 0.57628596f, 0.51164514f, 0.11695814f, + 0.79792547f, 0.97192264f, 0.29246020f, 0.41030061f, 0.19014873f, 0.90233624f, 0.84986305f, 0.26141909f, + 0.84528726f, 0.81416380f, 0.00429944f, 0.31476986f, 0.00440918f, 0.77413058f, 0.13409913f, 0.20965169f, + 0.61764991f, 0.55266041f, 0.56107825f, 0.42051074f, 0.16804738f, 0.80362344f, 0.52392679f, 0.27550557f, + 0.66738850f, 0.39348483f, 0.31801429f, 0.30325863f, 0.37068403f, 0.92767614f, 0.60799408f, 0.01458820f, + 0.24194679f, 0.59596598f, 0.81762302f, 0.38094005f, 0.16618672f, 0.92488551f, 0.84298438f, 0.21752745f, }; std::vector output = { - 0.26186451, 0.45950246, -0.04001215, 0.47680017, 0.26333901, 0.46158865, -0.04006424, 0.47588652, - 0.26875457, 0.47031689, -0.03951600, 0.47674999, 0.26851410, 0.46987134, -0.03919901, 0.47629333, - 0.18083976, 0.16579385, -0.05161894, 0.63075018, 0.18228555, 0.16642828, -0.04873618, 0.63316816, - 0.18362364, 0.16702136, -0.05045432, 0.63178891, 0.18000112, 0.16541445, -0.05139139, 0.63105792, + 0.26186451f, 0.45950246f, -0.04001215f, 0.47680017f, 0.26333901f, 0.46158865f, -0.04006424f, 0.47588652f, + 0.26875457f, 0.47031689f, -0.03951600f, 0.47674999f, 0.26851410f, 0.46987134f, -0.03919901f, 0.47629333f, + 0.18083976f, 0.16579385f, -0.05161894f, 0.63075018f, 0.18228555f, 0.16642828f, -0.04873618f, 0.63316816f, + 0.18362364f, 0.16702136f, -0.05045432f, 0.63178891f, 0.18000112f, 0.16541445f, -0.05139139f, 0.63105792f, }; std::vector present = { - 0.92467678, 0.79873562, 0.00939191, 0.34891853, 0.35521412, 0.21872006, 0.89974332, 0.74132687, - 0.73566031, 0.75168055, -0.59527576, -0.23625080, -0.58657664, -0.29827437, -0.59528387, -0.33578828, - -0.59068960, -0.34870598, 0.06773245, 0.85702997, 0.76256698, 0.51739877, 0.91567177, 0.66617578, - 0.88056499, 0.08436447, 0.54744655, 0.25466520, 0.08536442, -0.06134639, 0.11295843, -0.04818217, - 0.14562836, -0.12305059, 0.15695867, -0.11161390, 0.08500137, 0.19271941, 0.86525357, 0.21717627, - 0.97158766, 0.42288730, 0.09890039, 0.01148765, 0.97024685, 0.19697112, -0.59141791, -0.31600696, - -0.58647990, -0.34302223, -0.59306550, -0.36427227, -0.59695083, -0.26431620, 0.67671591, 0.67960924, - 0.46656516, 0.30850092, 0.73536104, 0.73938161, 0.91650903, 0.57628596, 0.51164514, 0.11695814, - 0.11255538, -0.07302766, 0.16620418, -0.09871224, 0.15272795, -0.12076923, 0.08827571, -0.07442430, - 0.79792547, 0.97192264, 0.29246020, 0.41030061, 0.19014873, 0.90233624, 0.84986305, 0.26141909, - 0.84528726, 0.81416380, 0.07014155, 0.07749540, 0.08745074, 0.13131952, 0.08430066, 0.09709007, - 0.09247591, 0.11065811, 0.00429944, 0.31476986, 0.00440918, 0.77413058, 0.13409913, 0.20965169, - 0.61764991, 0.55266041, 0.56107825, 0.42051074, -0.18658412, 0.53568852, -0.19482780, 0.53271860, - -0.19558203, 0.57155901, -0.19633618, 0.57260245, 0.16804738, 0.80362344, 0.52392679, 0.27550557, - 0.66738850, 0.39348483, 0.31801429, 0.30325863, 0.37068403, 0.92767614, 0.08172131, 0.13249113, - 0.09947956, 0.10781212, 0.08890627, 0.12280971, 0.06911418, 0.09499176, 0.60799408, 0.01458820, - 0.24194679, 0.59596598, 0.81762302, 0.38094005, 0.16618672, 0.92488551, 0.84298438, 0.21752745, - -0.19839945, 0.53462923, -0.19349247, 0.57778782, -0.20039621, 0.56689924, -0.19190890, 0.53286803, + 0.92467678f, 0.79873562f, 0.00939191f, 0.34891853f, 0.35521412f, 0.21872006f, 0.89974332f, 0.74132687f, + 0.73566031f, 0.75168055f, -0.59527576f, -0.23625080f, -0.58657664f, -0.29827437f, -0.59528387f, -0.33578828f, + -0.59068960f, -0.34870598f, 0.06773245f, 0.85702997f, 0.76256698f, 0.51739877f, 0.91567177f, 0.66617578f, + 0.88056499f, 0.08436447f, 0.54744655f, 0.25466520f, 0.08536442f, -0.06134639f, 0.11295843f, -0.04818217f, + 0.14562836f, -0.12305059f, 0.15695867f, -0.11161390f, 0.08500137f, 0.19271941f, 0.86525357f, 0.21717627f, + 0.97158766f, 0.42288730f, 0.09890039f, 0.01148765f, 0.97024685f, 0.19697112f, -0.59141791f, -0.31600696f, + -0.58647990f, -0.34302223f, -0.59306550f, -0.36427227f, -0.59695083f, -0.26431620f, 0.67671591f, 0.67960924f, + 0.46656516f, 0.30850092f, 0.73536104f, 0.73938161f, 0.91650903f, 0.57628596f, 0.51164514f, 0.11695814f, + 0.11255538f, -0.07302766f, 0.16620418f, -0.09871224f, 0.15272795f, -0.12076923f, 0.08827571f, -0.07442430f, + 0.79792547f, 0.97192264f, 0.29246020f, 0.41030061f, 0.19014873f, 0.90233624f, 0.84986305f, 0.26141909f, + 0.84528726f, 0.81416380f, 0.07014155f, 0.07749540f, 0.08745074f, 0.13131952f, 0.08430066f, 0.09709007f, + 0.09247591f, 0.11065811f, 0.00429944f, 0.31476986f, 0.00440918f, 0.77413058f, 0.13409913f, 0.20965169f, + 0.61764991f, 0.55266041f, 0.56107825f, 0.42051074f, -0.18658412f, 0.53568852f, -0.19482780f, 0.53271860f, + -0.19558203f, 0.57155901f, -0.19633618f, 0.57260245f, 0.16804738f, 0.80362344f, 0.52392679f, 0.27550557f, + 0.66738850f, 0.39348483f, 0.31801429f, 0.30325863f, 0.37068403f, 0.92767614f, 0.08172131f, 0.13249113f, + 0.09947956f, 0.10781212f, 0.08890627f, 0.12280971f, 0.06911418f, 0.09499176f, 0.60799408f, 0.01458820f, + 0.24194679f, 0.59596598f, 0.81762302f, 0.38094005f, 0.16618672f, 0.92488551f, 0.84298438f, 0.21752745f, + -0.19839945f, 0.53462923f, -0.19349247f, 0.57778782f, -0.20039621f, 0.56689924f, -0.19190890f, 0.53286803f, }; test_case.add_input(input); @@ -933,7 +947,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_attention_past) { test_case.add_input(past); test_case.add_expected_output(output); test_case.add_expected_output(present); - test_case.run_with_tolerance_as_fp(1e-6); + test_case.run_with_tolerance_as_fp(1e-6f); } NGRAPH_TEST(${BACKEND_NAME}, onnx_model_attention_extra_add) { @@ -944,9 +958,9 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_attention_extra_add) { auto test_case = test::TestCase(function, s_device); std::vector input = { - 0.14930259, 0.11199699, 0.81292826, 0.08368169, 0.05704883, 0.41276145, 0.38760167, 0.00146112, - 0.14275745, 0.54254925, 0.07962929, 0.31023681, 0.09597706, 0.60583973, 0.90233743, 0.33360451, - 0.18193199, 0.19159532, 0.07869831, 0.86026299, 0.20683478, 0.40150928, 0.93124926, 0.31805834, + 0.14930259f, 0.11199699f, 0.81292826f, 0.08368169f, 0.05704883f, 0.41276145f, 0.38760167f, 0.00146112f, + 0.14275745f, 0.54254925f, 0.07962929f, 0.31023681f, 0.09597706f, 0.60583973f, 0.90233743f, 0.33360451f, + 0.18193199f, 0.19159532f, 0.07869831f, 0.86026299f, 0.20683478f, 0.40150928f, 0.93124926f, 0.31805834f, }; std::vector mask = { 0, @@ -959,30 +973,30 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_attention_extra_add) { 0, }; std::vector extra_add = { - 0.73230380, 0.61824518, 0.19738488, 0.57034588, 0.22331032, 0.53262889, 0.60098642, 0.72943515, - 0.09009175, 0.81116527, 0.47240964, 0.49679127, 0.41110733, 0.29418564, 0.93818313, 0.64175284, - 0.06807775, 0.66733366, 0.78848422, 0.48788327, 0.38806340, 0.14002480, 0.72263688, 0.22772972, - 0.24000823, 0.75820386, 0.64254439, 0.19385594, 0.95595860, 0.59840417, 0.93769604, 0.62474734, - 0.36690548, 0.76047903, 0.62352085, 0.58574778, 0.64251810, 0.78072041, 0.43344691, 0.75383639, - 0.73950553, 0.92625278, 0.05066428, 0.08448382, 0.25980917, 0.50312829, 0.97800279, 0.05422170, - 0.05171391, 0.82828254, 0.42234898, 0.95752198, 0.96325767, 0.97909677, 0.35578200, 0.48091716, - 0.03637243, 0.91552693, 0.43403026, 0.94275808, 0.51182085, 0.86773109, 0.38459453, 0.87822068, + 0.73230380f, 0.61824518f, 0.19738488f, 0.57034588f, 0.22331032f, 0.53262889f, 0.60098642f, 0.72943515f, + 0.09009175f, 0.81116527f, 0.47240964f, 0.49679127f, 0.41110733f, 0.29418564f, 0.93818313f, 0.64175284f, + 0.06807775f, 0.66733366f, 0.78848422f, 0.48788327f, 0.38806340f, 0.14002480f, 0.72263688f, 0.22772972f, + 0.24000823f, 0.75820386f, 0.64254439f, 0.19385594f, 0.95595860f, 0.59840417f, 0.93769604f, 0.62474734f, + 0.36690548f, 0.76047903f, 0.62352085f, 0.58574778f, 0.64251810f, 0.78072041f, 0.43344691f, 0.75383639f, + 0.73950553f, 0.92625278f, 0.05066428f, 0.08448382f, 0.25980917f, 0.50312829f, 0.97800279f, 0.05422170f, + 0.05171391f, 0.82828254f, 0.42234898f, 0.95752198f, 0.96325767f, 0.97909677f, 0.35578200f, 0.48091716f, + 0.03637243f, 0.91552693f, 0.43403026f, 0.94275808f, 0.51182085f, 0.86773109f, 0.38459453f, 0.87822068f, }; std::vector output = { - 0.06090815, 0.12919067, -0.19883196, 0.50295448, 0.06090815, 0.12919067, -0.19883196, 0.50295448, - 0.06090815, 0.12919067, -0.19883196, 0.50295448, 0.06090815, 0.12919067, -0.19883196, 0.50295448, - 0.08714182, 0.12259886, -0.19516067, 0.54010558, 0.08671370, 0.12369543, -0.19658084, 0.54502594, - 0.08458151, 0.12488046, -0.19519810, 0.53906947, 0.09063499, 0.12088943, -0.19583938, 0.54266596, + 0.06090815f, 0.12919067f, -0.19883196f, 0.50295448f, 0.06090815f, 0.12919067f, -0.19883196f, 0.50295448f, + 0.06090815f, 0.12919067f, -0.19883196f, 0.50295448f, 0.06090815f, 0.12919067f, -0.19883196f, 0.50295448f, + 0.08714182f, 0.12259886f, -0.19516067f, 0.54010558f, 0.08671370f, 0.12369543f, -0.19658084f, 0.54502594f, + 0.08458151f, 0.12488046f, -0.19519810f, 0.53906947f, 0.09063499f, 0.12088943f, -0.19583938f, 0.54266596f, }; std::vector present = { - -0.59800303, -0.35666457, -0.59420627, -0.31881350, -0.59887993, -0.27025288, -0.60216135, -0.27772796, - 0.11659990, -0.11224300, 0.09693416, -0.07304113, 0.06023501, -0.05941332, 0.06434284, -0.07978789, - -0.59005713, -0.37009716, -0.59542215, -0.27914333, -0.57998544, -0.29826957, -0.58625919, -0.28872511, - 0.15994480, -0.11288825, 0.07906821, -0.05991337, 0.14479136, -0.04415035, 0.13493451, -0.06541853, - 0.07513385, 0.14411135, 0.07505661, 0.14532046, 0.06090815, 0.12919067, 0.05788904, 0.12018456, - -0.20586906, 0.53715372, -0.20203318, 0.52092510, -0.19883196, 0.50295448, -0.19937295, 0.51055026, - 0.09417956, 0.12943678, 0.06923291, 0.12574309, 0.10221909, 0.11366953, 0.09235901, 0.09584601, - -0.20036517, 0.56818324, -0.19709785, 0.51547027, -0.18871340, 0.55736589, -0.18826833, 0.55965197, + -0.59800303f, -0.35666457f, -0.59420627f, -0.31881350f, -0.59887993f, -0.27025288f, -0.60216135f, -0.27772796f, + 0.11659990f, -0.11224300f, 0.09693416f, -0.07304113f, 0.06023501f, -0.05941332f, 0.06434284f, -0.07978789f, + -0.59005713f, -0.37009716f, -0.59542215f, -0.27914333f, -0.57998544f, -0.29826957f, -0.58625919f, -0.28872511f, + 0.15994480f, -0.11288825f, 0.07906821f, -0.05991337f, 0.14479136f, -0.04415035f, 0.13493451f, -0.06541853f, + 0.07513385f, 0.14411135f, 0.07505661f, 0.14532046f, 0.06090815f, 0.12919067f, 0.05788904f, 0.12018456f, + -0.20586906f, 0.53715372f, -0.20203318f, 0.52092510f, -0.19883196f, 0.50295448f, -0.19937295f, 0.51055026f, + 0.09417956f, 0.12943678f, 0.06923291f, 0.12574309f, 0.10221909f, 0.11366953f, 0.09235901f, 0.09584601f, + -0.20036517f, 0.56818324f, -0.19709785f, 0.51547027f, -0.18871340f, 0.55736589f, -0.18826833f, 0.55965197f, }; test_case.add_input(input); @@ -990,7 +1004,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_attention_extra_add) { test_case.add_input(extra_add); test_case.add_expected_output(output); test_case.add_expected_output(present); - test_case.run_with_tolerance_as_fp(1e-7); + test_case.run_with_tolerance_as_fp(1e-7f); } NGRAPH_TEST(${BACKEND_NAME}, onnx_model_attention_dynamic_shapes) { @@ -1001,29 +1015,30 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_attention_dynamic_shapes) { auto test_case = test::TestCase(function, s_device); std::vector input = { - 0.42226878, 0.50984067, 0.80440795, 0.68040705, 0.93614250, 0.45104721, 0.71767306, 0.48596525, - 0.70076728, 0.04500086, 0.28930107, 0.77435863, 0.19392140, 0.90290719, 0.91955870, 0.58811885, - 0.76795286, 0.62884814, 0.23377730, 0.49212688, 0.87256873, 0.11944817, 0.57715887, 0.91886938, + 0.42226878f, 0.50984067f, 0.80440795f, 0.68040705f, 0.93614250f, 0.45104721f, 0.71767306f, 0.48596525f, + 0.70076728f, 0.04500086f, 0.28930107f, 0.77435863f, 0.19392140f, 0.90290719f, 0.91955870f, 0.58811885f, + 0.76795286f, 0.62884814f, 0.23377730f, 0.49212688f, 0.87256873f, 0.11944817f, 0.57715887f, 0.91886938f, }; std::vector weights = { - 0.99377930, 0.22733542, 0.43217131, 0.60717988, 0.97224706, 0.70020503, 0.92439449, 0.41512674, 0.47728160, - 0.40306625, 0.72619593, 0.37954643, 0.36950976, 0.84305370, 0.61671126, 0.22251014, 0.73839295, 0.73471880, - 0.37428924, 0.80240524, 0.23120961, 0.06072779, 0.92840081, 0.71558088, 0.08719950, 0.51666921, 0.53768843, - 0.48113129, 0.46389169, 0.01036468, 0.37341005, 0.67195475, 0.53599644, 0.41795707, 0.58081782, 0.97939289, + 0.99377930f, 0.22733542f, 0.43217131f, 0.60717988f, 0.97224706f, 0.70020503f, 0.92439449f, 0.41512674f, + 0.47728160f, 0.40306625f, 0.72619593f, 0.37954643f, 0.36950976f, 0.84305370f, 0.61671126f, 0.22251014f, + 0.73839295f, 0.73471880f, 0.37428924f, 0.80240524f, 0.23120961f, 0.06072779f, 0.92840081f, 0.71558088f, + 0.08719950f, 0.51666921f, 0.53768843f, 0.48113129f, 0.46389169f, 0.01036468f, 0.37341005f, 0.67195475f, + 0.53599644f, 0.41795707f, 0.58081782f, 0.97939289f, }; std::vector bias = { - 0.77122736, - 0.75600564, - 0.86177206, - 0.69982684, - 0.74719858, - 0.78054035, - 0.80007398, - 0.74902135, - 0.81258053, - 0.01575289, - 0.08463049, - 0.39671996, + 0.77122736f, + 0.75600564f, + 0.86177206f, + 0.69982684f, + 0.74719858f, + 0.78054035f, + 0.80007398f, + 0.74902135f, + 0.81258053f, + 0.01575289f, + 0.08463049f, + 0.39671996f, }; std::vector mask = { 0, @@ -1046,39 +1061,42 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_attention_dynamic_shapes) { 0, }; std::vector past = { - 0.27759778, 0.18458818, 0.63114458, 0.09953160, 0.59739488, 0.63917851, 0.18828323, 0.65625650, 0.84574437, - 0.91846281, 0.55102497, 0.27506110, 0.06816208, 0.82616585, 0.85912132, 0.88682729, 0.14730524, 0.61618829, - 0.89891797, 0.27753425, 0.57438278, 0.33753166, 0.88768929, 0.35533753, 0.30193496, 0.81678063, 0.26569194, - 0.62769043, 0.61990744, 0.59077013, 0.11058200, 0.97370809, 0.81339806, 0.57207322, 0.80417949, 0.54185718, - 0.80831683, 0.29390740, 0.29051417, 0.51964313, 0.04341308, 0.05925354, 0.82397246, 0.55753845, 0.61247689, - 0.98571628, 0.07566493, 0.37537411, 0.42080343, 0.21715857, 0.57869565, 0.55962265, 0.82500041, 0.60776925, - 0.19367239, 0.88382334, 0.20328504, 0.58192456, 0.94542676, 0.98562658, 0.64355153, 0.69856495, 0.30377558, - 0.02857198, 0.96969068, 0.48450547, 0.98341352, 0.03546083, 0.84963584, 0.94460547, 0.90907097, 0.22525074, - 0.12530145, 0.52223104, 0.09549426, 0.93127102, 0.93429947, 0.01428344, 0.74249738, 0.22606593, + 0.27759778f, 0.18458818f, 0.63114458f, 0.09953160f, 0.59739488f, 0.63917851f, 0.18828323f, 0.65625650f, + 0.84574437f, 0.91846281f, 0.55102497f, 0.27506110f, 0.06816208f, 0.82616585f, 0.85912132f, 0.88682729f, + 0.14730524f, 0.61618829f, 0.89891797f, 0.27753425f, 0.57438278f, 0.33753166f, 0.88768929f, 0.35533753f, + 0.30193496f, 0.81678063f, 0.26569194f, 0.62769043f, 0.61990744f, 0.59077013f, 0.11058200f, 0.97370809f, + 0.81339806f, 0.57207322f, 0.80417949f, 0.54185718f, 0.80831683f, 0.29390740f, 0.29051417f, 0.51964313f, + 0.04341308f, 0.05925354f, 0.82397246f, 0.55753845f, 0.61247689f, 0.98571628f, 0.07566493f, 0.37537411f, + 0.42080343f, 0.21715857f, 0.57869565f, 0.55962265f, 0.82500041f, 0.60776925f, 0.19367239f, 0.88382334f, + 0.20328504f, 0.58192456f, 0.94542676f, 0.98562658f, 0.64355153f, 0.69856495f, 0.30377558f, 0.02857198f, + 0.96969068f, 0.48450547f, 0.98341352f, 0.03546083f, 0.84963584f, 0.94460547f, 0.90907097f, 0.22525074f, + 0.12530145f, 0.52223104f, 0.09549426f, 0.93127102f, 0.93429947f, 0.01428344f, 0.74249738f, 0.22606593f, }; std::vector output = { - 1.47439122, 0.50951630, 1.17974961, 1.58501005, 1.49403512, 0.51560062, 1.18972027, 1.59668207, - 1.48384988, 0.51248586, 1.18596375, 1.59219086, 1.44181466, 0.50219649, 1.15537691, 1.55348074, - 0.83429223, 0.59521818, 0.87688094, 0.13611843, 0.82936716, 0.61004817, 0.87633312, 0.13887596, - 0.83155584, 0.59382534, 0.87496555, 0.14041223, 0.83309680, 0.58982348, 0.87517864, 0.13930768, + 1.47439122f, 0.50951630f, 1.17974961f, 1.58501005f, 1.49403512f, 0.51560062f, 1.18972027f, 1.59668207f, + 1.48384988f, 0.51248586f, 1.18596375f, 1.59219086f, 1.44181466f, 0.50219649f, 1.15537691f, 1.55348074f, + 0.83429223f, 0.59521818f, 0.87688094f, 0.13611843f, 0.82936716f, 0.61004817f, 0.87633312f, 0.13887596f, + 0.83155584f, 0.59382534f, 0.87496555f, 0.14041223f, 0.83309680f, 0.58982348f, 0.87517864f, 0.13930768f, }; std::vector present = { - 0.27759778, 0.18458818, 0.63114458, 0.09953160, 0.59739488, 0.63917851, 0.18828323, 0.65625650, 0.84574437, - 0.91846281, 1.90736914, 1.45914197, 2.30920029, 1.94944119, 2.12886763, 1.64736962, 1.36378694, 1.03263116, - 0.55102497, 0.27506110, 0.06816208, 0.82616585, 0.85912132, 0.88682729, 0.14730524, 0.61618829, 0.89891797, - 0.27753425, 1.68161881, 1.87394094, 1.94785213, 2.08572555, 1.90705216, 1.90777159, 1.23910809, 1.52017307, - 0.57438278, 0.33753166, 0.88768929, 0.35533753, 0.30193496, 0.81678063, 0.26569194, 0.62769043, 0.61990744, - 0.59077013, 2.02901411, 1.58923888, 2.17776394, 1.76309133, 1.74264824, 1.31485105, 1.71575761, 1.29775190, - 0.11058200, 0.97370809, 0.81339806, 0.57207322, 0.80417949, 0.54185718, 0.80831683, 0.29390740, 0.29051417, - 0.51964313, 1.66065478, 2.17192268, 1.86598253, 2.03193212, 1.52620018, 1.82728052, 1.46963060, 1.87916136, - 0.04341308, 0.05925354, 0.82397246, 0.55753845, 0.61247689, 0.98571628, 0.07566493, 0.37537411, 0.42080343, - 0.21715857, 1.56316149, 0.55312467, 1.59553123, 0.53537023, 1.64308119, 0.62742490, 1.31600118, 0.37510848, - 0.57869565, 0.55962265, 0.82500041, 0.60776925, 0.19367239, 0.88382334, 0.20328504, 0.58192456, 0.94542676, - 0.98562658, 1.33183134, 1.70965421, 1.70983100, 1.76660407, 1.46399045, 1.70318413, 0.83565855, 1.37921953, - 0.64355153, 0.69856495, 0.30377558, 0.02857198, 0.96969068, 0.48450547, 0.98341352, 0.03546083, 0.84963584, - 0.94460547, 1.60677671, 0.53308368, 1.60789728, 0.56227136, 1.50563633, 0.50456268, 1.49554634, 0.48299593, - 0.90907097, 0.22525074, 0.12530145, 0.52223104, 0.09549426, 0.93127102, 0.93429947, 0.01428344, 0.74249738, - 0.22606593, 1.59781134, 2.01703453, 1.58993423, 1.78536010, 1.21809304, 1.69219351, 1.24090374, 1.75499403, + 0.27759778f, 0.18458818f, 0.63114458f, 0.09953160f, 0.59739488f, 0.63917851f, 0.18828323f, 0.65625650f, + 0.84574437f, 0.91846281f, 1.90736914f, 1.45914197f, 2.30920029f, 1.94944119f, 2.12886763f, 1.64736962f, + 1.36378694f, 1.03263116f, 0.55102497f, 0.27506110f, 0.06816208f, 0.82616585f, 0.85912132f, 0.88682729f, + 0.14730524f, 0.61618829f, 0.89891797f, 0.27753425f, 1.68161881f, 1.87394094f, 1.94785213f, 2.08572555f, + 1.90705216f, 1.90777159f, 1.23910809f, 1.52017307f, 0.57438278f, 0.33753166f, 0.88768929f, 0.35533753f, + 0.30193496f, 0.81678063f, 0.26569194f, 0.62769043f, 0.61990744f, 0.59077013f, 2.02901411f, 1.58923888f, + 2.17776394f, 1.76309133f, 1.74264824f, 1.31485105f, 1.71575761f, 1.29775190f, 0.11058200f, 0.97370809f, + 0.81339806f, 0.57207322f, 0.80417949f, 0.54185718f, 0.80831683f, 0.29390740f, 0.29051417f, 0.51964313f, + 1.66065478f, 2.17192268f, 1.86598253f, 2.03193212f, 1.52620018f, 1.82728052f, 1.46963060f, 1.87916136f, + 0.04341308f, 0.05925354f, 0.82397246f, 0.55753845f, 0.61247689f, 0.98571628f, 0.07566493f, 0.37537411f, + 0.42080343f, 0.21715857f, 1.56316149f, 0.55312467f, 1.59553123f, 0.53537023f, 1.64308119f, 0.62742490f, + 1.31600118f, 0.37510848f, 0.57869565f, 0.55962265f, 0.82500041f, 0.60776925f, 0.19367239f, 0.88382334f, + 0.20328504f, 0.58192456f, 0.94542676f, 0.98562658f, 1.33183134f, 1.70965421f, 1.70983100f, 1.76660407f, + 1.46399045f, 1.70318413f, 0.83565855f, 1.37921953f, 0.64355153f, 0.69856495f, 0.30377558f, 0.02857198f, + 0.96969068f, 0.48450547f, 0.98341352f, 0.03546083f, 0.84963584f, 0.94460547f, 1.60677671f, 0.53308368f, + 1.60789728f, 0.56227136f, 1.50563633f, 0.50456268f, 1.49554634f, 0.48299593f, 0.90907097f, 0.22525074f, + 0.12530145f, 0.52223104f, 0.09549426f, 0.93127102f, 0.93429947f, 0.01428344f, 0.74249738f, 0.22606593f, + 1.59781134f, 2.01703453f, 1.58993423f, 1.78536010f, 1.21809304f, 1.69219351f, 1.24090374f, 1.75499403f, }; test_case.add_input(Shape{2, 4, 3}, input); @@ -1088,7 +1106,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_attention_dynamic_shapes) { test_case.add_input(Shape{2, 2, 2, 5, 2}, past); test_case.add_expected_output(Shape{2, 4, 4}, output); test_case.add_expected_output(Shape{2, 2, 2, 9, 2}, present); - test_case.run_with_tolerance_as_fp(1e-6); + test_case.run_with_tolerance_as_fp(1e-6f); } NGRAPH_TEST(${BACKEND_NAME}, onnx_model_fusedgemm_abc) { @@ -1098,67 +1116,68 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_fusedgemm_abc) { auto test_case = test::TestCase(function, s_device); std::vector inputA = { - 0.760289272, - 0.155913759, - 0.781790674, - -0.916164881, - -0.599392663, - 0.264654594, - 0.793851873, - 0.177088557, - 0.082737454, - 0.070692121, - -0.811413035, - -0.098108588, - 0.650090827, - -0.987659751, - -0.815909968, - -0.375566031, - -0.192777789, - -0.843511765, + 0.760289272f, + 0.155913759f, + 0.781790674f, + -0.916164881f, + -0.599392663f, + 0.264654594f, + 0.793851873f, + 0.177088557f, + 0.082737454f, + 0.070692121f, + -0.811413035f, + -0.098108588f, + 0.650090827f, + -0.987659751f, + -0.815909968f, + -0.375566031f, + -0.192777789f, + -0.843511765f, }; std::vector inputB = { - -0.599338344, -0.893724541, -0.362130441, -0.510642812, -0.943908814, -0.247790266, -0.732624930, 0.660286910, - -0.264866660, -0.907203793, 0.339617010, -0.322529173, 0.714601048, 0.581729832, -0.609115490, -0.369882312, - -0.462432785, -0.554824440, -0.833489997, -0.899945507, -0.088337136, -0.253637339, -0.443307744, -0.677004897, + -0.599338344f, -0.893724541f, -0.362130441f, -0.510642812f, -0.943908814f, -0.247790266f, + -0.732624930f, 0.660286910f, -0.264866660f, -0.907203793f, 0.339617010f, -0.322529173f, + 0.714601048f, 0.581729832f, -0.609115490f, -0.369882312f, -0.462432785f, -0.554824440f, + -0.833489997f, -0.899945507f, -0.088337136f, -0.253637339f, -0.443307744f, -0.677004897f, }; std::vector inputC = { - -0.540039918, - -0.235745675, - -0.337291175, - -0.702340580, - 0.532629731, - -0.794515569, - -0.532012999, - 0.372558416, - 0.582367524, - -0.483044018, - 0.656635884, - -0.655929499, + -0.540039918f, + -0.235745675f, + -0.337291175f, + -0.702340580f, + 0.532629731f, + -0.794515569f, + -0.532012999f, + 0.372558416f, + 0.582367524f, + -0.483044018f, + 0.656635884f, + -0.655929499f, }; std::vector output = { - -8.75421E-05, - -9.65321E-05, - 0.239491309, - -2.70329E-05, - 0.151090653, - -5.53371E-05, - -1.22197E-05, - 0.413963711, - 0.618195780, - 0.011654445, - 0.815541101, - -2.46706E-05, + -8.75421E-05f, + -9.65321E-05f, + 0.239491309f, + -2.70329E-05f, + 0.151090653f, + -5.53371E-05f, + -1.22197E-05f, + 0.413963711f, + 0.618195780f, + 0.011654445f, + 0.815541101f, + -2.46706E-05f, }; test_case.add_input(Shape{3, 6}, inputA); test_case.add_input(Shape{6, 4}, inputB); test_case.add_input(Shape{3, 4}, inputC); test_case.add_expected_output(Shape{3, 4}, output); - test_case.run_with_tolerance_as_fp(1e-6); + test_case.run_with_tolerance_as_fp(1e-6f); } NGRAPH_TEST(${BACKEND_NAME}, onnx_com_microsoft_fused_conv_hard_sigmoid) { diff --git a/src/frontends/onnx/tests/onnx_import_const_folding.in.cpp b/src/frontends/onnx/tests/onnx_import_const_folding.in.cpp index 1c9a1054bf674d..a6c151d0ee526d 100644 --- a/src/frontends/onnx/tests/onnx_import_const_folding.in.cpp +++ b/src/frontends/onnx/tests/onnx_import_const_folding.in.cpp @@ -56,7 +56,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_const_folding_model_scatter_elements) { SERIALIZED_ZOO, "onnx/scatter_elements_opset11.onnx")); - test_constant_folding(fn, {1.0, 1.1, 3.0, 2.1, 5.0}, Shape{1, 5}); + test_constant_folding(fn, {1.0f, 1.1f, 3.0f, 2.1f, 5.0f}, Shape{1, 5}); } NGRAPH_TEST(${BACKEND_NAME}, onnx_const_folding_model_non_zero_scalar) { diff --git a/src/frontends/onnx/tests/onnx_import_controlflow.in.cpp b/src/frontends/onnx/tests/onnx_import_controlflow.in.cpp index 0c1d0648d8abee..3cf38e223d07ed 100644 --- a/src/frontends/onnx/tests/onnx_import_controlflow.in.cpp +++ b/src/frontends/onnx/tests/onnx_import_controlflow.in.cpp @@ -537,7 +537,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_if_branches_with_same_inputs) { auto test_case = test::TestCase(function, s_device); std::vector x(40, 2); std::vector y(40); - std::iota(y.begin(), y.end(), -20); + std::iota(y.begin(), y.end(), -20.f); // condition test_case.add_input({true}); @@ -577,7 +577,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_if_branches_with_different_inputs) { auto test_case = test::TestCase(function, s_device); std::vector x(40, 2); std::vector y(40); - std::iota(y.begin(), y.end(), -20); + std::iota(y.begin(), y.end(), -20.f); // condition test_case.add_input({true}); @@ -649,7 +649,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_if_inside_if) { // expected value == x * y std::vector x(40, 2); std::vector y(40); - std::iota(y.begin(), y.end(), -20); + std::iota(y.begin(), y.end(), -20.f); std::vector expected; std::transform(x.begin(), x.end(), y.begin(), std::back_inserter(expected), [](float i, float j) -> float { return i * j; @@ -663,7 +663,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_if_inside_if) { // case when condition == true and all(x < y) // expected value == x + y std::iota(x.begin(), x.end(), -static_cast(x.size())); - std::iota(y.begin(), y.end(), 1); + std::iota(y.begin(), y.end(), 1.f); std::transform(x.begin(), x.end(), y.begin(), expected.begin(), [](float i, float j) -> float { return i + j; }); @@ -703,13 +703,13 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_if_branches_with_multiple_outputs) { // case when condition == true so split is along axis 0 std::vector x(36); - std::iota(x.begin(), x.end(), 0); + std::iota(x.begin(), x.end(), 0.f); std::vector expected1(12); - std::iota(expected1.begin(), expected1.end(), 0); + std::iota(expected1.begin(), expected1.end(), 0.f); std::vector expected2(12); - std::iota(expected2.begin(), expected2.end(), 12); + std::iota(expected2.begin(), expected2.end(), 12.f); std::vector expected3(12); - std::iota(expected3.begin(), expected3.end(), 24); + std::iota(expected3.begin(), expected3.end(), 24.f); test_case.add_input({true}); // condition test_case.add_input(x); test_case.add_expected_output(expected1); @@ -768,9 +768,10 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_if_with_only_indentity_in_else_branch) { auto test_case = test::TestCase(function, s_device); std::vector x(shape_size(Shape{1, 5, 2, 2})); - std::iota(x.begin(), x.end(), 0); - std::vector expected{1.333333, 3, 4.666666, 6.333333, 8, 10, 12, 14, 16, 18, - 20, 22, 24, 26, 28, 30, 25.33333, 27, 28.666667, 30.33333}; + std::iota(x.begin(), x.end(), 0.f); + std::vector expected{1.333333f, 3.f, 4.666666f, 6.333333f, 8.f, 10.f, 12.f, + 14.f, 16.f, 18.f, 20.f, 22.f, 24.f, 26.f, + 28.f, 30.f, 25.33333f, 27.f, 28.666667f, 30.33333f}; test_case.add_input(x); test_case.add_expected_output(expected); test_case.run(); @@ -820,7 +821,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_if_dynamic_inputs) { auto test_case = test::TestCase(function, s_device); std::vector x(40, 2); std::vector y(40); - std::iota(y.begin(), y.end(), -20); + std::iota(y.begin(), y.end(), -20.f); std::vector expected; std::transform(x.begin(), x.end(), y.begin(), std::back_inserter(expected), [](float i, float j) -> float { return i + j; diff --git a/src/frontends/onnx/tests/onnx_import_dyn_shapes.in.cpp b/src/frontends/onnx/tests/onnx_import_dyn_shapes.in.cpp index 2afbdef3999159..dfa888bb943ce9 100644 --- a/src/frontends/onnx/tests/onnx_import_dyn_shapes.in.cpp +++ b/src/frontends/onnx/tests/onnx_import_dyn_shapes.in.cpp @@ -220,7 +220,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_shapes_model_asinh_3_2) { auto test_case = test::TestCase(function, s_device); test_case.add_input(Shape{3, 2}, {-1.5f, 0.0f, 1.5f, -1.5f, 0.0f, 1.5f}); - test_case.add_expected_output(Shape{3, 2}, {-1.1947632f, 0.0f, 1.1947632f, -1.1947632, 0.0f, 1.1947632f}); + test_case.add_expected_output(Shape{3, 2}, {-1.1947632f, 0.0f, 1.1947632f, -1.1947632f, 0.0f, 1.1947632f}); test_case.run(); } @@ -713,7 +713,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_shapes_transpose) { const auto elems_in_tensor = shape_size(shape); std::vector input_values(elems_in_tensor); - std::iota(std::begin(input_values), std::end(input_values), 1); + std::iota(std::begin(input_values), std::end(input_values), 1.f); test_case.add_input(shape, input_values); @@ -729,9 +729,12 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_shapes_transpose) { namespace { Shape get_flattened_shape(const Shape& in_shape, size_t axis) { - size_t first_dim_size = - std::accumulate(begin(in_shape), next(begin(in_shape), axis), 1UL, std::multiplies()); - size_t last_dim_size = std::accumulate(next(begin(in_shape), axis), end(in_shape), 1UL, std::multiplies()); + size_t first_dim_size = std::accumulate(begin(in_shape), + next(begin(in_shape), axis), + static_cast(1), + std::multiplies()); + size_t last_dim_size = + std::accumulate(next(begin(in_shape), axis), end(in_shape), static_cast(1), std::multiplies()); return Shape{first_dim_size, last_dim_size}; } } // namespace @@ -751,7 +754,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_shapes_flatten_axis_0) { const auto elems_in_tensor = shape_size(shape); std::vector input_values(elems_in_tensor); - std::iota(input_values.begin(), input_values.end(), 1); + std::iota(input_values.begin(), input_values.end(), 1.f); test_case.add_input(shape, input_values); @@ -778,7 +781,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_shapes_flatten_axis) { const auto elems_in_tensor = shape_size(shape); std::vector input_values(elems_in_tensor); - std::iota(input_values.begin(), input_values.end(), 1); + std::iota(input_values.begin(), input_values.end(), 1.f); test_case.add_input(shape, input_values); @@ -805,7 +808,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_shapes_flatten_neg_axis) { const auto elems_in_tensor = shape_size(shape); std::vector input_values(elems_in_tensor); - std::iota(input_values.begin(), input_values.end(), 1); + std::iota(input_values.begin(), input_values.end(), 1.f); test_case.add_input(shape, input_values); @@ -907,7 +910,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_shapes_slice_10_3d_input) { const Shape input_shape{3, 4, 1}; std::vector input_values(shape_size(input_shape)); - std::iota(input_values.begin(), input_values.end(), 0); + std::iota(input_values.begin(), input_values.end(), 0.f); test_case.add_input(input_values); test_case.add_input({0, 0}); test_case.add_input({2, 3}); @@ -926,7 +929,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_shapes_slice_10_3d_input_neg_axes) { const Shape input_shape{3, 4, 1}; std::vector input_values(shape_size(input_shape)); - std::iota(input_values.begin(), input_values.end(), 0); + std::iota(input_values.begin(), input_values.end(), 0.f); test_case.add_input(input_values); test_case.add_input({0, 0}); test_case.add_input({2, 3}); @@ -945,7 +948,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_shapes_slice_10_3d_input_12_axes) { const Shape input_shape{4, 3, 2}; std::vector input_values(shape_size(input_shape)); - std::iota(input_values.begin(), input_values.end(), 0); + std::iota(input_values.begin(), input_values.end(), 0.f); test_case.add_input(input_values); test_case.add_input({0, 0}); test_case.add_input({2, 1}); @@ -963,7 +966,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_shapes_slice_10_3d_input_20_axes) { const Shape input_shape{4, 3, 2}; std::vector input_values(shape_size(input_shape)); - std::iota(input_values.begin(), input_values.end(), 0); + std::iota(input_values.begin(), input_values.end(), 0.f); test_case.add_input(input_shape, input_values); test_case.add_input({0, 1}); test_case.add_input({1, 3}); @@ -982,7 +985,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_shapes_slice_10_4d_input_23_axes) { const Shape input_shape{2, 2, 2, 2}; std::vector input_values(shape_size(input_shape)); - std::iota(input_values.begin(), input_values.end(), 0); + std::iota(input_values.begin(), input_values.end(), 0.f); test_case.add_input(input_values); test_case.add_input({0, 0}); test_case.add_input({1, 1}); @@ -1000,7 +1003,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_shapes_slice_10_4d_input_0231_axes_ends_ma const Shape input_shape{2, 2, 2, 2}; std::vector input_values(shape_size(input_shape)); - std::iota(input_values.begin(), input_values.end(), 0); + std::iota(input_values.begin(), input_values.end(), 0.f); test_case.add_input(input_values); test_case.add_input({0, 1, 1, 0}); test_case.add_input({std::numeric_limits::max(), @@ -1021,7 +1024,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_shapes_slice_10_4d_input_2103_axes_ends_ma const Shape input_shape{2, 2, 2, 5}; std::vector input_values(shape_size(input_shape)); - std::iota(input_values.begin(), input_values.end(), 0); + std::iota(input_values.begin(), input_values.end(), 0.f); test_case.add_input(input_values); test_case.add_input({1, 0, 0, 1}); test_case.add_input({2, @@ -1043,7 +1046,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_shapes_slice_10_4d_input_23_axes_21_steps) const Shape input_shape{2, 2, 6, 2}; std::vector input_values(shape_size(input_shape)); - std::iota(input_values.begin(), input_values.end(), 0); + std::iota(input_values.begin(), input_values.end(), 0.f); test_case.add_input(input_values); test_case.add_input({0, 1}); test_case.add_input({5, 2}); @@ -1060,7 +1063,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_shapes_slice_10_default_axes) { const Shape input_shape{4, 3, 2}; std::vector input_values(shape_size(input_shape)); - std::iota(input_values.begin(), input_values.end(), 0); + std::iota(input_values.begin(), input_values.end(), 0.f); test_case.add_input(input_values); test_case.add_input({1, 1, 1}); test_case.add_input({2, 2, 2}); @@ -1116,34 +1119,34 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_model_softmax_axis_2) { file_util::path_join(CommonTestUtils::getExecutableDirectory(), SERIALIZED_ZOO, "onnx/softmax_axis_2.onnx")); const std::vector input = { - 2.75793882, -0.50841322, 0.82013929, -0.62409912, -0.96136118, 0.21004745, 1.38337255, - 1.19030397, 2.0940445, -0.03551657, -0.78686039, 1.992782, 0.04300319, -0.29230777, - -0.56797112, -1.26732165, -0.61935399, 0.57670432, 0.92844898, 2.82469233, + 2.75793882f, -0.50841322f, 0.82013929f, -0.62409912f, -0.96136118f, 0.21004745f, 1.38337255f, + 1.19030397f, 2.0940445f, -0.03551657f, -0.78686039f, 1.992782f, 0.04300319f, -0.29230777f, + -0.56797112f, -1.26732165f, -0.61935399f, 0.57670432f, 0.92844898f, 2.82469233f, - 0.98721677, -0.05100663, -1.21178917, -0.17530157, 1.40051805, -0.13259761, -1.14313018, - 0.2673723, -0.87996154, 1.29053106, 1.55, 0.8396538, 1.20729817, 0.23727845, - -0.89113606, -1.70909842, 0.26460363, -0.70566808, 2.383518, 1.07024615, + 0.98721677f, -0.05100663f, -1.21178917f, -0.17530157f, 1.40051805f, -0.13259761f, -1.14313018f, + 0.2673723f, -0.87996154f, 1.29053106f, 1.55f, 0.8396538f, 1.20729817f, 0.23727845f, + -0.89113606f, -1.70909842f, 0.26460363f, -0.70566808f, 2.383518f, 1.07024615f, - -1.21722605, 0.82919357, 0.55765697, 0.12657686, 0.63432172, 0.75425957, -2.43721014, - -1.24478184, 2.65316853, 1.19509542, -0.95523998, 0.5149006, -0.01151649, 0.68327026, - -0.4589638, -0.46554745, 0.21055324, 0.39266729, 2.05098086, 1.83207919}; + -1.21722605f, 0.82919357f, 0.55765697f, 0.12657686f, 0.63432172f, 0.75425957f, -2.43721014f, + -1.24478184f, 2.65316853f, 1.19509542f, -0.95523998f, 0.5149006f, -0.01151649f, 0.68327026f, + -0.4589638f, -0.46554745f, 0.21055324f, 0.39266729f, 2.05098086f, 1.83207919f}; auto test_case = test::TestCase(function, s_device); test_case.add_input(input); test_case.add_expected_output( Shape{3, 4, 5}, - {0.80619486, 0.03075257, 0.1161086, 0.027393, 0.01955098, 0.07012682, 0.22670066, - 0.18689779, 0.4614171, 0.05485763, 0.04486172, 0.72286838, 0.10286818, 0.07356265, - 0.05583908, 0.01280724, 0.02448298, 0.08096658, 0.11509768, 0.76664552, + {0.80619486f, 0.03075257f, 0.1161086f, 0.027393f, 0.01955098f, 0.07012682f, 0.22670066f, + 0.18689779f, 0.4614171f, 0.05485763f, 0.04486172f, 0.72286838f, 0.10286818f, 0.07356265f, + 0.05583908f, 0.01280724f, 0.02448298f, 0.08096658f, 0.11509768f, 0.76664552f, - 0.30399806, 0.1076406, 0.03371745, 0.0950595, 0.4595844, 0.13369873, 0.04866969, - 0.19944906, 0.06332151, 0.55486101, 0.39101105, 0.19217177, 0.27755913, 0.10521588, - 0.03404216, 0.01150354, 0.08279411, 0.03137732, 0.68902071, 0.18530432, + 0.30399806f, 0.1076406f, 0.03371745f, 0.0950595f, 0.4595844f, 0.13369873f, 0.04866969f, + 0.19944906f, 0.06332151f, 0.55486101f, 0.39101105f, 0.19217177f, 0.27755913f, 0.10521588f, + 0.03404216f, 0.01150354f, 0.08279411f, 0.03137732f, 0.68902071f, 0.18530432f, - 0.0402528, 0.31156222, 0.23747503, 0.1543129, 0.25639705, 0.10627912, 0.00436928, - 0.01439711, 0.70979614, 0.16515835, 0.06798343, 0.2957175, 0.17468555, 0.34994439, - 0.11166912, 0.03615172, 0.07108136, 0.08527994, 0.44775794, 0.35972905}); + 0.0402528f, 0.31156222f, 0.23747503f, 0.1543129f, 0.25639705f, 0.10627912f, 0.00436928f, + 0.01439711f, 0.70979614f, 0.16515835f, 0.06798343f, 0.2957175f, 0.17468555f, 0.34994439f, + 0.11166912f, 0.03615172f, 0.07108136f, 0.08527994f, 0.44775794f, 0.35972905f}); test_case.run(3); } @@ -1184,7 +1187,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_instance_normalization_dyn_shape) { Shape data_shape{1, 2, 3, 4}; std::vector data(shape_size(data_shape)); - std::iota(std::begin(data), std::end(data), 1); + std::iota(std::begin(data), std::end(data), 1.f); auto test_case = test::TestCase(function, s_device); @@ -1207,7 +1210,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_instance_normalization_dyn_shape2) { Shape data_shape{1, 2, 3, 4}; std::vector data(shape_size(data_shape)); - std::iota(std::begin(data), std::end(data), 1); + std::iota(std::begin(data), std::end(data), 1.f); auto test_case = test::TestCase(function, s_device); @@ -1275,7 +1278,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_dyn_shapes_slice_1_3d_input_21_axes_ends_max) const Shape input_shape{1, 2, 3, 4}; std::vector input_values(shape_size(input_shape)); - std::iota(input_values.begin(), input_values.end(), 0); + std::iota(input_values.begin(), input_values.end(), 0.f); test_case.add_input(input_shape, input_values); test_case.add_expected_output(Shape{1, 1, 3, 3}, {13, 14, 15, 17, 18, 19, 21, 22, 23}); test_case.run(); @@ -1315,7 +1318,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_max_pool_dyn_rank_without_default_attrs) Shape input_shape{1, 1, 4, 4}; std::vector input(shape_size(input_shape)); - std::iota(input.begin(), input.end(), 0); + std::iota(input.begin(), input.end(), 0.f); test_case.add_input(input_shape, input); test_case.add_expected_output(Shape{1, 1, 3, 3}, {5, 6, 7, 9, 10, 11, 13, 14, 15}); test_case.run(); @@ -1327,7 +1330,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_depth_to_space_dynamic_input) { "onnx/dynamic_shapes/depth_to_space.onnx")); std::vector input(32); - std::iota(input.begin(), input.end(), 0); + std::iota(input.begin(), input.end(), 0.f); std::vector expected_output{0.f, 8.f, 1.f, 9.f, 16.f, 24.f, 17.f, 25.f, 2.f, 10.f, 3.f, 11.f, 18.f, 26.f, 19.f, 27.f, 4.f, 12.f, 5.f, 13.f, 20.f, 28.f, @@ -1345,7 +1348,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_space_to_depth_dynamic_input) { "onnx/dynamic_shapes/space_to_depth.onnx")); std::vector input(32); - std::iota(input.begin(), input.end(), 0); + std::iota(input.begin(), input.end(), 0.f); std::vector expected_output{ 0.f, 2.f, 8.f, 10.f, 16.f, 18.f, 24.f, 26.f, 1.f, 3.f, 9.f, 11.f, 17.f, 19.f, 25.f, 27.f, diff --git a/src/frontends/onnx/tests/onnx_import_org_openvino.in.cpp b/src/frontends/onnx/tests/onnx_import_org_openvino.in.cpp index fe85ef213db8ae..909eb4bb7bf2fb 100644 --- a/src/frontends/onnx/tests/onnx_import_org_openvino.in.cpp +++ b/src/frontends/onnx/tests/onnx_import_org_openvino.in.cpp @@ -57,14 +57,14 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_prior_box) { std::vector A(3 * 2 * 2); std::vector B(3 * 6 * 6); std::vector output = { - -2.3200002, -2.3200002, 3.6533334, 3.6533334, -3.7053659, -3.7053659, 5.0386992, 5.0386992, - -0.98666668, -2.3200002, 4.9866667, 3.6533334, -2.3720326, -3.7053659, 6.3720322, 5.0386992, - -2.3200002, -0.98666668, 3.6533334, 4.9866667, -3.7053659, -2.3720326, 5.0386992, 6.3720322, - -0.98666668, -0.98666668, 4.9866667, 4.9866667, -2.3720326, -2.3720326, 6.3720322, 6.3720322, - 0.1, 0.1, 0.2, 0.2, 0.1, 0.1, 0.2, 0.2, - 0.1, 0.1, 0.2, 0.2, 0.1, 0.1, 0.2, 0.2, - 0.1, 0.1, 0.2, 0.2, 0.1, 0.1, 0.2, 0.2, - 0.1, 0.1, 0.2, 0.2, 0.1, 0.1, 0.2, 0.2, + -2.3200002f, -2.3200002f, 3.6533334f, 3.6533334f, -3.7053659f, -3.7053659f, 5.0386992f, 5.0386992f, + -0.98666668f, -2.3200002f, 4.9866667f, 3.6533334f, -2.3720326f, -3.7053659f, 6.3720322f, 5.0386992f, + -2.3200002f, -0.98666668f, 3.6533334f, 4.9866667f, -3.7053659f, -2.3720326f, 5.0386992f, 6.3720322f, + -0.98666668f, -0.98666668f, 4.9866667f, 4.9866667f, -2.3720326f, -2.3720326f, 6.3720322f, 6.3720322f, + 0.1f, 0.1f, 0.2f, 0.2f, 0.1f, 0.1f, 0.2f, 0.2f, + 0.1f, 0.1f, 0.2f, 0.2f, 0.1f, 0.1f, 0.2f, 0.2f, + 0.1f, 0.1f, 0.2f, 0.2f, 0.1f, 0.1f, 0.2f, 0.2f, + 0.1f, 0.1f, 0.2f, 0.2f, 0.1f, 0.1f, 0.2f, 0.2f, }; test_case.add_input(A); test_case.add_input(B); @@ -78,11 +78,11 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_priorbox_clustered) { "onnx/priorbox_clustered.onnx")); auto test_case = test::TestCase(function, s_device); - std::vector A{15.0}; - std::vector B{10.0}; + std::vector A{15.0f}; + std::vector B{10.0f}; std::vector output = { - 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, - 0.1, 0.1, 0.2, 0.2, 0.1, 0.1, 0.2, 0.2, 0.1, 0.1, 0.2, 0.2, 0.1, 0.1, 0.2, 0.2, + 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, + 0.1f, 0.1f, 0.2f, 0.2f, 0.1f, 0.1f, 0.2f, 0.2f, 0.1f, 0.1f, 0.2f, 0.2f, 0.1f, 0.1f, 0.2f, 0.2f, }; test_case.add_input(A); test_case.add_input(B); @@ -101,22 +101,22 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_priorbox_clustered_most_attrs_default) { std::iota(std::begin(A), std::end(A), 0.0f); std::vector B(1 * 1 * 3 * 3); std::iota(std::begin(B), std::end(B), 0.0f); - std::vector output = {-0.1666666716337203979, - -0.1666666716337203979, - 0.1666666716337203979, - 0.1666666716337203979, - -0.1666666716337203979, - 0.3333333432674407959, - 0.1666666716337203979, - 0.6666666865348815918, - 0.1, - 0.1, - 0.2, - 0.2, - 0.1, - 0.1, - 0.2, - 0.2}; + std::vector output = {-0.1666666716337203979f, + -0.1666666716337203979f, + 0.1666666716337203979f, + 0.1666666716337203979f, + -0.1666666716337203979f, + 0.3333333432674407959f, + 0.1666666716337203979f, + 0.6666666865348815918f, + 0.1f, + 0.1f, + 0.2f, + 0.2f, + 0.1f, + 0.1f, + 0.2f, + 0.2f}; test_case.add_input(A); test_case.add_input(B); test_case.add_expected_output(Shape{1, 2, 8}, output); @@ -170,11 +170,11 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_detection_output) { std::vector logits = gen_vector(12, -2, 2); std::vector class_preds = gen_vector(9, 0, 1); std::vector proposals = gen_vector(12 * 2, 0, 1); - std::vector output = {0, 1, 0.777778, 0.279849, 0.283779, 0.562743, 0.695387, - 0, 1, 0.444444, 0.12963, 0.176075, 0.212963, 0.284573, - 0, 2, 0.888889, 0.279849, 0.283779, 0.562743, 0.695387, - 0, 2, 0.555556, 0.12963, 0.176075, 0.212963, 0.284573, - 0, 2, 0.222222, -0.0608094, -0.0142007, -0.0225239, 0.0304044}; + std::vector output = {0, 1, 0.777778f, 0.279849f, 0.283779f, 0.562743f, 0.695387f, + 0, 1, 0.444444f, 0.12963f, 0.176075f, 0.212963f, 0.284573f, + 0, 2, 0.888889f, 0.279849f, 0.283779f, 0.562743f, 0.695387f, + 0, 2, 0.555556f, 0.12963f, 0.176075f, 0.212963f, 0.284573f, + 0, 2, 0.222222f, -0.0608094f, -0.0142007f, -0.0225239f, 0.0304044f}; test_case.add_input(logits); test_case.add_input(class_preds); test_case.add_input(proposals); @@ -188,18 +188,18 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_group_norm) { file_util::path_join(CommonTestUtils::getExecutableDirectory(), SERIALIZED_ZOO, "onnx/group_norm.onnx")); auto test_case = test::TestCase(function, s_device); Shape shape{2, 8, 2, 2}; - int size = shape_size(shape); + const auto size = shape_size(shape); std::vector data(size); - std::iota(data.begin(), data.end(), 0); + std::iota(data.begin(), data.end(), 0.f); std::vector output = { - -0.52752507, -0.09108937, 0.3453464, 0.78178215, 2.4364357, 3.309307, 4.1821785, 5.05505, - -1.5825753, -0.27326822, 1.0360391, 2.3453465, 4.8728714, 6.618614, 8.364357, 10.1101, - -2.6376252, -0.45544672, 1.726732, 3.9089108, 7.309307, 9.927921, 12.546536, 15.165151, - -3.6926756, -0.6376257, 2.4174247, 5.472475, 9.745743, 13.237228, 16.728714, 20.2202, - -0.52752507, -0.09108937, 0.3453464, 0.78178215, 2.4364357, 3.309307, 4.1821785, 5.05505, - -1.5825753, -0.27326822, 1.0360391, 2.3453465, 4.8728714, 6.618614, 8.364357, 10.1101, - -2.6376252, -0.45544672, 1.726732, 3.9089108, 7.309307, 9.927921, 12.546536, 15.165151, - -3.6926756, -0.6376257, 2.4174247, 5.472475, 9.745743, 13.237228, 16.728714, 20.2202, + -0.52752507f, -0.09108937f, 0.3453464f, 0.78178215f, 2.4364357f, 3.309307f, 4.1821785f, 5.05505f, + -1.5825753f, -0.27326822f, 1.0360391f, 2.3453465f, 4.8728714f, 6.618614f, 8.364357f, 10.1101f, + -2.6376252f, -0.45544672f, 1.726732f, 3.9089108f, 7.309307f, 9.927921f, 12.546536f, 15.165151f, + -3.6926756f, -0.6376257f, 2.4174247f, 5.472475f, 9.745743f, 13.237228f, 16.728714f, 20.2202f, + -0.52752507f, -0.09108937f, 0.3453464f, 0.78178215f, 2.4364357f, 3.309307f, 4.1821785f, 5.05505f, + -1.5825753f, -0.27326822f, 1.0360391f, 2.3453465f, 4.8728714f, 6.618614f, 8.364357f, 10.1101f, + -2.6376252f, -0.45544672f, 1.726732f, 3.9089108f, 7.309307f, 9.927921f, 12.546536f, 15.165151f, + -3.6926756f, -0.6376257f, 2.4174247f, 5.472475f, 9.745743f, 13.237228f, 16.728714f, 20.2202f, }; test_case.add_input(data); @@ -212,15 +212,16 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_group_norm_5d) { file_util::path_join(CommonTestUtils::getExecutableDirectory(), SERIALIZED_ZOO, "onnx/group_norm_5d.onnx")); auto test_case = test::TestCase(function, s_device); Shape shape{2, 8, 1, 2, 1}; - int size = shape_size(shape); + const auto size = shape_size(shape); std::vector data(size); - std::iota(data.begin(), data.end(), 0); - std::vector output = { - -0.34163546562, 0.55278813838, 2.89442372322, 4.68327093124, -1.02490639686, 1.65836453437, 5.78884744644, - 9.36654186248, -1.70817732810, 2.76394081115, 8.68327140808, 14.04981231689, -2.39144825935, 3.86951708793, - 11.57769489288, 18.73308372497, -0.34163546562, 0.55278813838, 2.89442372322, 4.68327093124, -1.02490639686, - 1.65836453437, 5.78884744644, 9.36654186248, -1.70817732810, 2.76394081115, 8.68327140808, 14.04981231689, - -2.39144825935, 3.86951708793, 11.57769489288, 18.73308372497}; + std::iota(data.begin(), data.end(), 0.f); + std::vector output = {-0.34163546562f, 0.55278813838f, 2.89442372322f, 4.68327093124f, -1.02490639686f, + 1.65836453437f, 5.78884744644f, 9.36654186248f, -1.70817732810f, 2.76394081115f, + 8.68327140808f, 14.04981231689f, -2.39144825935f, 3.86951708793f, 11.57769489288f, + 18.73308372497f, -0.34163546562f, 0.55278813838f, 2.89442372322f, 4.68327093124f, + -1.02490639686f, 1.65836453437f, 5.78884744644f, 9.36654186248f, -1.70817732810f, + 2.76394081115f, 8.68327140808f, 14.04981231689f, -2.39144825935f, 3.86951708793f, + 11.57769489288f, 18.73308372497f}; test_case.add_input(data); test_case.add_expected_output(shape, output); @@ -232,20 +233,20 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_normalize) { file_util::path_join(CommonTestUtils::getExecutableDirectory(), SERIALIZED_ZOO, "onnx/normalize.onnx")); auto test_case = test::TestCase(function, s_device); std::vector data(12); - std::iota(data.begin(), data.end(), 1); + std::iota(data.begin(), data.end(), 1.f); std::vector output = { - 0.19334731, - 0.33806169, - 0.44846106, - 0.53452247, - 1.4501048, - 1.5212777, - 1.5696137, - 1.6035674, - 3.4802516, - 3.3806169, - 3.2887144, - 3.2071347, + 0.19334731f, + 0.33806169f, + 0.44846106f, + 0.53452247f, + 1.4501048f, + 1.5212777f, + 1.5696137f, + 1.6035674f, + 3.4802516f, + 3.3806169f, + 3.2887144f, + 3.2071347f, }; test_case.add_input(data); test_case.add_expected_output(Shape{1, 3, 2, 2}, output); @@ -260,7 +261,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_swish_with_beta) { auto test_case = test::TestCase(function, s_device); std::vector input_data{-0.5f, 0, 0.5f}; test_case.add_input(input_data); - test_case.add_expected_output(expected_output_shape, {-0.2036667, 0.0, 0.2963333}); + test_case.add_expected_output(expected_output_shape, {-0.2036667f, 0.0f, 0.2963333f}); test_case.run_with_tolerance_as_fp(2.0e-5f); } @@ -274,7 +275,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_swish_without_beta) { auto test_case = test::TestCase(function, s_device); std::vector input_data{-0.5f, 0, 0.5f}; test_case.add_input(input_data); - test_case.add_expected_output(expected_output_shape, {-0.18877034, 0.0, 0.31122968}); + test_case.add_expected_output(expected_output_shape, {-0.18877034f, 0.0f, 0.31122968f}); test_case.run_with_tolerance_as_fp(2.0e-5f); } @@ -313,9 +314,9 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_experimental_detectron_detection_output) test_case.add_expected_output(Shape{5, 4}, { 0.8929862f, - 0.892986297607421875, - 12.10701370239257812, - 12.10701370239257812, + 0.892986297607421875f, + 12.10701370239257812f, + 12.10701370239257812f, 0, 0.0f, 0.0f, @@ -440,18 +441,18 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_experimental_detectron_group_norm) { auto test_case = test::TestCase(function, s_device); Shape shape{2, 8, 2, 2}; - int size = shape_size(shape); + const auto size = shape_size(shape); std::vector data(size); - std::iota(data.begin(), data.end(), 0); + std::iota(data.begin(), data.end(), 0.f); std::vector output = { - -0.52752507, -0.09108937, 0.3453464, 0.78178215, 2.4364357, 3.309307, 4.1821785, 5.05505, - -1.5825753, -0.27326822, 1.0360391, 2.3453465, 4.8728714, 6.618614, 8.364357, 10.1101, - -2.6376252, -0.45544672, 1.726732, 3.9089108, 7.309307, 9.927921, 12.546536, 15.165151, - -3.6926756, -0.6376257, 2.4174247, 5.472475, 9.745743, 13.237228, 16.728714, 20.2202, - -0.52752507, -0.09108937, 0.3453464, 0.78178215, 2.4364357, 3.309307, 4.1821785, 5.05505, - -1.5825753, -0.27326822, 1.0360391, 2.3453465, 4.8728714, 6.618614, 8.364357, 10.1101, - -2.6376252, -0.45544672, 1.726732, 3.9089108, 7.309307, 9.927921, 12.546536, 15.165151, - -3.6926756, -0.6376257, 2.4174247, 5.472475, 9.745743, 13.237228, 16.728714, 20.2202, + -0.52752507f, -0.09108937f, 0.3453464f, 0.78178215f, 2.4364357f, 3.309307f, 4.1821785f, 5.05505f, + -1.5825753f, -0.27326822f, 1.0360391f, 2.3453465f, 4.8728714f, 6.618614f, 8.364357f, 10.1101f, + -2.6376252f, -0.45544672f, 1.726732f, 3.9089108f, 7.309307f, 9.927921f, 12.546536f, 15.165151f, + -3.6926756f, -0.6376257f, 2.4174247f, 5.472475f, 9.745743f, 13.237228f, 16.728714f, 20.2202f, + -0.52752507f, -0.09108937f, 0.3453464f, 0.78178215f, 2.4364357f, 3.309307f, 4.1821785f, 5.05505f, + -1.5825753f, -0.27326822f, 1.0360391f, 2.3453465f, 4.8728714f, 6.618614f, 8.364357f, 10.1101f, + -2.6376252f, -0.45544672f, 1.726732f, 3.9089108f, 7.309307f, 9.927921f, 12.546536f, 15.165151f, + -3.6926756f, -0.6376257f, 2.4174247f, 5.472475f, 9.745743f, 13.237228f, 16.728714f, 20.2202f, }; test_case.add_input(data); @@ -468,13 +469,13 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_experimental_detectron_prior_grid_genera auto test_case = test::TestCase(function, s_device); std::vector priors(shape_size(Shape{3, 4})); - std::iota(priors.begin(), priors.end(), 0); + std::iota(priors.begin(), priors.end(), 0.f); std::vector feature_map(shape_size(Shape{1, 1, 1, 3})); - std::iota(feature_map.begin(), feature_map.end(), 0); + std::iota(feature_map.begin(), feature_map.end(), 0.f); std::vector im_data(shape_size(Shape{1, 3, 4, 7})); - std::iota(im_data.begin(), im_data.end(), 0); + std::iota(im_data.begin(), im_data.end(), 0.f); test_case.add_input(priors); test_case.add_input(feature_map); @@ -495,51 +496,51 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_experimental_detectron_roi_feature_extra auto test_case = test::TestCase(function, s_device); std::vector rois(shape_size(Shape{2, 4})); - std::iota(rois.begin(), rois.end(), 0); + std::iota(rois.begin(), rois.end(), 0.f); std::vector pyramid_layer_0(shape_size(Shape{1, 2, 2, 3})); - std::iota(pyramid_layer_0.begin(), pyramid_layer_0.end(), 0); + std::iota(pyramid_layer_0.begin(), pyramid_layer_0.end(), 0.f); test_case.add_input(rois); test_case.add_input(pyramid_layer_0); test_case.add_expected_output(Shape{2, 2, 3, 3}, - {1.416666746139526367, - 1.750000119209289551, - 2.083333492279052734, - 2.416666746139526367, - 2.75, - 3.083333492279052734, - 3.166666507720947266, - 3.5, - 3.833333492279052734, - 7.416666507720947266, - 7.75, - 8.083333015441894531, - 8.416666984558105469, - 8.75, - 9.083333969116210938, - 9.166666030883789062, - 9.5, - 9.833333969116210938, - 4.166666984558105469, - 4.5, - 4.833333492279052734, - 4.166666984558105469, - 4.5, - 4.833333492279052734, - 2.083333492279052734, - 2.25, - 2.416666746139526367, - 10.16666603088378906, - 10.5, - 10.83333206176757812, - 10.16666603088378906, - 10.5, - 10.83333206176757812, - 5.083333015441894531, - 5.25, - 5.416666507720947266}); + {1.416666746139526367f, + 1.750000119209289551f, + 2.083333492279052734f, + 2.416666746139526367f, + 2.75f, + 3.083333492279052734f, + 3.166666507720947266f, + 3.5f, + 3.833333492279052734f, + 7.416666507720947266f, + 7.75f, + 8.083333015441894531f, + 8.416666984558105469f, + 8.75f, + 9.083333969116210938f, + 9.166666030883789062f, + 9.5f, + 9.833333969116210938f, + 4.166666984558105469f, + 4.5f, + 4.833333492279052734f, + 4.166666984558105469f, + 4.5f, + 4.833333492279052734f, + 2.083333492279052734f, + 2.25f, + 2.416666746139526367f, + 10.16666603088378906f, + 10.5f, + 10.83333206176757812f, + 10.16666603088378906f, + 10.5f, + 10.83333206176757812f, + 5.083333015441894531f, + 5.25f, + 5.416666507720947266f}); test_case.add_expected_output(Shape{2, 4}, {0, 1, 2, 3, 4, 5, 6, 7}); test_case.run(); @@ -593,29 +594,32 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_generate_proposals) { // scores test_case.add_input( Shape{1, 3, 2, 6}, - {0.56637216, 0.90457034, 0.69827306, 0.4353543, 0.47985056, 0.42658508, 0.14516132, 0.08081771, 0.1799732, - 0.9229515, 0.42420176, 0.50857586, 0.82664067, 0.4972319, 0.3752427, 0.56731623, 0.18241242, 0.33252355, - 0.30608943, 0.6572437, 0.69185436, 0.88646156, 0.36985755, 0.5590753, 0.5256446, 0.03342898, 0.1344396, - 0.68642473, 0.37953874, 0.32575172, 0.21108444, 0.5661886, 0.45378175, 0.62126315, 0.26799858, 0.37272978}); + {0.56637216f, 0.90457034f, 0.69827306f, 0.4353543f, 0.47985056f, 0.42658508f, 0.14516132f, 0.08081771f, + 0.1799732f, 0.9229515f, 0.42420176f, 0.50857586f, 0.82664067f, 0.4972319f, 0.3752427f, 0.56731623f, + 0.18241242f, 0.33252355f, 0.30608943f, 0.6572437f, 0.69185436f, 0.88646156f, 0.36985755f, 0.5590753f, + 0.5256446f, 0.03342898f, 0.1344396f, 0.68642473f, 0.37953874f, 0.32575172f, 0.21108444f, 0.5661886f, + 0.45378175f, 0.62126315f, 0.26799858f, 0.37272978f}); // deltas test_case.add_input( Shape{1, 12, 2, 6}, - {0.5337073, 0.86607957, 0.55151343, 0.21626699, 0.4462629, 0.03985678, 0.5157072, 0.9932138, 0.7565954, - 0.43803605, 0.802818, 0.14834064, 0.53932905, 0.14314, 0.3817048, 0.95075196, 0.05516243, 0.2567484, - 0.25508744, 0.77438325, 0.43561, 0.2094628, 0.8299043, 0.44982538, 0.95615596, 0.5651084, 0.11801951, - 0.05352486, 0.9774733, 0.14439464, 0.62644225, 0.14370479, 0.54161614, 0.557915, 0.53102225, 0.0840179, - 0.7249888, 0.9843559, 0.5490522, 0.53788143, 0.822474, 0.3278008, 0.39688024, 0.3286012, 0.5117038, - 0.04743988, 0.9408995, 0.29885054, 0.81039643, 0.85277915, 0.06807619, 0.86430097, 0.36225632, 0.16606331, - 0.5401001, 0.7541649, 0.11998601, 0.5131829, 0.40606487, 0.327888, 0.27721855, 0.6378373, 0.22795396, - 0.4961256, 0.3215895, 0.15607187, 0.14782153, 0.8908137, 0.8835288, 0.834191, 0.29907143, 0.7983525, - 0.755875, 0.30837986, 0.0839176, 0.26624718, 0.04371626, 0.09472824, 0.20689541, 0.37622106, 0.1083321, - 0.1342548, 0.05815459, 0.7676379, 0.8105144, 0.92348766, 0.26761323, 0.7183306, 0.8947588, 0.19020908, - 0.42731014, 0.7473663, 0.85775334, 0.9340091, 0.3278848, 0.755993, 0.05307213, 0.39705503, 0.21003333, - 0.5625373, 0.66188884, 0.80521655, 0.6125863, 0.44678232, 0.97802377, 0.0204936, 0.02686367, 0.7390654, - 0.74631, 0.58399844, 0.5988792, 0.37413648, 0.5946692, 0.6955776, 0.36377597, 0.7891322, 0.40900692, - 0.99139464, 0.50169915, 0.41435778, 0.17142445, 0.26761186, 0.31591868, 0.14249913, 0.12919712, 0.5418711, - 0.6523203, 0.50259084, 0.7379765, 0.01171071, 0.94423133, 0.00841132, 0.97486794, 0.2921785, 0.7633071, - 0.88477814, 0.03563205, 0.50833166, 0.01354555, 0.535081, 0.41366324, 0.0694767, 0.9944055, 0.9981207}); + {0.5337073f, 0.86607957f, 0.55151343f, 0.21626699f, 0.4462629f, 0.03985678f, 0.5157072f, 0.9932138f, + 0.7565954f, 0.43803605f, 0.802818f, 0.14834064f, 0.53932905f, 0.14314f, 0.3817048f, 0.95075196f, + 0.05516243f, 0.2567484f, 0.25508744f, 0.77438325f, 0.43561f, 0.2094628f, 0.8299043f, 0.44982538f, + 0.95615596f, 0.5651084f, 0.11801951f, 0.05352486f, 0.9774733f, 0.14439464f, 0.62644225f, 0.14370479f, + 0.54161614f, 0.557915f, 0.53102225f, 0.0840179f, 0.7249888f, 0.9843559f, 0.5490522f, 0.53788143f, + 0.822474f, 0.3278008f, 0.39688024f, 0.3286012f, 0.5117038f, 0.04743988f, 0.9408995f, 0.29885054f, + 0.81039643f, 0.85277915f, 0.06807619f, 0.86430097f, 0.36225632f, 0.16606331f, 0.5401001f, 0.7541649f, + 0.11998601f, 0.5131829f, 0.40606487f, 0.327888f, 0.27721855f, 0.6378373f, 0.22795396f, 0.4961256f, + 0.3215895f, 0.15607187f, 0.14782153f, 0.8908137f, 0.8835288f, 0.834191f, 0.29907143f, 0.7983525f, + 0.755875f, 0.30837986f, 0.0839176f, 0.26624718f, 0.04371626f, 0.09472824f, 0.20689541f, 0.37622106f, + 0.1083321f, 0.1342548f, 0.05815459f, 0.7676379f, 0.8105144f, 0.92348766f, 0.26761323f, 0.7183306f, + 0.8947588f, 0.19020908f, 0.42731014f, 0.7473663f, 0.85775334f, 0.9340091f, 0.3278848f, 0.755993f, + 0.05307213f, 0.39705503f, 0.21003333f, 0.5625373f, 0.66188884f, 0.80521655f, 0.6125863f, 0.44678232f, + 0.97802377f, 0.0204936f, 0.02686367f, 0.7390654f, 0.74631f, 0.58399844f, 0.5988792f, 0.37413648f, + 0.5946692f, 0.6955776f, 0.36377597f, 0.7891322f, 0.40900692f, 0.99139464f, 0.50169915f, 0.41435778f, + 0.17142445f, 0.26761186f, 0.31591868f, 0.14249913f, 0.12919712f, 0.5418711f, 0.6523203f, 0.50259084f, + 0.7379765f, 0.01171071f, 0.94423133f, 0.00841132f, 0.97486794f, 0.2921785f, 0.7633071f, 0.88477814f, + 0.03563205f, 0.50833166f, 0.01354555f, 0.535081f, 0.41366324f, 0.0694767f, 0.9944055f, 0.9981207f}); // im_info test_case.add_input(Shape{1, 3}, {200, 200, 0}); // anchors @@ -623,11 +627,11 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_generate_proposals) { test_case.add_expected_output( Shape{6, 4}, - {0.12904608, 1.3703424, 3.6230984, 3.4675088, 0.9725206, 0., 4.4917974, 4.9623675, - 4.882682, 5.1236916, 7.1700497, 10.213073, 4.4913187, 4.305372, 8.750267, 8.803502, - 0.9777608, 1.0317986, 3.228293, 4.495021, 4.125554, 5.4091997, 6.35439, 10.124915}); + {0.12904608f, 1.3703424f, 3.6230984f, 3.4675088f, 0.9725206f, 0., 4.4917974f, 4.9623675f, + 4.882682f, 5.1236916f, 7.1700497f, 10.213073f, 4.4913187f, 4.305372f, 8.750267f, 8.803502f, + 0.9777608f, 1.0317986f, 3.228293f, 4.495021f, 4.125554f, 5.4091997f, 6.35439f, 10.124915f}); test_case.add_expected_output(Shape{6}, - {0.9229515, 0.90457034, 0.88646156, 0.82664067, 0.69827306, 0.69185436}); + {0.9229515f, 0.90457034f, 0.88646156f, 0.82664067f, 0.69827306f, 0.69185436f}); test_case.add_expected_output(Shape{1}, {6}); test_case.run(); } diff --git a/src/frontends/onnx/tests/onnx_import_org_pytorch.in.cpp b/src/frontends/onnx/tests/onnx_import_org_pytorch.in.cpp index 110fbe656614de..d57b9c6ad64b32 100644 --- a/src/frontends/onnx/tests/onnx_import_org_pytorch.in.cpp +++ b/src/frontends/onnx/tests/onnx_import_org_pytorch.in.cpp @@ -34,23 +34,23 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_adaptive_avg_pooling2d_nchw) { "onnx/org.pytorch/adaptive_avg_pooling2d_nchw.onnx")); auto test_case = test::TestCase(function, s_device); - test_case.add_input({0.9945, - 0.3466, - 0.2894, - 0.9318, - 0.0115, - 0.4867, - 0.7608, - 0.1550, - 0.8485, - 0.4971, - 0.8833, - 0.4579, - 0.3673, - 0.5410, - 0.2004, - 0.1519}); - test_case.add_expected_output(Shape{1, 1, 2, 2}, {0.4598249, 0.5342500, 0.5634750, 0.4233750}); + test_case.add_input({0.9945f, + 0.3466f, + 0.2894f, + 0.9318f, + 0.0115f, + 0.4867f, + 0.7608f, + 0.1550f, + 0.8485f, + 0.4971f, + 0.8833f, + 0.4579f, + 0.3673f, + 0.5410f, + 0.2004f, + 0.1519f}); + test_case.add_expected_output(Shape{1, 1, 2, 2}, {0.4598249f, 0.5342500f, 0.5634750f, 0.4233750f}); test_case.run(); } @@ -61,8 +61,8 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_adaptive_avg_pooling2d_chw) { "onnx/org.pytorch/adaptive_avg_pooling2d_chw.onnx")); auto test_case = test::TestCase(function, s_device); - test_case.add_input({12.0, -1.0, -56.0, 20.0, 1.0, -8.0, 7.0, 9.0}); + test_case.add_input({12.0f, -1.0f, -56.0f, 20.0f, 1.0f, -8.0f, 7.0f, 9.0f}); - test_case.add_expected_output(Shape{1, 2, 2}, {5.5, -18.0, -3.5, 8.0}); + test_case.add_expected_output(Shape{1, 2, 2}, {5.5f, -18.0f, -3.5f, 8.0f}); test_case.run(); } diff --git a/src/frontends/onnx/tests/onnx_import_quant.in.cpp b/src/frontends/onnx/tests/onnx_import_quant.in.cpp index 784bb8a0ae2ad3..d49b96c22b7f91 100644 --- a/src/frontends/onnx/tests/onnx_import_quant.in.cpp +++ b/src/frontends/onnx/tests/onnx_import_quant.in.cpp @@ -1061,7 +1061,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_fake_quantize_const_inputs_infer) { const Shape data_shape{1, 2, 3, 4}; const auto n_elements = shape_size(data_shape); std::vector input_data(n_elements); - std::iota(std::begin(input_data), std::end(input_data), 0); + std::iota(std::begin(input_data), std::end(input_data), 0.f); auto test_case = test::TestCase(function, s_device); test_case.add_input(input_data); @@ -1081,7 +1081,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_fake_quantize_nonconst_inputs_infer) { const Shape data_shape{1, 2, 3, 4}; const size_t n_elements = shape_size(data_shape); std::vector input_data(n_elements); - std::iota(std::begin(input_data), std::end(input_data), 0); + std::iota(std::begin(input_data), std::end(input_data), 0.f); auto test_case = test::TestCase(function, s_device); test_case.add_input(input_data); diff --git a/src/frontends/onnx/tests/onnx_import_reshape.in.cpp b/src/frontends/onnx/tests/onnx_import_reshape.in.cpp index 771000f8091db8..645ccd8cc4cea7 100644 --- a/src/frontends/onnx/tests/onnx_import_reshape.in.cpp +++ b/src/frontends/onnx/tests/onnx_import_reshape.in.cpp @@ -129,29 +129,29 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_reshape_negative_dim) { "onnx/reshape_negative_dim.onnx")); // 2x3x4 - auto input = test::NDArray({{{0.5488135, 0.71518934, 0.60276335, 0.5448832}, - {0.4236548, 0.6458941, 0.4375872, 0.891773}, - {0.96366274, 0.3834415, 0.79172504, 0.5288949}}, + auto input = test::NDArray({{{0.5488135f, 0.71518934f, 0.60276335f, 0.5448832f}, + {0.4236548f, 0.6458941f, 0.4375872f, 0.891773f}, + {0.96366274f, 0.3834415f, 0.79172504f, 0.5288949f}}, - {{0.56804454, 0.92559665, 0.07103606, 0.0871293}, - {0.0202184, 0.83261985, 0.77815676, 0.87001216}, - {0.9786183, 0.7991586, 0.46147937, 0.7805292}}}) + {{0.56804454f, 0.92559665f, 0.07103606f, 0.0871293f}, + {0.0202184f, 0.83261985f, 0.77815676f, 0.87001216f}, + {0.9786183f, 0.7991586f, 0.46147937f, 0.7805292f}}}) .get_vector(); // 2x6x2 - auto expected_output = test::NDArray({{{0.5488135, 0.71518934}, - {0.60276335, 0.5448832}, - {0.4236548, 0.6458941}, - {0.4375872, 0.891773}, - {0.96366274, 0.3834415}, - {0.79172504, 0.5288949}}, - - {{0.56804454, 0.92559665}, - {0.07103606, 0.0871293}, - {0.0202184, 0.83261985}, - {0.77815676, 0.87001216}, - {0.9786183, 0.7991586}, - {0.46147937, 0.7805292}}}) + auto expected_output = test::NDArray({{{0.5488135f, 0.71518934f}, + {0.60276335f, 0.5448832f}, + {0.4236548f, 0.6458941f}, + {0.4375872f, 0.891773f}, + {0.96366274f, 0.3834415f}, + {0.79172504f, 0.5288949f}}, + + {{0.56804454f, 0.92559665f}, + {0.07103606f, 0.0871293f}, + {0.0202184f, 0.83261985f}, + {0.77815676f, 0.87001216f}, + {0.9786183f, 0.7991586f}, + {0.46147937f, 0.7805292f}}}) .get_vector(); auto test_case = test::TestCase(function, s_device); @@ -207,7 +207,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_depth_to_space) { file_util::path_join(CommonTestUtils::getExecutableDirectory(), SERIALIZED_ZOO, "onnx/depth_to_space.onnx")); std::vector input(32); - std::iota(input.begin(), input.end(), 0); + std::iota(input.begin(), input.end(), 0.f); std::vector expected_output{0.f, 8.f, 1.f, 9.f, 16.f, 24.f, 17.f, 25.f, 2.f, 10.f, 3.f, 11.f, 18.f, 26.f, 19.f, 27.f, 4.f, 12.f, 5.f, 13.f, 20.f, 28.f, @@ -224,7 +224,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_depth_to_space_v1) { file_util::path_join(CommonTestUtils::getExecutableDirectory(), SERIALIZED_ZOO, "onnx/depth_to_space_v1.onnx")); std::vector input(32); - std::iota(input.begin(), input.end(), 0); + std::iota(input.begin(), input.end(), 0.f); std::vector expected_output{0.f, 8.f, 1.f, 9.f, 16.f, 24.f, 17.f, 25.f, 2.f, 10.f, 3.f, 11.f, 18.f, 26.f, 19.f, 27.f, 4.f, 12.f, 5.f, 13.f, 20.f, 28.f, @@ -242,7 +242,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_depth_to_space_crd) { "onnx/depth_to_space_crd.onnx")); std::vector input(32); - std::iota(input.begin(), input.end(), 0); + std::iota(input.begin(), input.end(), 0.f); std::vector expected_output{0.f, 4.f, 1.f, 5.f, 8.f, 12.f, 9.f, 13.f, 2.f, 6.f, 3.f, 7.f, 10.f, 14.f, 11.f, 15.f, 16.f, 20.f, 17.f, 21.f, 24.f, 28.f, @@ -304,7 +304,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_space_to_depth) { file_util::path_join(CommonTestUtils::getExecutableDirectory(), SERIALIZED_ZOO, "onnx/space_to_depth.onnx")); std::vector input(32); - std::iota(input.begin(), input.end(), 0); + std::iota(input.begin(), input.end(), 0.f); std::vector expected_output{ 0.f, 2.f, 8.f, 10.f, 16.f, 18.f, 24.f, 26.f, 1.f, 3.f, 9.f, 11.f, 17.f, 19.f, 25.f, 27.f, diff --git a/src/frontends/onnx/tests/onnx_import_rnn.in.cpp b/src/frontends/onnx/tests/onnx_import_rnn.in.cpp index fbe25a2b0b0227..06b28aa76a88e6 100644 --- a/src/frontends/onnx/tests/onnx_import_rnn.in.cpp +++ b/src/frontends/onnx/tests/onnx_import_rnn.in.cpp @@ -39,12 +39,12 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_lstm_fwd_default_const) { "onnx/lstm_fwd_default_const.onnx")); auto test_case = test::TestCase(function, s_device); - test_case.add_input({0.68172926, 1.1405563, -0.03931177, -0.03759607}); // X + test_case.add_input({0.68172926f, 1.1405563f, -0.03931177f, -0.03759607f}); // X test_case.add_expected_output(Shape{2, 1, 1, 2}, - {-0.063373, -0.20347191, -0.07230289, -0.13298286}); // Y_data - test_case.add_expected_output(Shape{1, 1, 2}, {-0.07230289, -0.13298286}); // Y_h_data - test_case.add_expected_output(Shape{1, 1, 2}, {-0.1557954, -0.24502525}); // Y_c_data + {-0.063373f, -0.20347191f, -0.07230289f, -0.13298286f}); // Y_data + test_case.add_expected_output(Shape{1, 1, 2}, {-0.07230289f, -0.13298286f}); // Y_h_data + test_case.add_expected_output(Shape{1, 1, 2}, {-0.1557954f, -0.24502525f}); // Y_c_data test_case.run(DEFAULT_FLOAT_TOLERANCE_BITS + 1); } @@ -55,12 +55,12 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_lstm_reverse_const) { "onnx/lstm_reverse_const.onnx")); auto test_case = test::TestCase(function, s_device); - test_case.add_input({0.68172926, 1.1405563, -0.03931177, -0.03759607}); // X + test_case.add_input({0.68172926f, 1.1405563f, -0.03931177f, -0.03759607f}); // X test_case.add_expected_output(Shape{2, 1, 1, 2}, - {-0.06082131, -0.19985214, 0.00860566, 0.00920492}); // Y_data - test_case.add_expected_output(Shape{1, 1, 2}, {-0.06082131, -0.19985214}); // Y_h_data - test_case.add_expected_output(Shape{1, 1, 2}, {-0.25917438, -0.3832652}); // Y_c_data + {-0.06082131f, -0.19985214f, 0.00860566f, 0.00920492f}); // Y_data + test_case.add_expected_output(Shape{1, 1, 2}, {-0.06082131f, -0.19985214f}); // Y_h_data + test_case.add_expected_output(Shape{1, 1, 2}, {-0.25917438f, -0.3832652f}); // Y_c_data test_case.run(DEFAULT_FLOAT_TOLERANCE_BITS + 1); } @@ -70,21 +70,21 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_lstm_bidir_const) { file_util::path_join(CommonTestUtils::getExecutableDirectory(), SERIALIZED_ZOO, "onnx/lstm_bidir_const.onnx")); auto test_case = test::TestCase(function, s_device); - test_case.add_input({0.68172926, 1.1405563, -0.03931177, -0.03759607}); // X + test_case.add_input({0.68172926f, 1.1405563f, -0.03931177f, -0.03759607f}); // X test_case.add_expected_output(Shape{2, 2, 1, 2}, - {-0.063373, - -0.20347191, - -0.06082131, - -0.19985214, - -0.07230289, - -0.13298286, - 0.00860566, - 0.00920492}); // Y_data + {-0.063373f, + -0.20347191f, + -0.06082131f, + -0.19985214f, + -0.07230289f, + -0.13298286f, + 0.00860566f, + 0.00920492f}); // Y_data test_case.add_expected_output(Shape{2, 1, 2}, - {-0.07230289, -0.13298286, -0.06082131, -0.19985214}); // Y_h_data + {-0.07230289f, -0.13298286f, -0.06082131f, -0.19985214f}); // Y_h_data test_case.add_expected_output(Shape{2, 1, 2}, - {-0.1557954, -0.24502525, -0.25917438, -0.3832652}); // Y_c_data + {-0.1557954f, -0.24502525f, -0.25917438f, -0.3832652f}); // Y_c_data test_case.run(DEFAULT_FLOAT_TOLERANCE_BITS + 1); } @@ -95,12 +95,12 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_lstm_fwd_with_clip_const) { "onnx/lstm_fwd_clip_const.onnx")); auto test_case = test::TestCase(function, s_device); - test_case.add_input({0.68172926, 1.1405563, -0.03931177, -0.03759607}); // X + test_case.add_input({0.68172926f, 1.1405563f, -0.03931177f, -0.03759607f}); // X test_case.add_expected_output(Shape{2, 1, 1, 2}, - {-0.02391884, -0.02744377, -0.01024176, -0.01188637}); // Y_data - test_case.add_expected_output(Shape{1, 1, 2}, {-0.01024176, -0.01188637}); // Y_h_data - test_case.add_expected_output(Shape{1, 1, 2}, {-0.02039271, -0.02353566}); // Y_c_data + {-0.02391884f, -0.02744377f, -0.01024176f, -0.01188637f}); // Y_data + test_case.add_expected_output(Shape{1, 1, 2}, {-0.01024176f, -0.01188637f}); // Y_h_data + test_case.add_expected_output(Shape{1, 1, 2}, {-0.02039271f, -0.02353566f}); // Y_c_data test_case.run(DEFAULT_FLOAT_TOLERANCE_BITS + 1); } @@ -111,27 +111,27 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_lstm_fwd_mixed_seq_const) { "onnx/lstm_fwd_mixed_seq_const.onnx")); auto test_case = test::TestCase(function, s_device); - test_case.add_input({0.68172926, 1.1405563, -0.03931177, -0.03759607}); // X + test_case.add_input({0.68172926f, 1.1405563f, -0.03931177f, -0.03759607f}); // X test_case.add_expected_output(Shape{2, 1, 2, 3}, - {0.13528088, - -0.1779867, - -0.07448981, - 0.14769037, - -0.16327181, - -0.10419653, + {0.13528088f, + -0.1779867f, + -0.07448981f, + 0.14769037f, + -0.16327181f, + -0.10419653f, 0., 0., 0., - 0.08759661, - -0.04002844, - -0.08617793}); // Y_data + 0.08759661f, + -0.04002844f, + -0.08617793f}); // Y_data test_case.add_expected_output( Shape{1, 2, 3}, - {0.13528088, -0.1779867, -0.07448981, 0.08759661, -0.04002844, -0.08617793}); // Y_h_data + {0.13528088f, -0.1779867f, -0.07448981f, 0.08759661f, -0.04002844f, -0.08617793f}); // Y_h_data test_case.add_expected_output( Shape{1, 2, 3}, - {0.367563, -0.43762812, -0.20435227, 0.17330585, -0.0732716, -0.18809439}); // Y_c_data + {0.367563f, -0.43762812f, -0.20435227f, 0.17330585f, -0.0732716f, -0.18809439f}); // Y_c_data test_case.run(DEFAULT_FLOAT_TOLERANCE_BITS + 1); } @@ -142,27 +142,27 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_lstm_reverse_mixed_seq_const) { "onnx/lstm_reverse_mixed_seq_const.onnx")); auto test_case = test::TestCase(function, s_device); - test_case.add_input({0.68172926, 1.1405563, -0.03931177, -0.03759607}); // X + test_case.add_input({0.68172926f, 1.1405563f, -0.03931177f, -0.03759607f}); // X test_case.add_expected_output(Shape{2, 1, 2, 3}, - {0.13528088, - -0.1779867, - -0.07448981, - 0.14696799, - -0.15571019, - -0.10270946, + {0.13528088f, + -0.1779867f, + -0.07448981f, + 0.14696799f, + -0.15571019f, + -0.10270946f, 0., 0., 0., - -0.01110403, - 0.0228607, - 0.00397353}); // Y_data + -0.01110403f, + 0.0228607f, + 0.00397353f}); // Y_data test_case.add_expected_output( Shape{1, 2, 3}, - {0.13528088, -0.1779867, -0.07448981, 0.14696799, -0.15571019, -0.10270946}); // Y_h_data + {0.13528088f, -0.1779867f, -0.07448981f, 0.14696799f, -0.15571019f, -0.10270946f}); // Y_h_data test_case.add_expected_output( Shape{1, 2, 3}, - {0.367563, -0.43762812, -0.20435227, 0.50598085, -0.42627674, -0.3641275}); // Y_c_data + {0.367563f, -0.43762812f, -0.20435227f, 0.50598085f, -0.42627674f, -0.3641275f}); // Y_c_data test_case.run(DEFAULT_FLOAT_TOLERANCE_BITS + 1); } @@ -174,43 +174,43 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_lstm_bidir_mixed_seq_const) { auto test_case = test::TestCase(function, s_device); test_case.add_input( - {0.68172926, 1.1405563, -0.03931177, -0.03759607, 1.1397027, 0.60444903, 1.3246384, -0.28191715}); // X + {0.68172926f, 1.1405563f, -0.03931177f, -0.03759607f, 1.1397027f, 0.60444903f, 1.3246384f, -0.28191715f}); // X test_case.add_expected_output(Shape{2, 2, 2, 2}, - {-0.063373, - -0.20347191, - 0.00860566, - 0.00920492, - -0.063373, - -0.20347191, - -0.12004475, - -0.12800421, + {-0.063373f, + -0.20347191f, + 0.00860566f, + 0.00920492f, + -0.063373f, + -0.20347191f, + -0.12004475f, + -0.12800421f, 0., 0., - -0.19095606, - -0.12459831, + -0.19095606f, + -0.12459831f, 0., 0., - -0.1911628, - -0.12813942}); // Y_data + -0.1911628f, + -0.12813942f}); // Y_data test_case.add_expected_output(Shape{2, 2, 2}, - {-0.063373, - -0.20347191, - -0.19095606, - -0.12459831, - -0.063373, - -0.20347191, - -0.12004475, - -0.12800421}); // Y_h_data + {-0.063373f, + -0.20347191f, + -0.19095606f, + -0.12459831f, + -0.063373f, + -0.20347191f, + -0.12004475f, + -0.12800421f}); // Y_h_data test_case.add_expected_output(Shape{2, 2, 2}, - {-0.2732999, - -0.38956356, - -0.48170844, - -0.34701264, - -0.2732999, - -0.38956356, - -0.27130172, - -0.253659}); // Y_c_data + {-0.2732999f, + -0.38956356f, + -0.48170844f, + -0.34701264f, + -0.2732999f, + -0.38956356f, + -0.27130172f, + -0.253659f}); // Y_c_data test_case.run(DEFAULT_FLOAT_TOLERANCE_BITS + 1); } @@ -221,8 +221,8 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_lstm_fwd_with_clip_peepholes) { "onnx/lstm_fwd_with_clip_peepholes.onnx")); auto test_case = test::TestCase(function, s_device); - test_case.add_input({-0.455351, -0.276391, -0.185934, -0.269585}); // X - test_case.add_input({-0.494659f, // W + test_case.add_input({-0.455351f, -0.276391f, -0.185934f, -0.269585f}); // X + test_case.add_input({-0.494659f, // W 0.0453352f, -0.487793f, 0.417264f, @@ -560,10 +560,10 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_lstm_dynamic_batch_size_and_seq_len) { test_case.add_input({1, 2, 3, 4, 5, 6}); test_case.add_expected_output(Shape{1, 1, 3, 2}, - {0.761594, 0.761594, 0.761594, 0.761594, 0.761594, 0.761594}); // Y + {0.761594f, 0.761594f, 0.761594f, 0.761594f, 0.761594f, 0.761594f}); // Y test_case.add_expected_output(Shape{1, 3, 2}, - {0.761594, 0.761594, 0.761594, 0.761594, 0.761594, 0.761594}); // Y_c - test_case.add_expected_output(Shape{1, 3, 2}, {1, 1, 1, 1, 1, 1}); // Y_h + {0.761594f, 0.761594f, 0.761594f, 0.761594f, 0.761594f, 0.761594f}); // Y_c + test_case.add_expected_output(Shape{1, 3, 2}, {1, 1, 1, 1, 1, 1}); // Y_h test_case.run(DEFAULT_FLOAT_TOLERANCE_BITS + 1); } @@ -769,32 +769,32 @@ NGRAPH_TEST_F(${BACKEND_NAME}, GRUSequenceOp, onnx_model_gru_fwd_activations_con // Y test_case.add_expected_output( Shape{4, 1, 3, 5}, - std::vector{0.30736187, 0.10271017, 0.91698503, 0.3471303, -0.0123809, 0.51264125, 0.51235366, - 0.45471948, 0.50601995, 0.49260828, 0.4781971, 0.0668709, 0.89421916, 0.33762455, - -0.19021586, 0.6881336, 0.7331965, 0.8887774, 0.34048334, 0.38408905, 0.49962956, - 0.2948451, 0.3651103, 0.33406913, 0.57418096, 0.49882296, 0.4321446, 0.97142136, - 0.20714557, 0.66270787, 0.53192705, 0.46424377, 0.9647801, 0.19583187, 0.7362316, - 0.48205143, -0.04748845, 0.27395952, 0.35897565, 0.5801568, 0.5889811, 0.36110958, - 1.3433081, 0.29702073, 0.5709667, 0.936689, 0.84129435, 1.1782551, 0.23925206, - 0.57521456, 0.43502977, -0.5664091, 0.6758457, 0.2958132, 0.70932186, 0.4411352, - -0.1717428, 1.7761463, 0.14413449, 0.73801273}); + std::vector{0.30736187f, 0.10271017f, 0.91698503f, 0.3471303f, -0.0123809f, 0.51264125f, 0.51235366f, + 0.45471948f, 0.50601995f, 0.49260828f, 0.4781971f, 0.0668709f, 0.89421916f, 0.33762455f, + -0.19021586f, 0.6881336f, 0.7331965f, 0.8887774f, 0.34048334f, 0.38408905f, 0.49962956f, + 0.2948451f, 0.3651103f, 0.33406913f, 0.57418096f, 0.49882296f, 0.4321446f, 0.97142136f, + 0.20714557f, 0.66270787f, 0.53192705f, 0.46424377f, 0.9647801f, 0.19583187f, 0.7362316f, + 0.48205143f, -0.04748845f, 0.27395952f, 0.35897565f, 0.5801568f, 0.5889811f, 0.36110958f, + 1.3433081f, 0.29702073f, 0.5709667f, 0.936689f, 0.84129435f, 1.1782551f, 0.23925206f, + 0.57521456f, 0.43502977f, -0.5664091f, 0.6758457f, 0.2958132f, 0.70932186f, 0.4411352f, + -0.1717428f, 1.7761463f, 0.14413449f, 0.73801273f}); // Y_h test_case.add_expected_output(Shape{1, 3, 5}, - std::vector{0.936689, - 0.84129435, - 1.1782551, - 0.23925206, - 0.57521456, - 0.43502977, - -0.5664091, - 0.6758457, - 0.2958132, - 0.70932186, - 0.4411352, - -0.1717428, - 1.7761463, - 0.14413449, - 0.73801273}); + std::vector{0.936689f, + 0.84129435f, + 1.1782551f, + 0.23925206f, + 0.57521456f, + 0.43502977f, + -0.5664091f, + 0.6758457f, + 0.2958132f, + 0.70932186f, + 0.4411352f, + -0.1717428f, + 1.7761463f, + 0.14413449f, + 0.73801273f}); test_case.run(DEFAULT_FLOAT_TOLERANCE_BITS + 5); } @@ -908,32 +908,33 @@ NGRAPH_TEST_F(${BACKEND_NAME}, GRUSequenceOp, onnx_model_gru_fwd_mixed_seq_len_c // Y test_case.add_expected_output( Shape{4, 1, 3, 5}, - std::vector{-0.9559332, 0.4372494, 0.9967716, -0.9079381, -1.2538278, 1.9265908, -0.8437393, - -1.2057271, -0.25887525, -0.52679026, -0.3619178, 0.67928517, 0.9486744, -0.12006134, - -1.3862017, -0.98941356, 0.80389524, 0.97586197, -0.9343586, -0.74858856, 1.797039, - -0.7873732, -0.72469383, -0.5866635, -0.42103744, -0.8406298, 0.85877097, 0.6349921, - -0.55897295, -0.6168443, 0., 0., 0., 0., 0., - 1.577129, -0.6935871, -0.304804, -0.75392795, -0.20703818, -0.93796504, 0.9220495, - 0.36017662, -0.7007159, 0.06962098, 0., 0., 0., 0., - 0., 0., 0., 0., 0., 0., -0.96323603, - 0.9265786, 0.54976916, -0.8037839, 0.73501444}); + std::vector{-0.9559332f, 0.4372494f, 0.9967716f, -0.9079381f, -1.2538278f, 1.9265908f, + -0.8437393f, -1.2057271f, -0.25887525f, -0.52679026f, -0.3619178f, 0.67928517f, + 0.9486744f, -0.12006134f, -1.3862017f, -0.98941356f, 0.80389524f, 0.97586197f, + -0.9343586f, -0.74858856f, 1.797039f, -0.7873732f, -0.72469383f, -0.5866635f, + -0.42103744f, -0.8406298f, 0.85877097f, 0.6349921f, -0.55897295f, -0.6168443f, + 0., 0., 0., 0., 0., 1.577129f, + -0.6935871f, -0.304804f, -0.75392795f, -0.20703818f, -0.93796504f, 0.9220495f, + 0.36017662f, -0.7007159f, 0.06962098f, 0., 0., 0., + 0., 0., 0., 0., 0., 0., + 0., -0.96323603f, 0.9265786f, 0.54976916f, -0.8037839f, 0.73501444f}); // Y_h test_case.add_expected_output(Shape{1, 3, 5}, - std::vector{-0.98941356, - 0.80389524, - 0.97586197, - -0.9343586, - -0.74858856, - 1.577129, - -0.6935871, - -0.304804, - -0.75392795, - -0.20703818, - -0.96323603, - 0.9265786, - 0.54976916, - -0.8037839, - 0.73501444}); + std::vector{-0.98941356f, + 0.80389524f, + 0.97586197f, + -0.9343586f, + -0.74858856f, + 1.577129f, + -0.6935871f, + -0.304804f, + -0.75392795f, + -0.20703818f, + -0.96323603f, + 0.9265786f, + 0.54976916f, + -0.8037839f, + 0.73501444f}); test_case.run(DEFAULT_FLOAT_TOLERANCE_BITS + 3); } @@ -949,32 +950,33 @@ NGRAPH_TEST_F(${BACKEND_NAME}, GRUSequenceOp, onnx_model_gru_reverse_mixed_seq_l // Y test_case.add_expected_output( Shape{4, 1, 3, 5}, - std::vector{-0.9917215, 0.07583051, 0.997975, -0.9315585, -0.7483002, 1.536813, -0.59922504, - -0.33637103, -0.7565539, -0.23930266, -0.7844553, 1.0393485, 0.73516595, -0.5616293, - -0.09489207, -0.9501128, 0.7905356, 0.9928266, -0.9153729, -1.1781745, 1.7955453, - -0.77754307, -0.6831806, -0.6266324, -0.39791372, -0.8030517, 1.3107346, 0.3700709, - -0.49808976, 0.52939236, 0., 0., 0., 0., 0., - 1.9345565, -0.83817405, -1.1433047, -0.35640514, -0.5191339, -0.655544, 1.3520991, - 0.42289692, -0.3171452, -0.3922639, 0., 0., 0., 0., - 0., 0., 0., 0., 0., 0., -0.24612205, - 1.6415757, 0.79883975, -0.18640287, -1.0134869}); + std::vector{-0.9917215f, 0.07583051f, 0.997975f, -0.9315585f, -0.7483002f, 1.536813f, + -0.59922504f, -0.33637103f, -0.7565539f, -0.23930266f, -0.7844553f, 1.0393485f, + 0.73516595f, -0.5616293f, -0.09489207f, -0.9501128f, 0.7905356f, 0.9928266f, + -0.9153729f, -1.1781745f, 1.7955453f, -0.77754307f, -0.6831806f, -0.6266324f, + -0.39791372f, -0.8030517f, 1.3107346f, 0.3700709f, -0.49808976f, 0.52939236f, + 0., 0., 0., 0., 0., 1.9345565f, + -0.83817405f, -1.1433047f, -0.35640514f, -0.5191339f, -0.655544f, 1.3520991f, + 0.42289692f, -0.3171452f, -0.3922639f, 0., 0., 0., + 0., 0., 0., 0., 0., 0., + 0., -0.24612205f, 1.6415757f, 0.79883975f, -0.18640287f, -1.0134869f}); // Y_h test_case.add_expected_output(Shape{1, 3, 5}, - std::vector{-0.9917215, - 0.07583051, - 0.997975, - -0.9315585, - -0.7483002, - 1.536813, - -0.59922504, - -0.33637103, - -0.7565539, - -0.23930266, - -0.7844553, - 1.0393485, - 0.73516595, - -0.5616293, - -0.09489207}); + std::vector{-0.9917215f, + 0.07583051f, + 0.997975f, + -0.9315585f, + -0.7483002f, + 1.536813f, + -0.59922504f, + -0.33637103f, + -0.7565539f, + -0.23930266f, + -0.7844553f, + 1.0393485f, + 0.73516595f, + -0.5616293f, + -0.09489207f}); test_case.run(DEFAULT_FLOAT_TOLERANCE_BITS + 3); } @@ -991,29 +993,32 @@ NGRAPH_TEST_F(${BACKEND_NAME}, GRUSequenceOp, onnx_model_gru_bidir_mixed_seq_len test_case.add_expected_output( Shape{4, 2, 3, 5}, std::vector{ - -0.3224981, -0.44282594, 0.7499796, -0.12240417, 0.12079421, 0.02534254, 0.02504561, -0.0463777, - 0.01204535, -0.01497037, -0.04651929, -0.6264307, 0.7236632, 0.06250653, 0.02594197, 0.0595789, - 0.40258542, -0.40646964, 0.70320284, -0.02962421, 0.10372428, -0.38378227, -0.4331268, -0.15696645, - -0.3451503, 0.20918667, -0.59024405, -0.845524, 0.60705113, -0.6336088, -0.0833023, -0.40062034, - 0.7579466, -0.12340625, 0.04415433, -0.24662054, 0.27420586, -0.09122991, -0.22768986, 0.19980887, - -0.218649, -0.5560231, 0.56177044, -0.25098884, 0.15462328, 0.0409361, 0.17866893, -0.2782218, - 0.27396634, -0.04992082, 0.15353821, -0.4497267, -0.44631857, -0.478926, -0.23017275, 0.25369287, - -0.7369056, -0.73285, -0.5750758, -0.533177, 0., 0., 0., 0., - 0., -0.45753813, 0.5987347, -0.07046632, -0.35819566, 0.3916747, -0.18096107, -0.24415034, - 0.38435352, -0.29881003, 0.07738188, 0., 0., 0., 0., 0., - 0.10390212, -0.29646862, -0.20532897, -0.31521815, 0.01049522, 0.19370168, -0.6386781, -0.42919028, - -0.47081998, -0.2954276, 0., 0., 0., 0., 0., 0., - 0., 0., 0., 0., -0.50112087, -0.11085765, 0.5155622, -0.5635352, - 0.54762024, 0., 0., 0., 0., 0., 0., 0., - 0., 0., 0., 0.17058733, -0.6941011, -0.27862304, -0.27050856, -0.03864266}); + -0.3224981f, -0.44282594f, 0.7499796f, -0.12240417f, 0.12079421f, 0.02534254f, 0.02504561f, + -0.0463777f, 0.01204535f, -0.01497037f, -0.04651929f, -0.6264307f, 0.7236632f, 0.06250653f, + 0.02594197f, 0.0595789f, 0.40258542f, -0.40646964f, 0.70320284f, -0.02962421f, 0.10372428f, + -0.38378227f, -0.4331268f, -0.15696645f, -0.3451503f, 0.20918667f, -0.59024405f, -0.845524f, + 0.60705113f, -0.6336088f, -0.0833023f, -0.40062034f, 0.7579466f, -0.12340625f, 0.04415433f, + -0.24662054f, 0.27420586f, -0.09122991f, -0.22768986f, 0.19980887f, -0.218649f, -0.5560231f, + 0.56177044f, -0.25098884f, 0.15462328f, 0.0409361f, 0.17866893f, -0.2782218f, 0.27396634f, + -0.04992082f, 0.15353821f, -0.4497267f, -0.44631857f, -0.478926f, -0.23017275f, 0.25369287f, + -0.7369056f, -0.73285f, -0.5750758f, -0.533177f, 0., 0., 0., + 0., 0., -0.45753813f, 0.5987347f, -0.07046632f, -0.35819566f, 0.3916747f, + -0.18096107f, -0.24415034f, 0.38435352f, -0.29881003f, 0.07738188f, 0., 0., + 0., 0., 0., 0.10390212f, -0.29646862f, -0.20532897f, -0.31521815f, + 0.01049522f, 0.19370168f, -0.6386781f, -0.42919028f, -0.47081998f, -0.2954276f, 0., + 0., 0., 0., 0., 0., 0., 0., + 0., 0., -0.50112087f, -0.11085765f, 0.5155622f, -0.5635352f, 0.54762024f, + 0., 0., 0., 0., 0., 0., 0., + 0., 0., 0., 0.17058733f, -0.6941011f, -0.27862304f, -0.27050856f, + -0.03864266f}); // Y_h test_case.add_expected_output( Shape{2, 3, 5}, - std::vector{-0.0833023, -0.40062034, 0.7579466, -0.12340625, 0.04415433, -0.45753813, - 0.5987347, -0.07046632, -0.35819566, 0.3916747, -0.50112087, -0.11085765, - 0.5155622, -0.5635352, 0.54762024, 0.0595789, 0.40258542, -0.40646964, - 0.70320284, -0.02962421, 0.10372428, -0.38378227, -0.4331268, -0.15696645, - -0.3451503, 0.20918667, -0.59024405, -0.845524, 0.60705113, -0.6336088}); + std::vector{-0.0833023f, -0.40062034f, 0.7579466f, -0.12340625f, 0.04415433f, -0.45753813f, + 0.5987347f, -0.07046632f, -0.35819566f, 0.3916747f, -0.50112087f, -0.11085765f, + 0.5155622f, -0.5635352f, 0.54762024f, 0.0595789f, 0.40258542f, -0.40646964f, + 0.70320284f, -0.02962421f, 0.10372428f, -0.38378227f, -0.4331268f, -0.15696645f, + -0.3451503f, 0.20918667f, -0.59024405f, -0.845524f, 0.60705113f, -0.6336088f}); test_case.run(DEFAULT_FLOAT_TOLERANCE_BITS + 4); } @@ -1901,32 +1906,32 @@ NGRAPH_TEST_F(${BACKEND_NAME}, RNNSequenceOp, onnx_model_rnn_reverse_mixed_seq_l // Y test_case.add_expected_output( Shape{4, 1, 3, 5}, - std::vector{-0.27398264, 0.96948624, 0.26404798, 0.8068119, 0.99935544, 0.73694086, 0.44305325, - -0.9964632, 0.7063714, 0.9999049, -0.7241098, 0.08538079, -0.785874, 0.60833323, - 0.99999666, 0.53703666, 0.0267657, 0.37151086, -0.68740594, 0.9992448, 0.3254757, - 0.7716811, -0.9996745, 0.9957807, 0.9995338, 0.9997339, 0.9888724, -0.8992324, - -0.797282, 0.98666525, 0., 0., 0., 0., 0., - 0.95711637, -0.8986079, -0.99998885, 0.96265936, 0.9380511, -0.86523867, 0.3528558, - -0.99675506, 0.946875, 0.79539406, 0., 0., 0., 0., - 0., 0., 0., 0., 0., 0., 0.99903, - 0.9998094, 0.9499353, 0.6077225, -0.9921822}); + std::vector{ + -0.27398264f, 0.96948624f, 0.26404798f, 0.8068119f, 0.99935544f, 0.73694086f, 0.44305325f, -0.9964632f, + 0.7063714f, 0.9999049f, -0.7241098f, 0.08538079f, -0.785874f, 0.60833323f, 0.99999666f, 0.53703666f, + 0.0267657f, 0.37151086f, -0.68740594f, 0.9992448f, 0.3254757f, 0.7716811f, -0.9996745f, 0.9957807f, + 0.9995338f, 0.9997339f, 0.9888724f, -0.8992324f, -0.797282f, 0.98666525f, 0., 0., + 0., 0., 0., 0.95711637f, -0.8986079f, -0.99998885f, 0.96265936f, 0.9380511f, + -0.86523867f, 0.3528558f, -0.99675506f, 0.946875f, 0.79539406f, 0., 0., 0., + 0., 0., 0., 0., 0., 0., 0., 0.99903f, + 0.9998094f, 0.9499353f, 0.6077225f, -0.9921822f}); // Y_h test_case.add_expected_output(Shape{1, 3, 5}, - std::vector{-0.27398264, - 0.96948624, - 0.26404798, - 0.8068119, - 0.99935544, - 0.73694086, - 0.44305325, - -0.9964632, - 0.7063714, - 0.9999049, - -0.7241098, - 0.08538079, - -0.785874, - 0.60833323, - 0.99999666}); + std::vector{-0.27398264f, + 0.96948624f, + 0.26404798f, + 0.8068119f, + 0.99935544f, + 0.73694086f, + 0.44305325f, + -0.9964632f, + 0.7063714f, + 0.9999049f, + -0.7241098f, + 0.08538079f, + -0.785874f, + 0.60833323f, + 0.99999666f}); test_case.run(DEFAULT_FLOAT_TOLERANCE_BITS + 4); } @@ -1942,29 +1947,32 @@ NGRAPH_TEST_F(${BACKEND_NAME}, RNNSequenceOp, onnx_model_rnn_bidir_mixed_seq_len test_case.add_expected_output( Shape{4, 2, 3, 5}, std::vector{ - 0.02254748, 0.15776646, -0.8229023, 0.19205809, 0.76984656, -0.00603169, -0.0286147, 0.04512155, - -0.0011912, -0.02572936, -0.13703543, -0.49651444, -0.78868157, 0.3566854, 0.8758509, -0.99602485, - -0.8151508, -0.5803147, 0.4985683, 0.30210292, 0.11550081, -0.30236644, 0.99622667, -0.8732492, - -0.43772405, -0.9284624, -0.5595875, 0.9986867, -0.18373811, 0.8451735, -0.43823165, -0.1904698, - 0.8320786, 0.9830735, 0.61861455, 0.19109797, 0.64407, 0.00962067, -0.32752877, -0.5050589, - -0.23455954, 0.9517933, 0.9050665, 0.91091585, -0.77941567, -0.71390504, -0.24422187, -0.38115412, - 0.3462553, 0.44084883, -0.81455964, -0.23556596, 0.85043025, -0.7840209, -0.82087713, -0.8349008, - -0.7880142, 0.99017143, -0.9816452, -0.93827677, 0., 0., 0., 0., - 0., 0.28117967, 0.20685148, 0.01166701, -0.5441828, -0.5463747, -0.85301256, 0.52109087, - -0.8317892, -0.9676957, -0.30258918, 0., 0., 0., 0., 0., - -0.7010546, -0.3106169, -0.04788882, -0.21822351, -0.33518708, -0.9073148, 0.16276085, 0.9518349, - -0.8635942, -0.92539954, 0., 0., 0., 0., 0., 0., - 0., 0., 0., 0., 0.9948462, -0.6242633, -0.19065344, -0.36072153, - -0.99407107, 0., 0., 0., 0., 0., 0., 0., - 0., 0., 0., -0.9957684, -0.7924, -0.40261805, -0.34061068, -0.55580306}); + 0.02254748f, 0.15776646f, -0.8229023f, 0.19205809f, 0.76984656f, -0.00603169f, -0.0286147f, + 0.04512155f, -0.0011912f, -0.02572936f, -0.13703543f, -0.49651444f, -0.78868157f, 0.3566854f, + 0.8758509f, -0.99602485f, -0.8151508f, -0.5803147f, 0.4985683f, 0.30210292f, 0.11550081f, + -0.30236644f, 0.99622667f, -0.8732492f, -0.43772405f, -0.9284624f, -0.5595875f, 0.9986867f, + -0.18373811f, 0.8451735f, -0.43823165f, -0.1904698f, 0.8320786f, 0.9830735f, 0.61861455f, + 0.19109797f, 0.64407f, 0.00962067f, -0.32752877f, -0.5050589f, -0.23455954f, 0.9517933f, + 0.9050665f, 0.91091585f, -0.77941567f, -0.71390504f, -0.24422187f, -0.38115412f, 0.3462553f, + 0.44084883f, -0.81455964f, -0.23556596f, 0.85043025f, -0.7840209f, -0.82087713f, -0.8349008f, + -0.7880142f, 0.99017143f, -0.9816452f, -0.93827677f, 0., 0., 0., + 0., 0., 0.28117967f, 0.20685148f, 0.01166701f, -0.5441828f, -0.5463747f, + -0.85301256f, 0.52109087f, -0.8317892f, -0.9676957f, -0.30258918f, 0., 0., + 0., 0., 0., -0.7010546f, -0.3106169f, -0.04788882f, -0.21822351f, + -0.33518708f, -0.9073148f, 0.16276085f, 0.9518349f, -0.8635942f, -0.92539954f, 0., + 0., 0., 0., 0., 0., 0., 0., + 0., 0., 0.9948462f, -0.6242633f, -0.19065344f, -0.36072153f, -0.99407107f, + 0., 0., 0., 0., 0., 0., 0., + 0., 0., 0., -0.9957684f, -0.7924f, -0.40261805f, -0.34061068f, + -0.55580306f}); // Y_h test_case.add_expected_output( Shape{2, 3, 5}, - std::vector{-0.43823165, -0.1904698, 0.8320786, 0.9830735, 0.61861455, 0.28117967, - 0.20685148, 0.01166701, -0.5441828, -0.5463747, 0.9948462, -0.6242633, - -0.19065344, -0.36072153, -0.99407107, -0.99602485, -0.8151508, -0.5803147, - 0.4985683, 0.30210292, 0.11550081, -0.30236644, 0.99622667, -0.8732492, - -0.43772405, -0.9284624, -0.5595875, 0.9986867, -0.18373811, 0.8451735}); + std::vector{-0.43823165f, -0.1904698f, 0.8320786f, 0.9830735f, 0.61861455f, 0.28117967f, + 0.20685148f, 0.01166701f, -0.5441828f, -0.5463747f, 0.9948462f, -0.6242633f, + -0.19065344f, -0.36072153f, -0.99407107f, -0.99602485f, -0.8151508f, -0.5803147f, + 0.4985683f, 0.30210292f, 0.11550081f, -0.30236644f, 0.99622667f, -0.8732492f, + -0.43772405f, -0.9284624f, -0.5595875f, 0.9986867f, -0.18373811f, 0.8451735f}); // loosest match @ mantissa bit: // 16 or next bit (0.01166688557714223862 vs 0.01166701037436723709) diff --git a/src/frontends/onnx/tests/onnx_transformations.cpp b/src/frontends/onnx/tests/onnx_transformations.cpp index 76420f41e314c3..ca4535c921ff76 100644 --- a/src/frontends/onnx/tests/onnx_transformations.cpp +++ b/src/frontends/onnx/tests/onnx_transformations.cpp @@ -42,7 +42,7 @@ bool after_func_expand_name_comp(std::string lhs, std::string rhs) { if (is_hex_symbol(name[i])) { ++founded_hex; if (cut_begin == -1) { - cut_begin = i; + cut_begin = static_cast(i); } if (founded_hex >= min_address) { cut_length = founded_hex; From 9d0749a5b7f16121bc80059e760e342572abc2ca Mon Sep 17 00:00:00 2001 From: Sofya Balandina Date: Thu, 23 Mar 2023 10:59:31 +0000 Subject: [PATCH 056/296] [conformanceTests] Add key for manage pipeline after crashes (#16123) * [conformanceTests] Add key for manage pipeline after crashes * Move crash_handler to funcTestsUtils --- .../plugin/conformance/test_runner/README.md | 1 + .../conformance_infra/include/gflag_config.hpp | 5 +++++ .../test_runner/conformance_infra/src/main.cpp | 3 ++- .../src/read_ir_test/read_ir.cpp | 2 +- .../include/base/ov_behavior_test_utils.hpp | 2 +- .../shared/include/behavior/plugin/life_time.hpp | 2 +- .../op_impl_check/op_impl_check_compile_model.hpp | 2 +- .../op_impl_check/op_impl_check_query_model.hpp | 2 +- .../op_impl_check/op_impl_check.cpp | 2 +- .../shared_test_classes/base/layer_test_utils.hpp | 2 +- .../shared_test_classes/src/base/ov_subgraph.cpp | 2 +- .../functional_test_utils}/crash_handler.hpp | 4 +++- .../src}/crash_handler.cpp | 15 ++++++++++++++- 13 files changed, 33 insertions(+), 11 deletions(-) rename src/tests/ie_test_utils/{common_test_utils => functional_test_utils/include/functional_test_utils}/crash_handler.hpp (78%) rename src/tests/ie_test_utils/{common_test_utils => functional_test_utils/src}/crash_handler.cpp (83%) diff --git a/src/tests/functional/plugin/conformance/test_runner/README.md b/src/tests/functional/plugin/conformance/test_runner/README.md index 4c67da79667285..628567d23aab41 100644 --- a/src/tests/functional/plugin/conformance/test_runner/README.md +++ b/src/tests/functional/plugin/conformance/test_runner/README.md @@ -129,6 +129,7 @@ The target is able to take the following command-line arguments: * `--shape_mode` is optional. It allows you to run `static`, `dynamic` , or both scenarios. The default value is an empty string, which allows running both scenarios. Possible values are `static`, `dynamic`, `` * `--test_timeout` specifies setup timeout for each test in seconds. The default timeout is 900 seconds (15 minutes). +* `--ignore_crash` Optional. Allow to not terminate the whole run after crash and continue execution from the next test. This is organized with custom crash handler. Please, note, that handler work for test body, if crash happened on SetUp/TearDown stage, the process will be terminated. * All `gtest` command-line parameters > **NOTE**: diff --git a/src/tests/functional/plugin/conformance/test_runner/conformance_infra/include/gflag_config.hpp b/src/tests/functional/plugin/conformance/test_runner/conformance_infra/include/gflag_config.hpp index 5ce146ba16b698..04c5dd2e28b2b2 100644 --- a/src/tests/functional/plugin/conformance/test_runner/conformance_infra/include/gflag_config.hpp +++ b/src/tests/functional/plugin/conformance/test_runner/conformance_infra/include/gflag_config.hpp @@ -46,6 +46,9 @@ static const char extract_body_message[] = "Optional. Allows to count extracted static const char shape_mode_message[] = "Optional. Allows to run `static`, `dynamic` or both scenarios. Default value is empty string allows to run both" " scenarios. Possible values are `static`, `dynamic`, ``"; static const char test_timeout_message[] = "Optional. Setup timeout for each test in seconds, default timeout 900seconds (15 minutes)."; +static const char ignore_crash_message[] = "Optional. Allow to not terminate the whole run after crash and continue execution from the next test." + "This is organized with custom crash handler. Please, note, that handler work for test body," + "if crash happened on SetUp/TearDown stage, the process will be terminated."; static const char reference_cache_dir_message[] = "Optional. Set the directory with reference cache"; @@ -63,6 +66,7 @@ DEFINE_bool(report_unique_name, false, report_unique_name_message); DEFINE_bool(extract_body, false, extract_body_message); DEFINE_string(shape_mode, "", shape_mode_message); DEFINE_uint32(test_timeout, UINT_MAX, test_timeout_message); +DEFINE_uint32(ignore_crash, false, ignore_crash_message); DEFINE_string(ref_dir, "", reference_cache_dir_message); /** @@ -87,6 +91,7 @@ static void showUsage() { std::cout << " --plugin_lib_name " << output_folder_message << std::endl; std::cout << " --shape_mode \"\" " << shape_mode_message << std::endl; std::cout << " --test_timeout \"\" " << test_timeout_message << std::endl; + std::cout << " --ignore_crash " << ignore_crash_message << std::endl; std::cout << " --ref_dir \"\" " << reference_cache_dir_message << std::endl; } diff --git a/src/tests/functional/plugin/conformance/test_runner/conformance_infra/src/main.cpp b/src/tests/functional/plugin/conformance/test_runner/conformance_infra/src/main.cpp index 8c3567cafa1a82..82ca2ec898afbf 100644 --- a/src/tests/functional/plugin/conformance/test_runner/conformance_infra/src/main.cpp +++ b/src/tests/functional/plugin/conformance/test_runner/conformance_infra/src/main.cpp @@ -17,7 +17,7 @@ #include "gflag_config.hpp" #include "conformance.hpp" -#include "common_test_utils/crash_handler.hpp" +#include "functional_test_utils/crash_handler.hpp" using namespace ov::test::conformance; @@ -63,6 +63,7 @@ int main(int argc, char* argv[]) { } CommonTestUtils::CrashHandler::SetUpTimeout(FLAGS_test_timeout); + CommonTestUtils::CrashHandler::SetUpPipelineAfterCrash(FLAGS_ignore_crash); // ---------------------------Initialization of Gtest env ----------------------------------------------- ov::test::conformance::targetDevice = FLAGS_device.c_str(); diff --git a/src/tests/functional/plugin/conformance/test_runner/conformance_infra/src/read_ir_test/read_ir.cpp b/src/tests/functional/plugin/conformance/test_runner/conformance_infra/src/read_ir_test/read_ir.cpp index 397dcb9e6762f8..246051b7f7e543 100644 --- a/src/tests/functional/plugin/conformance/test_runner/conformance_infra/src/read_ir_test/read_ir.cpp +++ b/src/tests/functional/plugin/conformance/test_runner/conformance_infra/src/read_ir_test/read_ir.cpp @@ -13,7 +13,7 @@ #include "common_test_utils/file_utils.hpp" #include "common_test_utils/data_utils.hpp" #include "common_test_utils/common_utils.hpp" -#include "common_test_utils/crash_handler.hpp" +#include "functional_test_utils/crash_handler.hpp" #include "functional_test_utils/summary/op_info.hpp" #include "functional_test_utils/skip_tests_config.hpp" diff --git a/src/tests/functional/plugin/shared/include/base/ov_behavior_test_utils.hpp b/src/tests/functional/plugin/shared/include/base/ov_behavior_test_utils.hpp index 73fc178a7209e8..010ec941e5db90 100644 --- a/src/tests/functional/plugin/shared/include/base/ov_behavior_test_utils.hpp +++ b/src/tests/functional/plugin/shared/include/base/ov_behavior_test_utils.hpp @@ -18,7 +18,7 @@ #include "common_test_utils/test_common.hpp" #include "common_test_utils/test_constants.hpp" #include "common_test_utils/common_utils.hpp" -#include "common_test_utils/crash_handler.hpp" +#include "functional_test_utils/crash_handler.hpp" #include "common_test_utils/file_utils.hpp" #include "functional_test_utils/plugin_cache.hpp" diff --git a/src/tests/functional/plugin/shared/include/behavior/plugin/life_time.hpp b/src/tests/functional/plugin/shared/include/behavior/plugin/life_time.hpp index 6ef031d096a15a..010ed38672c643 100644 --- a/src/tests/functional/plugin/shared/include/behavior/plugin/life_time.hpp +++ b/src/tests/functional/plugin/shared/include/behavior/plugin/life_time.hpp @@ -15,7 +15,7 @@ #include #include #include "gtest/gtest.h" -#include "common_test_utils/crash_handler.hpp" +#include "functional_test_utils/crash_handler.hpp" #include "functional_test_utils/skip_tests_config.hpp" #include "functional_test_utils/precision_utils.hpp" #include "base/behavior_test_utils.hpp" diff --git a/src/tests/functional/plugin/shared/include/single_layer_tests/op_impl_check/op_impl_check_compile_model.hpp b/src/tests/functional/plugin/shared/include/single_layer_tests/op_impl_check/op_impl_check_compile_model.hpp index 9450bc62dc1065..76c839ecbab1ba 100644 --- a/src/tests/functional/plugin/shared/include/single_layer_tests/op_impl_check/op_impl_check_compile_model.hpp +++ b/src/tests/functional/plugin/shared/include/single_layer_tests/op_impl_check/op_impl_check_compile_model.hpp @@ -2,7 +2,7 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "common_test_utils/crash_handler.hpp" +#include "functional_test_utils/crash_handler.hpp" #include "single_layer_tests/op_impl_check/op_impl_check.hpp" namespace ov { diff --git a/src/tests/functional/plugin/shared/include/single_layer_tests/op_impl_check/op_impl_check_query_model.hpp b/src/tests/functional/plugin/shared/include/single_layer_tests/op_impl_check/op_impl_check_query_model.hpp index 5aa0df75dfaf48..a1c18f05f33521 100644 --- a/src/tests/functional/plugin/shared/include/single_layer_tests/op_impl_check/op_impl_check_query_model.hpp +++ b/src/tests/functional/plugin/shared/include/single_layer_tests/op_impl_check/op_impl_check_query_model.hpp @@ -2,7 +2,7 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "common_test_utils/crash_handler.hpp" +#include "functional_test_utils/crash_handler.hpp" #include "single_layer_tests/op_impl_check/op_impl_check.hpp" namespace ov { diff --git a/src/tests/functional/plugin/shared/src/single_layer_tests/op_impl_check/op_impl_check.cpp b/src/tests/functional/plugin/shared/src/single_layer_tests/op_impl_check/op_impl_check.cpp index 99fd2c24a55cac..2a6c23f2c004cb 100644 --- a/src/tests/functional/plugin/shared/src/single_layer_tests/op_impl_check/op_impl_check.cpp +++ b/src/tests/functional/plugin/shared/src/single_layer_tests/op_impl_check/op_impl_check.cpp @@ -7,7 +7,7 @@ #endif #include "single_layer_tests/op_impl_check/op_impl_check.hpp" -#include "common_test_utils/crash_handler.hpp" +#include "functional_test_utils/crash_handler.hpp" namespace ov { namespace test { diff --git a/src/tests/functional/shared_test_classes/include/shared_test_classes/base/layer_test_utils.hpp b/src/tests/functional/shared_test_classes/include/shared_test_classes/base/layer_test_utils.hpp index c2d415b04b893f..a4dda85d675d25 100644 --- a/src/tests/functional/shared_test_classes/include/shared_test_classes/base/layer_test_utils.hpp +++ b/src/tests/functional/shared_test_classes/include/shared_test_classes/base/layer_test_utils.hpp @@ -21,8 +21,8 @@ #include "common_test_utils/ngraph_test_utils.hpp" #include "common_test_utils/common_utils.hpp" #include "common_test_utils/test_common.hpp" -#include "common_test_utils/crash_handler.hpp" +#include "functional_test_utils/crash_handler.hpp" #include "functional_test_utils/skip_tests_config.hpp" #include "functional_test_utils/plugin_cache.hpp" #include "functional_test_utils/blob_utils.hpp" diff --git a/src/tests/functional/shared_test_classes/src/base/ov_subgraph.cpp b/src/tests/functional/shared_test_classes/src/base/ov_subgraph.cpp index ed59d6e0d743b3..ee6c57ca694222 100644 --- a/src/tests/functional/shared_test_classes/src/base/ov_subgraph.cpp +++ b/src/tests/functional/shared_test_classes/src/base/ov_subgraph.cpp @@ -23,7 +23,7 @@ #include "ngraph_functions/utils/ngraph_helpers.hpp" #include "common_test_utils/file_utils.hpp" -#include "common_test_utils/crash_handler.hpp" +#include "functional_test_utils/crash_handler.hpp" #include "common_test_utils/ov_tensor_utils.hpp" #include "functional_test_utils/skip_tests_config.hpp" diff --git a/src/tests/ie_test_utils/common_test_utils/crash_handler.hpp b/src/tests/ie_test_utils/functional_test_utils/include/functional_test_utils/crash_handler.hpp similarity index 78% rename from src/tests/ie_test_utils/common_test_utils/crash_handler.hpp rename to src/tests/ie_test_utils/functional_test_utils/include/functional_test_utils/crash_handler.hpp index f06ede67c61993..75b2c47cff1d2c 100644 --- a/src/tests/ie_test_utils/common_test_utils/crash_handler.hpp +++ b/src/tests/ie_test_utils/functional_test_utils/include/functional_test_utils/crash_handler.hpp @@ -6,7 +6,7 @@ #include -#include "common_utils.hpp" +#include "common_test_utils/common_utils.hpp" #include #include @@ -20,10 +20,12 @@ enum JMP_STATUS { ok = 0, anyError = 1, alarmErr = 2 }; class CrashHandler { private: static unsigned int MAX_TEST_WORK_TIME; + static bool IGNORE_CRASH; public: CrashHandler(); ~CrashHandler(); static void SetUpTimeout(unsigned int timeout); + static void SetUpPipelineAfterCrash(bool ignore_crash); void StartTimer(); }; diff --git a/src/tests/ie_test_utils/common_test_utils/crash_handler.cpp b/src/tests/ie_test_utils/functional_test_utils/src/crash_handler.cpp similarity index 83% rename from src/tests/ie_test_utils/common_test_utils/crash_handler.cpp rename to src/tests/ie_test_utils/functional_test_utils/src/crash_handler.cpp index 4372f75a4dc5b5..3134df4317578d 100644 --- a/src/tests/ie_test_utils/common_test_utils/crash_handler.cpp +++ b/src/tests/ie_test_utils/functional_test_utils/src/crash_handler.cpp @@ -2,7 +2,9 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "crash_handler.hpp" +#include "functional_test_utils/summary/op_summary.hpp" + +#include "functional_test_utils/crash_handler.hpp" #include namespace CommonTestUtils { @@ -10,6 +12,7 @@ namespace CommonTestUtils { // enviroment to restore in case of crash jmp_buf env; unsigned int CrashHandler::MAX_TEST_WORK_TIME = UINT_MAX; +bool CrashHandler::IGNORE_CRASH = false; CrashHandler::CrashHandler() { // setup default value for timeout in 15 minutes @@ -31,6 +34,12 @@ CrashHandler::CrashHandler() { signal(SIGALRM, SIG_DFL); #endif + if (!CrashHandler::IGNORE_CRASH) { + auto &s = ov::test::utils::OpSummary::getInstance(); + s.saveReport(); + std::abort(); + } + #ifdef _WIN32 longjmp(env, JMP_STATUS::anyError); #else @@ -84,4 +93,8 @@ void CrashHandler::SetUpTimeout(unsigned int timeout) { MAX_TEST_WORK_TIME = timeout; } +void CrashHandler::SetUpPipelineAfterCrash(bool ignore_crash) { + IGNORE_CRASH = ignore_crash; +} + } // namespace CommonTestUtils \ No newline at end of file From c89da1aee2018dd28085dcbc4018e8a02d56cfc7 Mon Sep 17 00:00:00 2001 From: Sebastian Golebiewski Date: Thu, 23 Mar 2023 12:02:01 +0100 Subject: [PATCH 057/296] DOCS shift to rst - Install OpenVINO on macOS, Raspbian (#16506) --- .../installing-openvino-brew.md | 81 +++++------- .../installing-openvino-from-archive-macos.md | 123 +++++++++--------- .../installing-openvino-macos-header.md | 13 +- .../installing-openvino-macos.md | 12 +- .../installing-openvino-raspbian.md | 65 +++------ 5 files changed, 133 insertions(+), 161 deletions(-) diff --git a/docs/install_guides/installing-openvino-brew.md b/docs/install_guides/installing-openvino-brew.md index 557e38872e84c2..1bbf98042015a2 100644 --- a/docs/install_guides/installing-openvino-brew.md +++ b/docs/install_guides/installing-openvino-brew.md @@ -2,58 +2,52 @@ @sphinxdirective -With the OpenVINO™ 2022.3 release, you can install OpenVINO Runtime on macOS and Linux via `Homebrew `_. OpenVINO™ Development Tools can be installed via PyPI only. See :ref:`Installing Additional Components ` for more information. +With the OpenVINO™ 2022.3 release, you can install OpenVINO Runtime on macOS and Linux via `Homebrew `_. OpenVINO™ Development Tools can be installed via PyPI only. See `Installing Additional Components <#optional-installing-additional-components>`__ for more information. -See the `Release Notes `_ for more information on updates in the latest release. +See the `Release Notes `__ for more information on updates in the latest release. Installing OpenVINO Runtime from Homebrew is recommended for C++ developers. If you are working with Python, the PyPI package has everything needed for Python development and deployment on CPU and GPUs. Visit the :doc:`Install OpenVINO from PyPI ` page for instructions on how to install OpenVINO Runtime for Python using PyPI. -.. note:: +.. note:: Only CPU is supported for inference if you install OpenVINO via HomeBrew. -.. warning:: +.. warning:: - By downloading and using this container and the included software, you agree to the terms and conditions of the `software license agreements `_. + By downloading and using this container and the included software, you agree to the terms and conditions of the `software license agreements `__. -@endsphinxdirective - -## Prerequisites - -### System Requirements -@sphinxdirective - -Full requirement listing is available on the `System Requirements Page `_ +Prerequisites +#################### -@endsphinxdirective +System Requirements +++++++++++++++++++++ -### Software Requirements +Full requirement listing is available on the `System Requirements Page `__ -@sphinxdirective +Software Requirements ++++++++++++++++++++++ .. tab:: macOS * `Homebrew `_ - * `CMake 3.13 or higher `_ (choose "macOS 10.13 or later"). Add `/Applications/CMake.app/Contents/bin` to path (for default installation). - * `Python 3.7 - 3.10 `_ (choose 3.7 - 3.10). Install and add it to path. - * Apple Xcode Command Line Tools. In the terminal, run `xcode-select --install` from any directory to install it. + * `CMake 3.13 or higher `__ (choose "macOS 10.13 or later"). Add ``/Applications/CMake.app/Contents/bin`` to path (for default installation). + * `Python 3.7 - 3.10 `__ (choose 3.7 - 3.10). Install and add it to path. + * Apple Xcode Command Line Tools. In the terminal, run ``xcode-select --install`` from any directory to install it. * (Optional) Apple Xcode IDE (not required for OpenVINO™, but useful for development) .. tab:: Linux * `Homebrew `_ - * `CMake 3.13 or higher, 64-bit `_ + * `CMake 3.13 or higher, 64-bit `__ * GCC 7.5.0 (for Ubuntu 18.04) or GCC 9.3.0 (for Ubuntu 20.04) - * `Python 3.7 - 3.10, 64-bit `_ - -@endsphinxdirective + * `Python 3.7 - 3.10, 64-bit `__ -## Installing OpenVINO Runtime -@sphinxdirective +Installing OpenVINO Runtime +########################### -1. Make sure that you have installed HomeBrew on your system. If not, follow the instructions on `the Homebrew website `_ to install and configure it. +1. Make sure that you have installed HomeBrew on your system. If not, follow the instructions on `the Homebrew website `__ to install and configure it. 2. Open a command prompt terminal window, and run the following command to install OpenVINO Runtime: @@ -61,15 +55,11 @@ Full requirement listing is available on the `System Requirements Page `_. -@endsphinxdirective - -## Uninstalling OpenVINO +Uninstalling OpenVINO +##################### To uninstall OpenVINO via HomeBrew, use the following command: -```sh -brew uninstall openvino -``` -## What's Next? +.. code-block:: sh -@sphinxdirective + brew uninstall openvino + + +What's Next? +#################### -Now that you've installed OpenVINO Runtime, you can try the following things: +Now that you've installed OpenVINO Runtime, you can try the following things: * Learn more about :doc:`OpenVINO Workflow `. * To prepare your models for working with OpenVINO, see :doc:`Model Preparation `. @@ -99,8 +89,9 @@ Now that you've installed OpenVINO Runtime, you can try the following things: * See sample applications in :doc:`OpenVINO toolkit Samples Overview `. * Take a glance at the OpenVINO product home page: https://software.intel.com/en-us/openvino-toolkit. -@endsphinxdirective +Additional Resources +#################### -## Additional Resources +* `OpenVINO Installation Selector Tool `__ -- [OpenVINO Installation Selector Tool](https://www.intel.com/content/www/us/en/developer/tools/openvino-toolkit/download.html) \ No newline at end of file +@endsphinxdirective diff --git a/docs/install_guides/installing-openvino-from-archive-macos.md b/docs/install_guides/installing-openvino-from-archive-macos.md index 23d20d6e666130..2e3793bce92e3f 100644 --- a/docs/install_guides/installing-openvino-from-archive-macos.md +++ b/docs/install_guides/installing-openvino-from-archive-macos.md @@ -1,53 +1,59 @@ # Install OpenVINO™ Runtime on macOS from an Archive File {#openvino_docs_install_guides_installing_openvino_from_archive_macos} +@sphinxdirective + With the OpenVINO™ 2022.3 release, you can download and use archive files to install OpenVINO Runtime. The archive files contain pre-built binaries and library files needed for OpenVINO Runtime, as well as code samples. -Installing OpenVINO Runtime from archive files is recommended for C++ developers. If you are working with Python, the PyPI package has everything needed for Python development and deployment on CPU and GPUs. Visit the [Install OpenVINO from PyPI](installing-openvino-pip.md) page for instructions on how to install OpenVINO Runtime for Python using PyPI. +Installing OpenVINO Runtime from archive files is recommended for C++ developers. If you are working with Python, the PyPI package has everything needed for Python development and deployment on CPU and GPUs. Visit the :doc:`Install OpenVINO from PyPI ` page for instructions on how to install OpenVINO Runtime for Python using PyPI. -See the [Release Notes](https://software.intel.com/en-us/articles/OpenVINO-RelNotes) for more information on updates in the latest release. +See the `Release Notes `__ for more information on updates in the latest release. -> **NOTE**: Since the OpenVINO™ 2022.1 release, the following development tools: Model Optimizer, Post-Training Optimization Tool, Model Downloader and other Open Model Zoo tools, Accuracy Checker, and Annotation Converter can be installed via [pypi.org](https://pypi.org/project/openvino-dev/) only. +.. note:: + + Since the OpenVINO™ 2022.1 release, the following development tools: Model Optimizer, Post-Training Optimization Tool, Model Downloader and other Open Model Zoo tools, Accuracy Checker, and Annotation Converter can be installed via `pypi.org `__ only. -@sphinxdirective .. tab:: System Requirements | Full requirement listing is available in: - | `System Requirements Page `_ + | `System Requirements Page `__ .. tab:: Software Requirements - * `CMake 3.13 or higher `_ (choose "macOS 10.13 or later"). Add `/Applications/CMake.app/Contents/bin` to path (for default install). - * `Python 3.7 - 3.10 `_ (choose 3.7 - 3.10). Install and add to path. - * Apple Xcode Command Line Tools. In the terminal, run `xcode-select --install` from any directory + * `CMake 3.13 or higher `__ (choose "macOS 10.13 or later"). Add ``/Applications/CMake.app/Contents/bin`` to path (for default install). + * `Python 3.7 - 3.10 `__ (choose 3.7 - 3.10). Install and add to path. + * Apple Xcode Command Line Tools. In the terminal, run ``xcode-select --install`` from any directory * (Optional) Apple Xcode IDE (not required for OpenVINO™, but useful for development) -@endsphinxdirective -## Installing OpenVINO Runtime +Installing OpenVINO Runtime +########################### -### Step 1: Install OpenVINO Core Components +Step 1: Install OpenVINO Core Components +++++++++++++++++++++++++++++++++++++++++ -@sphinxdirective -1. Open a command prompt terminal window. -2. Create the `/opt/intel` folder for OpenVINO by using the following command. If the folder already exists, skip this command. +1. Open a command prompt terminal window. +2. Create the ``/opt/intel`` folder for OpenVINO by using the following command. If the folder already exists, skip this command. .. code-block:: sh sudo mkdir /opt/intel - - .. note:: - - The `/opt/intel` path is the recommended folder path for installing OpenVINO. You may use a different path if desired. -3. Browse to the current user's `Downloads` folder: + + .. note:: + + The ``/opt/intel`` path is the recommended folder path for installing OpenVINO. You may use a different path if desired. + + +3. Browse to the current user's ``Downloads`` folder: .. code-block:: sh cd /Downloads - -4. Download the `OpenVINO Runtime archive file for macOS `_, extract the files, rename the extracted folder and move it to the desired path: + + +4. Download the `OpenVINO Runtime archive file for macOS `__, extract the files, rename the extracted folder and move it to the desired path: .. tab:: x86, 64-bit @@ -65,55 +71,62 @@ See the [Release Notes](https://software.intel.com/en-us/articles/OpenVINO-RelNo tar -xf openvino_2022.3.0.tgz sudo mv m_openvino_toolkit_macos_11_0_2022.3.0.9052.9752fafe8eb_arm64 /opt/intel/openvino_2022.3.0 + 5. For simplicity, it is useful to create a symbolic link as below: .. code-block:: sh sudo ln -s openvino_2022.3.0 openvino_2022 - - .. note:: - - If you have already installed a previous release of OpenVINO 2022, a symbolic link to the `openvino_2022` folder may already exist. Unlink the previous link with `sudo unlink openvino_2022`, and then re-run the command above. -@endsphinxdirective + .. note:: + + If you have already installed a previous release of OpenVINO 2022, a symbolic link to the ``openvino_2022`` folder may already exist. Unlink the previous link with ``sudo unlink openvino_2022``, and then re-run the command above. + -Congratulations, you finished the installation! The `/opt/intel/openvino_2022` folder now contains the core components for OpenVINO. If you used a different path in Step 2, you will find the `openvino_2022` folder there. The path to the `openvino_2022` directory is also referred as `` throughout the OpenVINO documentation. +Congratulations, you finished the installation! The ``/opt/intel/openvino_2022`` folder now contains the core components for OpenVINO. If you used a different path in Step 2, you will find the ``openvino_2022`` folder there. The path to the ``openvino_2022`` directory is also referred as ```` throughout the OpenVINO documentation. -### Step 2: Configure the Environment +Step 2: Configure the Environment ++++++++++++++++++++++++++++++++++ -You must update several environment variables before you can compile and run OpenVINO applications. Open a terminal window and run the `setupvars.sh` script as shown below to temporarily set your environment variables. If your is not `/opt/intel/openvino_2022`, use the correct one instead. +You must update several environment variables before you can compile and run OpenVINO applications. Open a terminal window and run the ``setupvars.sh`` script as shown below to temporarily set your environment variables. If your ```` is not ``/opt/intel/openvino_2022``, use the correct one instead. -```sh -source /opt/intel/openvino_2022/setupvars.sh -``` +.. code-block:: sh -If you have more than one OpenVINO™ version on your machine, you can easily switch its version by sourcing the `setupvars.sh` of your choice. + source /opt/intel/openvino_2022/setupvars.sh -> **NOTE**: The above command must be re-run every time you start a new terminal session. To set up macOS to automatically run the command every time a new terminal is opened, open `~/.zshrc` in your favorite editor and add `source /opt/intel/openvino_2022/setupvars.sh` after the last line. Next time when you open a terminal, you will see `[setupvars.sh] OpenVINO™ environment initialized`. Changing `~/.zshrc` is not recommended when you have multiple OpenVINO versions on your machine and want to switch among them. + +If you have more than one OpenVINO™ version on your machine, you can easily switch its version by sourcing the ``setupvars.sh`` of your choice. + +.. note:: + + The above command must be re-run every time you start a new terminal session. To set up macOS to automatically run the command every time a new terminal is opened, open ``~/.zshrc`` in your favorite editor and add ``source /opt/intel/openvino_2022/setupvars.sh`` after the last line. Next time when you open a terminal, you will see ``[setupvars.sh] OpenVINO™ environment initialized``. Changing ``~/.zshrc`` is not recommended when you have multiple OpenVINO versions on your machine and want to switch among them. The environment variables are set. Continue to the next section if you want to download any additional components. -### Step 3 (Optional): Install Additional Components +Step 3 (Optional): Install Additional Components +++++++++++++++++++++++++++++++++++++++++++++++++ OpenVINO Development Tools is a set of utilities for working with OpenVINO and OpenVINO models. It provides tools like Model Optimizer, Benchmark Tool, Post-Training Optimization Tool, and Open Model Zoo Downloader. If you install OpenVINO Runtime using archive files, OpenVINO Development Tools must be installed separately. -See the [Install OpenVINO Development Tools](installing-model-dev-tools.md) page for step-by-step installation instructions. +See the :doc:`Install OpenVINO Development Tools ` page for step-by-step installation instructions. + +OpenCV is necessary to run demos from Open Model Zoo (OMZ). Some OpenVINO samples can also extend their capabilities when compiled with OpenCV as a dependency. To install OpenCV for OpenVINO, see the `instructions on GitHub `__. -OpenCV is necessary to run demos from Open Model Zoo (OMZ). Some OpenVINO samples can also extend their capabilities when compiled with OpenCV as a dependency. To install OpenCV for OpenVINO, see the [instructions on GitHub](https://github.com/opencv/opencv/wiki/BuildOpenCV4OpenVINO). +What's Next? +#################### -## What's Next? Now that you've installed OpenVINO Runtime, you're ready to run your own machine learning applications! Learn more about how to integrate a model in OpenVINO applications by trying out the following tutorials. -@sphinxdirective + .. tab:: Get started with Python Try the `Python Quick Start Example `_ to estimate depth in a scene using an OpenVINO monodepth model in a Jupyter Notebook inside your web browser. - + .. image:: https://user-images.githubusercontent.com/15709723/127752390-f6aa371f-31b5-4846-84b9-18dd4f662406.gif :width: 400 Visit the :ref:`Tutorials ` page for more Jupyter Notebooks to get you started with OpenVINO, such as: - + * `OpenVINO Python API Tutorial `_ * `Basic image classification program with Hello Image Classification `_ * `Convert a PyTorch model and use it for image background removal `_ @@ -121,44 +134,38 @@ Now that you've installed OpenVINO Runtime, you're ready to run your own machine .. tab:: Get started with C++ Try the `C++ Quick Start Example `_ for step-by-step instructions on building and running a basic image classification C++ application. - + .. image:: https://user-images.githubusercontent.com/36741649/127170593-86976dc3-e5e4-40be-b0a6-206379cd7df5.jpg :width: 400 Visit the :ref:`Samples ` page for other C++ example applications to get you started with OpenVINO, such as: - + * `Basic object detection with the Hello Reshape SSD C++ sample `_ * `Automatic speech recognition C++ sample `_ -@endsphinxdirective - -## Uninstalling Intel® Distribution of OpenVINO™ Toolkit +Uninstalling Intel® Distribution of OpenVINO™ Toolkit +##################################################### -To uninstall the toolkit, follow the steps on the [Uninstalling page](uninstalling-openvino.md). +To uninstall the toolkit, follow the steps on the :doc:`Uninstalling page `. -## Additional Resources - -@sphinxdirective +Additional Resources +#################### +* `OpenVINO Installation Selector Tool `__ * :ref:`Troubleshooting Guide for OpenVINO Installation & Configuration ` * Converting models for use with OpenVINO™: :ref:`Model Optimizer User Guide ` * Writing your own OpenVINO™ applications: :ref:`OpenVINO™ Runtime User Guide ` * Sample applications: :ref:`OpenVINO™ Toolkit Samples Overview ` * Pre-trained deep learning models: :ref:`Overview of OpenVINO™ Toolkit Pre-Trained Models ` -* IoT libraries and code samples in the GitHUB repository: `Intel® IoT Developer Kit`_ +* IoT libraries and code samples in the GitHUB repository: `Intel® IoT Developer Kit `__ -.. _Intel® IoT Developer Kit: https://github.com/intel-iot-devkit +---> @endsphinxdirective - -## Additional Resources - -- [OpenVINO Installation Selector Tool](https://www.intel.com/content/www/us/en/developer/tools/openvino-toolkit/download.html) \ No newline at end of file diff --git a/docs/install_guides/installing-openvino-macos-header.md b/docs/install_guides/installing-openvino-macos-header.md index 7054d0c6e0dbc3..69b1e93df437c3 100644 --- a/docs/install_guides/installing-openvino-macos-header.md +++ b/docs/install_guides/installing-openvino-macos-header.md @@ -10,12 +10,13 @@ From PyPI Using HomeBrew -@endsphinxdirective +If you want to install OpenVINO™ Runtime on macOS, there are a few ways to accomplish this. We prepared following options for you: -If you want to install OpenVINO™ Runtime on macOS, there are a few ways to accomplish this. We prepared following options for you: +* :doc:`Install OpenVINO Runtime from an Archive File ` +* :doc:`Install OpenVINO Runtime via HomeBrew ` +* :doc:`Install OpenVINO from PyPI ` -* [Install OpenVINO Runtime from an Archive File](installing-openvino-from-archive-macos.md) -* [Install OpenVINO Runtime via HomeBrew](installing-openvino-brew.md) -* [Install OpenVINO from PyPI](installing-openvino-pip.md) +For a full selection of distribution channels, +see the `OpenVINO Installation Selector Tool `__ -For a full selection of distribution channels, see the [OpenVINO Installation Selector Tool](https://www.intel.com/content/www/us/en/developer/tools/openvino-toolkit/download.html) +@endsphinxdirective \ No newline at end of file diff --git a/docs/install_guides/installing-openvino-macos.md b/docs/install_guides/installing-openvino-macos.md index 9a98f9335e1ded..21c5053e082821 100644 --- a/docs/install_guides/installing-openvino-macos.md +++ b/docs/install_guides/installing-openvino-macos.md @@ -1,12 +1,16 @@ # Install OpenVINO™ Runtime for macOS from Installer +@sphinxdirective + Currently only the following ways are provided to install OpenVINO™: -* [Install OpenVINO Runtime from an Archive File](installing-openvino-from-archive-macos.md) -* [Install OpenVINO Runtime via HomeBrew](installing-openvino-brew.md) -* [Install OpenVINO from PyPI](installing-openvino-pip.md) -* [Build From Source](https://github.com/openvinotoolkit/openvino/blob/master/docs/dev/build.md) +* :doc:`Install OpenVINO Runtime from an Archive File ` +* :doc:`Install OpenVINO Runtime via HomeBrew ` +* :doc:`Install OpenVINO from PyPI ` +* `Build From Source `__ The other installation methods are temporarily unavailable. For a full selection of distribution channels, see the [OpenVINO Installation Selector Tool](https://www.intel.com/content/www/us/en/developer/tools/openvino-toolkit/download.html) + +@endsphinxdirective \ No newline at end of file diff --git a/docs/install_guides/installing-openvino-raspbian.md b/docs/install_guides/installing-openvino-raspbian.md index 8e87595aca6022..d47cea800092e6 100644 --- a/docs/install_guides/installing-openvino-raspbian.md +++ b/docs/install_guides/installing-openvino-raspbian.md @@ -8,12 +8,8 @@ * These steps have been validated with Raspberry Pi 3. * There is also an open-source version of OpenVINO™ that can be compiled for arch64 (see `build instructions `_). -@endsphinxdirective - - -## Development and Target Systems - -@sphinxdirective +Development and Target Systems +############################### .. tab:: System Requirements @@ -28,12 +24,8 @@ .. _install-openvino: -@endsphinxdirective - - -## Step 1: Download and Install OpenVINO Runtime - -@sphinxdirective +Step 1: Download and Install OpenVINO Runtime +############################################# #. Open the Terminal or your preferred console application. #. Create an installation folder for OpenVINO. If the folder already exists, skip this step. @@ -99,12 +91,8 @@ Congratulations, you finished the installation! The ``/opt/intel/openvino_2022`` .. _install-external-dependencies: -@endsphinxdirective - - -## Step 2: Install External Software Dependencies - -@sphinxdirective +Step 2: Install External Software Dependencies +############################################## CMake version 3.10 or higher is required for building the OpenVINO™ toolkit sample application. To install, open a Terminal window and run the following command: @@ -117,12 +105,8 @@ CMake is installed. Continue to the next section to set the environment variable .. _set-the-environment-variables-raspbian: -@endsphinxdirective - - -## Step 3: Set the Environment Variables - -@sphinxdirective +Step 3: Set the Environment Variables +##################################### You must update several environment variables before you can compile and run OpenVINO applications. Open a terminal window and run the ``setupvars.sh`` script as shown below to temporarily set your environment variables. If your is not ``/opt/intel/openvino_2022``, use the correct one instead. @@ -141,12 +125,8 @@ The environment variables are set. Continue to the next section if you want to d .. _model-optimizer: -@endsphinxdirective - - -## Step 4 (Optional): Install Additional Components - -@sphinxdirective +Step 4 (Optional): Install Additional Components +################################################ If you want to use your model for inference, the model must be converted to the ``.bin`` and ``.xml`` Intermediate Representation (IR) files that are used as input by OpenVINO Runtime. To get the optimized models, you can use one of the following options: @@ -158,16 +138,11 @@ If you want to use your model for inference, the model must be converted to the * OpenVINO Development Tools is a set of utilities for working with OpenVINO and OpenVINO models. It provides tools like Model Optimizer, Benchmark Tool, Post-Training Optimization Tool, and Open Model Zoo Downloader. See the :doc:`Install OpenVINO Development Tools ` page for step-by-step installation instructions. -@endsphinxdirective - - -## What's Next? - -@sphinxdirective +What's Next? +#################### Now that you've installed OpenVINO Runtime, you're ready to run your own machine learning applications! Learn more about how to integrate a model in OpenVINO applications by trying out the following tutorials. - .. tab:: Get started with Python Try the `Python Quick Start Example `_ to estimate depth in a scene using an OpenVINO monodepth model in a Jupyter Notebook inside your web browser. @@ -181,6 +156,7 @@ Now that you've installed OpenVINO Runtime, you're ready to run your own machine * `Basic image classification program with Hello Image Classification `_ * `Convert a PyTorch model and use it for image background removal `_ + .. tab:: Get started with C++ Try the `C++ Quick Start Example `_ for step-by-step instructions on building and running a basic image classification C++ application. @@ -193,24 +169,17 @@ Now that you've installed OpenVINO Runtime, you're ready to run your own machine * `Basic object detection with the Hello Reshape SSD C++ sample `_ * `Automatic speech recognition C++ sample `_ - To uninstall the toolkit, follow the steps on the :doc:`Uninstalling page `. -@endsphinxdirective - - -## Additional Resources - -@sphinxdirective +Additional Resources +#################### * :ref:`Troubleshooting Guide for OpenVINO Installation & Configuration ` * Converting models for use with OpenVINO™: :ref:`Model Optimizer User Guide ` * Writing your own OpenVINO™ applications: :ref:`OpenVINO™ Runtime User Guide ` * Sample applications: :ref:`OpenVINO™ Toolkit Samples Overview ` * Pre-trained deep learning models: :ref:`Overview of OpenVINO™ Toolkit Pre-Trained Models ` -* IoT libraries and code samples in the GitHUB repository: `Intel® IoT Developer Kit`_ +* IoT libraries and code samples in the GitHUB repository: `Intel® IoT Developer Kit `__ * :ref:`OpenVINO Installation Selector Tool ` -.. _Intel® IoT Developer Kit: https://github.com/intel-iot-devkit - -@endsphinxdirective \ No newline at end of file +@endsphinxdirective From 448654ea650c683eab7611fd416b36ef0558da16 Mon Sep 17 00:00:00 2001 From: Irina Efode Date: Thu, 23 Mar 2023 15:08:18 +0400 Subject: [PATCH 058/296] [CONFORMANCE] Fix report gewneration in case of mixed reports: rel and abs (#16505) --- .../functional_test_utils/layer_tests_summary/summarize.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/tests/ie_test_utils/functional_test_utils/layer_tests_summary/summarize.py b/src/tests/ie_test_utils/functional_test_utils/layer_tests_summary/summarize.py index 6abb27405217d8..8814de1d34285e 100644 --- a/src/tests/ie_test_utils/functional_test_utils/layer_tests_summary/summarize.py +++ b/src/tests/ie_test_utils/functional_test_utils/layer_tests_summary/summarize.py @@ -101,6 +101,9 @@ def merge_xmls(xml_paths: list): continue xml_value = None if "relative_" in attr_name: + value = op_result.attrib.get(attr_name) + if value is None: + continue xml_value = float(op_result.attrib.get(attr_name)) else: xml_value = int(op_result.attrib.get(attr_name)) From 3b8d9c568c731e909e735a83cb0ec3a83611ee9d Mon Sep 17 00:00:00 2001 From: Nadezhda Ageeva Date: Thu, 23 Mar 2023 16:09:13 +0400 Subject: [PATCH 059/296] Allow skip LoadNetworkToDefaultDeviceNoThrow tests (#16507) --- .../plugin/shared/include/behavior/plugin/core_integration.hpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/tests/functional/plugin/shared/include/behavior/plugin/core_integration.hpp b/src/tests/functional/plugin/shared/include/behavior/plugin/core_integration.hpp index a519fc11e3f0ce..5a2bce5b9b6097 100644 --- a/src/tests/functional/plugin/shared/include/behavior/plugin/core_integration.hpp +++ b/src/tests/functional/plugin/shared/include/behavior/plugin/core_integration.hpp @@ -905,6 +905,7 @@ TEST_P(IEClassQueryNetworkTest, QueryNetworkHETEROWithBigDeviceIDThrows) { // TEST(IEClassBasicTest, smoke_LoadNetworkToDefaultDeviceNoThrow) { + SKIP_IF_CURRENT_TEST_IS_DISABLED() InferenceEngine::CNNNetwork actualCnnNetwork; std::shared_ptr actualNetwork = ngraph::builder::subgraph::makeSplitConvConcat(); ASSERT_NO_THROW(actualCnnNetwork = InferenceEngine::CNNNetwork(actualNetwork)); From 8a246a8bf20e18c100500de572c191a7e2fa6277 Mon Sep 17 00:00:00 2001 From: Maksim Kutakov Date: Thu, 23 Mar 2023 13:25:39 +0100 Subject: [PATCH 060/296] [CPU] Use Dnnl executor to avoid extra dnnl primitve desc query (#16372) --- .../src/memory_desc/dnnl_memory_desc.cpp | 9 +++- .../src/memory_desc/dnnl_memory_desc.h | 1 + src/plugins/intel_cpu/src/node.cpp | 6 --- src/plugins/intel_cpu/src/node.h | 10 ++-- .../src/nodes/common/dnnl_executor.cpp | 39 +++++++-------- .../src/nodes/common/dnnl_executor.h | 40 +++++++++++++-- src/plugins/intel_cpu/src/nodes/concat.h | 1 + src/plugins/intel_cpu/src/nodes/conv.cpp | 19 +++---- src/plugins/intel_cpu/src/nodes/deconv.cpp | 23 ++++----- .../intel_cpu/src/nodes/fullyconnected.cpp | 49 +++++++------------ .../intel_cpu/src/nodes/fullyconnected.h | 10 ---- src/plugins/intel_cpu/src/nodes/input.h | 1 + src/plugins/intel_cpu/src/nodes/interaction.h | 1 + src/plugins/intel_cpu/src/nodes/lrn.cpp | 27 ++++++---- src/plugins/intel_cpu/src/nodes/lrn.h | 4 ++ src/plugins/intel_cpu/src/nodes/matmul.cpp | 22 ++++++--- src/plugins/intel_cpu/src/nodes/matmul.h | 4 ++ src/plugins/intel_cpu/src/nodes/pooling.cpp | 27 ++++++---- src/plugins/intel_cpu/src/nodes/pooling.h | 5 ++ src/plugins/intel_cpu/src/nodes/reorder.cpp | 6 ++- src/plugins/intel_cpu/src/nodes/reorder.h | 1 + src/plugins/intel_cpu/src/nodes/rnn.cpp | 19 ++++--- src/plugins/intel_cpu/src/nodes/rnn.h | 5 ++ src/plugins/intel_cpu/src/nodes/softmax.cpp | 26 ++++++---- src/plugins/intel_cpu/src/nodes/softmax.h | 5 ++ src/plugins/intel_cpu/src/nodes/transpose.h | 1 + 26 files changed, 205 insertions(+), 156 deletions(-) diff --git a/src/plugins/intel_cpu/src/memory_desc/dnnl_memory_desc.cpp b/src/plugins/intel_cpu/src/memory_desc/dnnl_memory_desc.cpp index 1f2a17189a31cc..0458f93836779d 100644 --- a/src/plugins/intel_cpu/src/memory_desc/dnnl_memory_desc.cpp +++ b/src/plugins/intel_cpu/src/memory_desc/dnnl_memory_desc.cpp @@ -36,13 +36,18 @@ MemoryDescPtr DnnlMemoryDesc::cloneWithNewPrecision(const InferenceEngine::Preci } bool DnnlMemoryDesc::isCompatible(const MemoryDesc &rhs) const { - if (MemoryDescType::Dnnl == rhs.getType()) { - return this->desc == rhs.as()->desc; + if (MemoryDescType::Dnnl & rhs.getType()) { + auto* dnnMemDesc = rhs.as(); + return isCompatible(*dnnMemDesc); } else { return false; } } +bool DnnlMemoryDesc::isCompatible(const DnnlMemoryDesc& rhs) const { + return this->desc == rhs.desc; +} + std::string DnnlMemoryDesc::serializeFormat() const { dnnl::impl::memory_desc_wrapper wrapped(desc.get()); if (wrapped.is_wino_desc()) { diff --git a/src/plugins/intel_cpu/src/memory_desc/dnnl_memory_desc.h b/src/plugins/intel_cpu/src/memory_desc/dnnl_memory_desc.h index c6a88794485c40..373e66679f8824 100644 --- a/src/plugins/intel_cpu/src/memory_desc/dnnl_memory_desc.h +++ b/src/plugins/intel_cpu/src/memory_desc/dnnl_memory_desc.h @@ -26,6 +26,7 @@ class DnnlMemoryDesc : public virtual MemoryDesc { MemoryDescPtr cloneWithNewPrecision(const InferenceEngine::Precision prec) const override; bool isCompatible(const MemoryDesc& rhs) const override; + bool isCompatible(const DnnlMemoryDesc& rhs) const; bool hasLayoutType(LayoutType layoutType) const override { return false; } diff --git a/src/plugins/intel_cpu/src/node.cpp b/src/plugins/intel_cpu/src/node.cpp index 03529f39d1c003..64752ea8692fdd 100644 --- a/src/plugins/intel_cpu/src/node.cpp +++ b/src/plugins/intel_cpu/src/node.cpp @@ -550,12 +550,6 @@ std::vector Node::getAvailableFormatsForDims(const Shape &di return {memory::format_tag::any}; } -void Node::execute(dnnl::stream strm) { - if (prim) { - prim.execute(strm, primArgs); - } -} - void Node::updateShapes() { IE_ASSERT(isDynamicNode()) << "Node::updateShapes() is called to a static shape node of type: " << getTypeStr() << " with name: " << getName(); if (needShapeInfer()) { diff --git a/src/plugins/intel_cpu/src/node.h b/src/plugins/intel_cpu/src/node.h index 0d15441972af92..dd78bfd0159b85 100644 --- a/src/plugins/intel_cpu/src/node.h +++ b/src/plugins/intel_cpu/src/node.h @@ -334,7 +334,7 @@ class Node { void resolveInPlaceEdges(); - virtual void execute(dnnl::stream strm); + virtual void execute(dnnl::stream strm) = 0; void updateShapes(); void updateDynamicParams(); void executeDynamic(dnnl::stream strm); @@ -578,7 +578,6 @@ class Node { std::vector supportedPrimitiveDescriptors; std::unordered_map primArgs; std::unordered_map postOpsArgs; - dnnl::primitive prim; std::vector descs; const GraphContext::CPtr context; @@ -649,9 +648,10 @@ class Node { IE_THROW(NotImplemented) << "[DS] prapareParams not implemented for node with type " << NameFromType(getType()); } - MemoryPtr getScratchPadMem(const const_dnnl_primitive_desc_t& pd) { - auto scratchpadMemoryDesc = DnnlExtensionUtils::query_md(pd, dnnl::query::scratchpad_md); - scratchpadMem = context->getScratchPad()->createScratchPadMem(scratchpadMemoryDesc); + MemoryPtr getScratchPadMem(const DnnlMemoryDescPtr& desc) { + if (!scratchpadMem || !scratchpadMem->getDesc().isCompatible(*desc)) { + scratchpadMem = context->getScratchPad()->createScratchPadMem(desc); + } return scratchpadMem; } diff --git a/src/plugins/intel_cpu/src/nodes/common/dnnl_executor.cpp b/src/plugins/intel_cpu/src/nodes/common/dnnl_executor.cpp index 3f055cc63fe039..7d337457494de9 100644 --- a/src/plugins/intel_cpu/src/nodes/common/dnnl_executor.cpp +++ b/src/plugins/intel_cpu/src/nodes/common/dnnl_executor.cpp @@ -9,6 +9,14 @@ using namespace dnnl; namespace ov { namespace intel_cpu { +DnnlExecutor::DnnlExecutor(const dnnl::primitive_desc& pd) { + execPrim = dnnl::primitive(pd); + src_md = DnnlExtensionUtils::makeDescriptor(pd.src_desc()); + dst_md = DnnlExtensionUtils::makeDescriptor(pd.dst_desc()); + wghts_md = DnnlExtensionUtils::makeDescriptor(pd.weights_desc()); + scrch_md = DnnlExtensionUtils::makeDescriptor(pd.scratchpad_desc()); +} + DnnlExecutor::IntermReorder::IntermReorder(const dnnl::memory::desc& descSrc, const dnnl::memory::desc& descDst, const dnnl::engine& engine) : m_descSrc(descSrc), m_descDst(descDst) { @@ -20,7 +28,15 @@ void DnnlExecutor::IntermReorder::exec(dnnl::memory& memSrc, dnnl::memory& memDs m_reorder.execute(strm, memSrc, memDst); } -void DnnlExecutor::exec(std::unordered_map primArgs, dnnl::stream strm) { +void DnnlExecutor::exec(const std::unordered_map& primArgs, dnnl::stream strm) { + if (inputReorders.empty() && outputReorders.empty()) { + execPrim.execute(strm, primArgs); + } else { + reorder_exec(primArgs, strm); + } +} + +void DnnlExecutor::reorder_exec(std::unordered_map primArgs, dnnl::stream strm) { for (auto &inReorder : inputReorders) { if (primArgs.count(inReorder.first)) { dnnl::memory memDst(inReorder.second.getDstDesc(), strm.get_engine()); @@ -58,27 +74,6 @@ const_dnnl_primitive_desc_t DnnlExecutor::getPrimitiveDesc() const { return execPrim.get_primitive_desc(); } -dnnl::memory::desc DnnlExecutor::getSrcDesc() const { - auto pd = getPrimitiveDesc(); - auto md = DnnlExtensionUtils::query_md(pd, dnnl::query::src_md); - - return md->getDnnlDesc(); -} - -dnnl::memory::desc DnnlExecutor::getWeightDesc() const { - auto pd = getPrimitiveDesc(); - auto md = DnnlExtensionUtils::query_md(pd, dnnl::query::weights_md); - - return md->getDnnlDesc(); -} - -dnnl::memory::desc DnnlExecutor::getDstDesc() const { - auto pd = getPrimitiveDesc(); - auto md = DnnlExtensionUtils::query_md(pd, dnnl::query::dst_md); - - return md->getDnnlDesc(); -} - impl_desc_type DnnlExecutor::getImplementationType() const { auto pd = getPrimitiveDesc(); return parse_impl_name(DnnlExtensionUtils::query_impl_info_str(pd)); diff --git a/src/plugins/intel_cpu/src/nodes/common/dnnl_executor.h b/src/plugins/intel_cpu/src/nodes/common/dnnl_executor.h index f824fd8146ecb6..0f3eff13797eef 100644 --- a/src/plugins/intel_cpu/src/nodes/common/dnnl_executor.h +++ b/src/plugins/intel_cpu/src/nodes/common/dnnl_executor.h @@ -26,22 +26,52 @@ class DnnlExecutor { }; public: - void exec(std::unordered_map primArgs, dnnl::stream strm); + explicit DnnlExecutor(const dnnl::primitive_desc& pd); + void exec(const std::unordered_map& primArgs, dnnl::stream strm); bool needReordering() const; virtual ~DnnlExecutor() = default; dnnl::primitive getExecPrim() const; const_dnnl_primitive_desc_t getPrimitiveDesc() const; - dnnl::memory::desc getSrcDesc() const; - dnnl::memory::desc getWeightDesc() const; - dnnl::memory::desc getDstDesc() const; impl_desc_type getImplementationType() const; + DnnlMemoryDescPtr getSrcDesc() const { + return src_md; + } + DnnlMemoryDescPtr getWeightDesc() const { + return wghts_md; + } + DnnlMemoryDescPtr getDstDesc() const { + return dst_md; + } + DnnlMemoryDescPtr getScratchPadDesc() const { + return scrch_md; + } + + const dnnl::memory::desc& getDnnlSrcDesc() const { + return src_md->getDnnlDesc(); + } + const dnnl::memory::desc& getDnnlWeightDesc() const { + return wghts_md->getDnnlDesc(); + } + const dnnl::memory::desc& getDnnlDstDesc() const { + return dst_md->getDnnlDesc(); + } + const dnnl::memory::desc& getDnnlScratchPadDesc() const { + return scrch_md->getDnnlDesc(); + } + + protected: + void reorder_exec(std::unordered_map primArgs, dnnl::stream strm); + protected: - DnnlExecutor() = default; dnnl::primitive execPrim; // key is the port number for the primitive that needs memory reordering std::unordered_map inputReorders; std::unordered_map outputReorders; + DnnlMemoryDescPtr src_md; + DnnlMemoryDescPtr wghts_md; + DnnlMemoryDescPtr dst_md; + DnnlMemoryDescPtr scrch_md; }; } // namespace intel_cpu diff --git a/src/plugins/intel_cpu/src/nodes/concat.h b/src/plugins/intel_cpu/src/nodes/concat.h index 9a0a8a66274321..32831bcede332a 100644 --- a/src/plugins/intel_cpu/src/nodes/concat.h +++ b/src/plugins/intel_cpu/src/nodes/concat.h @@ -52,6 +52,7 @@ class Concat : public Node { InferenceEngine::Precision outputPrecision = InferenceEngine::Precision::FP32; bool canExecRef = false; static constexpr size_t MAX_RANK_REF = 6; + dnnl::primitive prim; }; } // namespace node diff --git a/src/plugins/intel_cpu/src/nodes/conv.cpp b/src/plugins/intel_cpu/src/nodes/conv.cpp index ab07b6521e71f7..ab2f07c5d9ca10 100644 --- a/src/plugins/intel_cpu/src/nodes/conv.cpp +++ b/src/plugins/intel_cpu/src/nodes/conv.cpp @@ -1490,8 +1490,7 @@ void Convolution::prepareParams() { Node::appendPostOpArgs(*pAttrLocal, primArgs, convPostOpsArgs[preferLegacyPostOps]); - auto pd = execPtr->getPrimitiveDesc(); - auto scratchpadMem = getScratchPadMem(pd); + auto scratchpadMem = getScratchPadMem(execPtr->getScratchPadDesc()); primArgs[DNNL_ARG_SCRATCHPAD] = scratchpadMem->GetPrimitive(); #ifdef CPU_DEBUG_CAPS @@ -1508,19 +1507,17 @@ Convolution::ConvolutionExecutor::ConvolutionExecutor(const dnnl::convolution_fo const dnnl::memory::desc& inMemDesc, const dnnl::memory::desc& weightMemDesc, const dnnl::memory::desc& outMemDesc, - const dnnl::engine& engine) { - execPrim = dnnl::convolution_forward(pd); - - if (inMemDesc != pd.src_desc()) { - inputReorders.insert({DNNL_ARG_SRC, IntermReorder(inMemDesc, pd.src_desc(), engine)}); + const dnnl::engine& engine) : DnnlExecutor(pd) { + if (inMemDesc != getDnnlSrcDesc()) { + inputReorders.insert({DNNL_ARG_SRC, IntermReorder(inMemDesc, getDnnlSrcDesc(), engine)}); } - if (weightMemDesc != pd.weights_desc()) { - inputReorders.insert({DNNL_ARG_WEIGHTS, IntermReorder(weightMemDesc, pd.weights_desc(), engine)}); + if (weightMemDesc != getDnnlWeightDesc()) { + inputReorders.insert({DNNL_ARG_WEIGHTS, IntermReorder(weightMemDesc, getDnnlWeightDesc(), engine)}); } - if (outMemDesc != pd.dst_desc()) { - outputReorders.insert({DNNL_ARG_DST, IntermReorder(pd.dst_desc(), outMemDesc, engine)}); + if (outMemDesc != getDnnlDstDesc()) { + outputReorders.insert({DNNL_ARG_DST, IntermReorder(getDnnlDstDesc(), outMemDesc, engine)}); } } diff --git a/src/plugins/intel_cpu/src/nodes/deconv.cpp b/src/plugins/intel_cpu/src/nodes/deconv.cpp index db013ced146e6d..2395a4a6af2a8d 100644 --- a/src/plugins/intel_cpu/src/nodes/deconv.cpp +++ b/src/plugins/intel_cpu/src/nodes/deconv.cpp @@ -991,8 +991,7 @@ void Deconvolution::prepareParams() { } Node::appendPostOpArgs(*pAttrLocal, primArgs, postOpsArgs); - auto pd = execPtr->getPrimitiveDesc(); - auto scratchpadMem = getScratchPadMem(pd); + auto scratchpadMem = getScratchPadMem(execPtr->getScratchPadDesc()); primArgs[DNNL_ARG_SCRATCHPAD] = scratchpadMem->GetPrimitive(); #ifdef CPU_DEBUG_CAPS if (result.second == CacheEntryBase::LookUpStatus::Miss) { @@ -1094,9 +1093,7 @@ Deconvolution::DeconvExecutorDefault::DeconvExecutorDefault(const dnnl::convolut const dnnl::memory::desc& inMemDesc, const dnnl::memory::desc& weightMemDesc, const dnnl::memory::desc& outMemDesc, - const dnnl::engine& engine) { - execPrim = dnnl::convolution_backward_data(pd); - + const dnnl::engine& engine) : DnnlExecutor(pd) { if (inMemDesc != pd.diff_dst_desc()) { inputReorders.insert({DNNL_ARG_DIFF_DST, IntermReorder(inMemDesc, pd.diff_dst_desc(), engine)}); } @@ -1114,19 +1111,17 @@ Deconvolution::DeconvExecutorInt8::DeconvExecutorInt8(const dnnl::deconvolution_ const dnnl::memory::desc& inMemDesc, const dnnl::memory::desc& weightMemDesc, const dnnl::memory::desc& outMemDesc, - const dnnl::engine& engine) { - execPrim = dnnl::deconvolution_forward(pd); - - if (inMemDesc != pd.src_desc()) { - inputReorders.insert({DNNL_ARG_SRC, IntermReorder(inMemDesc, pd.src_desc(), engine)}); + const dnnl::engine& engine) : DnnlExecutor(pd) { + if (inMemDesc != getDnnlSrcDesc()) { + inputReorders.insert({DNNL_ARG_SRC, IntermReorder(inMemDesc, getDnnlSrcDesc(), engine)}); } - if (weightMemDesc != pd.weights_desc()) { - inputReorders.insert({DNNL_ARG_WEIGHTS, IntermReorder(weightMemDesc, pd.weights_desc(), engine)}); + if (weightMemDesc != getDnnlWeightDesc()) { + inputReorders.insert({DNNL_ARG_WEIGHTS, IntermReorder(weightMemDesc, getDnnlWeightDesc(), engine)}); } - if (outMemDesc != pd.dst_desc()) { - outputReorders.insert({DNNL_ARG_DST, IntermReorder(pd.dst_desc(), outMemDesc, engine)}); + if (outMemDesc != getDnnlDstDesc()) { + outputReorders.insert({DNNL_ARG_DST, IntermReorder(getDnnlDstDesc(), outMemDesc, engine)}); } } diff --git a/src/plugins/intel_cpu/src/nodes/fullyconnected.cpp b/src/plugins/intel_cpu/src/nodes/fullyconnected.cpp index 6b4c8e43521426..3d9cb3035cdf55 100644 --- a/src/plugins/intel_cpu/src/nodes/fullyconnected.cpp +++ b/src/plugins/intel_cpu/src/nodes/fullyconnected.cpp @@ -311,7 +311,7 @@ void FullyConnected::prepareParams() { implementationTypeIP, useConv1x1}; - auto engine = getEngine(); + auto& engine = getEngine(); auto builder = [&engine](const FCKey& key) -> executorPtr { executorPtr execPtr = nullptr; @@ -333,7 +333,7 @@ void FullyConnected::prepareParams() { } if (prim_desc) { - execPtr = std::make_shared(prim_desc); + execPtr = std::make_shared(prim_desc); } } // fallback @@ -388,7 +388,7 @@ void FullyConnected::prepareParams() { } } - execPtr = std::make_shared(prim_desc); + execPtr = std::make_shared(prim_desc); } return execPtr; }; @@ -404,26 +404,20 @@ void FullyConnected::prepareParams() { execPtr = result.first; if (execPtr) { - // no executor yet or shapes changed - if (!prevExecPtr || prevExecPtr->getSrcDesc() != execPtr->getSrcDesc()) { - auto oldMem = srcMemPtr->GetPrimitive(); - // fast path: wanted is same with parent node output, typical is static shape with inner product - if (execPtr->getSrcDesc() == inDesc->getDnnlDesc()) { - primArgs[DNNL_ARG_SRC] = std::move(oldMem); - } else { - primArgs[DNNL_ARG_SRC] = dnnl::memory(execPtr->getSrcDesc(), oldMem.get_engine(), oldMem.get_data_handle()); - } + if (execPtr->getSrcDesc()->isCompatible(*inDesc)) { + primArgs[DNNL_ARG_SRC] = srcMemPtr->GetPrimitive(); + } else { + primArgs[DNNL_ARG_SRC] = dnnl::memory(execPtr->getDnnlSrcDesc(), engine, srcMemPtr->GetData()); } - if (!prevExecPtr || prevExecPtr->getDstDesc() != execPtr->getDstDesc()) { - auto oldMem = dstMemPtr->GetPrimitive(); - if (execPtr->getDstDesc() == outDesc->getDnnlDesc()) { - primArgs[DNNL_ARG_DST] = std::move(oldMem); - } else { - primArgs[DNNL_ARG_DST] = dnnl::memory(execPtr->getDstDesc(), oldMem.get_engine(), oldMem.get_data_handle()); - } + + if (execPtr->getDstDesc()->isCompatible(*outDesc)) { + primArgs[DNNL_ARG_DST] = dstMemPtr->GetPrimitive(); + } else { + primArgs[DNNL_ARG_DST] = dnnl::memory(execPtr->getDnnlDstDesc(), engine, dstMemPtr->GetData()); } - if (!prevExecPtr || prevExecPtr->getWeightDesc() != execPtr->getWeightDesc()) { - primArgs[DNNL_ARG_WEIGHTS] = prepareWeightMemory(DnnlExtensionUtils::makeDescriptor(execPtr->getWeightDesc()))->GetPrimitive(); + + if (!prevExecPtr || !execPtr->getWeightDesc()->isCompatible(*(prevExecPtr->getWeightDesc()))) { + primArgs[DNNL_ARG_WEIGHTS] = prepareWeightMemory(execPtr->getWeightDesc())->GetPrimitive(); } // changed shapes may also cause the kernel type changed selected_pd->setImplementationType(execPtr->getImplementationType()); @@ -438,9 +432,8 @@ void FullyConnected::prepareParams() { primArgs[DNNL_ARG_BIAS] = biasMemPtr->GetPrimitive(); } - auto pd = execPtr->getPrimitiveDesc(); - auto scratchpadMem = getScratchPadMem(pd); - primArgs[DNNL_ARG_SCRATCHPAD] = scratchpadMem->GetPrimitive(); + auto schratchpadMem = getScratchPadMem(execPtr->getScratchPadDesc()); + primArgs[DNNL_ARG_SCRATCHPAD] = schratchpadMem->GetPrimitive(); #ifdef CPU_DEBUG_CAPS if (result.second == CacheEntryBase::LookUpStatus::Miss) { DEBUG_LOG("verbose##", getName(), "##", pd->info(), "\n"); @@ -919,14 +912,6 @@ bool FullyConnected::canBeExecutedInConv1x1() const { return retVal; } -FullyConnected::ExecutorInnerProduct::ExecutorInnerProduct(const dnnl::inner_product_forward::primitive_desc& pd) { - execPrim = dnnl::inner_product_forward(pd); -} - -FullyConnected::ExecutorConv1x1::ExecutorConv1x1(const dnnl::convolution_forward::primitive_desc& pd) { - execPrim = dnnl::convolution_forward(pd); -} - MemoryPtr FullyConnected::prepareWeightMemory(DnnlMemoryDescPtr weightDesc) { if (!getParentEdgeAt(1)->getParent()->isConstant()) IE_THROW() << "Weight input is not const for node " << getName() << "."; diff --git a/src/plugins/intel_cpu/src/nodes/fullyconnected.h b/src/plugins/intel_cpu/src/nodes/fullyconnected.h index 4de5dff882649d..3f0983f2fc2a77 100644 --- a/src/plugins/intel_cpu/src/nodes/fullyconnected.h +++ b/src/plugins/intel_cpu/src/nodes/fullyconnected.h @@ -90,16 +90,6 @@ class FullyConnected : public Node { std::unordered_map privateWeightCache; dnnl::primitive_attr attr; - class ExecutorInnerProduct : public DnnlExecutor { - public: - ExecutorInnerProduct(const dnnl::inner_product_forward::primitive_desc& pd); - }; - - class ExecutorConv1x1 : public DnnlExecutor { - public: - ExecutorConv1x1(const dnnl::convolution_forward::primitive_desc& pd); - }; - static dnnl::convolution_forward::primitive_desc createDescriptorInternalForConv(DnnlMemoryDescCPtr inputDescPtr, DnnlMemoryDescCPtr weightDescPtr, diff --git a/src/plugins/intel_cpu/src/nodes/input.h b/src/plugins/intel_cpu/src/nodes/input.h index d3c05b721da6f0..71ae6b91e7660c 100644 --- a/src/plugins/intel_cpu/src/nodes/input.h +++ b/src/plugins/intel_cpu/src/nodes/input.h @@ -31,6 +31,7 @@ class Input : public Node { void withMeanImage(); MemoryCPtr getMemoryPtr() const; + void execute(dnnl::stream strm) override {} void executeDynamicImpl(dnnl::stream strm) override {} bool isExecutable() const override { return false; diff --git a/src/plugins/intel_cpu/src/nodes/interaction.h b/src/plugins/intel_cpu/src/nodes/interaction.h index 661cfc22de8b88..122ae3b2addc8c 100644 --- a/src/plugins/intel_cpu/src/nodes/interaction.h +++ b/src/plugins/intel_cpu/src/nodes/interaction.h @@ -60,6 +60,7 @@ class Interaction : public Node { private: void execRef(dnnl::stream strm); + dnnl::primitive prim; size_t batchSize = 0; size_t featureSize = 0; size_t inputSizes = 0; diff --git a/src/plugins/intel_cpu/src/nodes/lrn.cpp b/src/plugins/intel_cpu/src/nodes/lrn.cpp index 5cc0dce6230eae..f5f8995626d3e4 100644 --- a/src/plugins/intel_cpu/src/nodes/lrn.cpp +++ b/src/plugins/intel_cpu/src/nodes/lrn.cpp @@ -182,7 +182,7 @@ void Lrn::prepareParams() { LrnKey key = {inpDesc, selected_pd->getImplementationType(), alg, size, k, alpha, beta, attr}; auto engine = getEngine(); - auto builder = [&engine](const LrnKey& key) -> dnnl::primitive { + auto builder = [&engine](const LrnKey& key) -> executorPtr { auto desc = std::make_shared( engine, dnnl::prop_kind::forward_inference, @@ -205,25 +205,24 @@ void Lrn::prepareParams() { break; } if (!itpd.next_impl()) - return dnnl::lrn_forward(); + return nullptr; } - return dnnl::lrn_forward(prim_desc); + return std::make_shared(prim_desc); }; auto cache = context->getParamsCache(); auto result = cache->getOrCreate(key, builder); - if (!result.first) { + execPtr = result.first; + if (!execPtr) { IE_THROW() << "Primitive descriptor was not found for node " << getName() << "."; } - prim = result.first; - auto pd = prim.get_primitive_desc(); - auto scratchpadMem = getScratchPadMem(pd); + auto scratchpadMem = getScratchPadMem(execPtr->getScratchPadDesc()); - auto src = srcMemPtr->GetPrimitive(); - auto dst = dstMemPtr->GetPrimitive(); - primArgs = { {DNNL_ARG_SRC, src}, {DNNL_ARG_DST, dst}, {DNNL_ARG_SCRATCHPAD, scratchpadMem->GetPrimitive()} }; + primArgs[DNNL_ARG_SCRATCHPAD] = scratchpadMem->GetPrimitive(); + primArgs[DNNL_ARG_SRC] = srcMemPtr->GetPrimitive(); + primArgs[DNNL_ARG_DST] = dstMemPtr->GetPrimitive(); } bool Lrn::created() const { @@ -250,6 +249,14 @@ void Lrn::createDescriptor(const std::vector &inputDesc, descs.push_back(desc); } +void Lrn::execute(dnnl::stream strm) { + if (execPtr) { + execPtr->exec(primArgs, strm); + } else { + IE_THROW() << errorPrefix << " doesn't have an initialized executor"; + } +} + void Lrn::executeDynamicImpl(dnnl::stream strm) { execute(strm); } diff --git a/src/plugins/intel_cpu/src/nodes/lrn.h b/src/plugins/intel_cpu/src/nodes/lrn.h index b821fa8b70e521..c1635261f70faf 100644 --- a/src/plugins/intel_cpu/src/nodes/lrn.h +++ b/src/plugins/intel_cpu/src/nodes/lrn.h @@ -9,6 +9,7 @@ #include #include #include +#include "common/dnnl_executor.h" namespace ov { namespace intel_cpu { @@ -31,11 +32,14 @@ class Lrn : public Node { } void prepareParams() override; + void execute(dnnl::stream strm) override; void executeDynamicImpl(dnnl::stream strm) override; static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; private: + using executorPtr = std::shared_ptr; + executorPtr execPtr = nullptr; dnnl::algorithm alg; size_t size = 1; int k = 1; diff --git a/src/plugins/intel_cpu/src/nodes/matmul.cpp b/src/plugins/intel_cpu/src/nodes/matmul.cpp index c1c1381e6631f2..4027c2d08e30b8 100644 --- a/src/plugins/intel_cpu/src/nodes/matmul.cpp +++ b/src/plugins/intel_cpu/src/nodes/matmul.cpp @@ -593,7 +593,7 @@ void MatMul::prepareParams() { auto engine = getEngine(); - auto builder = [&engine](const MatMulKey& key) -> dnnl::primitive { + auto builder = [&engine](const MatMulKey& key) -> executorPtr { dnnl::matmul::primitive_desc matmul_desc; if (key.bias) { @@ -633,22 +633,20 @@ void MatMul::prepareParams() { break; } } - return matmul(prim_desc); + return std::make_shared(prim_desc); }; auto cache = context->getParamsCache(); auto result = cache->getOrCreate(key, builder); - if (!result.first) { + execPtr = result.first; + if (!execPtr) { IE_THROW() << "Primitive descriptor was not found for node " << getName() << "."; } - prim = result.first; + auto schratchpadMem = getScratchPadMem(execPtr->getScratchPadDesc()); - auto pd = prim.get_primitive_desc(); - auto scratchpadMem = getScratchPadMem(pd); - - primArgs[DNNL_ARG_SCRATCHPAD] = scratchpadMem->GetPrimitive(); + primArgs[DNNL_ARG_SCRATCHPAD] = schratchpadMem->GetPrimitive(); primArgs[DNNL_ARG_SRC_0] = src0MemPtr->GetPrimitive(); primArgs[DNNL_ARG_WEIGHTS_0] = src1MemPtr->GetPrimitive(); primArgs[DNNL_ARG_DST] = dstMemPtr->GetPrimitive(); @@ -658,6 +656,14 @@ void MatMul::prepareParams() { appendPostOpArgs(*attr, primArgs, postOpsArgs); } +void MatMul::execute(dnnl::stream strm) { + if (execPtr) { + execPtr->exec(primArgs, strm); + } else { + IE_THROW() << errorPrefix << " doesn't have an initialized executor"; + } +} + void MatMul::executeDynamicImpl(dnnl::stream strm) { execute(strm); } diff --git a/src/plugins/intel_cpu/src/nodes/matmul.h b/src/plugins/intel_cpu/src/nodes/matmul.h index 5c8902483972b8..16d2140cbe5eee 100644 --- a/src/plugins/intel_cpu/src/nodes/matmul.h +++ b/src/plugins/intel_cpu/src/nodes/matmul.h @@ -10,6 +10,7 @@ #include #include #include "memory_desc/dnnl_blocked_memory_desc.h" +#include "common/dnnl_executor.h" namespace ov { namespace intel_cpu { @@ -38,6 +39,7 @@ class MatMul : public Node { } void prepareParams() override; + void execute(dnnl::stream strm) override; void executeDynamicImpl(dnnl::stream strm) override; static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; @@ -48,6 +50,8 @@ class MatMul : public Node { AttrPtr initPrimitiveAttr(const VectorDims& dims); private: + using executorPtr = std::shared_ptr; + executorPtr execPtr = nullptr; dnnl::memory::desc getBiasDescFrom(const DnnlMemoryDescCPtr outMemDesc); std::pair makeDummyInputShapes(const Shape& in0, const Shape& in1) const; diff --git a/src/plugins/intel_cpu/src/nodes/pooling.cpp b/src/plugins/intel_cpu/src/nodes/pooling.cpp index fc56f8d812ce54..b31c358911904a 100644 --- a/src/plugins/intel_cpu/src/nodes/pooling.cpp +++ b/src/plugins/intel_cpu/src/nodes/pooling.cpp @@ -369,7 +369,7 @@ void Pooling::prepareParams() { alg, selected_pd->getImplementationType()}; auto engine = getEngine(); - auto builder = [&engine](const PoolingKey& key) -> dnnl::primitive { + auto builder = [&engine](const PoolingKey& key) -> executorPtr { primitive_desc_iterator itpd = createDescriptorHelper(engine, key.inp->getDnnlDesc(), key.out->getDnnlDesc(), @@ -393,27 +393,34 @@ void Pooling::prepareParams() { break; } - return pooling_forward(prim_desc); + return std::make_shared(prim_desc); }; auto cache = context->getParamsCache(); auto result = cache->getOrCreate(key, builder); - if (!result.first) { + execPtr = result.first; + + if (!execPtr) { IE_THROW() << "Primitive descriptor was not found for node " << getName() << "."; } - prim = result.first; - - auto pd = prim.get_primitive_desc(); - auto scratchpadMem = getScratchPadMem(pd); - auto src = getParentEdgesAtPort(0)[0]->getMemoryPtr()->GetPrimitive(); - auto dst = getChildEdgesAtPort(0)[0]->getMemoryPtr()->GetPrimitive(); - primArgs = {{DNNL_ARG_SRC, src}, {DNNL_ARG_DST, dst}, {DNNL_ARG_SCRATCHPAD, scratchpadMem->GetPrimitive()}}; + auto scratchpadMem = getScratchPadMem(execPtr->getScratchPadDesc()); + primArgs[DNNL_ARG_SCRATCHPAD] = scratchpadMem->GetPrimitive(); + primArgs[DNNL_ARG_SRC] = getParentEdgesAtPort(0)[0]->getMemoryPtr()->GetPrimitive(); + primArgs[DNNL_ARG_DST] = getChildEdgesAtPort(0)[0]->getMemoryPtr()->GetPrimitive(); Node::appendPostOpArgs(*attr, primArgs, postOpsArgs); } +void Pooling::execute(dnnl::stream strm) { + if (execPtr) { + execPtr->exec(primArgs, strm); + } else { + IE_THROW() << "Pooling node with name '" << getName() << "' doesn't have an initialized executor"; + } +} + void Pooling::executeDynamicImpl(dnnl::stream strm) { execute(strm); } diff --git a/src/plugins/intel_cpu/src/nodes/pooling.h b/src/plugins/intel_cpu/src/nodes/pooling.h index 2daaa3f9a528e8..6d76e3d48980a2 100644 --- a/src/plugins/intel_cpu/src/nodes/pooling.h +++ b/src/plugins/intel_cpu/src/nodes/pooling.h @@ -10,6 +10,7 @@ #include #include #include +#include "common/dnnl_executor.h" namespace ov { namespace intel_cpu { @@ -30,6 +31,7 @@ class Pooling : public Node { } void prepareParams() override; + void execute(dnnl::stream strm) override; void executeDynamicImpl(dnnl::stream strm) override; static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; @@ -38,6 +40,9 @@ class Pooling : public Node { AttrPtr initPrimitiveAttr() override; private: + using executorPtr = std::shared_ptr; + executorPtr execPtr = nullptr; + void setPostOps(dnnl::primitive_attr &attr); void initEffectiveAttributes(const Shape &inDims, const Shape &outDims); diff --git a/src/plugins/intel_cpu/src/nodes/reorder.cpp b/src/plugins/intel_cpu/src/nodes/reorder.cpp index 2efcd0e44b6e69..5dd5674abd9814 100644 --- a/src/plugins/intel_cpu/src/nodes/reorder.cpp +++ b/src/plugins/intel_cpu/src/nodes/reorder.cpp @@ -336,7 +336,11 @@ void Reorder::execute(dnnl::stream strm) { src_blocked->setDataHandle(getParentEdgeAt(0)->getMemory().GetData()); dst_blocked->setDataHandle(getChildEdgeAt(0)->getMemory().GetData()); - Node::execute(strm); + if (prim) { + prim.execute(strm, primArgs); + } else { + IE_THROW() << "Reorder node with name " << getName() << " doesn't have an initialized primitive"; + } } } diff --git a/src/plugins/intel_cpu/src/nodes/reorder.h b/src/plugins/intel_cpu/src/nodes/reorder.h index f6091a6c91bd43..4bd3fa8fc3211b 100644 --- a/src/plugins/intel_cpu/src/nodes/reorder.h +++ b/src/plugins/intel_cpu/src/nodes/reorder.h @@ -66,6 +66,7 @@ class Reorder : public Node { static void reorderData(const Memory &input, const Memory &output, MultiCachePtr cache = nullptr); private: + dnnl::reorder::primitive prim; std::shared_ptr input; std::shared_ptr output; diff --git a/src/plugins/intel_cpu/src/nodes/rnn.cpp b/src/plugins/intel_cpu/src/nodes/rnn.cpp index e7b97b9355d214..4ed7ed7a4e5550 100644 --- a/src/plugins/intel_cpu/src/nodes/rnn.cpp +++ b/src/plugins/intel_cpu/src/nodes/rnn.cpp @@ -1062,7 +1062,7 @@ void RNN::prepareParams() { RNNKey key = { inDataDescs, outDataDescs, wDescs, cell_type, cell_act, direction, *attr }; auto engine = getEngine(); - auto builder = [&engine](const RNNKey& key) -> dnnl::primitive { + auto builder = [&engine](const RNNKey& key) -> executorPtr { const auto descPtr = createPrimitiveDescriptor(engine, key.cellType, key.cellAct, @@ -1072,23 +1072,22 @@ void RNN::prepareParams() { key.wDescs, key.attr); - return dnnl::primitive(descPtr); + return std::make_shared(descPtr); }; auto cache = context->getParamsCache(); auto result = cache->getOrCreate(key, builder); - if (!result.first) { + execPtr = result.first; + + if (!execPtr) { IE_THROW() << "Primitive descriptor was not found for node " << getName() << "."; } - prim = result.first; - - auto pd = prim.get_primitive_desc(); - scratchpadMem = getScratchPadMem(pd); + scratchpadMem = getScratchPadMem(execPtr->getScratchPadDesc()); if (!wasMemoryPrepared || wFormatWasChanged) { - auto pd = prim.get_primitive_desc(); + auto pd = execPtr->getPrimitiveDesc(); auto query_weights_md = [&](int idx = 0) -> dnnl::memory::desc { auto what = dnnl::convert_to_c(dnnl::query::weights_md); const_dnnl_memory_desc_t cdesc = dnnl_primitive_desc_query_md(pd, what, idx); @@ -1118,7 +1117,7 @@ std::shared_ptr RNN::getDstMemDesc(dnnl::primitive_desc_iterator& pr } void RNN::execute(dnnl::stream strm) { - if (!prim) + if (!execPtr) THROW_ERROR << "does not have initialized primitive to execute."; const auto src_data_mem = getParentEdgeAt(0)->getMemoryPtr(); @@ -1160,7 +1159,7 @@ void RNN::execute(dnnl::stream strm) { } } - prim.execute(strm, args); + execPtr->exec(args, strm); } void RNN::executeDynamicImpl(dnnl::stream strm) { diff --git a/src/plugins/intel_cpu/src/nodes/rnn.h b/src/plugins/intel_cpu/src/nodes/rnn.h index b94d026adcf75c..dbe4f9769d14b7 100644 --- a/src/plugins/intel_cpu/src/nodes/rnn.h +++ b/src/plugins/intel_cpu/src/nodes/rnn.h @@ -11,6 +11,8 @@ #include #include +#include "common/dnnl_executor.h" + namespace ov { namespace intel_cpu { namespace node { @@ -66,6 +68,9 @@ class RNN : public Node { void copyWeightsData(); + using executorPtr = std::shared_ptr; + executorPtr execPtr = nullptr; + /** Specify mode Cell or Seq. true - Cell, false - Seq */ bool is_cell = false; diff --git a/src/plugins/intel_cpu/src/nodes/softmax.cpp b/src/plugins/intel_cpu/src/nodes/softmax.cpp index 7f3d3c337e5792..65176e4a7c7907 100644 --- a/src/plugins/intel_cpu/src/nodes/softmax.cpp +++ b/src/plugins/intel_cpu/src/nodes/softmax.cpp @@ -170,7 +170,7 @@ void SoftMax::prepareParams() { SoftmaxKey key = {inpDesc, selected_pd->getImplementationType(), axis, *attr}; auto engine = getEngine(); - auto builder = [&engine](const SoftmaxKey& key) -> dnnl::primitive { + auto builder = [&engine](const SoftmaxKey& key) -> executorPtr { softmax_forward::primitive_desc prim_desc; auto desc = std::make_shared( engine, @@ -196,26 +196,32 @@ void SoftMax::prepareParams() { break; } if (!itpd.next_impl()) - return softmax_forward(); + return nullptr; } - return softmax_forward(prim_desc); + return std::make_shared(prim_desc); }; auto cache = context->getParamsCache(); auto result = cache->getOrCreate(key, builder); - if (!result.first) { + execPtr = result.first; + if (!execPtr) { IE_THROW() << "Primitive descriptor was not found for node " << getName() << "."; } - prim = result.first; + auto scratchpadMem = getScratchPadMem(execPtr->getScratchPadDesc()); - auto pd = prim.get_primitive_desc(); - auto scratchpadMem = getScratchPadMem(pd); + primArgs[DNNL_ARG_SCRATCHPAD] = scratchpadMem->GetPrimitive(); + primArgs[DNNL_ARG_SRC] = getParentEdgesAtPort(0)[0]->getMemoryPtr()->GetPrimitive(); + primArgs[DNNL_ARG_DST] = getChildEdgesAtPort(0)[0]->getMemoryPtr()->GetPrimitive(); +} - auto src = getParentEdgesAtPort(0)[0]->getMemoryPtr()->GetPrimitive(); - auto dst = getChildEdgesAtPort(0)[0]->getMemoryPtr()->GetPrimitive(); - primArgs = {{DNNL_ARG_SRC, src}, {DNNL_ARG_DST, dst}, {DNNL_ARG_SCRATCHPAD, scratchpadMem->GetPrimitive()}}; +void SoftMax::execute(dnnl::stream strm) { + if (execPtr) { + execPtr->exec(primArgs, strm); + } else { + IE_THROW() << "Softmax node with name '" << getName() << "' doesn't have an initialized executor"; + } } void SoftMax::executeDynamicImpl(dnnl::stream strm) { diff --git a/src/plugins/intel_cpu/src/nodes/softmax.h b/src/plugins/intel_cpu/src/nodes/softmax.h index 78fc51115a18d7..1a472075168406 100644 --- a/src/plugins/intel_cpu/src/nodes/softmax.h +++ b/src/plugins/intel_cpu/src/nodes/softmax.h @@ -11,6 +11,8 @@ #include #include +#include "common/dnnl_executor.h" + namespace ov { namespace intel_cpu { namespace node { @@ -26,11 +28,14 @@ class SoftMax : public Node { bool created() const override; AttrPtr initPrimitiveAttr() override; void prepareParams() override; + void execute(dnnl::stream strm) override; void executeDynamicImpl(dnnl::stream strm) override; static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; private: + using executorPtr = std::shared_ptr; + executorPtr execPtr = nullptr; size_t axis = 0; }; diff --git a/src/plugins/intel_cpu/src/nodes/transpose.h b/src/plugins/intel_cpu/src/nodes/transpose.h index b13bc1a0a745ab..03988d24fe8367 100644 --- a/src/plugins/intel_cpu/src/nodes/transpose.h +++ b/src/plugins/intel_cpu/src/nodes/transpose.h @@ -48,6 +48,7 @@ class Transpose : public Node { }; using executorPtr = std::shared_ptr; executorPtr execPtr = nullptr; + dnnl::primitive prim; struct TransposeJitExecutor : public TransposeExecutor { TransposeJitExecutor(const PermuteParams& params); From fb24e9141629451a92b7c814fa6738486bb3dc13 Mon Sep 17 00:00:00 2001 From: Edward Shogulin Date: Thu, 23 Mar 2023 13:24:10 +0000 Subject: [PATCH 061/296] [LPT] NNCF GroupConvolution 5D on weights support (#16336) * [LPT] NNCF GroupConvolution 5D on weights support * PullReshapeThroughDequantization rollback --- .../src/convolution.cpp | 16 +- .../src/weightable_layer_transformation.cpp | 12 +- .../group_convolution_transformation.cpp | 1180 ++++++++++------- ..._through_dequantization_transformation.cpp | 165 ++- ..._through_dequantization_transformation.cpp | 86 +- .../groupconvolution_qdq_transformation.cpp | 121 ++ .../groupconvolution_qdq_transformation.cpp | 121 ++ .../group_convolution_function.hpp | 3 +- .../src/group_convolution_function.cpp | 60 +- 9 files changed, 1170 insertions(+), 594 deletions(-) diff --git a/src/common/low_precision_transformations/src/convolution.cpp b/src/common/low_precision_transformations/src/convolution.cpp index 46831f0586c5cf..4bd2dd31f24534 100644 --- a/src/common/low_precision_transformations/src/convolution.cpp +++ b/src/common/low_precision_transformations/src/convolution.cpp @@ -237,8 +237,15 @@ bool ConvolutionTransformation::transform(TransformationContext &context, ngraph Shape newScaleShape = newScalePShape.to_shape(); if (!newScaleShape.empty()) { - // that's all we need: [C, 1, 1, 1] => [C, 1, 1] - newScaleShape.pop_back(); + const auto input_shape = convolution->get_input_partial_shape(0); + const auto diff = newScaleShape.size() - input_shape.size(); + OPENVINO_ASSERT( + newScaleShape.empty() || ((0 <= diff) && (diff <= 2ull)), + "unexpected shape size on weights"); + + for (size_t i = 0; i <= diff; ++i) { + newScaleShape.pop_back(); + } } if (reshapeFromWeights != nullptr) { @@ -282,7 +289,12 @@ bool ConvolutionTransformation::transform(TransformationContext &context, ngraph const size_t weightsRankValue = weightsPShape.rank().get_length(); Shape zeroPointShape(weightsRankValue, 1ul); + // output channel or group zeroPointShape[0] = static_cast(weightsPShape[0].get_length()); + if ((reshapeFromWeights == nullptr) && (weightsRankValue == 5ull)) { + // output channel + zeroPointShape[1] = static_cast(weightsPShape[1].get_length()); + } auto zeroPointConstant = fold( subtractFromWeights->input_value(1), diff --git a/src/common/low_precision_transformations/src/weightable_layer_transformation.cpp b/src/common/low_precision_transformations/src/weightable_layer_transformation.cpp index 1cfe4bb51d3ac7..1837f21635235c 100644 --- a/src/common/low_precision_transformations/src/weightable_layer_transformation.cpp +++ b/src/common/low_precision_transformations/src/weightable_layer_transformation.cpp @@ -230,16 +230,16 @@ bool WeightableLayerTransformation::isQuantizedStatic(const std::shared_ptrget_input_node_shared_ptr(1); - if (!ov::is_type(reshape)) { - return false; - } + std::shared_ptr parent = ov::is_type(reshape) ? + reshape->get_input_node_shared_ptr(0) : + reshape; - if (ov::is_type(reshape->get_input_node_shared_ptr(0))) { - const std::shared_ptr fq = ov::as_type_ptr(reshape->get_input_node_shared_ptr(0)); + const auto fq = ov::as_type_ptr(parent); + if (fq != nullptr) { return NetworkHelper::isQuantizeSupported(fq); } - dequantizationOnWeights = NetworkHelper::getDequantization(reshape, defaultPrecisions, 0); + dequantizationOnWeights = NetworkHelper::getDequantization(parent, defaultPrecisions, 0, true); } else if (ov::is_type(layer->get_input_node_shared_ptr(1))) { const std::shared_ptr fq = ov::as_type_ptr(layer->get_input_node_shared_ptr(1)); return NetworkHelper::isQuantizeSupported(fq); diff --git a/src/common/low_precision_transformations/tests/group_convolution_transformation.cpp b/src/common/low_precision_transformations/tests/group_convolution_transformation.cpp index b20d211d651adc..d148c370dfd2f8 100644 --- a/src/common/low_precision_transformations/tests/group_convolution_transformation.cpp +++ b/src/common/low_precision_transformations/tests/group_convolution_transformation.cpp @@ -48,6 +48,7 @@ class GroupConvolutionTestValues { TestTransformationParams params; size_t group; int groupCalculationDimention; + bool addReshape; Actual actual; Expected expected; }; @@ -76,7 +77,8 @@ class GroupConvolutionTransformation : public LayerTransformation, testValues.actual.dequantizationOnWeights, ngraph::element::f32, {}, - ngraph::element::f32); + ngraph::element::f32, + testValues.addReshape); SimpleLowPrecisionTransformer transform; transform.add( @@ -101,7 +103,8 @@ class GroupConvolutionTransformation : public LayerTransformation, testValues.expected.dequantizationOnWeights, testValues.expected.precisionAfterOperation, testValues.expected.dequantizationAfter, - testValues.expected.precisionAfterDequantization); + testValues.expected.precisionAfterDequantization, + testValues.addReshape); } static std::string getTestCaseName(testing::TestParamInfo obj) { @@ -113,7 +116,9 @@ class GroupConvolutionTransformation : public LayerTransformation, result << toString(testValues.params) << "_" << inputShape << "_" << outputShape << "_" << testValues.group << "_" << testValues.groupCalculationDimention << "_" << testValues.actual.precisionBeforeDequantization << "_" << testValues.actual.dequantization << "_" - << "_weights_" << testValues.actual.weights->get_element_type() << "_" + << "_add_reshape:" << testValues.addReshape << "_" + << "_weights_type:" << testValues.actual.weights->get_element_type() << "_" + << "_weights_shape:" << testValues.actual.weights->get_shape() << "_" << "{ " << testValues.actual.weights->cast_vector()[0] << " }_" << testValues.actual.fakeQuantizeOnWeights << "_"; return result.str(); @@ -128,333 +133,520 @@ TEST_P(GroupConvolutionTransformation, CompareFunctions) { ASSERT_TRUE(LayerTransformation::allNamesAreUnique(actualFunction)) << "Not all names are unique"; } +// clang-format off namespace testValues1 { + const std::vector> shapesForGroupConv = { {{1, 6, 224, 224}, {1, 24, 218, 218}}, - {{-1, -1, -1, -1}, {-1, -1, -1, -1}}}; + {{-1, -1, -1, -1}, {-1, -1, -1, -1}} +}; const std::vector testValuesGroupConv = { // group convolution, tensor quantization, with zero point - {LayerTransformation::createParamsU8I8().setSupportAsymmetricQuantization(true), - 3ul, - -1, - // ActualValues - {ngraph::element::u8, - {{ngraph::element::f32}, {128.f}, {0.02f}}, - op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector{2.f}), - {255ul, Shape({1, 1, 1, 1}), {0.f}, {254.f}, {-1.27f}, {1.27f}}, - {}}, - // ExpectedValues - { - ngraph::element::u8, - {{}, {{128.f}, ngraph::element::f32, {1, 6, 1, 1}, false}, {}}, - op::Constant::create(ngraph::element::i8, ngraph::Shape{}, std::vector{-125.f}), - {}, - {}, - ngraph::element::f32, - {{}, {}, {{0.0002f}, ngraph::element::f32, {}}} // 0.0002 = 0.02 (on data) * 0.01 (on weights) - }}, + { + LayerTransformation::createParamsU8I8().setSupportAsymmetricQuantization(true), + 3ul, + -1, + true, + // ActualValues + { + ngraph::element::u8, + {{ngraph::element::f32}, {128.f}, {0.02f}}, + op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector{2.f}), + {255ul, Shape({1, 1, 1, 1}), {0.f}, {254.f}, {-1.27f}, {1.27f}}, + {} + }, + // ExpectedValues + { + ngraph::element::u8, + {{}, {{128.f}, ngraph::element::f32, {1, 6, 1, 1}, false}, {}}, + op::Constant::create(ngraph::element::i8, ngraph::Shape{}, std::vector{-125.f}), + {}, + {}, + ngraph::element::f32, + {{}, {}, {{0.0002f}, ngraph::element::f32, {}}} // 0.0002 = 0.02 (on data) * 0.01 (on weights) + } + }, // group convolution, tensor quantization, with zero point - {LayerTransformation::createParamsU8I8().setSupportAsymmetricQuantization(true), - 3ul, - 0, - // ActualValues - {ngraph::element::u8, - {{ngraph::element::f32}, {128.f}, {0.02f}}, - op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector{2.f}), - {255ul, Shape({1, 1, 1, 1}), {0.f}, {254.f}, {-1.27f}, {1.27f}}, - {}}, - // ExpectedValues - { - ngraph::element::u8, - {{}, {{128.f}, ngraph::element::f32, {1, 6, 1, 1}, false}, {}}, - op::Constant::create(ngraph::element::i8, ngraph::Shape{}, std::vector{-125.f}), - {}, - {}, - ngraph::element::f32, - {{}, {}, {{0.0002f}, ngraph::element::f32, {}}} // 0.0002 = 0.02 (on data) * 0.01 (on weights) - }}, + { + LayerTransformation::createParamsU8I8().setSupportAsymmetricQuantization(true), + 3ul, + 0, + true, + // ActualValues + { + ngraph::element::u8, + {{ngraph::element::f32}, {128.f}, {0.02f}}, + op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector{2.f}), + {255ul, Shape({1, 1, 1, 1}), {0.f}, {254.f}, {-1.27f}, {1.27f}}, + {} + }, + // ExpectedValues + { + ngraph::element::u8, + {{}, {{128.f}, ngraph::element::f32, {1, 6, 1, 1}, false}, {}}, + op::Constant::create(ngraph::element::i8, ngraph::Shape{}, std::vector{-125.f}), + {}, + {}, + ngraph::element::f32, + {{}, {}, {{0.0002f}, ngraph::element::f32, {}}} // 0.0002 = 0.02 (on data) * 0.01 (on weights) + } + }, // group convolution, tensor quantization, with zero point - {LayerTransformation::createParamsU8I8().setSupportAsymmetricQuantization(true), - 3ul, - 1, - // ActualValues - {ngraph::element::u8, - {{ngraph::element::f32}, {128.f}, {0.02f}}, - op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector{2.f}), - {255ul, Shape({1, 1, 1, 1}), {0.f}, {254.f}, {-1.27f}, {1.27f}}, - {}}, - // ExpectedValues - { - ngraph::element::u8, - {{}, {{128.f}, ngraph::element::f32, {1, 6, 1, 1}, false}, {}}, - op::Constant::create(ngraph::element::i8, ngraph::Shape{}, std::vector{-125.f}), - {}, - {}, - ngraph::element::f32, - {{}, {}, {{0.0002f}, ngraph::element::f32, {}}} // 0.0002 = 0.02 (on data) * 0.01 (on weights) - }}, + { + LayerTransformation::createParamsU8I8().setSupportAsymmetricQuantization(true), + 3ul, + 1, + true, + // ActualValues + { + ngraph::element::u8, + {{ngraph::element::f32}, {128.f}, {0.02f}}, + op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector{2.f}), + {255ul, Shape({1, 1, 1, 1}), {0.f}, {254.f}, {-1.27f}, {1.27f}}, + {} + }, + // ExpectedValues + { + ngraph::element::u8, + {{}, {{128.f}, ngraph::element::f32, {1, 6, 1, 1}, false}, {}}, + op::Constant::create(ngraph::element::i8, ngraph::Shape{}, std::vector{-125.f}), + {}, + {}, + ngraph::element::f32, + {{}, {}, {{0.0002f}, ngraph::element::f32, {}}} // 0.0002 = 0.02 (on data) * 0.01 (on weights) + } + }, // group convolution, tensor quantization, with zero point - {LayerTransformation::createParamsU8I8().setSupportAsymmetricQuantization(false), - 3ul, - -1, - // ActualValues - {ngraph::element::u8, - {{ngraph::element::f32}, {128.f}, {0.02f}}, - op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector{2.f}), - {255ul, Shape({1, 1, 1, 1}), {0.f}, {254.f}, {-1.27f}, {1.27f}}, - {}}, - // ExpectedValues - {ngraph::element::u8, - {{ngraph::element::f32}, {128.f}, {0.02f}}, - op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector{2.f}), - {255ul, Shape({1, 1, 1, 1}), {0.f}, {254.f}, {-1.27f}, {1.27f}}, - {}, - ngraph::element::f32, - {}}}, + { + LayerTransformation::createParamsU8I8().setSupportAsymmetricQuantization(false), + 3ul, + -1, + true, + // ActualValues + { + ngraph::element::u8, + {{ngraph::element::f32}, {128.f}, {0.02f}}, + op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector{2.f}), + {255ul, Shape({1, 1, 1, 1}), {0.f}, {254.f}, {-1.27f}, {1.27f}}, + {} + }, + // ExpectedValues + { + ngraph::element::u8, + {{ngraph::element::f32}, {128.f}, {0.02f}}, + op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector{2.f}), + {255ul, Shape({1, 1, 1, 1}), {0.f}, {254.f}, {-1.27f}, {1.27f}}, + {}, + ngraph::element::f32, + {} + } + }, + // group convolution, tensor quantization, with zero point - {LayerTransformation::createParamsU8I8().setUpdatePrecisions(false), - 3ul, - -1, - // ActualValues - {ngraph::element::f32, - {{}, {128.f}, {0.02f}}, - op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector{2.f}), - {255ul, Shape({1, 1, 1, 1}), {0.f}, {254.f}, {-1.27f}, {1.27f}}, - {}}, - // ExpectedValues - { - ngraph::element::f32, - {{}, {{128.f}, ngraph::element::f32, {1, 6, 1, 1}, false}, {}}, - op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector{-125.f}), - {}, - {}, - ngraph::element::f32, - {{}, {}, {{0.0002f}, ngraph::element::f32, {}}} // 0.0002 = 0.02 (on data) * 0.01 (on weights) - }}, + { + LayerTransformation::createParamsU8I8().setUpdatePrecisions(false), + 3ul, + -1, + true, + // ActualValues + { + ngraph::element::f32, + {{}, {128.f}, {0.02f}}, + op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector{2.f}), + {255ul, Shape({1, 1, 1, 1}), {0.f}, {254.f}, {-1.27f}, {1.27f}}, + {} + }, + // ExpectedValues + { + ngraph::element::f32, + {{}, {{128.f}, ngraph::element::f32, {1, 6, 1, 1}, false}, {}}, + op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector{-125.f}), + {}, + {}, + ngraph::element::f32, + {{}, {}, {{0.0002f}, ngraph::element::f32, {}}} // 0.0002 = 0.02 (on data) * 0.01 (on weights) + } + }, + // group convolution, per-channel quantization with different values, without zero point - {LayerTransformation::createParamsU8I8(), - 3ul, - -1, - // ActualValues - {ngraph::element::u8, - {{ngraph::element::f32}, {}, {{0.02f, 0.02f, 0.04f, 0.04f, 0.08f, 0.08f}, ngraph::element::f32, {1, 6, 1, 1}}}, - op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector{2.f}), - {255ul, Shape({1, 1, 1, 1}), {0.f}, {254.f}, {-1.27f}, {1.27f}}, - {}}, - // ExpectedValues - { - ngraph::element::u8, - {}, - op::Constant::create(ngraph::element::i8, ngraph::Shape{}, std::vector{-125.f}), - {}, - {}, - ngraph::element::f32, - {{}, - {}, - {{// 0.0002 = 0.02 (on data) * 0.01 (on weights) - 0.0002f, - 0.0002f, - 0.0002f, - 0.0002f, - 0.0002f, - 0.0002f, - 0.0002f, - 0.0002f, - // 0.0004 = 0.04 (on data) * 0.01 (on weights) - 0.0004f, - 0.0004f, - 0.0004f, - 0.0004f, - 0.0004f, - 0.0004f, - 0.0004f, - 0.0004f, - // 0.0008 = 0.08 (on data) * 0.01 (on weights) - 0.0008f, - 0.0008f, - 0.0008f, - 0.0008f, - 0.0008f, - 0.0008f, - 0.0008f, - 0.0008f}, - ngraph::element::f32, - {1, 24, 1, 1}}}, - }}, + { + LayerTransformation::createParamsU8I8(), + 3ul, + -1, + true, + // ActualValues + { + ngraph::element::u8, + {{ngraph::element::f32}, {}, {{0.02f, 0.02f, 0.04f, 0.04f, 0.08f, 0.08f}, ngraph::element::f32, {1, 6, 1, 1}}}, + op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector{2.f}), + {255ul, Shape({1, 1, 1, 1}), {0.f}, {254.f}, {-1.27f}, {1.27f}}, + {} + }, + // ExpectedValues + { + ngraph::element::u8, + {}, + op::Constant::create(ngraph::element::i8, ngraph::Shape{}, std::vector{-125.f}), + {}, + {}, + ngraph::element::f32, + { + {}, + {}, + { + { + // 0.0002 = 0.02 (on data) * 0.01 (on weights) + 0.0002f, + 0.0002f, + 0.0002f, + 0.0002f, + 0.0002f, + 0.0002f, + 0.0002f, + 0.0002f, + // 0.0004 = 0.04 (on data) * 0.01 (on weights) + 0.0004f, + 0.0004f, + 0.0004f, + 0.0004f, + 0.0004f, + 0.0004f, + 0.0004f, + 0.0004f, + // 0.0008 = 0.08 (on data) * 0.01 (on weights) + 0.0008f, + 0.0008f, + 0.0008f, + 0.0008f, + 0.0008f, + 0.0008f, + 0.0008f, + 0.0008f + }, + ngraph::element::f32, + {1, 24, 1, 1} + } + }, + } + }, + // group convolution, per-channel quantization with the same values, without zero point - {LayerTransformation::createParamsU8I8(), - 3ul, - -1, - // ActualValues - { - ngraph::element::u8, - {{ngraph::element::f32}, {}, {{0.02f}, ngraph::element::f32, {1, 6, 1, 1}}}, - op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector{2.f}), - {255ul, Shape({1, 1, 1, 1}), {0.f}, {254.f}, {-1.27f}, {1.27f}}, - {}, - }, - // ExpectedValues - { - ngraph::element::u8, - {}, - op::Constant::create(ngraph::element::i8, ngraph::Shape{}, std::vector{-125.f}), - {}, - {}, - ngraph::element::f32, - {{}, {}, {{0.0002f}, ngraph::element::f32, {}}}, - }}, + { + LayerTransformation::createParamsU8I8(), + 3ul, + -1, + true, + // ActualValues + { + ngraph::element::u8, + {{ngraph::element::f32}, {}, {{0.02f}, ngraph::element::f32, {1, 6, 1, 1}}}, + op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector{2.f}), + {255ul, Shape({1, 1, 1, 1}), {0.f}, {254.f}, {-1.27f}, {1.27f}}, + {}, + }, + // ExpectedValues + { + ngraph::element::u8, + {}, + op::Constant::create(ngraph::element::i8, ngraph::Shape{}, std::vector{-125.f}), + {}, + {}, + ngraph::element::f32, + {{}, {}, {{0.0002f}, ngraph::element::f32, {}}}, + } + }, + // group convolution, without zero point, without convert - {LayerTransformation::createParamsU8I8(), - 3ul, - -1, - // ActualValues - {ngraph::element::f32, - {{}, {}, {0.02f}}, - op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector{2.f}), - {255ul, Shape({1, 1, 1, 1}), {0.f}, {254.f}, {-1.27f}, {1.27f}}, - {}}, - // ExpectedValues - {ngraph::element::f32, - {{}, {}, {0.02f}}, - op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector{-1.25f}), - {}, - {}, - ngraph::element::f32, - {}}}, + { + LayerTransformation::createParamsU8I8(), + 3ul, + -1, + true, + // ActualValues + { + ngraph::element::f32, + {{}, {}, {0.02f}}, + op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector{2.f}), + {255ul, Shape({1, 1, 1, 1}), {0.f}, {254.f}, {-1.27f}, {1.27f}}, + {} + }, + // ExpectedValues + { + ngraph::element::f32, + {{}, {}, {0.02f}}, + op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector{-1.25f}), + {}, + {}, + ngraph::element::f32, + {} + } + }, + // group convolution, without zero point - {LayerTransformation::createParamsU8I8(), - 3ul, - -1, - // ActualValues - {ngraph::element::u8, - {{element::f32}, {}, {0.02f}}, - op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector{2.f}), - {255ul, Shape({1, 1, 1, 1}), {0.f}, {254.f}, {-1.27f}, {1.27f}}, - {}}, - // ExpectedValues - {ngraph::element::u8, - {}, - op::Constant::create(ngraph::element::i8, ngraph::Shape{}, std::vector{-125.f}), - {}, - {}, - ngraph::element::f32, - {{}, {}, {{0.0002f}, ngraph::element::f32, {}}}}}, + { + LayerTransformation::createParamsU8I8(), + 3ul, + -1, + true, + // ActualValues + { + ngraph::element::u8, + {{element::f32}, {}, {0.02f}}, + op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector{2.f}), + {255ul, Shape({1, 1, 1, 1}), {0.f}, {254.f}, {-1.27f}, {1.27f}}, + {} + }, + // ExpectedValues + { + ngraph::element::u8, + {}, + op::Constant::create(ngraph::element::i8, ngraph::Shape{}, std::vector{-125.f}), + {}, + {}, + ngraph::element::f32, + {{}, {}, {{0.0002f}, ngraph::element::f32, {}}} + } + }, + // per-channel quantization with different values, without zero point - {LayerTransformation::createParamsU8I8(), - 3ul, - -1, - // ActualValues - {ngraph::element::u8, - {{ngraph::element::f32}, {}, {{0.02f, 0.02f, 0.04f, 0.04f, 0.08f, 0.08f}, ngraph::element::f32, {1, 6, 1, 1}}}, - op::Constant::create(ngraph::element::i8, ngraph::Shape{}, std::vector{2.f}), - {}, - {ngraph::element::f32, {}, {0.01f}}}, - // ExpectedValues - { - ngraph::element::u8, - {}, - op::Constant::create(ngraph::element::i8, ngraph::Shape{}, std::vector{2.f}), - {}, - {}, - ngraph::element::f32, - {{}, - {}, - {{// 0.0002 = 0.02 (on data) * 0.01 (on weights) - 0.0002f, - 0.0002f, - 0.0002f, - 0.0002f, - 0.0002f, - 0.0002f, - 0.0002f, - 0.0002f, - // 0.0004 = 0.04 (on data) * 0.01 (on weights) - 0.0004f, - 0.0004f, - 0.0004f, - 0.0004f, - 0.0004f, - 0.0004f, - 0.0004f, - 0.0004f, - // 0.0008 = 0.08 (on data) * 0.01 (on weights) - 0.0008f, - 0.0008f, - 0.0008f, - 0.0008f, - 0.0008f, - 0.0008f, - 0.0008f, - 0.0008f}, - ngraph::element::f32, - {1, 24, 1, 1}}}, - }}, + { + LayerTransformation::createParamsU8I8(), + 3ul, + -1, + true, + // ActualValues + { + ngraph::element::u8, + {{ngraph::element::f32}, {}, {{0.02f, 0.02f, 0.04f, 0.04f, 0.08f, 0.08f}, ngraph::element::f32, {1, 6, 1, 1}}}, + op::Constant::create(ngraph::element::i8, ngraph::Shape{}, std::vector{2.f}), + {}, + {ngraph::element::f32, {}, {0.01f}} + }, + // ExpectedValues + { + ngraph::element::u8, + {}, + op::Constant::create(ngraph::element::i8, ngraph::Shape{}, std::vector{2.f}), + {}, + {}, + ngraph::element::f32, + { + {}, + {}, + { + { + // 0.0002 = 0.02 (on data) * 0.01 (on weights) + 0.0002f, + 0.0002f, + 0.0002f, + 0.0002f, + 0.0002f, + 0.0002f, + 0.0002f, + 0.0002f, + // 0.0004 = 0.04 (on data) * 0.01 (on weights) + 0.0004f, + 0.0004f, + 0.0004f, + 0.0004f, + 0.0004f, + 0.0004f, + 0.0004f, + 0.0004f, + // 0.0008 = 0.08 (on data) * 0.01 (on weights) + 0.0008f, + 0.0008f, + 0.0008f, + 0.0008f, + 0.0008f, + 0.0008f, + 0.0008f, + 0.0008f + }, + ngraph::element::f32, + {1, 24, 1, 1} + } + }, + } + }, // per-channel quantization with different values, without zero point - {LayerTransformation::createParamsU8I8(), - 3ul, - -1, - // ActualValues - {ngraph::element::u8, - {{ngraph::element::f32}, - {{255}, ngraph::element::f32, {}, true, 1, ngraph::element::u8, true}, - {{0.02f, 0.02f, 0.04f, 0.04f, 0.08f, 0.08f}, ngraph::element::f32, {1, 6, 1, 1}}}, - op::Constant::create(ngraph::element::i8, ngraph::Shape{}, std::vector{2.f}), - {}, - {ngraph::element::f32, {{127}, ngraph::element::f32, {}, true, 1, ngraph::element::i8, true}, {0.01f}}}, - // ExpectedValues - { - ngraph::element::u8, - {{}, {std::vector(6ul, 255.f), ngraph::element::f32, {1, 6, 1, 1}, false, 1, ngraph::element::u8}, {}}, - op::Constant::create(ngraph::element::i8, ngraph::Shape{}, std::vector{2.f}), - {}, - {{}, - {std::vector(24ul, 127.f), - ngraph::element::f32, - {24, 1, 1, 1}, - false, - 1, - ngraph::element::i8, - false, - {{ov::pass::DisableConstantFolding::get_type_info_static(), ov::pass::DisableConstantFolding()}}}, - {}}, - ngraph::element::f32, - {{}, - {}, - {{// 0.0002 = 0.02 (on data) * 0.01 (on weights) - 0.0002f, - 0.0002f, - 0.0002f, - 0.0002f, - 0.0002f, - 0.0002f, - 0.0002f, - 0.0002f, - // 0.0004 = 0.04 (on data) * 0.01 (on weights) - 0.0004f, - 0.0004f, - 0.0004f, - 0.0004f, - 0.0004f, - 0.0004f, - 0.0004f, - 0.0004f, - // 0.0008 = 0.08 (on data) * 0.01 (on weights) - 0.0008f, - 0.0008f, - 0.0008f, - 0.0008f, - 0.0008f, - 0.0008f, - 0.0008f, - 0.0008f}, - ngraph::element::f32, - {1, 24, 1, 1}}}, - }}, + { + LayerTransformation::createParamsU8I8(), + 3ul, + -1, + true, + // ActualValues + { + ngraph::element::u8, + { + {ngraph::element::f32}, + {{255}, ngraph::element::f32, {}, true, 1, ngraph::element::u8, true}, + {{0.02f, 0.02f, 0.04f, 0.04f, 0.08f, 0.08f}, ngraph::element::f32, {1, 6, 1, 1}} + }, + op::Constant::create(ngraph::element::i8, ngraph::Shape{}, std::vector{2.f}), + {}, + { + ngraph::element::f32, + {{127}, ngraph::element::f32, {}, true, 1, ngraph::element::i8, true}, + {0.01f} + } + }, + // ExpectedValues + { + ngraph::element::u8, + { + {}, + {std::vector(6ul, 255.f), ngraph::element::f32, {1, 6, 1, 1}, false, 1, ngraph::element::u8}, + {} + }, + op::Constant::create(ngraph::element::i8, ngraph::Shape{}, std::vector{2.f}), + {}, + { + {}, + { + std::vector(24ul, 127.f), + ngraph::element::f32, + {24, 1, 1, 1}, + false, + 1, + ngraph::element::i8, + false, + {{ov::pass::DisableConstantFolding::get_type_info_static(), ov::pass::DisableConstantFolding()}} + }, + {} + }, + ngraph::element::f32, + { + {}, + {}, + { + { + // 0.0002 = 0.02 (on data) * 0.01 (on weights) + 0.0002f, + 0.0002f, + 0.0002f, + 0.0002f, + 0.0002f, + 0.0002f, + 0.0002f, + 0.0002f, + // 0.0004 = 0.04 (on data) * 0.01 (on weights) + 0.0004f, + 0.0004f, + 0.0004f, + 0.0004f, + 0.0004f, + 0.0004f, + 0.0004f, + 0.0004f, + // 0.0008 = 0.08 (on data) * 0.01 (on weights) + 0.0008f, + 0.0008f, + 0.0008f, + 0.0008f, + 0.0008f, + 0.0008f, + 0.0008f, + 0.0008f + }, + ngraph::element::f32, + {1, 24, 1, 1} + } + }, + } + }, + + // per-channel quantization with different values, without zero point, no reshape - 5D weights + { + LayerTransformation::createParamsU8I8(), + 3ul, + -1, + false, + // ActualValues + { + ngraph::element::u8, + { + {ngraph::element::f32}, + {{255}, ngraph::element::f32, {}, true, 1, ngraph::element::u8, true}, + {{0.02f, 0.02f, 0.04f, 0.04f, 0.08f, 0.08f}, ngraph::element::f32, {1, 6, 1, 1}} + }, + op::Constant::create(ngraph::element::i8, ngraph::Shape{3,8,2,7,7}, std::vector{2.f}), + {}, + { + ngraph::element::f32, + {{127}, ngraph::element::f32, {}, true, 1, ngraph::element::i8, true}, + {0.01f} + } + }, + // ExpectedValues + { + ngraph::element::u8, + { + {}, + {std::vector(6ul, 255.f), ngraph::element::f32, {1, 6, 1, 1}, false, 1, ngraph::element::u8}, + {} + }, + op::Constant::create(ngraph::element::i8, ngraph::Shape{3,8,2,7,7}, std::vector{2.f}), + {}, + { + {}, + { + std::vector(24ul, 127.f), + ngraph::element::f32, + {3, 8, 1, 1, 1}, + false, + 1, + ngraph::element::i8, + false, + {{ov::pass::DisableConstantFolding::get_type_info_static(), ov::pass::DisableConstantFolding()}} + }, + {} + }, + ngraph::element::f32, + { + {}, + {}, + { + { + // 0.0002 = 0.02 (on data) * 0.01 (on weights) + 0.0002f, + 0.0002f, + 0.0002f, + 0.0002f, + 0.0002f, + 0.0002f, + 0.0002f, + 0.0002f, + // 0.0004 = 0.04 (on data) * 0.01 (on weights) + 0.0004f, + 0.0004f, + 0.0004f, + 0.0004f, + 0.0004f, + 0.0004f, + 0.0004f, + 0.0004f, + // 0.0008 = 0.08 (on data) * 0.01 (on weights) + 0.0008f, + 0.0008f, + 0.0008f, + 0.0008f, + 0.0008f, + 0.0008f, + 0.0008f, + 0.0008f + }, + ngraph::element::f32, + {1, 24, 1, 1} + } + }, + } + }, }; INSTANTIATE_TEST_SUITE_P(smoke_LPT, @@ -472,146 +664,201 @@ const std::vector> shapesF const std::vector testValuesForDepthWiseConv = { // depth-wise convolution, per-tensor quantization, with zero point - {LayerTransformation::createParamsU8I8(), - 3ul, - -1, - // ActualValues - {ngraph::element::u8, - {{ngraph::element::f32}, {128.f}, {0.02f}}, - op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector{2.f}), - {255ul, Shape({1, 1, 1, 1}), {0.f}, {254.f}, {-1.27f}, {1.27f}}, - {}}, - // ExpectedValues - {ngraph::element::u8, - {{}, {{128.f}, ngraph::element::f32, {1, 6, 1, 1}, false}, {}}, - op::Constant::create(ngraph::element::i8, ngraph::Shape{}, std::vector{-125.f}), - {}, - {}, - ngraph::element::f32, - {{}, {}, {{0.0002f}, ngraph::element::f32, {}}}}}, + { + LayerTransformation::createParamsU8I8(), + 3ul, + -1, + true, + // ActualValues + { + ngraph::element::u8, + {{ngraph::element::f32}, {128.f}, {0.02f}}, + op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector{2.f}), + {255ul, Shape({1, 1, 1, 1}), {0.f}, {254.f}, {-1.27f}, {1.27f}}, + {} + }, + // ExpectedValues + { + ngraph::element::u8, + {{}, {{128.f}, ngraph::element::f32, {1, 6, 1, 1}, false}, {}}, + op::Constant::create(ngraph::element::i8, ngraph::Shape{}, std::vector{-125.f}), + {}, + {}, + ngraph::element::f32, + {{}, {}, {{0.0002f}, ngraph::element::f32, {}}} + } + }, + // depth-wise convolution, tensor quantization, with zero point - {LayerTransformation::createParamsU8I8().setUpdatePrecisions(false), - 3ul, - -1, - // ActualValues - {ngraph::element::f32, - {{}, {128.f}, {0.02f}}, - op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector{2.f}), - {255ul, Shape({1, 1, 1, 1}), {0.f}, {254.f}, {-1.27f}, {1.27f}}, - {}}, - // ExpectedValues - {ngraph::element::f32, - {{}, {{128.f}, ngraph::element::f32, {1, 6, 1, 1}, false}, {}}, - op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector{-125.f}), - {}, - {}, - ngraph::element::f32, - {{}, {}, {{0.0002f}, ngraph::element::f32, {}}}}}, + { + LayerTransformation::createParamsU8I8().setUpdatePrecisions(false), + 3ul, + -1, + true, + // ActualValues + { + ngraph::element::f32, + {{}, {128.f}, {0.02f}}, + op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector{2.f}), + {255ul, Shape({1, 1, 1, 1}), {0.f}, {254.f}, {-1.27f}, {1.27f}}, + {} + }, + // ExpectedValues + { + ngraph::element::f32, + {{}, {{128.f}, ngraph::element::f32, {1, 6, 1, 1}, false}, {}}, + op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector{-125.f}), + {}, + {}, + ngraph::element::f32, + {{}, {}, {{0.0002f}, ngraph::element::f32, {}}} + } + }, + // depth-wise convolution, per-channel quantization with different values, without zero point - {LayerTransformation::createParamsU8I8(), - 6ul, - -1, - // ActualValues - {ngraph::element::u8, - {{ngraph::element::f32}, {}, {{0.02f, 0.02f, 0.04f, 0.04f, 0.08f, 0.08f}, ngraph::element::f32, {1, 6, 1, 1}}}, - op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector{2.f}), - {255ul, Shape({1, 1, 1, 1}), {0.f}, {254.f}, {-1.27f}, {1.27f}}, - {}}, - // ExpectedValues - { - ngraph::element::u8, - {}, - op::Constant::create(ngraph::element::i8, ngraph::Shape{}, std::vector{-125.f}), - {}, - {}, - ngraph::element::f32, - {{}, - {}, - {{ - 0.0002f, - 0.0002f, // 0.0002 = 0.02 (on data) * 0.01 (on weights) - 0.0004f, - 0.0004f, // 0.0004 = 0.04 (on data) * 0.01 (on weights) - 0.0008f, - 0.0008f // 0.0008 = 0.08 (on data) * 0.01 (on weights) - }, - ngraph::element::f32, - {1, 6, 1, 1}}}, - }}, + { + LayerTransformation::createParamsU8I8(), + 6ul, + -1, + true, + // ActualValues + { + ngraph::element::u8, + {{ngraph::element::f32}, {}, {{0.02f, 0.02f, 0.04f, 0.04f, 0.08f, 0.08f}, ngraph::element::f32, {1, 6, 1, 1}}}, + op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector{2.f}), + {255ul, Shape({1, 1, 1, 1}), {0.f}, {254.f}, {-1.27f}, {1.27f}}, + {} + }, + // ExpectedValues + { + ngraph::element::u8, + {}, + op::Constant::create(ngraph::element::i8, ngraph::Shape{}, std::vector{-125.f}), + {}, + {}, + ngraph::element::f32, + { + {}, + {}, + { + { + 0.0002f, + 0.0002f, // 0.0002 = 0.02 (on data) * 0.01 (on weights) + 0.0004f, + 0.0004f, // 0.0004 = 0.04 (on data) * 0.01 (on weights) + 0.0008f, + 0.0008f // 0.0008 = 0.08 (on data) * 0.01 (on weights) + }, + ngraph::element::f32, + {1, 6, 1, 1} + } + }, + } + }, + // depth-wise convolution, per-tensor quantization with the same values, without zero point - {LayerTransformation::createParamsU8I8(), - 6ul, - -1, - // ActualValues - {ngraph::element::u8, - {{ngraph::element::f32}, {}, {{0.02f}, ngraph::element::f32, {1, 6, 1, 1}}}, - op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector{2.f}), - {255ul, Shape({1, 1, 1, 1}), {0.f}, {254.f}, {-1.27f}, {1.27f}}, - {}}, - // ExpectedValues - { - ngraph::element::u8, - {}, - op::Constant::create(ngraph::element::i8, ngraph::Shape{}, std::vector{-125.f}), - {}, - {}, - ngraph::element::f32, - {{}, {}, {{0.0002f}, ngraph::element::f32, {}}}, - }}, + { + LayerTransformation::createParamsU8I8(), + 6ul, + -1, + true, + // ActualValues + { + ngraph::element::u8, + {{ngraph::element::f32}, {}, {{0.02f}, ngraph::element::f32, {1, 6, 1, 1}}}, + op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector{2.f}), + {255ul, Shape({1, 1, 1, 1}), {0.f}, {254.f}, {-1.27f}, {1.27f}}, + {} + }, + // ExpectedValues + { + ngraph::element::u8, + {}, + op::Constant::create(ngraph::element::i8, ngraph::Shape{}, std::vector{-125.f}), + {}, + {}, + ngraph::element::f32, + {{}, {}, {{0.0002f}, ngraph::element::f32, {}}}, + } + }, + // depth-wise convolution, without zero point, without convert - {LayerTransformation::createParamsU8I8(), - 6ul, - -1, - // ActualValues - {ngraph::element::f32, - {{}, {}, {0.02f}}, - op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector{2.f}), - {255ul, Shape({1, 1, 1, 1}), {0.f}, {254.f}, {-1.27f}, {1.27f}}, - {}}, - // ExpectedValues - {ngraph::element::f32, - {{}, {}, {0.02f}}, - op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector{-1.25f}), - {}, - {}, - ngraph::element::f32, - {}}}, + { + LayerTransformation::createParamsU8I8(), + 6ul, + -1, + true, + // ActualValues + { + ngraph::element::f32, + {{}, {}, {0.02f}}, + op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector{2.f}), + {255ul, Shape({1, 1, 1, 1}), {0.f}, {254.f}, {-1.27f}, {1.27f}}, + {} + }, + // ExpectedValues + { + ngraph::element::f32, + {{}, {}, {0.02f}}, + op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector{-1.25f}), + {}, + {}, + ngraph::element::f32, + {} + } + }, + // depth-wise convolution, without zero point - {LayerTransformation::createParamsU8I8(), - 6ul, - -1, - // ActualValues - {ngraph::element::u8, - {{element::f32}, {}, {0.02f}}, - op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector{2.f}), - {255ul, Shape({1, 1, 1, 1}), {0.f}, {254.f}, {-1.27f}, {1.27f}}, - {}}, - // ExpectedValues - {ngraph::element::u8, - {}, - op::Constant::create(ngraph::element::i8, ngraph::Shape{}, std::vector{-125.f}), - {}, - {}, - ngraph::element::f32, - {{}, {}, {{0.0002f}, ngraph::element::f32, {}}}}}, + { + LayerTransformation::createParamsU8I8(), + 6ul, + -1, + true, + // ActualValues + { + ngraph::element::u8, + {{element::f32}, {}, {0.02f}}, + op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector{2.f}), + {255ul, Shape({1, 1, 1, 1}), {0.f}, {254.f}, {-1.27f}, {1.27f}}, + {} + }, + // ExpectedValues + { + ngraph::element::u8, + {}, + op::Constant::create(ngraph::element::i8, ngraph::Shape{}, std::vector{-125.f}), + {}, + {}, + ngraph::element::f32, + {{}, {}, {{0.0002f}, ngraph::element::f32, {}}} + } + }, + // without dequantization operations - {LayerTransformation::createParamsU8I8(), - 6ul, - -1, - // ActualValues - {ngraph::element::f32, - {}, - op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector{2.f}), - {255ul, Shape({1, 1, 1, 1}), {0.f}, {254.f}, {-1.27f}, {1.27f}}, - {}}, - // ExpectedValues - {ngraph::element::f32, - {}, - op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector{2.f}), - {255ul, Shape({1, 1, 1, 1}), {0.f}, {254.f}, {-1.27f}, {1.27f}}, - {}, - ngraph::element::f32, - {}}}, + { + LayerTransformation::createParamsU8I8(), + 6ul, + -1, + true, + // ActualValues + { + ngraph::element::f32, + {}, + op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector{2.f}), + {255ul, Shape({1, 1, 1, 1}), {0.f}, {254.f}, {-1.27f}, {1.27f}}, + {} + }, + // ExpectedValues + { + ngraph::element::f32, + {}, + op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector{2.f}), + {255ul, Shape({1, 1, 1, 1}), {0.f}, {254.f}, {-1.27f}, {1.27f}}, + {}, + ngraph::element::f32, + {} + } + }, }; INSTANTIATE_TEST_SUITE_P(smoke_LPT, @@ -623,27 +870,35 @@ INSTANTIATE_TEST_SUITE_P(smoke_LPT, namespace testValues3 { const std::vector> shapesWithDynamicChannel = { - {PartialShape::dynamic(), PartialShape::dynamic()}}; + {PartialShape::dynamic(), PartialShape::dynamic()} +}; const std::vector testValuesWithDynamicChannel = { // depth-wise convolution, per-tensor quantization, with zero point - {LayerTransformation::createParamsU8I8(), - 3ul, - -1, - // ActualValues - {ngraph::element::u8, - {{ngraph::element::f32}, {128.f}, {0.02f}}, - op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector{2.f}), - {255ul, Shape({1, 1, 1, 1}), {0.f}, {254.f}, {-1.27f}, {1.27f}}, - {}}, - // ExpectedValues - {ngraph::element::u8, - {{ngraph::element::f32}, {128.f}, {0.02f}}, - op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector{2.f}), - {255ul, Shape({1, 1, 1, 1}), {0.f}, {254.f}, {-1.27f}, {1.27f}}, - {}, - ngraph::element::f32, - {}}}, + { + LayerTransformation::createParamsU8I8(), + 3ul, + -1, + true, + // ActualValues + { + ngraph::element::u8, + {{ngraph::element::f32}, {128.f}, {0.02f}}, + op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector{2.f}), + {255ul, Shape({1, 1, 1, 1}), {0.f}, {254.f}, {-1.27f}, {1.27f}}, + {} + }, + // ExpectedValues + { + ngraph::element::u8, + {{ngraph::element::f32}, {128.f}, {0.02f}}, + op::Constant::create(ngraph::element::f32, ngraph::Shape{}, std::vector{2.f}), + {255ul, Shape({1, 1, 1, 1}), {0.f}, {254.f}, {-1.27f}, {1.27f}}, + {}, + ngraph::element::f32, + {} + } + }, }; INSTANTIATE_TEST_SUITE_P(smoke_LPT, @@ -652,3 +907,4 @@ INSTANTIATE_TEST_SUITE_P(smoke_LPT, ::testing::ValuesIn(testValuesWithDynamicChannel)), GroupConvolutionTransformation::getTestCaseName); } // namespace testValues3 +// clang-format on diff --git a/src/common/low_precision_transformations/tests/pull_reshape_through_dequantization_transformation.cpp b/src/common/low_precision_transformations/tests/pull_reshape_through_dequantization_transformation.cpp index 14e8f4361ceb22..b15a8f3b784c92 100644 --- a/src/common/low_precision_transformations/tests/pull_reshape_through_dequantization_transformation.cpp +++ b/src/common/low_precision_transformations/tests/pull_reshape_through_dequantization_transformation.cpp @@ -133,11 +133,17 @@ TEST_P(PullReshapeThroughDequantizationTransformation, CompareFunctions) { ASSERT_TRUE(res.first) << res.second; } -const std::vector inputShapes = {ngraph::Shape({1, 960, 7, 7}), ngraph::Shape({4, 960, 7, 7})}; +// clang-format off + +const std::vector inputShapes = { + ngraph::Shape({1, 960, 7, 7}), + ngraph::Shape({4, 960, 7, 7}) +}; const std::vector> dequantizationOnWeightElementwiseConstantShapes = { {ngraph::Shape({1, 960}), ngraph::Shape({960, 1, 1, 1})}, - {ngraph::Shape({9, 960}), ngraph::Shape({960, 1, 3, 3})}}; + {ngraph::Shape({9, 960}), ngraph::Shape({960, 1, 3, 3})} +}; const std::vector multiplyShapes = {ngraph::Shape({1, 1, 960, 1})}; @@ -193,37 +199,51 @@ const std::vector testValues = { // \ / // Multiply // - {LayerTransformation::createParamsU8I8().setSupportAsymmetricQuantization(true), - // ActualValues - {ngraph::element::u8, - {{ngraph::element::f32, false}, - {{127.f}, element::f32, {}, false, 1ul, element::u8, true}, - {{0.02f}, element::f32, {}, false}}, - {std::vector{2.f}, ngraph::element::i8, {9, 960}}, - {{ngraph::element::f32, false}, - {{127.f}, element::f32, {/* from parameter */}, false}, - {{0.03f}, element::f32, {/* from parameter */}, false}}, - {{3, 3, 960, 1}}, - {{2}, element::f32, {/* from parameter: multiplyShapes */}, false}, - {{2, 3, 0, 1}}, - {{960, 1, 1, 3, 3}}, - ngraph::element::f32, - {}}, - // ExpectedValues - {ngraph::element::u8, - {{ngraph::element::f32, false}, - {{127.f}, element::f32, {}, false, 1ul, element::u8, true}, - {{0.02f}, element::f32, {}, false}}, - {std::vector{2.f}, ngraph::element::i8, {960, 1, 3, 3}}, - {{ngraph::element::f32, false}, - {{127.f}, element::f32, {/* from parameter */}, false}, - {{0.06f}, element::f32, {/* from parameter */}, false}}, - {}, - {}, - {}, - {{960, 1, 1, 3, 3}}, - ngraph::element::f32, - {}}}, + { + LayerTransformation::createParamsU8I8().setSupportAsymmetricQuantization(true), + // ActualValues + { + ngraph::element::u8, + { + { ngraph::element::f32, false }, + { {127.f}, element::f32, {}, false, 1ul, element::u8, true }, + { {0.02f}, element::f32, {}, false } + }, + { std::vector{ 2.f }, ngraph::element::i8, {9, 960}}, + { + { ngraph::element::f32, false }, + { {127.f}, element::f32, {/* from parameter */}, false }, + { {0.03f}, element::f32, {/* from parameter */}, false } + }, + { {3, 3, 960, 1} }, + { {2}, element::f32, {/* from parameter: multiplyShapes */}, false }, + { {2, 3, 0, 1} }, + { {960, 1, 1, 3, 3} }, + ngraph::element::f32, + {} + }, + // ExpectedValues + { + ngraph::element::u8, + { + { ngraph::element::f32, false }, + { {127.f}, element::f32, {}, false, 1ul, element::u8, true }, + { {0.02f}, element::f32, {}, false } + }, + { std::vector{ 2.f }, ngraph::element::i8, {960, 1, 3, 3}}, + { + { ngraph::element::f32, false }, + { {127.f}, element::f32, {/* from parameter */}, false }, + { {0.06f}, element::f32, {/* from parameter */}, false } + }, + {}, + {}, + {}, + {{960, 1, 1, 3, 3}}, + ngraph::element::f32, + {} + } + }, // Subtract with Convert + Constant // Actual: @@ -276,37 +296,54 @@ const std::vector testValues = { // \ / // Multiply // - {LayerTransformation::createParamsU8I8().setSupportAsymmetricQuantization(true), - // ActualValues - {ngraph::element::u8, - {{ngraph::element::f32, false}, - {{127.f}, element::f32, {}, false, 1ul, element::u8, true}, - {{0.02f}, element::f32, {}, false}}, - {std::vector{2.f}, ngraph::element::i8, {9, 960}}, - {{ngraph::element::f32, false}, - {{127.f}, element::f32, {/* from parameter */}, false, 1ul, element::i8, true}, - {{0.03f}, element::f32, {/* from parameter */}, false}}, - {{3, 3, 960, 1}}, - {{2}, element::f32, {/* from parameter: multiplyShapes */}, false}, - {{2, 3, 0, 1}}, - {{960, 1, 1, 3, 3}}, - ngraph::element::f32, - {}}, - // ExpectedValues - {ngraph::element::u8, - {{ngraph::element::f32, false}, - {{127.f}, element::f32, {}, false, 1ul, element::u8, true}, - {{0.02f}, element::f32, {}, false}}, - {std::vector{2.f}, ngraph::element::i8, {960, 1, 3, 3}}, - {{ngraph::element::f32, false}, - {{127.f}, element::f32, {/* from parameter */}, false, 1ul, element::i8, true}, - {{0.06f}, element::f32, {/* from parameter */}, false}}, - {}, - {}, - {}, - {{960, 1, 1, 3, 3}}, - ngraph::element::f32, - {}}}}; + { + LayerTransformation::createParamsU8I8().setSupportAsymmetricQuantization(true), + // ActualValues + { + ngraph::element::u8, + { + { ngraph::element::f32, false }, + { {127.f}, element::f32, {}, false, 1ul, element::u8, true }, + { {0.02f}, element::f32, {}, false } + }, + { std::vector{ 2.f }, ngraph::element::i8, {9, 960}}, + { + { ngraph::element::f32, false }, + { {127.f}, element::f32, {/* from parameter */}, false, 1ul, element::i8, true }, + { {0.03f}, element::f32, {/* from parameter */}, false } + }, + { {3, 3, 960, 1} }, + { {2}, element::f32, {/* from parameter: multiplyShapes */}, false }, + { {2, 3, 0, 1} }, + { {960, 1, 1, 3, 3} }, + ngraph::element::f32, + {} + }, + // ExpectedValues + { + ngraph::element::u8, + { + { ngraph::element::f32, false }, + { {127.f}, element::f32, {}, false, 1ul, element::u8, true }, + { {0.02f}, element::f32, {}, false } + }, + { std::vector{ 2.f }, ngraph::element::i8, {960, 1, 3, 3}}, + { + { ngraph::element::f32, false }, + { {127.f}, element::f32, {/* from parameter */}, false, 1ul, element::i8, true }, + { {0.06f}, element::f32, {/* from parameter */}, false } + }, + {}, + {}, + {}, + {{960, 1, 1, 3, 3}}, + ngraph::element::f32, + {} + } + } +}; + +// clang-format on INSTANTIATE_TEST_SUITE_P(smoke_LPT, PullReshapeThroughDequantizationTransformation, diff --git a/src/common/low_precision_transformations/tests/pull_transpose_through_dequantization_transformation.cpp b/src/common/low_precision_transformations/tests/pull_transpose_through_dequantization_transformation.cpp index 8b5d9a0b6447b8..6979bdcc36616d 100644 --- a/src/common/low_precision_transformations/tests/pull_transpose_through_dequantization_transformation.cpp +++ b/src/common/low_precision_transformations/tests/pull_transpose_through_dequantization_transformation.cpp @@ -126,7 +126,12 @@ TEST_P(PullTransposeThroughDequantizationTransformation, CompareFunctions) { ASSERT_TRUE(res.first) << res.second; } -const std::vector inputShapes = {ngraph::Shape({1, 960, 7, 7}), ngraph::Shape({4, 960, 7, 7})}; +// clang-format off + +const std::vector inputShapes = { + ngraph::Shape({1, 960, 7, 7}), + ngraph::Shape({4, 960, 7, 7}) +}; const std::vector> dequantizationOnWeightElementwiseConstantShapes = { {ngraph::Shape({}), ngraph::Shape({1, 1, 1, 1})}, @@ -178,37 +183,54 @@ const std::vector testValues = { // \ / // Multiply // - {LayerTransformation::createParamsU8I8().setSupportAsymmetricQuantization(true), - // ActualValues - {ngraph::element::u8, - {{ngraph::element::f32, false}, - {{127.f}, element::f32, {}, false, 1ul, element::u8, true}, - {{0.02f}, element::f32, {}, false}}, - {std::vector{2.f}, ngraph::element::i8, {3, 3, 960, 1}}, - {{ngraph::element::f32, false}, - {{127.f}, element::f32, {/* from parameter */}, false}, - {{0.03f}, element::f32, {/* from parameter */}, false}}, - {}, // reshape1 - {}, // multiply - {{2, 3, 0, 1}}, - {{960, 1, 1, 3, 3}}, - ngraph::element::f32, - {}}, - // ExpectedValues - {ngraph::element::u8, - {{ngraph::element::f32, false}, - {{127.f}, element::f32, {}, false, 1ul, element::u8, true}, - {{0.02f}, element::f32, {}, false}}, - {std::vector{2.f}, ngraph::element::i8, {960, 1, 3, 3}}, - {{ngraph::element::f32, false}, - {{127.f}, element::f32, {/* from parameter */}, false}, - {{0.03f}, element::f32, {/* from parameter */}, false}}, - {}, - {}, - {}, - {{960, 1, 1, 3, 3}}, - ngraph::element::f32, - {}}}}; + { + LayerTransformation::createParamsU8I8().setSupportAsymmetricQuantization(true), + // ActualValues + { + ngraph::element::u8, + { + {ngraph::element::f32, false}, + {{127.f}, element::f32, {}, false, 1ul, element::u8, true}, + {{0.02f}, element::f32, {}, false} + }, + {std::vector{2.f}, ngraph::element::i8, {3, 3, 960, 1}}, + { + {ngraph::element::f32, false}, + {{127.f}, element::f32, {/* from parameter */}, false}, + {{0.03f}, element::f32, {/* from parameter */}, false} + }, + {}, // reshape1 + {}, // multiply + {{2, 3, 0, 1}}, + {{960, 1, 1, 3, 3}}, + ngraph::element::f32, + {} + }, + // ExpectedValues + { + ngraph::element::u8, + { + {ngraph::element::f32, false}, + {{127.f}, element::f32, {}, false, 1ul, element::u8, true}, + {{0.02f}, element::f32, {}, false} + }, + {std::vector{2.f}, ngraph::element::i8, {960, 1, 3, 3}}, + { + {ngraph::element::f32, false}, + {{127.f}, element::f32, {/* from parameter */}, false}, + {{0.03f}, element::f32, {/* from parameter */}, false} + }, + {}, + {}, + {}, + {{960, 1, 1, 3, 3}}, + ngraph::element::f32, + {} + } + } +}; + +// clang-format on INSTANTIATE_TEST_SUITE_P(smoke_LPT, PullTransposeThroughDequantizationTransformation, diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/groupconvolution_qdq_transformation.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/groupconvolution_qdq_transformation.cpp index c8d8473ad1468e..bc058ef87c7add 100644 --- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/groupconvolution_qdq_transformation.cpp +++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/groupconvolution_qdq_transformation.cpp @@ -11,6 +11,8 @@ using namespace LayerTestsDefinitions; namespace { +// clang-format off + const std::vector netPrecisions = { ngraph::element::f32, // ngraph::element::f16 @@ -370,6 +372,66 @@ const std::vector true, }, + // Actual: + // + // FQ + // |FP32 + // | + // Convert Convert Constant Constant + // |U8 |U8 |I8 |I8 + // | | | | + // Convert Convert Convert Convert + // \FP32 /FP32 \FP32 /FP32 + // \ / \ / + // Subtract Constant Subtract Constant + // \FP32 /FP32 \FP32 /FP32 + // \ / \ / + // Multiply Multiply + // \FP32 /FP32 + // \ / + // \ / + // \ / + // GroupConvolution Constant + // \FP32 /FP32 + // \ / + // Multiply + // + // Transformed: + // + // FQ Constant Constant + // \U8 /U8 / I8 + // \ / / + // Subtract Subtract + // \FP32 /FP32 + // \ / + // \ / + // \ / + // GroupConvolution Constant + // \FP32 /FP32 + // \ / + // Multiply + { + { 256ul, {{ 1, 1, 1, 1 }}, { -12.8f }, { 12.7f }, { 0.f }, { 255.f }, ngraph::element::f32 }, + { ngraph::element::u8, false }, + { + { ngraph::element::f32, false }, + { {128.f}, ngraph::element::f32, {}, false, 1ul, ngraph::element::u8, true }, + { {0.1f}, ngraph::element::f32, {}, false } + }, + { std::vector(4, 15.f), ngraph::element::i8, {2, 1, 2, 1, 1} }, + {}, + {}, + { + { ngraph::element::f32, false }, + { {126.f, 127.f}, ngraph::element::f32, {2, 1, 1, 1, 1}, false, 1ul, ngraph::element::i8, true }, + { {0.1f, 0.2f}, ngraph::element::f32, {2, 1, 1, 1, 1}, false } + }, + {}, + "output_original", + "FP32", + true, + }, + // Actual: // // FQ @@ -427,6 +489,63 @@ const std::vector false, }, + // Actual: + // + // FQ + // |FP32 + // | + // Convert Convert + // |U8 |U8 + // | | + // Convert Convert Constant + // \FP32 /FP32 \U8 + // \ / \ + // Subtract Constant Convert Constant + // \FP32 /FP32 \FP32 /FP32 + // \ / \ / + // Multiply Multiply + // \FP32 /FP32 + // \ / + // \ / + // \ / + // GroupConvolution + // + // Transformed: + // + // FQ Constant + // \U8 /U8 + // \ / + // Subtract + // \FP32 + // \ Constant + // \ /I8 + // \ / + // GroupConvolution Constant + // \FP32 /FP32 + // \ / + // Multiply + { + { 256ul, {{ 1, 1, 1, 1 }}, { -12.8f }, { 12.7f }, { 0.f }, { 255.f }, ngraph::element::f32 }, + { ngraph::element::u8, false }, + { + { ngraph::element::f32, false }, + { {128.f}, ngraph::element::f32, {}, false, 1ul, ngraph::element::u8, true }, + { {0.1f}, ngraph::element::f32, {}, false } + }, + { std::vector(4, 15.f), ngraph::element::i8, {2, 1, 2, 1, 1} }, + {}, + {}, + { + { ngraph::element::f32, false }, + {}, + { {0.1f, 0.2f}, ngraph::element::f32, {2, 1, 1, 1, 1}, false } + }, + {}, + "output_original", + "U8", + false, + }, + // Actual: // // FQ @@ -500,4 +619,6 @@ INSTANTIATE_TEST_SUITE_P(smoke_LPT, GroupConvolutionQDqTransformation, ::testing::ValuesIn(trasformationParamValues), ::testing::ValuesIn(params)), GroupConvolutionQDqTransformation::getTestCaseName); + +// clang-format on } // namespace diff --git a/src/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/groupconvolution_qdq_transformation.cpp b/src/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/groupconvolution_qdq_transformation.cpp index 0794065e13e5f7..946554d0f7f2ea 100644 --- a/src/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/groupconvolution_qdq_transformation.cpp +++ b/src/tests/functional/plugin/gpu/shared_tests_instances/low_precision_transformations/groupconvolution_qdq_transformation.cpp @@ -11,6 +11,8 @@ using namespace LayerTestsDefinitions; namespace { +// clang-format off + const std::vector netPrecisions = { ngraph::element::f32, // ngraph::element::f16 @@ -370,6 +372,66 @@ const std::vector true, }, + // Actual: + // + // FQ + // |FP32 + // | + // Convert Convert Constant Constant + // |U8 |U8 |I8 |I8 + // | | | | + // Convert Convert Convert Convert + // \FP32 /FP32 \FP32 /FP32 + // \ / \ / + // Subtract Constant Subtract Constant + // \FP32 /FP32 \FP32 /FP32 + // \ / \ / + // Multiply Multiply + // \FP32 /FP32 + // \ / + // \ / + // \ / + // GroupConvolution Constant + // \FP32 /FP32 + // \ / + // Multiply + // + // Transformed: + // + // FQ Constant Constant + // \U8 /U8 / I8 + // \ / / + // Subtract Subtract + // \FP32 /FP32 + // \ / + // \ / + // \ / + // GroupConvolution Constant + // \FP32 /FP32 + // \ / + // Multiply + { + { 256ul, {{ 1, 1, 1, 1 }}, { -12.8f }, { 12.7f }, { 0.f }, { 255.f }, ngraph::element::f32 }, + { ngraph::element::u8, false }, + { + { ngraph::element::f32, false }, + { {128.f}, ngraph::element::f32, {}, false, 1ul, ngraph::element::u8, true }, + { {0.1f}, ngraph::element::f32, {}, false } + }, + { std::vector(4, 15.f), ngraph::element::i8, {2, 1, 2, 1, 1} }, + {}, + {}, + { + { ngraph::element::f32, false }, + { {126.f, 127.f}, ngraph::element::f32, {2, 1, 1, 1, 1}, false, 1ul, ngraph::element::i8, true }, + { {0.1f, 0.2f}, ngraph::element::f32, {2, 1, 1, 1, 1}, false } + }, + {}, + "output_original", + "FP32", + true, + }, + // Actual: // // FQ @@ -427,6 +489,63 @@ const std::vector false, }, + // Actual: + // + // FQ + // |FP32 + // | + // Convert Convert + // |U8 |U8 + // | | + // Convert Convert Constant + // \FP32 /FP32 \U8 + // \ / \ + // Subtract Constant Convert Constant + // \FP32 /FP32 \FP32 /FP32 + // \ / \ / + // Multiply Multiply + // \FP32 /FP32 + // \ / + // \ / + // \ / + // GroupConvolution + // + // Transformed: + // + // FQ Constant + // \U8 /U8 + // \ / + // Subtract + // \FP32 + // \ Constant + // \ /I8 + // \ / + // GroupConvolution Constant + // \FP32 /FP32 + // \ / + // Multiply + { + { 256ul, {{ 1, 1, 1, 1 }}, { -12.8f }, { 12.7f }, { 0.f }, { 255.f }, ngraph::element::f32 }, + { ngraph::element::u8, false }, + { + { ngraph::element::f32, false }, + { {128.f}, ngraph::element::f32, {}, false, 1ul, ngraph::element::u8, true }, + { {0.1f}, ngraph::element::f32, {}, false } + }, + { std::vector(4, 15.f), ngraph::element::i8, {2, 1, 2, 1, 1} }, + {}, + {}, + { + { ngraph::element::f32, false }, + {}, + { {0.1f, 0.2f}, ngraph::element::f32, {2, 1, 1, 1, 1}, false } + }, + {}, + "output_original", + "U8", + false, + }, + // Actual: // // FQ @@ -500,4 +619,6 @@ INSTANTIATE_TEST_SUITE_P(smoke_LPT, GroupConvolutionQDqTransformation, ::testing::ValuesIn(trasformationParamValues), ::testing::ValuesIn(params)), GroupConvolutionQDqTransformation::getTestCaseName); + +// clang-format on } // namespace diff --git a/src/tests/ngraph_helpers/lpt_ngraph_functions/include/lpt_ngraph_functions/group_convolution_function.hpp b/src/tests/ngraph_helpers/lpt_ngraph_functions/include/lpt_ngraph_functions/group_convolution_function.hpp index e90c32eb00bf46..20101a88d57745 100644 --- a/src/tests/ngraph_helpers/lpt_ngraph_functions/include/lpt_ngraph_functions/group_convolution_function.hpp +++ b/src/tests/ngraph_helpers/lpt_ngraph_functions/include/lpt_ngraph_functions/group_convolution_function.hpp @@ -49,7 +49,8 @@ class GroupConvolutionFunction { const ngraph::builder::subgraph::DequantizationOperations& dequantizationOnWeights, const ngraph::element::Type precisionAfterOperation, const ngraph::builder::subgraph::DequantizationOperations& dequantizationAfter, - const ngraph::element::Type precisionAfterDequantization); + const ngraph::element::Type precisionAfterDequantization, + const bool addReshape); }; } // namespace subgraph diff --git a/src/tests/ngraph_helpers/lpt_ngraph_functions/src/group_convolution_function.cpp b/src/tests/ngraph_helpers/lpt_ngraph_functions/src/group_convolution_function.cpp index 6af36be45295cd..953e52326de7c7 100644 --- a/src/tests/ngraph_helpers/lpt_ngraph_functions/src/group_convolution_function.cpp +++ b/src/tests/ngraph_helpers/lpt_ngraph_functions/src/group_convolution_function.cpp @@ -31,7 +31,8 @@ std::shared_ptr createWeightsOriginal( const size_t kernelSize, const std::vector& weightsValues, const FakeQuantizeOnWeights& fakeQuantizeOnWeights, - const ngraph::builder::subgraph::DequantizationOperations& dequantizationOnWeights) { + const ngraph::builder::subgraph::DequantizationOperations& dequantizationOnWeights, + const bool addReshape = true) { std::shared_ptr weights; if (fakeQuantizeOnWeights.empty() && dequantizationOnWeights.empty()) { weights = ngraph::opset1::Constant::create( @@ -46,9 +47,13 @@ std::shared_ptr createWeightsOriginal( const size_t inputChannelsPerGroup = inputChannelsCount / groupCount; weights = ngraph::opset1::Constant::create( precision, - rankLength == 3 ? - ngraph::Shape{ outputChannelsCount, inputChannelsPerGroup, kernelSize } : - ngraph::Shape{ outputChannelsCount, inputChannelsPerGroup, kernelSize, kernelSize }, + addReshape ? + (rankLength == 3 ? + ngraph::Shape{ outputChannelsCount, inputChannelsPerGroup, kernelSize } : + ngraph::Shape{ outputChannelsCount, inputChannelsPerGroup, kernelSize, kernelSize }) : + (rankLength == 3 ? + ngraph::Shape{ groupCount, outputChannelsCount / groupCount, inputChannelsPerGroup, kernelSize } : + ngraph::Shape{ groupCount, outputChannelsCount / groupCount, inputChannelsPerGroup, kernelSize, kernelSize }), weightsValues.size() == 1ul ? std::vector( rankLength == 3 ? @@ -75,24 +80,26 @@ std::shared_ptr createWeightsOriginal( weights = ngraph::builder::subgraph::makeDequantization(weights, dequantizationOnWeights); } - weights = std::make_shared( - weights, - ngraph::opset1::Constant::create( - element::i64, - Shape{ static_cast(rankLength) + 1ul }, - rankLength == 3 ? - std::vector { - calculatedDimention == 0 ? -1 : static_cast(groupCount), - calculatedDimention == 1 ? -1 : static_cast(outputChannelsCount / groupCount), - static_cast(inputChannelsPerGroup), - static_cast(kernelSize) } : - std::vector { - calculatedDimention == 0 ? -1 : static_cast(groupCount), - calculatedDimention == 1 ? -1 : static_cast(outputChannelsCount / groupCount), - static_cast(inputChannelsPerGroup), - static_cast(kernelSize), - static_cast(kernelSize) }), - true); + if (addReshape) { + weights = std::make_shared( + weights, + ngraph::opset1::Constant::create( + element::i64, + Shape{ static_cast(rankLength) + 1ul }, + rankLength == 3 ? + std::vector { + calculatedDimention == 0 ? -1 : static_cast(groupCount), + calculatedDimention == 1 ? -1 : static_cast(outputChannelsCount / groupCount), + static_cast(inputChannelsPerGroup), + static_cast(kernelSize) } : + std::vector { + calculatedDimention == 0 ? -1 : static_cast(groupCount), + calculatedDimention == 1 ? -1 : static_cast(outputChannelsCount / groupCount), + static_cast(inputChannelsPerGroup), + static_cast(kernelSize), + static_cast(kernelSize) }), + true); + } } return weights; @@ -253,7 +260,8 @@ std::shared_ptr GroupConvolutionFunction::get( const ngraph::builder::subgraph::DequantizationOperations& dequantizationOnWeights, const ngraph::element::Type precisionAfterOperation, const ngraph::builder::subgraph::DequantizationOperations& dequantizationAfter, - const ngraph::element::Type precisionAfterDequantization) { + const ngraph::element::Type precisionAfterDequantization, + const bool addReshape) { const auto rankLength = inputShape.rank().is_dynamic() ? 4 : inputShape.rank().get_length(); OPENVINO_ASSERT(rankLength == 3 || rankLength == 4, "not supported input shape rank: ", rankLength); @@ -269,9 +277,6 @@ std::shared_ptr GroupConvolutionFunction::get( const size_t outputChannelsInGroup = outputChannelsCount / groupCount; const size_t weightsSize = weightsConst->cast_vector().size(); - if ((weightsSize != 1ul) && (weightsSize != (inputChannelsCount * outputChannelsCount))) { - throw std::runtime_error("unexpected actual weights values size"); - } std::shared_ptr weights; if (fakeQuantizeOnWeights.empty() && dequantizationOnWeights.empty()) { @@ -293,7 +298,8 @@ std::shared_ptr GroupConvolutionFunction::get( kernelSize, weightsConst->cast_vector(), fakeQuantizeOnWeights, - dequantizationOnWeights); + dequantizationOnWeights, + addReshape); } auto convolutionOriginal = ngraph::opset1::GroupConvolution( From 44d6d97871b81bd4c36ec358f760adccecc25310 Mon Sep 17 00:00:00 2001 From: Sebastian Golebiewski Date: Thu, 23 Mar 2023 14:47:54 +0100 Subject: [PATCH 062/296] DOCS shift to rst - OpenVINO 2.0 Deployment (#16509) --- .../migration_ov_2_0/deployment_migration.md | 212 ++++++++++-------- docs/OV_Runtime_UG/migration_ov_2_0/intro.md | 90 +++++--- docs/{img => _static/images}/tf_openvino.svg | 0 3 files changed, 168 insertions(+), 134 deletions(-) rename docs/{img => _static/images}/tf_openvino.svg (100%) diff --git a/docs/OV_Runtime_UG/migration_ov_2_0/deployment_migration.md b/docs/OV_Runtime_UG/migration_ov_2_0/deployment_migration.md index f5d9a1c4213ca1..46d8a693094cc9 100644 --- a/docs/OV_Runtime_UG/migration_ov_2_0/deployment_migration.md +++ b/docs/OV_Runtime_UG/migration_ov_2_0/deployment_migration.md @@ -1,151 +1,161 @@ # Installation & Deployment {#openvino_2_0_deployment} +@sphinxdirective + One of the main concepts for OpenVINO™ API 2.0 is being "easy to use", which includes: + * Simplification of migration from different frameworks to OpenVINO. -* Organization of OpenVINO. +* Organization of OpenVINO. * Usage of development tools. * Development and deployment of OpenVINO-based applications. + To accomplish that, the 2022.1 release OpenVINO introduced significant changes to the installation and deployment processes. This guide will walk you through these changes. -## The Installer Package Contains OpenVINO™ Runtime Only +The Installer Package Contains OpenVINO™ Runtime Only +##################################################### -Since OpenVINO 2022.1, development tools have been distributed only via [PyPI](https://pypi.org/project/openvino-dev/), and are no longer included in the OpenVINO installer package. For a list of these components, refer to the [installation overview](../../install_guides/installing-openvino-overview.md) guide. Benefits of this approach include: +Since OpenVINO 2022.1, development tools have been distributed only via `PyPI `__, and are no longer included in the OpenVINO installer package. For a list of these components, refer to the :doc:`installation overview ` guide. Benefits of this approach include: -* simplification of the user experience - in previous versions, installation and usage of OpenVINO Development Tools differed from one distribution type to another (the OpenVINO installer vs. PyPI), +* simplification of the user experience - in previous versions, installation and usage of OpenVINO Development Tools differed from one distribution type to another (the OpenVINO installer vs. PyPI), * ensuring that dependencies are handled properly via the PIP package manager, and support virtual environments of development tools. The structure of the OpenVINO 2022.1 installer package has been organized as follows: -- The `runtime` folder includes headers, libraries and CMake interfaces. -- The `tools` folder contains [the compile tool](../../../tools/compile_tool/README.md), [deployment manager](../../OV_Runtime_UG/deployment/deployment-manager-tool.md), and a set of `requirements.txt` files with links to the corresponding versions of the `openvino-dev` package. -- The `python` folder contains the Python version for OpenVINO Runtime. +* The ``runtime`` folder includes headers, libraries and CMake interfaces. +* The ``tools`` folder contains :doc:`the compile tool `, :doc:`deployment manager `, and a set of ``requirements.txt`` files with links to the corresponding versions of the ``openvino-dev`` package. +* The ``python`` folder contains the Python version for OpenVINO Runtime. -## Installing OpenVINO Development Tools via PyPI +Installing OpenVINO Development Tools via PyPI +############################################## Since OpenVINO Development Tools is no longer in the installer package, the installation process has also changed. This section describes it through a comparison with previous versions. -### For Versions Prior to 2022.1 +For Versions Prior to 2022.1 +++++++++++++++++++++++++++++ + +In previous versions, OpenVINO Development Tools was a part of the main package. After the package was installed, to convert models (for example, TensorFlow), you needed to install additional dependencies by using the requirement files, such as ``requirements_tf.txt``, install Post-Training Optimization tool and Accuracy Checker tool via the ``setup.py`` scripts, and then use the ``setupvars`` scripts to make the tools available to the following command: -In previous versions, OpenVINO Development Tools was a part of the main package. After the package was installed, to convert models (for example, TensorFlow), you needed to install additional dependencies by using the requirement files, such as `requirements_tf.txt`, install Post-Training Optimization tool and Accuracy Checker tool via the `setup.py` scripts, and then use the `setupvars` scripts to make the tools available to the following command: +.. code-block:: sh -```sh -$ mo.py -h -``` + $ mo.py -h -### For 2022.1 and After -In OpenVINO 2022.1 and later, you can install the development tools only from a [PyPI](https://pypi.org/project/openvino-dev/) repository, using the following command (taking TensorFlow as an example): +For 2022.1 and After +++++++++++++++++++++ -```sh -$ python3 -m pip install -r /tools/requirements_tf.txt -``` +In OpenVINO 2022.1 and later, you can install the development tools only from a `PyPI `__ repository, using the following command (taking TensorFlow as an example): -This will install all the development tools and additional components necessary to work with TensorFlow via the `openvino-dev` package (see **Step 4. Install the Package** on the [PyPI page](https://pypi.org/project/openvino-dev/) for parameters of other frameworks). +.. code-block:: sh + + $ python3 -m pip install -r /tools/requirements_tf.txt + + +This will install all the development tools and additional components necessary to work with TensorFlow via the ``openvino-dev`` package (see **Step 4. Install the Package** on the `PyPI page `__ for parameters of other frameworks). Then, the tools can be used by commands like: -```sh -$ mo -h -$ pot -h -``` +.. code-block:: sh + + $ mo -h + $ pot -h -Installation of any other dependencies is not required. For more details on the installation steps, see the [Install OpenVINO Development Tools](../../install_guides/installing-model-dev-tools.md). -## Interface Changes for Building C/C++ Applications +Installation of any other dependencies is not required. For more details on the installation steps, see the :doc:`Install OpenVINO Development Tools `. + +Interface Changes for Building C/C++ Applications +################################################# The new OpenVINO Runtime with its API 2.0 has also brought some changes for building C/C++ applications. -### CMake Interface +CMake Interface +++++++++++++++++++++ The CMake interface has been changed as follows: **With Inference Engine of previous versions**: -```cmake -find_package(InferenceEngine REQUIRED) -find_package(ngraph REQUIRED) -add_executable(ie_ngraph_app main.cpp) -target_link_libraries(ie_ngraph_app PRIVATE ${InferenceEngine_LIBRARIES} ${NGRAPH_LIBRARIES}) -``` +.. code-block:: cmake + + find_package(InferenceEngine REQUIRED) + find_package(ngraph REQUIRED) + add_executable(ie_ngraph_app main.cpp) + target_link_libraries(ie_ngraph_app PRIVATE ${InferenceEngine_LIBRARIES} ${NGRAPH_LIBRARIES}) + **With OpenVINO Runtime 2022.1 (API 2.0)**: -```cmake -find_package(OpenVINO REQUIRED) -add_executable(ov_app main.cpp) -target_link_libraries(ov_app PRIVATE openvino::runtime) +.. code-block:: cmake + + find_package(OpenVINO REQUIRED) + add_executable(ov_app main.cpp) + target_link_libraries(ov_app PRIVATE openvino::runtime) -add_executable(ov_c_app main.c) -target_link_libraries(ov_c_app PRIVATE openvino::runtime::c) -``` + add_executable(ov_c_app main.c) + target_link_libraries(ov_c_app PRIVATE openvino::runtime::c) -### Native Interfaces + +Native Interfaces +++++++++++++++++++++ It is possible to build applications without the CMake interface by using: MSVC IDE, UNIX makefiles, and any other interface, which has been changed as shown here: **With Inference Engine of previous versions**: -@sphinxdirective - .. tab:: Include dirs - .. code-block:: sh - - /deployment_tools/inference_engine/include - /deployment_tools/ngraph/include + .. code-block:: sh + + /deployment_tools/inference_engine/include + /deployment_tools/ngraph/include .. tab:: Path to libs - .. code-block:: sh + .. code-block:: sh - /deployment_tools/inference_engine/lib/intel64/Release - /deployment_tools/ngraph/lib/ + /deployment_tools/inference_engine/lib/intel64/Release + /deployment_tools/ngraph/lib/ .. tab:: Shared libs - .. code-block:: sh + .. code-block:: sh - // UNIX systems - inference_engine.so ngraph.so + // UNIX systems + inference_engine.so ngraph.so - // Windows - inference_engine.dll ngraph.dll + // Windows + inference_engine.dll ngraph.dll .. tab:: (Windows) .lib files - .. code-block:: sh - - ngraph.lib - inference_engine.lib + .. code-block:: sh -@endsphinxdirective + ngraph.lib + inference_engine.lib **With OpenVINO Runtime 2022.1 (API 2.0)**: -@sphinxdirective - .. tab:: Include dirs - .. code-block:: sh + .. code-block:: sh - /runtime/include + /runtime/include .. tab:: Path to libs - .. code-block:: sh + .. code-block:: sh - /runtime/lib/intel64/Release + /runtime/lib/intel64/Release .. tab:: Shared libs - .. code-block:: sh + .. code-block:: sh - // UNIX systems - openvino.so + // UNIX systems + openvino.so - // Windows - openvino.dll + // Windows + openvino.dll .. tab:: (Windows) .lib files @@ -153,49 +163,55 @@ It is possible to build applications without the CMake interface by using: MSVC openvino.lib -@endsphinxdirective -## Clearer Library Structure for Deployment +Clearer Library Structure for Deployment +######################################## -OpenVINO 2022.1 introduced a reorganization of the libraries, to make deployment easier. In the previous versions, it was required to use several libraries to perform deployment steps. Now you can just use `openvino` or `openvino_c` based on your developing language, with the necessary plugins to complete your task. For example, `openvino_intel_cpu_plugin` and `openvino_ir_frontend` plugins will enable loading OpenVINO IRs and performing inference on the CPU device (for more details, see the [Local distribution with OpenVINO](../deployment/local-distribution.md)). +OpenVINO 2022.1 introduced a reorganization of the libraries, to make deployment easier. In the previous versions, it was required to use several libraries to perform deployment steps. Now you can just use ``openvino`` or ``openvino_c`` based on your developing language, with the necessary plugins to complete your task. For example, ``openvino_intel_cpu_plugin`` and ``openvino_ir_frontend`` plugins will enable loading OpenVINO IRs and performing inference on the CPU device (for more details, see the :doc:`Local distribution with OpenVINO `). Below are detailed comparisons of the library structure between OpenVINO 2022.1 and the previous versions: -* Starting with 2022.1 release, a single core library with all the functionalities (`openvino` for C++ Runtime, `openvino_c` for Inference Engine API C interface) is used, instead of the previous core libraries which contained `inference_engine`, `ngraph`, `inference_engine_transformations` and `inference_engine_lp_transformations`. -* The optional `inference_engine_preproc` preprocessing library (if `InferenceEngine::PreProcessInfo::setColorFormat` or `InferenceEngine::PreProcessInfo::setResizeAlgorithm` is used) has been renamed to `openvino_gapi_preproc` and deprecated in 2022.1. For more details, see the [Preprocessing capabilities of OpenVINO API 2.0](preprocessing.md). +* Starting with 2022.1 release, a single core library with all the functionalities (``openvino`` for C++ Runtime, ``openvino_c`` for Inference Engine API C interface) is used, instead of the previous core libraries which contained ``inference_engine``, ``ngraph``, ``inference_engine_transformations`` and ``inference_engine_lp_transformations``. +* The optional ``inference_engine_preproc`` preprocessing library (if `InferenceEngine::PreProcessInfo::setColorFormat `__ or `InferenceEngine::PreProcessInfo::setResizeAlgorithm `__ is used) has been renamed to ``openvino_gapi_preproc`` and deprecated in 2022.1. For more details, see the :doc:`Preprocessing capabilities of OpenVINO API 2.0 `. + * The libraries of plugins have been renamed as follows: - * `openvino_intel_cpu_plugin` is used for [CPU](../supported_plugins/CPU.md) device instead of `MKLDNNPlugin`. - * `openvino_intel_gpu_plugin` is used for [GPU](../supported_plugins/GPU.md) device instead of `clDNNPlugin`. - * `openvino_auto_plugin` is used for [Auto-Device Plugin](../auto_device_selection.md). + + * ``openvino_intel_cpu_plugin`` is used for :doc:`CPU ` device instead of ``MKLDNNPlugin``. + * ``openvino_intel_gpu_plugin`` is used for :doc:`GPU ` device instead of ``clDNNPlugin``. + * ``openvino_auto_plugin`` is used for :doc:`Auto-Device Plugin `. + * The plugins for reading and converting models have been changed as follows: - * `openvino_ir_frontend` is used to read IRs instead of `inference_engine_ir_reader`. - * `openvino_onnx_frontend` is used to read ONNX models instead of `inference_engine_onnx_reader` (with its dependencies). - * `openvino_paddle_frontend` is added in 2022.1 to read PaddlePaddle models. + + * ``openvino_ir_frontend`` is used to read IRs instead of ``inference_engine_ir_reader``. + * ``openvino_onnx_frontend`` is used to read ONNX models instead of ``inference_engine_onnx_reader`` (with its dependencies). + * ``openvino_paddle_frontend`` is added in 2022.1 to read PaddlePaddle models. + +@endsphinxdirective diff --git a/docs/OV_Runtime_UG/migration_ov_2_0/intro.md b/docs/OV_Runtime_UG/migration_ov_2_0/intro.md index 290610326492a8..80438604a56eb1 100644 --- a/docs/OV_Runtime_UG/migration_ov_2_0/intro.md +++ b/docs/OV_Runtime_UG/migration_ov_2_0/intro.md @@ -12,83 +12,101 @@ openvino_2_0_preprocessing openvino_2_0_model_creation -@endsphinxdirective -This guide introduces the new OpenVINO™ API: API 2.0, as well as the new OpenVINO IR model format: IR v11. Here, you will find comparisons of their "old" and "new" versions. +This guide introduces the new OpenVINO™ API: API 2.0, as well as the new OpenVINO IR model format: IR v11. Here, you will find comparisons of their "old" and "new" versions. -### Introduction of API 2.0 +Introduction of API 2.0 +####################### Versions of OpenVINO prior to 2022.1 required changes in the application logic when migrating an app from other frameworks, such as TensorFlow, ONNX Runtime, PyTorch, PaddlePaddle, etc. The changes were required because: -- Model Optimizer changed input precisions for some inputs. For example, neural language processing models with `I64` inputs were changed to include `I32` ones. -- Model Optimizer changed layouts for TensorFlow models (see the [Layouts in OpenVINO](../layout_overview.md)). It lead to unusual requirement of using the input data with a different layout than that of the framework: -![tf_openvino] -- Inference Engine API (`InferenceEngine::CNNNetwork`) applied some conversion rules for input and output precisions due to limitations in device plugins. +- Model Optimizer changed input precisions for some inputs. For example, neural language processing models with ``I64`` inputs were changed to include ``I32`` ones. +- Model Optimizer changed layouts for TensorFlow models (see the :doc:`Layouts in OpenVINO `). It lead to unusual requirement of using the input data with a different layout than that of the framework: + +.. image:: _static/images/tf_openvino.svg + :alt: tf_openvino + +- Inference Engine API (`InferenceEngine::CNNNetwork `__) applied some conversion rules for input and output precisions due to limitations in device plugins. - Users needed to specify input shapes during model conversions in Model Optimizer, and work with static shapes in the application. -OpenVINO™ 2022.1 has introduced API 2.0 (also called OpenVINO API v2) to align the logic of working with models as it is done in their origin frameworks - no layout and precision changes, operating with tensor names and indices to address inputs and outputs. OpenVINO Runtime has combined Inference Engine API used for inference and nGraph API targeted to work with models and operations. API 2.0 has a common structure, naming convention styles, namespaces, and removes duplicated structures. For more details, see the [Changes to Inference Pipeline in OpenVINO API v2](common_inference_pipeline.md). +OpenVINO™ 2022.1 has introduced API 2.0 (also called OpenVINO API v2) to align the logic of working with models as it is done in their origin frameworks - no layout and precision changes, operating with tensor names and indices to address inputs and outputs. OpenVINO Runtime has combined Inference Engine API used for inference and nGraph API targeted to work with models and operations. API 2.0 has a common structure, naming convention styles, namespaces, and removes duplicated structures. For more details, see the :doc:`Changes to Inference Pipeline in OpenVINO API v2 `. + +.. note:: + + Your existing applications will continue to work with OpenVINO Runtime 2022.1, as normal. Although, migration to API 2.0 is strongly recommended. This will allow you to use additional features, such as :doc:`Preprocessing ` and :doc:`Dynamic shapes support `. -> **NOTE**: Your existing applications will continue to work with OpenVINO Runtime 2022.1, as normal. Although, migration to API 2.0 is strongly recommended. This will allow you to use additional features, such as [Preprocessing](../preprocessing_overview.md) and [Dynamic shapes support](../ov_dynamic_shapes.md). -### The New OpenVINO IR v11 +The New OpenVINO IR v11 +####################### -To support these features, OpenVINO has introduced OpenVINO IR v11, which is now the default version for Model Optimizer. The model represented in OpenVINO IR v11 fully matches the original model in the original framework format in terms of inputs and outputs. It is also not required to specify input shapes during conversion, which results in OpenVINO IR v11 containing `-1` to denote undefined dimensions. For more details on how to fully utilize this feature, see [Working with dynamic shapes](../ov_dynamic_shapes.md). For information on how to reshape to static shapes in application, see [Changing input shapes](../ShapeInference.md). +To support these features, OpenVINO has introduced OpenVINO IR v11, which is now the default version for Model Optimizer. The model represented in OpenVINO IR v11 fully matches the original model in the original framework format in terms of inputs and outputs. It is also not required to specify input shapes during conversion, which results in OpenVINO IR v11 containing ``-1`` to denote undefined dimensions. For more details on how to fully utilize this feature, see :doc:`Working with dynamic shapes `. For information on how to reshape to static shapes in application, see :doc:`Changing input shapes `. OpenVINO IR v11 is fully compatible with applications written with the Inference Engine API used by older versions of OpenVINO. This backward compatibility is allowed thanks to additional runtime information included in OpenVINO IR v11. This means that when OpenVINO IR v11 is read by an application based on Inference Engine, it is internally converted to OpenVINO IR v10. OpenVINO IR v11 is supported by all OpenVINO Development tools including Post-Training Optimization Tool, Benchmark app, etc. -### Backward Compatibility for OpenVINO IR v10 +Backward Compatibility for OpenVINO IR v10 +########################################## -API 2.0 also supports backward compatibility for models of OpenVINO IR v10. If you have OpenVINO IR v10 files, they can also be fed to OpenVINO Runtime. For more details, see the [migration steps](common_inference_pipeline.md). +API 2.0 also supports backward compatibility for models of OpenVINO IR v10. If you have OpenVINO IR v10 files, they can also be fed to OpenVINO Runtime. For more details, see the :doc:`migration steps `. Some of the OpenVINO Development Tools also support both OpenVINO IR v10 and v11 as an input: -- Accuracy checker uses API 2.0 for model accuracy measurement by default. It also supports switching to the old API by using the `--use_new_api False` command-line parameter. Both launchers accept OpenVINO IR v10 and v11, but in some cases configuration files should be updated. For more details, see the [Accuracy Checker documentation](https://github.com/openvinotoolkit/open_model_zoo/blob/master/tools/accuracy_checker/openvino/tools/accuracy_checker/launcher/openvino_launcher_readme.md). -- [Compile tool](../../../tools/compile_tool/README.md) compiles the model to be used in API 2.0 by default. To use the resulting compiled blob under the Inference Engine API, the additional `ov_api_1_0` option should be passed. -However, Post-Training Optimization Tool of OpenVINO 2022.1 does not support OpenVINO IR v10. They require the latest version of Model Optimizer to generate OpenVINO IR v11 files. +- Accuracy checker uses API 2.0 for model accuracy measurement by default. It also supports switching to the old API by using the ``--use_new_api False`` command-line parameter. Both launchers accept OpenVINO IR v10 and v11, but in some cases configuration files should be updated. For more details, see the `Accuracy Checker documentation `__. +- :doc:`Compile tool ` compiles the model to be used in API 2.0 by default. To use the resulting compiled blob under the Inference Engine API, the additional ``ov_api_1_0`` option should be passed. -> **NOTE**: To quantize your OpenVINO IR v10 models to run with OpenVINO 2022.1, download and use Post-Training Optimization Tool of OpenVINO 2021.4. +However, Post-Training Optimization Tool of OpenVINO 2022.1 does not support OpenVINO IR v10. They require the latest version of Model Optimizer to generate OpenVINO IR v11 files. -@sphinxdirective +.. note:: -.. _differences_api20_ie: + To quantize your OpenVINO IR v10 models to run with OpenVINO 2022.1, download and use Post-Training Optimization Tool of OpenVINO 2021.4. -@endsphinxdirective +.. _differences_api20_ie: -### Differences in API 2.0 and Inference Engine API Behaviors +Differences in API 2.0 and Inference Engine API Behaviors +######################################################### Inference Engine and nGraph APIs do not become deprecated with the introduction of the new API, and they can still be used in applications. However, it is highly recommended to migrate to API 2.0, as it offers more features (further extended in future releases), such as: -- [Working with dynamic shapes](../ov_dynamic_shapes.md), which increases performance when working with compatible models such as NLP (Neural Language Processing) and super-resolution models. -- [Preprocessing of the model](../preprocessing_overview.md), which adds preprocessing operations to inference models and fully occupies the accelerator, freeing CPU resources. + +- :doc:`Working with dynamic shapes `, which increases performance when working with compatible models such as NLP (Neural Language Processing) and super-resolution models. +- :doc:`Preprocessing of the model `, which adds preprocessing operations to inference models and fully occupies the accelerator, freeing CPU resources. To understand the differences between Inference Engine API and API 2.0, see the definitions of two types of behaviors first: + - **Old behavior** of OpenVINO assumes that: + - Model Optimizer can change input element types and order of dimensions (layouts) for the model from the original framework. - Inference Engine can override input and output element types. - - Inference Engine API uses operation names to address inputs and outputs (e.g. InferenceEngine::InferRequest::GetBlob). + - Inference Engine API uses operation names to address inputs and outputs (e.g. `InferenceEngine::InferRequest::GetBlob `__). - Inference Engine API does not support compiling of models with dynamic input shapes. + - **New behavior** implemented in 2022.1 assumes full model alignment with the framework: + - Model Optimizer preserves input element types and order of dimensions (layouts), and stores tensor names from the original models. - - OpenVINO Runtime 2022.1 reads models in any format (OpenVINO IR v10, OpenVINO IR v11, TensorFlow (check [TensorFlow Frontend Capabilities and Limitations](../../resources/tensorflow_frontend.md)), ONNX, PaddlePaddle, etc.). + - OpenVINO Runtime 2022.1 reads models in any format (OpenVINO IR v10, OpenVINO IR v11, TensorFlow (check :doc:`TensorFlow Frontend Capabilities and Limitations `), ONNX, PaddlePaddle, etc.). - API 2.0 uses tensor names for addressing, which is the standard approach among the compatible model frameworks. - API 2.0 can also address input and output tensors by the index. Some model formats like ONNX are sensitive to the input and output order, which is preserved by OpenVINO 2022.1. The table below demonstrates which behavior, **old** or **new**, is used for models based on the two APIs. -| API | OpenVINO IR v10 | OpenVINO IR v11 | ONNX Files | Models Created in Code | -|-------------------------------|------------------|------------------|------------|------------------------| -|Inference Engine / nGraph APIs | Old | Old | Old | Old | -|API 2.0 | Old | New | New | New | ++--------------------------------+-----------------+-----------------+-----------------+------------------------+ +| API | OpenVINO IR v10 | OpenVINO IR v11 | ONNX Files | Models Created in Code | ++================================+=================+=================+=================+========================+ +| Inference Engine / nGraph APIs | Old | Old | Old | Old | ++--------------------------------+-----------------+-----------------+-----------------+------------------------+ +| API 2.0 | Old | New | New | New | ++--------------------------------+-----------------+-----------------+-----------------+------------------------+ -### More Information +More Information +#################### See the following pages to understand how to migrate Inference Engine-based applications to API 2.0: - - [Installation & Deployment](deployment_migration.md) - - [OpenVINO™ Common Inference pipeline](common_inference_pipeline.md) - - [Preprocess your model](./preprocessing.md) - - [Configure device](./configure_devices.md) - - [OpenVINO™ Model Creation](graph_construction.md) -[tf_openvino]: ../../img/tf_openvino.svg +- :doc:`Installation & Deployment ` +- :doc:`OpenVINO™ Common Inference pipeline ` +- :doc:`Preprocess your model ` +- :doc:`Configure device ` +- :doc:`OpenVINO™ Model Creation ` + +@endsphinxdirective diff --git a/docs/img/tf_openvino.svg b/docs/_static/images/tf_openvino.svg similarity index 100% rename from docs/img/tf_openvino.svg rename to docs/_static/images/tf_openvino.svg From de0a4e16fbb9baf88f93a72dbcedff348d27b695 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tomasz=20Do=C5=82bniak?= Date: Thu, 23 Mar 2023 16:33:54 +0100 Subject: [PATCH 063/296] TopK 11 exposed to Python (#16501) --- .../compatibility/ngraph/opset11/__init__.py | 2 +- .../src/compatibility/ngraph/opset11/ops.py | 32 ++++++++++++++++++- .../src/openvino/runtime/opset11/__init__.py | 2 +- .../src/openvino/runtime/opset11/ops.py | 30 +++++++++++++++++ .../python/tests/test_graph/test_create_op.py | 13 ++++++++ .../test_ngraph/test_create_op.py | 13 ++++++++ 6 files changed, 89 insertions(+), 3 deletions(-) diff --git a/src/bindings/python/src/compatibility/ngraph/opset11/__init__.py b/src/bindings/python/src/compatibility/ngraph/opset11/__init__.py index 91f84b81f415cd..047c93e4cc03d3 100644 --- a/src/bindings/python/src/compatibility/ngraph/opset11/__init__.py +++ b/src/bindings/python/src/compatibility/ngraph/opset11/__init__.py @@ -170,7 +170,7 @@ from ngraph.opset1.ops import tanh from ngraph.opset1.ops import tensor_iterator from ngraph.opset1.ops import tile -from ngraph.opset3.ops import topk +from ngraph.opset11.ops import topk from ngraph.opset1.ops import transpose from ngraph.opset10.ops import unique from ngraph.opset1.ops import unsqueeze diff --git a/src/bindings/python/src/compatibility/ngraph/opset11/ops.py b/src/bindings/python/src/compatibility/ngraph/opset11/ops.py index 434b778b246cf8..3a4b54059ca6fc 100644 --- a/src/bindings/python/src/compatibility/ngraph/opset11/ops.py +++ b/src/bindings/python/src/compatibility/ngraph/opset11/ops.py @@ -34,7 +34,7 @@ def interpolate( axes: Optional[NodeInput] = None, name: Optional[str] = None, ) -> Node: - """Perfors the interpolation of the input tensor. + """Performs the interpolation of the input tensor. :param image: The node providing input tensor with data for interpolation. :param scales_or_sizes: @@ -75,3 +75,33 @@ def interpolate( inputs = as_nodes(image, scales_or_sizes) if axes is None else as_nodes(image, scales_or_sizes, axes) return _get_node_factory_opset11().create("Interpolate", inputs, attrs) + + +@nameable_op +def topk( + data: NodeInput, + k: NodeInput, + axis: int, + mode: str, + sort: str, + index_element_type: str = "i32", + stable: bool = False, + name: Optional[str] = None, +) -> Node: + """Return a node which performs TopK. + + :param data: Input data. + :param k: K. + :param axis: TopK Axis. + :param mode: Compute TopK largest ('max') or smallest ('min') + :param sort: Order of output elements (sort by: 'none', 'index' or 'value') + :param index_element_type: Type of output tensor with indices. + :param stable: Specifies whether the equivalent elements should maintain + their relative order from the input tensor during sorting. + :return: The new node which performs TopK + """ + return _get_node_factory_opset11().create( + "TopK", + as_nodes(data, k), + {"axis": axis, "mode": mode, "sort": sort, "index_element_type": index_element_type, "stable": stable}, + ) diff --git a/src/bindings/python/src/openvino/runtime/opset11/__init__.py b/src/bindings/python/src/openvino/runtime/opset11/__init__.py index 79c7068bf83d87..3e867f548699b0 100644 --- a/src/bindings/python/src/openvino/runtime/opset11/__init__.py +++ b/src/bindings/python/src/openvino/runtime/opset11/__init__.py @@ -171,7 +171,7 @@ from openvino.runtime.opset1.ops import tanh from openvino.runtime.opset1.ops import tensor_iterator from openvino.runtime.opset1.ops import tile -from openvino.runtime.opset3.ops import topk +from openvino.runtime.opset11.ops import topk from openvino.runtime.opset1.ops import transpose from openvino.runtime.opset10.ops import unique from openvino.runtime.opset1.ops import unsqueeze diff --git a/src/bindings/python/src/openvino/runtime/opset11/ops.py b/src/bindings/python/src/openvino/runtime/opset11/ops.py index 2a54db0069ebd1..235b0e0ef37af5 100644 --- a/src/bindings/python/src/openvino/runtime/opset11/ops.py +++ b/src/bindings/python/src/openvino/runtime/opset11/ops.py @@ -75,3 +75,33 @@ def interpolate( inputs = as_nodes(image, scales_or_sizes) if axes is None else as_nodes(image, scales_or_sizes, axes) return _get_node_factory_opset11().create("Interpolate", inputs, attrs) + + +@nameable_op +def topk( + data: NodeInput, + k: NodeInput, + axis: int, + mode: str, + sort: str, + index_element_type: str = "i32", + stable: bool = False, + name: Optional[str] = None, +) -> Node: + """Return a node which performs TopK. + + :param data: Input data. + :param k: K. + :param axis: TopK Axis. + :param mode: Compute TopK largest ('max') or smallest ('min') + :param sort: Order of output elements (sort by: 'none', 'index' or 'value') + :param index_element_type: Type of output tensor with indices. + :param stable: Specifies whether the equivalent elements should maintain + their relative order from the input tensor during sorting. + :return: The new node which performs TopK + """ + return _get_node_factory_opset11().create( + "TopK", + as_nodes(data, k), + {"axis": axis, "mode": mode, "sort": sort, "index_element_type": index_element_type, "stable": stable}, + ) diff --git a/src/bindings/python/tests/test_graph/test_create_op.py b/src/bindings/python/tests/test_graph/test_create_op.py index f76ed01641a6d5..da8cfca15c1c51 100644 --- a/src/bindings/python/tests/test_graph/test_create_op.py +++ b/src/bindings/python/tests/test_graph/test_create_op.py @@ -2300,3 +2300,16 @@ def test_unique_opset10(): assert node.get_output_element_type(1) == Type.i64 assert node.get_output_element_type(2) == Type.i64 assert node.get_output_element_type(3) == Type.i64 + + +def test_topk_opset11(): + data_shape = [1, 3, 256] + data = ov.parameter(data_shape, dtype=np.int32, name="Data") + k_val = np.int32(3) + axis = np.int32(-1) + node = ov.topk(data, k_val, axis, "min", "value", stable=True) + + assert node.get_type_name() == "TopK" + assert node.get_output_size() == 2 + assert list(node.get_output_shape(0)) == [1, 3, 3] + assert list(node.get_output_shape(1)) == [1, 3, 3] diff --git a/src/bindings/python/tests_compatibility/test_ngraph/test_create_op.py b/src/bindings/python/tests_compatibility/test_ngraph/test_create_op.py index 09fda90564bd01..7ec5a26109ab49 100644 --- a/src/bindings/python/tests_compatibility/test_ngraph/test_create_op.py +++ b/src/bindings/python/tests_compatibility/test_ngraph/test_create_op.py @@ -2412,3 +2412,16 @@ def test_unique_opset10(): assert node.get_output_element_type(1) == Type.i64 assert node.get_output_element_type(2) == Type.i64 assert node.get_output_element_type(3) == Type.i64 + + +def test_topk_opset11(): + data_shape = [1, 3, 256] + data = ng.parameter(data_shape, dtype=np.int32, name="Data") + k_val = np.int32(3) + axis = np.int32(-1) + node = ng_opset11.topk(data, k_val, axis, "min", "value", stable=True) + + assert node.get_type_name() == "TopK" + assert node.get_output_size() == 2 + assert list(node.get_output_shape(0)) == [1, 3, 3] + assert list(node.get_output_shape(1)) == [1, 3, 3] From 2755b32fb917696cdfd265dee4a616b0db4cff5e Mon Sep 17 00:00:00 2001 From: Ilya Churaev Date: Thu, 23 Mar 2023 19:34:49 +0400 Subject: [PATCH 064/296] Changed Template plugin public property (#16496) * Changed template plugin public property * Add property documentation * Fixed comments * Fixed typo --- docs/IE_PLUGIN_DG/Intro.md | 7 +- docs/IE_PLUGIN_DG/Plugin.md | 1 + docs/IE_PLUGIN_DG/Properties.md | 10 + docs/IE_PLUGIN_DG/layout.xml | 1 + .../template/{config.hpp => properties.hpp} | 12 +- src/plugins/template/src/compiled_model.cpp | 7 +- src/plugins/template/src/config.cpp | 11 +- src/plugins/template/src/config.hpp | 1 + src/plugins/template/src/plugin.cpp | 7 +- .../behavior/plugin/configuration_tests.cpp | 10 +- .../disable_transformations_test.cpp | 51 +++++ .../transformations/preprocessing.cpp | 183 ------------------ .../template_transformations_test.cpp | 58 ------ 13 files changed, 95 insertions(+), 264 deletions(-) create mode 100644 docs/IE_PLUGIN_DG/Properties.md rename src/plugins/template/include/template/{config.hpp => properties.hpp} (58%) create mode 100644 src/plugins/template/tests/functional/transformations/disable_transformations_test.cpp delete mode 100644 src/plugins/template/tests/functional/transformations/preprocessing.cpp delete mode 100644 src/plugins/template/tests/functional/transformations/template_transformations_test.cpp diff --git a/docs/IE_PLUGIN_DG/Intro.md b/docs/IE_PLUGIN_DG/Intro.md index ed3d101ea4a6bc..8334f2db744714 100644 --- a/docs/IE_PLUGIN_DG/Intro.md +++ b/docs/IE_PLUGIN_DG/Intro.md @@ -11,6 +11,7 @@ Implement Compiled Model Functionality Implement Synchronous Inference Request Implement Asynchronous Inference Request + Provide Plugin Specific Properties Implement Remote Context Implement Remote Tensor openvino_docs_ov_plugin_dg_plugin_build @@ -45,9 +46,11 @@ OpenVINO plugin dynamic library consists of several main components: - Can extract performance counters for an inference pipeline execution profiling. 4. [Asynchronous Inference Request class](@ref openvino_docs_ov_plugin_dg_async_infer_request): - Wraps the [Inference Request](@ref openvino_docs_ov_plugin_dg_infer_request) class and runs pipeline stages in parallel on several task executors based on a device-specific pipeline structure. -5. [Remote Context](@ref openvino_docs_ov_plugin_dg_remote_context): +5. [Plugin specific properties](@ref openvino_docs_ov_plugin_dg_properties): + - Provides the plugin specific properties. +6. [Remote Context](@ref openvino_docs_ov_plugin_dg_remote_context): - Provides the device specific remote context. Context allows to create remote tensors. -6. [Remote Tensor](@ref openvino_docs_ov_plugin_dg_remote_tensor) +7. [Remote Tensor](@ref openvino_docs_ov_plugin_dg_remote_tensor) - Provides the device specific remote tensor API and implementation. > **NOTE**: This documentation is written based on the `Template` plugin, which demonstrates plugin diff --git a/docs/IE_PLUGIN_DG/Plugin.md b/docs/IE_PLUGIN_DG/Plugin.md index 96326fabcb574a..124852d6cf6d79 100644 --- a/docs/IE_PLUGIN_DG/Plugin.md +++ b/docs/IE_PLUGIN_DG/Plugin.md @@ -42,6 +42,7 @@ As an example, a plugin configuration has three value parameters: - `perf_counts` - boolean value to identify whether to collect performance counters during [Inference Request](@ref openvino_docs_ov_plugin_dg_infer_request) execution. - `streams_executor_config` - configuration of `ov::threading::IStreamsExecutor` to handle settings of multi-threaded context. - `performance_mode` - configuration of `ov::hint::PerformanceMode` to set the performance mode. +- `disable_transformations` - allows to disable transformations which are applied in the process of model compilation. ### Plugin Constructor diff --git a/docs/IE_PLUGIN_DG/Properties.md b/docs/IE_PLUGIN_DG/Properties.md new file mode 100644 index 00000000000000..a8459181e74c42 --- /dev/null +++ b/docs/IE_PLUGIN_DG/Properties.md @@ -0,0 +1,10 @@ +# Plugin Properties {#openvino_docs_ov_plugin_dg_properties} + +Plugin can provide own device specific properties. + +Property Class +------------------------ + +OpenVINO API provides the interface ov::Property which allows to define the property and access rights. Based on that, a declaration of plugin specific properties can look as follows: + +@snippet include/template/properties.hpp properties:public_header diff --git a/docs/IE_PLUGIN_DG/layout.xml b/docs/IE_PLUGIN_DG/layout.xml index 44137896ee794b..dbd424edc2c8dd 100644 --- a/docs/IE_PLUGIN_DG/layout.xml +++ b/docs/IE_PLUGIN_DG/layout.xml @@ -79,6 +79,7 @@ + diff --git a/src/plugins/template/include/template/config.hpp b/src/plugins/template/include/template/properties.hpp similarity index 58% rename from src/plugins/template/include/template/config.hpp rename to src/plugins/template/include/template/properties.hpp index 8b0267c13744ef..f00e6e1474fda4 100644 --- a/src/plugins/template/include/template/config.hpp +++ b/src/plugins/template/include/template/properties.hpp @@ -3,10 +3,10 @@ // /** - * @brief A header that defines advanced related properties for DLIA plugins. + * @brief A header that defines advanced related properties for Template plugin. * These properties should be used in set_property() and compile_model() methods of plugins * - * @file template/config.hpp + * @file template/properties.hpp */ #pragma once @@ -18,14 +18,14 @@ namespace ov { namespace template_plugin { -// ! [public_header:properties] +// ! [properties:public_header] /** - * @brief Defines the number of throutput streams used by TEMPLATE plugin. + * @brief Allows to disable all transformations for execution inside the TEMPLATE plugin. */ -static constexpr Property throughput_streams{"THROUGHPUT_STREAMS"}; +static constexpr Property disable_transformations{"DISABLE_TRANSFORMATIONS"}; -// ! [public_header:properties] +// ! [properties:public_header] } // namespace template_plugin } // namespace ov diff --git a/src/plugins/template/src/compiled_model.cpp b/src/plugins/template/src/compiled_model.cpp index 3bdd06163040de..5961280c12aa29 100644 --- a/src/plugins/template/src/compiled_model.cpp +++ b/src/plugins/template/src/compiled_model.cpp @@ -12,7 +12,6 @@ #include "itt.hpp" #include "openvino/runtime/properties.hpp" #include "plugin.hpp" -#include "template/config.hpp" #include "transformations/utils/utils.hpp" // ! [compiled_model:ctor] @@ -47,6 +46,8 @@ ov::template_plugin::CompiledModel::CompiledModel(const std::shared_ptr& model); void ov::template_plugin::CompiledModel::compile_model(const std::shared_ptr& model) { + if (m_cfg.disable_transformations) + return; // apply plugins transformations transform_model(model); // Perform any other steps like allocation and filling backend specific memory handles and so on @@ -107,9 +108,7 @@ ov::Any ov::template_plugin::CompiledModel::get_property(const std::string& name return ro_properties; }; const auto& default_rw_properties = []() { - std::vector rw_properties{ov::device::id, - ov::enable_profiling, - ov::template_plugin::throughput_streams}; + std::vector rw_properties{ov::device::id, ov::enable_profiling}; return rw_properties; }; const auto& to_string_vector = [](const std::vector& properties) { diff --git a/src/plugins/template/src/config.cpp b/src/plugins/template/src/config.cpp index 90842ead1dfa75..2e2075d39bd3c3 100644 --- a/src/plugins/template/src/config.cpp +++ b/src/plugins/template/src/config.cpp @@ -7,7 +7,8 @@ #include #include -#include "template/config.hpp" +#include "openvino/runtime/properties.hpp" +#include "template/properties.hpp" using namespace ov::template_plugin; @@ -22,8 +23,8 @@ Configuration::Configuration(const ov::AnyMap& config, const Configuration& defa const auto& key = c.first; const auto& value = c.second; - if (ov::template_plugin::throughput_streams == key) { - streams_executor_config.set_property(CONFIG_KEY(CPU_THROUGHPUT_STREAMS), value); + if (ov::template_plugin::disable_transformations == key) { + disable_transformations = value.as(); } else if (streamExecutorConfigKeys.end() != std::find(std::begin(streamExecutorConfigKeys), std::end(streamExecutorConfigKeys), key)) { streams_executor_config.set_property(key, value); @@ -51,7 +52,9 @@ ov::Any Configuration::Get(const std::string& name) const { return {std::to_string(device_id)}; } else if (name == CONFIG_KEY(PERF_COUNT)) { return {perf_count}; - } else if (name == ov::template_plugin::throughput_streams || name == CONFIG_KEY(CPU_THROUGHPUT_STREAMS)) { + } else if (name == ov::template_plugin::disable_transformations) { + return {disable_transformations}; + } else if (name == ov::num_streams) { return {std::to_string(streams_executor_config._streams)}; } else if (name == CONFIG_KEY(CPU_BIND_THREAD)) { return streams_executor_config.get_property(name); diff --git a/src/plugins/template/src/config.hpp b/src/plugins/template/src/config.hpp index c8066a91ebdc0e..5a9732d382d5fe 100644 --- a/src/plugins/template/src/config.hpp +++ b/src/plugins/template/src/config.hpp @@ -34,6 +34,7 @@ struct Configuration { bool perf_count = true; ov::threading::IStreamsExecutor::Config streams_executor_config; ov::hint::PerformanceMode performance_mode = ov::hint::PerformanceMode::UNDEFINED; + bool disable_transformations = false; }; // ! [configuration:header] diff --git a/src/plugins/template/src/plugin.cpp b/src/plugins/template/src/plugin.cpp index 25c44ed062d5d5..6747d88cb8eba5 100644 --- a/src/plugins/template/src/plugin.cpp +++ b/src/plugins/template/src/plugin.cpp @@ -12,7 +12,7 @@ #include "openvino/pass/manager.hpp" #include "openvino/runtime/properties.hpp" #include "remote_context.hpp" -#include "template/config.hpp" +#include "template/properties.hpp" #include "transformations/common_optimizations/common_optimizations.hpp" #include "transformations/common_optimizations/convert_compression_only_to_legacy.hpp" #include "transformations/control_flow/unroll_if.hpp" @@ -171,6 +171,9 @@ ov::SupportedOpsMap ov::template_plugin::Plugin::query_model(const std::shared_p auto supported = ov::get_supported_nodes( model, [&](std::shared_ptr& model) { + // skip transformations in case of user config + if (fullConfig.disable_transformations) + return; // 1. It is needed to apply all transformations as it is done in compile_model transform_model(model); }, @@ -228,7 +231,7 @@ ov::Any ov::template_plugin::Plugin::get_property(const std::string& name, const std::vector rw_properties{ov::device::id, ov::enable_profiling, ov::hint::performance_mode, - ov::template_plugin::throughput_streams}; + ov::template_plugin::disable_transformations}; return rw_properties; }; const auto& to_string_vector = [](const std::vector& properties) { diff --git a/src/plugins/template/tests/functional/shared_tests_instances/behavior/plugin/configuration_tests.cpp b/src/plugins/template/tests/functional/shared_tests_instances/behavior/plugin/configuration_tests.cpp index c0a9a5d63f0af3..15d04bf9cb3abd 100644 --- a/src/plugins/template/tests/functional/shared_tests_instances/behavior/plugin/configuration_tests.cpp +++ b/src/plugins/template/tests/functional/shared_tests_instances/behavior/plugin/configuration_tests.cpp @@ -4,7 +4,7 @@ #include "behavior/plugin/configuration_tests.hpp" -#include