From b04e9399b7897bf534d0fbf0bdb815fa4195c107 Mon Sep 17 00:00:00 2001 From: hyunback kim Date: Fri, 29 Mar 2024 13:47:16 +0900 Subject: [PATCH 1/5] [GPU] Remove heavy Reorder in target pattern(Conv - Reorder - Reshape -MVN) (#23647) Reorder is main bottle-neck(roughly 40%) in SD1.5(vae_decoder) on MTL. Reorder in target pattern is always added at Reshape primitive creation, because of Reshape's in/out dimens are different. We can get huge improvement to remove heavy Reorders from Using planner format in Conv. Target pattern(Conv - Reorder - Reshape -MVN) repeats 14times in vae_decoder. ### Tickets: - *134278* --------- Signed-off-by: hyunback --- .../intel_gpu/src/graph/layout_optimizer.cpp | 27 +++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) diff --git a/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp b/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp index 8f2021d39a2b7c..b9b4a76f344188 100644 --- a/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp +++ b/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp @@ -701,9 +701,32 @@ bool layout_optimizer::should_select_b_fs_yx_fsv16_layout(convolution_node const auto current_conv_partially_supports_layout = convolution_b_fs_yx_fsv16_opt(input_layout, output_layout, weights_layout, prim, true); auto may_use_weak_restrictions = is_prev_conv_node_supports_layout || weak_restriction_cond; - return ((_optimization_attributes.b_fs_yx_fsv16_network) && + std::function need_heavy_reorder = [&](const program_node& node, size_t cur_depth, size_t max_depth) { + if (cur_depth > max_depth) return false; + if (node.is_type()) { + for (auto& reorder_user : node.get_users()) { + if (reorder_user->is_type()) { + for (auto& reshape_user : reorder_user->get_users()) { + // Meteor Lake showed planar format Convolution without Reorder is better than + // blocked format Convolution in case Reorder is larger than [1, 512, 128, 128]. + if (reshape_user->is_type() && node.get_output_layout().get_linear_size() > 8300000) { + GPU_DEBUG_LOG << node.id() << ": " << node.get_output_layout().to_short_string() << " -> heavy reorder" << std::endl; + return true; + } + } + } + } + } + bool res = false; + for (const auto& usr : node.get_users()) { + res |= need_heavy_reorder(*usr, cur_depth + 1, max_depth); + } + return res; + }; + + return (((_optimization_attributes.b_fs_yx_fsv16_network) && (current_conv_supports_layout || (may_use_weak_restrictions && current_conv_partially_supports_layout))) || - input_layout.format == format::b_fs_yx_fsv16; + input_layout.format == format::b_fs_yx_fsv16) && !need_heavy_reorder(reinterpret_cast(node), 0, 3); } bool layout_optimizer::convolution_b_fs_zyx_fsv16_opt(const layout& input_layout, From ec256169a0e1d40f49a7873a21b3491c7c61cd46 Mon Sep 17 00:00:00 2001 From: Roman Lyamin Date: Fri, 29 Mar 2024 09:10:26 +0400 Subject: [PATCH 2/5] [GPU] Changed zero point pad size for dGPU (#23755) ### Details: - *The previous value led to performance drops on dGPU* ### Tickets: - *[136266](https://jira.devtools.intel.com/browse/CVS-136266)* --- src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp index 0af7db7216ca8a..2b55f9d93d4449 100644 --- a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp +++ b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp @@ -724,7 +724,7 @@ void TransformationsPipeline::apply(std::shared_ptr func) { if (!device_info.supports_immad) manager.register_pass(); - const size_t zp_pad_size = 32; + const size_t zp_pad_size = device_info.supports_immad ? 16 : 32; manager.register_pass(zp_pad_size); // This is supposed to be the last pass to ensure that we don't have name collisions until From db365529209ad372c7e81c7667e9841a8dea754f Mon Sep 17 00:00:00 2001 From: Xuejun Zhai Date: Fri, 29 Mar 2024 13:48:53 +0800 Subject: [PATCH 3/5] [CPU] Remove redundant tensor compatibility check from init_tensor (#23564) ### Details: - Remove redundant tensor compatibility check from init_tensor - Add test case for parameter->result tensor sharing. ### Tickets: - *CVS-135890* --------- Signed-off-by: Zhai, Xuejun Co-authored-by: River.Li Co-authored-by: Chen Peter --- src/plugins/intel_cpu/src/infer_request.cpp | 30 ------------------- .../subgraph_tests/parameter_result.hpp | 20 +++++++++++++ 2 files changed, 20 insertions(+), 30 deletions(-) diff --git a/src/plugins/intel_cpu/src/infer_request.cpp b/src/plugins/intel_cpu/src/infer_request.cpp index 260cf49538b513..93bfb0117f77f3 100644 --- a/src/plugins/intel_cpu/src/infer_request.cpp +++ b/src/plugins/intel_cpu/src/infer_request.cpp @@ -558,36 +558,6 @@ void SyncInferRequest::init_tensor(const std::size_t& port_index, const ov::ISyn tensor = ov::make_tensor(model_prec, tensor_shape); } ov::ISyncInferRequest::set_tensor(port, tensor); - } else { - const auto& tensor_shape = tensor->get_shape(); - const bool isDynamic = port_shape.is_dynamic(); - // Static shape case is enough information that shapes are incompatible to throw exception - // but in dynamic shape case we also need to handle following corner case: - // on tensor initialization stage we create empty tensor with dimensions equal 0 - // so if we have tensor with all zero dimension we mustn't throw exception - if (!port_shape.compatible(ov::PartialShape(tensor_shape)) && - (!isDynamic || static_cast(tensor_shape.size()) != port_shape.rank().get_length() || - std::any_of(tensor_shape.begin(), tensor_shape.end(), [](const size_t& dims) { - return dims != 0; - }))) { - OPENVINO_THROW("ParameterMismatch: model input and output use the same index: ", - port_index, - ", but expect tensors with different shapes. Input shape: ", - ov::PartialShape(tensor_shape), - ", output shape: ", - port_shape); - } - - const auto netOutPrc = port.get_element_type(); - if (netOutPrc != tensor->get_element_type()) { - OPENVINO_THROW("ParameterMismatch: model input and output use the same index: ", - port_index, - " but expect tensor with different precision: ", - tensor->get_element_type(), - " for input and ", - netOutPrc, - " for output."); - } } m_outputs[port_index] = tensor; if (!port_shape.is_dynamic() && !m_output_external_ptr.count(port_index)) { diff --git a/src/tests/functional/plugin/shared/include/subgraph_tests/parameter_result.hpp b/src/tests/functional/plugin/shared/include/subgraph_tests/parameter_result.hpp index e1f693f4cf5a8e..435278d521be0a 100644 --- a/src/tests/functional/plugin/shared/include/subgraph_tests/parameter_result.hpp +++ b/src/tests/functional/plugin/shared/include/subgraph_tests/parameter_result.hpp @@ -5,6 +5,7 @@ #pragma once #include "shared_test_classes/subgraph/parameter_result.hpp" +#include "openvino/runtime/tensor.hpp" namespace ov { namespace test { @@ -13,5 +14,24 @@ TEST_P(ParameterResultSubgraphTest, Inference) { run(); } +TEST_P(ParameterResultSubgraphTest, CheckSharedTensor) { + ov::test::InputShape input_shape; + std::tie(input_shape, targetDevice) = this->GetParam(); + + ov::Shape shape = input_shape.second[0]; + auto input = ov::Tensor(ov::element::f32, shape); + + // Load model + ov::Core core; + auto compiled_model = core.compile_model(function, targetDevice); + + // Infer + auto infer_req = compiled_model.create_infer_request(); + infer_req.set_input_tensor(input); + infer_req.infer(); + + ASSERT_EQ(infer_req.get_input_tensor().data(), infer_req.get_output_tensor().data()); +} + } // namespace test } // namespace ov From e5da9f9f8f2a8fbbcb14376a73786149e57528c8 Mon Sep 17 00:00:00 2001 From: barnasm1 Date: Fri, 29 Mar 2024 07:12:51 +0100 Subject: [PATCH 4/5] [CORE] Move i8, u8 Multiply and Divide ops into template (#23717) ### Details: - *Move i8, u8 Multiply and Divide ops into template* ### Tickets: - *[CVS-136165](https://jira.devtools.intel.com/browse/CVS-136165)* --- src/core/src/op/divide.cpp | 2 +- src/core/src/op/multiply.cpp | 2 +- src/plugins/template/backend/ops/divide.cpp | 42 +++++++ src/plugins/template/backend/ops/multiply.cpp | 9 +- .../template/backend/ops/ops_evaluates.hpp | 4 + .../template/backend/opset_int_tbl.hpp | 1 + .../tests/functional/op_reference/divide.cpp | 4 +- .../functional/op_reference/multiply.cpp | 113 +++++++++++++----- 8 files changed, 142 insertions(+), 35 deletions(-) create mode 100644 src/plugins/template/backend/ops/divide.cpp diff --git a/src/core/src/op/divide.cpp b/src/core/src/op/divide.cpp index d903c00c681dce..b00b731b296351 100644 --- a/src/core/src/op/divide.cpp +++ b/src/core/src/op/divide.cpp @@ -246,7 +246,7 @@ bool Divide::evaluate(TensorVector& outputs, const TensorVector& inputs) const { this, outputs, inputs, - OV_PP_ET_LIST(f32, i8, i32, i64, u8, u32, u64), + OV_PP_ET_LIST(f32, i32, i64, u32, u64), divide::Evaluate, inputs[0].get_element_type(), inputs[0], diff --git a/src/core/src/op/multiply.cpp b/src/core/src/op/multiply.cpp index 88dbd347d46edf..fa3ef518c03202 100644 --- a/src/core/src/op/multiply.cpp +++ b/src/core/src/op/multiply.cpp @@ -51,7 +51,7 @@ bool Multiply::evaluate(TensorVector& outputs, const TensorVector& inputs) const this, outputs, inputs, - OV_PP_ET_LIST(f32, f64, i8, i32, i64, u8, u32, u64), + OV_PP_ET_LIST(f32, f64, i32, i64, u32, u64), multiply::Evaluate, inputs[0].get_element_type(), inputs[0], diff --git a/src/plugins/template/backend/ops/divide.cpp b/src/plugins/template/backend/ops/divide.cpp new file mode 100644 index 00000000000000..a7115d6507f88a --- /dev/null +++ b/src/plugins/template/backend/ops/divide.cpp @@ -0,0 +1,42 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "openvino/reference/divide.hpp" + +#include "evaluate_node.hpp" + +template +bool evaluate(const std::shared_ptr& op, + ov::TensorVector& outputs, + const ov::TensorVector& inputs) { + using T = ov::fundamental_type_for; + ov::reference::divide(inputs[0].data(), + inputs[1].data(), + outputs[0].data(), + inputs[0].get_shape(), + inputs[1].get_shape(), + op->get_autob(), + op->is_pythondiv()); + return true; +} + +template <> +bool evaluate_node(std::shared_ptr node, + ov::TensorVector& outputs, + const ov::TensorVector& inputs) { + const auto& element_type = node->get_output_element_type(0); + + switch (element_type) { + case ov::element::i8: + return evaluate(ov::as_type_ptr(node), outputs, inputs); + case ov::element::i16: + return evaluate(ov::as_type_ptr(node), outputs, inputs); + case ov::element::u8: + return evaluate(ov::as_type_ptr(node), outputs, inputs); + case ov::element::u16: + return evaluate(ov::as_type_ptr(node), outputs, inputs); + default: + OPENVINO_THROW("Unhandled data type ", element_type, " in evaluate_node()"); + } +} diff --git a/src/plugins/template/backend/ops/multiply.cpp b/src/plugins/template/backend/ops/multiply.cpp index 39c13d4ad93019..0476e405c80848 100644 --- a/src/plugins/template/backend/ops/multiply.cpp +++ b/src/plugins/template/backend/ops/multiply.cpp @@ -10,11 +10,14 @@ template bool evaluate(const std::shared_ptr& op, ov::TensorVector& outputs, const ov::TensorVector& inputs) { - using T = typename ov::element_type_traits::value_type; + using T = ov::fundamental_type_for; ov::reference::multiply(inputs[0].data(), inputs[1].data(), outputs[0].data(), - ov::shape_size(inputs[0].get_shape())); + inputs[0].get_shape(), + inputs[1].get_shape(), + op->get_autob()); + return true; } @@ -34,6 +37,6 @@ bool evaluate_node(std::shared_ptr node, case ov::element::u16: return evaluate(ov::as_type_ptr(node), outputs, inputs); default: - OPENVINO_THROW("Unhandled data type ", node->get_element_type().get_type_name(), " in evaluate_node()"); + OPENVINO_THROW("Unhandled data type ", element_type, " in evaluate_node()"); } } diff --git a/src/plugins/template/backend/ops/ops_evaluates.hpp b/src/plugins/template/backend/ops/ops_evaluates.hpp index 38f3e31c354ab5..b621e466d04262 100644 --- a/src/plugins/template/backend/ops/ops_evaluates.hpp +++ b/src/plugins/template/backend/ops/ops_evaluates.hpp @@ -155,6 +155,10 @@ extern template bool evaluate_node(std::shar ov::TensorVector& outputs, const ov::TensorVector& inputs); +extern template bool evaluate_node(std::shared_ptr node, + ov::TensorVector& outputs, + const ov::TensorVector& inputs); + extern template bool evaluate_node(std::shared_ptr node, ov::TensorVector& outputs, const ov::TensorVector& inputs); diff --git a/src/plugins/template/backend/opset_int_tbl.hpp b/src/plugins/template/backend/opset_int_tbl.hpp index d6afa313b77b78..a9c9aa8737bec3 100644 --- a/src/plugins/template/backend/opset_int_tbl.hpp +++ b/src/plugins/template/backend/opset_int_tbl.hpp @@ -45,6 +45,7 @@ _OPENVINO_OP_REG(ConvertLike, op::v1) _OPENVINO_OP_REG(Convolution, op::v1) _OPENVINO_OP_REG(ConvolutionBackpropData, op::v1) _OPENVINO_OP_REG(DeformablePSROIPooling, op::v1) +_OPENVINO_OP_REG(Divide, op::v1) _OPENVINO_OP_REG(Equal, op::v1) _OPENVINO_OP_REG(Greater, op::v1) _OPENVINO_OP_REG(GroupConvolution, op::v1) diff --git a/src/plugins/template/tests/functional/op_reference/divide.cpp b/src/plugins/template/tests/functional/op_reference/divide.cpp index 27d0c1af8771d1..85c19a19703aaa 100644 --- a/src/plugins/template/tests/functional/op_reference/divide.cpp +++ b/src/plugins/template/tests/functional/op_reference/divide.cpp @@ -222,7 +222,9 @@ std::vector generateParamsForDivideCppRoundingInt32() { } std::vector generateCombinedParamsForDivide() { - const std::vector> allTypeParams{generateParamsForDivide(), + const std::vector> allTypeParams{generateParamsForDivide(), + generateParamsForDivide(), + generateParamsForDivide(), generateParamsForDivide(), generateParamsForDivide(), generateParamsForDivide(), diff --git a/src/plugins/template/tests/functional/op_reference/multiply.cpp b/src/plugins/template/tests/functional/op_reference/multiply.cpp index a72a32dfed8c77..7d44072876f353 100644 --- a/src/plugins/template/tests/functional/op_reference/multiply.cpp +++ b/src/plugins/template/tests/functional/op_reference/multiply.cpp @@ -78,27 +78,18 @@ template std::vector generateParamsForMultiply() { using T = typename element_type_traits::value_type; - std::vector params{ - MultiplyParams(ov::PartialShape{2, 2}, - ov::PartialShape{2, 2}, - IN_ET, - std::vector{1, 2, 3, 4}, - std::vector{5, 6, 7, 8}, - std::vector{5, 12, 21, 32}), - MultiplyParams( - ov::PartialShape{3, 2, 1}, - ov::PartialShape{1, 6}, - IN_ET, - std::vector{12, 24, 36, 48, 60, 72}, - std::vector{1, 2, 3, 4, 6, 1}, - std::vector{12, 24, 36, 48, 72, 12, 24, 48, 72, 96, 144, 24, 36, 72, 108, 144, 216, 36, - 48, 96, 144, 192, 288, 48, 60, 120, 180, 240, 360, 60, 72, 144, 216, 288, 432, 72}), - MultiplyParams(ov::PartialShape{1}, - ov::PartialShape{1}, - IN_ET, - std::vector{2}, - std::vector{8}, - std::vector{16})}; + std::vector params{MultiplyParams(ov::PartialShape{2, 2}, + ov::PartialShape{2, 2}, + IN_ET, + std::vector{1, 2, 3, 4}, + std::vector{5, 6, 7, 8}, + std::vector{5, 12, 21, 32}), + MultiplyParams(ov::PartialShape{1}, + ov::PartialShape{1}, + IN_ET, + std::vector{2}, + std::vector{8}, + std::vector{16})}; return params; } @@ -115,15 +106,79 @@ std::vector generateParamsForMultiplyFloat() { return params; } +template +std::vector generateParamsForMultiplyWithBroadcast() { + using T = typename element_type_traits::value_type; + + std::vector params{MultiplyParams( + ov::PartialShape{3, 2, 1}, + ov::PartialShape{1, 6}, + IN_ET, + std::vector{12, 24, 36, 48, 60, 72}, + std::vector{1, 2, 3, 4, 6, 1}, + std::vector{12, 24, 36, 48, 72, 12, 24, 48, 72, 96, 144, 24, 36, 72, 108, 144, 216, 36, + 48, 96, 144, 192, 288, 48, 60, 120, 180, 240, 360, 60, 72, 144, 216, 288, 432, 72})}; + + return params; +} + +template <> +std::vector generateParamsForMultiplyWithBroadcast() { + constexpr auto IN_ET = element::Type_t::i8; + using T = typename element_type_traits::value_type; + + std::vector params{MultiplyParams( + ov::PartialShape{3, 2, 1}, + ov::PartialShape{1, 6}, + IN_ET, + std::vector{-12, -6, 4, 14, 18, 20}, + std::vector{1, 2, 3, 4, 6, 1}, + std::vector{-12, -24, -36, -48, -72, -12, -6, -12, -18, -24, -36, -6, 4, 8, 12, 16, 24, 4, + 14, 28, 42, 56, 84, 14, 18, 36, 54, 72, 108, 18, 20, 40, 60, 80, 120, 20})}; + + return params; +} + +template <> +std::vector generateParamsForMultiplyWithBroadcast() { + constexpr auto IN_ET = element::Type_t::u8; + using T = typename element_type_traits::value_type; + + std::vector params{MultiplyParams( + ov::PartialShape{3, 2, 1}, + ov::PartialShape{1, 6}, + IN_ET, + std::vector{12, 24, 36, 38, 40, 42}, + std::vector{1, 2, 3, 4, 6, 1}, + std::vector{12, 24, 36, 48, 72, 12, 24, 48, 72, 96, 144, 24, 36, 72, 108, 144, 216, 36, + 38, 76, 114, 152, 228, 38, 40, 80, 120, 160, 240, 40, 42, 84, 126, 168, 252, 42})}; + + return params; +} + std::vector generateCombinedParamsForMultiply() { - const std::vector> allTypeParams{generateParamsForMultiply(), - generateParamsForMultiply(), - generateParamsForMultiply(), - generateParamsForMultiply(), - generateParamsForMultiply(), - generateParamsForMultiply(), - generateParamsForMultiply(), - generateParamsForMultiply()}; + const std::vector> allTypeParams{ + generateParamsForMultiplyWithBroadcast(), + generateParamsForMultiplyWithBroadcast(), + generateParamsForMultiplyWithBroadcast(), + generateParamsForMultiplyWithBroadcast(), + generateParamsForMultiplyWithBroadcast(), + generateParamsForMultiplyWithBroadcast(), + generateParamsForMultiplyWithBroadcast(), + generateParamsForMultiplyWithBroadcast(), + generateParamsForMultiplyWithBroadcast(), + generateParamsForMultiplyWithBroadcast(), + generateParamsForMultiply(), + generateParamsForMultiply(), + generateParamsForMultiply(), + generateParamsForMultiply(), + generateParamsForMultiply(), + generateParamsForMultiply(), + generateParamsForMultiply(), + generateParamsForMultiply(), + generateParamsForMultiply(), + generateParamsForMultiply(), + }; std::vector combinedParams; From 5f205254e07eac190ea8d17d3514f83762086a83 Mon Sep 17 00:00:00 2001 From: Vishniakov Nikolai Date: Fri, 29 Mar 2024 07:24:46 +0100 Subject: [PATCH 5/5] [OV JS] Extend smart CI for JS API (#23666) ### Details: - Add more validation dependencies ### Tickets: - 129334 --- .github/components.yml | 4 ---- 1 file changed, 4 deletions(-) diff --git a/.github/components.yml b/.github/components.yml index 71cb0721ac2974..328666dde27d15 100644 --- a/.github/components.yml +++ b/.github/components.yml @@ -27,7 +27,6 @@ CPU: revalidate: - C_API - Python_API - - JS_API - samples - ONNX_RT - PyTorch_FE @@ -106,7 +105,6 @@ IR_FE: revalidate: - C_API - Python_API - - JS_API - samples build: - CPU @@ -176,8 +174,6 @@ Python_API: - PyTorch_FE JS_API: - revalidate: - - samples build: - CPU - IR_FE