diff --git a/docs/articles_en/about-openvino/performance-benchmarks.rst b/docs/articles_en/about-openvino/performance-benchmarks.rst index 78a364c18ca4e6..5d9abfe891584f 100644 --- a/docs/articles_en/about-openvino/performance-benchmarks.rst +++ b/docs/articles_en/about-openvino/performance-benchmarks.rst @@ -163,7 +163,7 @@ For a listing of all platforms and configurations used for testing, refer to the 2024.5, as of November 20, 2024. * OpenVINO Model Server performance results are based on release - 2024.4, as of Sept. 30, 2024. + 2024.5, as of November 20, 2024. The results may not reflect all publicly available updates. Intel technologies' features and benefits depend on system configuration and may require enabled hardware, software, or service diff --git a/docs/articles_en/openvino-workflow/running-inference/inference-devices-and-modes/npu-device.rst b/docs/articles_en/openvino-workflow/running-inference/inference-devices-and-modes/npu-device.rst index 7b135fa7ff0b14..436d383ebf787e 100644 --- a/docs/articles_en/openvino-workflow/running-inference/inference-devices-and-modes/npu-device.rst +++ b/docs/articles_en/openvino-workflow/running-inference/inference-devices-and-modes/npu-device.rst @@ -146,6 +146,8 @@ offer a limited set of supported OpenVINO features. ov::intel_npu::turbo ov::intel_npu::tiles ov::intel_npu::max_tiles + ov::intel_npu::bypass_umd_caching + ov::intel_npu::defer_weights_load .. tab-item:: Read-only properties @@ -168,7 +170,6 @@ offer a limited set of supported OpenVINO features. ov::intel_npu::device_alloc_mem_size ov::intel_npu::device_total_mem_size ov::intel_npu::driver_version - ov::intel_npu::bypass_umd_caching .. note:: diff --git a/docs/sphinx_setup/_static/benchmarks_files/data/graph-data-ovms-genai.json b/docs/sphinx_setup/_static/benchmarks_files/data/graph-data-ovms-genai.json index 0d53c3813542d2..0de8f188e7de34 100644 --- a/docs/sphinx_setup/_static/benchmarks_files/data/graph-data-ovms-genai.json +++ b/docs/sphinx_setup/_static/benchmarks_files/data/graph-data-ovms-genai.json @@ -6,7 +6,7 @@ "whats_new_model": false, "PlatformType": "Server Platforms (Intel® Xeon®)", "Parameters": { - "OpenVINO Model Server": { + "Ovms": { "Precisions": [ { "Throughput": { @@ -30,7 +30,7 @@ } ] }, - "vLLM with OpenVINO backend": { + "Vllm": { "Precisions": [ { "Throughput": { @@ -63,7 +63,7 @@ "whats_new_model": false, "PlatformType": "Server Platforms (Intel® Xeon®)", "Parameters": { - "OpenVINO Model Server": { + "Ovms": { "Precisions": [ { "Throughput": { @@ -87,7 +87,7 @@ } ] }, - "vLLM with OpenVINO backend": { + "Vllm": { "Precisions": [ { "Throughput": { @@ -120,7 +120,7 @@ "whats_new_model": false, "PlatformType": "Server Platforms (Intel® Xeon®)", "Parameters": { - "OpenVINO Model Server": { + "Ovms": { "Precisions": [ { "Throughput": { @@ -144,7 +144,7 @@ } ] }, - "vLLM with OpenVINO backend": { + "Vllm": { "Precisions": [ { "Throughput": { @@ -177,7 +177,7 @@ "whats_new_model": true, "PlatformType": "Server Platforms (Intel® Xeon®)", "Parameters": { - "OpenVINO Model Server": { + "Ovms": { "Precisions": [ { "Throughput": { @@ -201,7 +201,7 @@ } ] }, - "vLLM with OpenVINO backend": { + "Vllm": { "Precisions": [ { "Throughput": { @@ -228,7 +228,7 @@ "whats_new_model": true, "PlatformType": "Server Platforms (Intel® Xeon®)", "Parameters": { - "OpenVINO Model Server": { + "Ovms": { "Precisions": [ { "Throughput": { @@ -252,7 +252,7 @@ } ] }, - "vLLM with OpenVINO backend": { + "Vllm": { "Precisions": [ { "Throughput": { @@ -283,7 +283,7 @@ "whats_new_model": true, "PlatformType": "Server Platforms (Intel® Xeon®)", "Parameters": { - "OpenVINO Model Server": { + "Ovms": { "Precisions": [ { "Throughput": { @@ -307,7 +307,7 @@ } ] }, - "vLLM with OpenVINO backend": { + "Vllm": { "Precisions": [ { "Throughput": { @@ -338,7 +338,7 @@ "whats_new_model": false, "PlatformType": "Server Platforms (Intel® Xeon®)", "Parameters": { - "OpenVINO Model Server": { + "Ovms": { "Precisions": [ { "Throughput": { @@ -362,7 +362,7 @@ } ] }, - "vLLM with OpenVINO backend": { + "Vllm": { "Precisions": [ { "Throughput": { @@ -393,7 +393,7 @@ "whats_new_model": false, "PlatformType": "Server Platforms (Intel® Xeon®)", "Parameters": { - "OpenVINO Model Server": { + "Ovms": { "Precisions": [ { "Throughput": { @@ -417,7 +417,7 @@ } ] }, - "vLLM with OpenVINO backend": { + "Vllm": { "Precisions": [ { "Throughput": { @@ -450,7 +450,7 @@ "whats_new_model": false, "PlatformType": "Server Platforms (Intel® Xeon®)", "Parameters": { - "OpenVINO Model Server": { + "Ovms": { "Precisions": [ { "Throughput": { @@ -474,7 +474,7 @@ } ] }, - "vLLM with OpenVINO backend": { + "Vllm": { "Precisions": [ { "Throughput": { diff --git a/src/common/transformations/include/transformations/op_conversions/convert_reduce_to_pooling.hpp b/src/common/transformations/include/transformations/op_conversions/convert_reduce_to_pooling.hpp index b74a0ff538e011..662660b926aa52 100644 --- a/src/common/transformations/include/transformations/op_conversions/convert_reduce_to_pooling.hpp +++ b/src/common/transformations/include/transformations/op_conversions/convert_reduce_to_pooling.hpp @@ -72,7 +72,7 @@ ov::matcher_pass_callback ConvertReduceBase::convert_reduce_to_pooling() { return [&](ov::pass::pattern::Matcher& m) { auto reduce = std::dynamic_pointer_cast(m.get_match_root()); - if (!reduce || transformation_callback(reduce)) { + if (!reduce || transformation_callback(reduce) || ov::shape_size(reduce->input_value(0).get_shape()) == 0) { return false; } diff --git a/src/common/transformations/src/transformations/common_optimizations/remove_multi_subgraph_op_dangling_params.cpp b/src/common/transformations/src/transformations/common_optimizations/remove_multi_subgraph_op_dangling_params.cpp index d86b4b71f102c7..fed6eaf9710420 100644 --- a/src/common/transformations/src/transformations/common_optimizations/remove_multi_subgraph_op_dangling_params.cpp +++ b/src/common/transformations/src/transformations/common_optimizations/remove_multi_subgraph_op_dangling_params.cpp @@ -11,10 +11,30 @@ #include "openvino/op/tensor_iterator.hpp" #include "openvino/op/util/multi_subgraph_base.hpp" #include "openvino/pass/pattern/op/wrap_type.hpp" +#include "openvino/util/common_util.hpp" #include "transformations/utils/utils.hpp" using namespace ov::op::util; +namespace { +/** @brief Value to mark that input idx has been removed (at least one removed so last idx will be always available) */ +constexpr auto mark_removed = std::numeric_limits::max(); + +constexpr bool is_not_removed_idx(const decltype(mark_removed) idx) { + return mark_removed != idx; +} + +uint64_t get_updated_idx(uint64_t idx, uint64_t removed_idx) { + if (idx == removed_idx) { + return mark_removed; + } else if (is_not_removed_idx(idx) && idx > removed_idx) { + return idx - 1; + } else { + return idx; + } +}; +} // namespace + bool ov::pass::RemoveMultiSubGraphOpDanglingParamsResults::run_on_model(const std::shared_ptr& m) { RUN_ON_MODEL_SCOPE(RemoveMultiSubGraphOpDanglingParamsResults); bool is_changed = false; @@ -117,7 +137,6 @@ bool ov::pass::RemoveMultiSubGraphOpDanglingParamsResults::run_on_model(const st // Remove inputs bool pass_required = false; std::set required_inputs_indices; - auto op_inputs = multi_subgraph_op->input_values(); std::vector> to_remove_descriptors_indexes; to_remove_descriptors_indexes.resize(subgraphs_size); for (size_t body_idx = 0; body_idx < subgraphs_size; ++body_idx) { @@ -142,64 +161,57 @@ bool ov::pass::RemoveMultiSubGraphOpDanglingParamsResults::run_on_model(const st using DescType = op::util::MultiSubGraphOp::MultiSubgraphInputDescriptionVector; auto update_body_param_desc = [](DescType& descriptors, uint64_t removed_body_idx) { for (auto& desc : descriptors) { - if (desc->m_body_parameter_index > removed_body_idx) { - desc->m_body_parameter_index--; - } + desc->m_body_parameter_index = get_updated_idx(desc->m_body_parameter_index, removed_body_idx); } }; auto update_op_inputs_desc = [&subgraphs_size](const std::shared_ptr& op, - std::set& required_inputs_indices, uint64_t removed_loop_idx) { - std::set new_required_inputs_indices; for (size_t body_idx = 0; body_idx < subgraphs_size; ++body_idx) { auto& descriptors = op->get_input_descriptions(static_cast(body_idx)); for (auto& desc : descriptors) { - if (desc->m_input_index > removed_loop_idx) { - desc->m_input_index--; - } + desc->m_input_index = get_updated_idx(desc->m_input_index, removed_loop_idx); } } - for (auto input_index : required_inputs_indices) { - if (input_index > removed_loop_idx) { - new_required_inputs_indices.insert(input_index - 1); - } else { - new_required_inputs_indices.insert(input_index); - } + }; + + const auto update_required_input_indicies = [](std::set& required_inputs_indices, + uint64_t removed_input_idx) { + std::set new_required_inputs_indices; + for (const auto& input_index : required_inputs_indices) { + new_required_inputs_indices.insert(input_index > removed_input_idx ? input_index - 1 : input_index); } - required_inputs_indices = new_required_inputs_indices; + required_inputs_indices = std::move(new_required_inputs_indices); }; // Remove dangling body params and input and update input descriptors + auto op_inputs = multi_subgraph_op->input_values(); for (size_t body_idx = 0; body_idx < subgraphs_size; ++body_idx) { auto& body_in_descriptors = multi_subgraph_op->get_input_descriptions(static_cast(body_idx)); - auto& body_func = multi_subgraph_op->get_function(static_cast(body_idx)); - auto& body_params = body_func->get_parameters(); op::util::MultiSubGraphOp::MultiSubgraphInputDescriptionVector updated_body_in_descriptors; + for (size_t desc_idx = 0; desc_idx < body_in_descriptors.size(); ++desc_idx) { - if (std::count(std::begin(to_remove_descriptors_indexes[body_idx]), - std::end(to_remove_descriptors_indexes[body_idx]), - desc_idx) > 0) { - if (body_in_descriptors[desc_idx]->m_body_parameter_index < body_params.size()) { - auto& body_param = body_params[body_in_descriptors[desc_idx]->m_body_parameter_index]; - body_func->remove_parameter(body_param); - // Move all body indexes which are after these indicated by to_remove_descriptors_indexes - update_body_param_desc(body_in_descriptors, - body_in_descriptors[desc_idx]->m_body_parameter_index); - } - // remove dangling input of MultiSubGraphOp which was not removed earlier - auto current_input_idx = body_in_descriptors[desc_idx]->m_input_index; - // the same input tensor can go to different input ports - if (current_input_idx < op_inputs.size() && - std::count(std::begin(required_inputs_indices), - std::end(required_inputs_indices), - current_input_idx) == 0 && - std::count(std::begin(op_inputs), std::end(op_inputs), op_inputs[current_input_idx]) > 0) { - op_inputs.erase(std::next(op_inputs.begin(), current_input_idx)); - // Move all input indexes (in all bodies) which are after these indicated by - // to_remove_descriptors_indexes and are not used in any body - update_op_inputs_desc(multi_subgraph_op, required_inputs_indices, current_input_idx); - } - } else { - updated_body_in_descriptors.emplace_back(body_in_descriptors[desc_idx]); + auto& current_body_desc = body_in_descriptors[desc_idx]; + const auto current_body_parameter_idx = current_body_desc->m_body_parameter_index; + if (!util::contains(to_remove_descriptors_indexes[body_idx], desc_idx)) { + updated_body_in_descriptors.emplace_back(current_body_desc); + } else if (is_not_removed_idx(current_body_parameter_idx)) { + auto& body_func = multi_subgraph_op->get_function(body_idx); + const auto& body_params = body_func->get_parameters(); + + body_func->remove_parameter(body_params[current_body_parameter_idx]); + // Move all body indexes which are after these indicated by to_remove_descriptors_indexes + update_body_param_desc(body_in_descriptors, current_body_parameter_idx); + } + + const auto current_input_idx = current_body_desc->m_input_index; + // remove dangling input of MultiSubGraphOp which was not removed earlier + // the same input tensor can go to different input ports + if (!util::contains(required_inputs_indices, current_input_idx) && + is_not_removed_idx(current_input_idx)) { + op_inputs.erase(op_inputs.begin() + current_input_idx); + // Move all input indexes (in all bodies) which are after these indicated by + // to_remove_descriptors_indexes and are not used in any body + update_op_inputs_desc(multi_subgraph_op, current_input_idx); + update_required_input_indicies(required_inputs_indices, current_input_idx); } } multi_subgraph_op->set_input_descriptions(static_cast(body_idx), updated_body_in_descriptors); diff --git a/src/common/transformations/tests/common_optimizations/remove_multi_subgraph_op_dangling_params_tests.cpp b/src/common/transformations/tests/common_optimizations/remove_multi_subgraph_op_dangling_params_tests.cpp index 89f332bffebff8..4e8ad1765bd20f 100644 --- a/src/common/transformations/tests/common_optimizations/remove_multi_subgraph_op_dangling_params_tests.cpp +++ b/src/common/transformations/tests/common_optimizations/remove_multi_subgraph_op_dangling_params_tests.cpp @@ -175,7 +175,7 @@ TEST_F(TransformationTestsF, RemoveLoopDanglingParametersIfConcatEmptyTensor) { } } -TEST_F(TransformationTestsF, RemoveIfDanglingParametersFromBodiesAndInputs) { +TEST_F(TransformationTestsF, RemoveIfDanglingParametersFromBodiesAndInputsConsecutive) { auto X = std::make_shared(element::f32, Shape{2, 4, 1}); auto Y = std::make_shared(element::f32, Shape{3, 4, 1}); auto cond = std::make_shared(element::boolean, Shape{1}, true); @@ -196,6 +196,8 @@ TEST_F(TransformationTestsF, RemoveIfDanglingParametersFromBodiesAndInputs) { if_op->set_else_body(else_body); if_op->set_input(X, Xte, Xte); if_op->set_input(Y, Yte, Yte); + // if_op descriptors are [desc_0, desc_1, desc_2, desc_3] + // desc_0, desc_2 are dangling, Parameters Y, Yte should be removed auto res = if_op->set_output(then_op_res, else_op_res); model = std::make_shared(OutputVector{res}, ParameterVector{X, Y}); @@ -213,6 +215,46 @@ TEST_F(TransformationTestsF, RemoveIfDanglingParametersFromBodiesAndInputs) { } } +TEST_F(TransformationTestsF, RemoveIfDanglingParametersFromBodiesAndInputsNotConsecutive) { + auto X = std::make_shared(element::f32, Shape{2, 4, 1}); + auto Y = std::make_shared(element::f32, Shape{3, 4, 1}); + auto cond = std::make_shared(element::boolean, Shape{1}, false); + + auto Xte = std::make_shared(element::f32, PartialShape::dynamic()); + auto Yte = std::make_shared(element::f32, PartialShape::dynamic()); + + auto then_op = std::make_shared(Yte, Yte); + auto then_op_res = std::make_shared(then_op); + + auto else_op = std::make_shared(Yte, Yte); + auto else_op_res = std::make_shared(else_op); + { + auto then_body = std::make_shared(OutputVector{then_op_res}, ParameterVector{Xte, Yte}); + auto else_body = std::make_shared(OutputVector{else_op_res}, ParameterVector{Xte, Yte}); + auto if_op = std::make_shared(cond); + if_op->set_then_body(then_body); + if_op->set_else_body(else_body); + if_op->set_input(X, Xte, Yte); + if_op->set_input(Y, Xte, Xte); + // if_op descriptors are [desc_0, desc_1, desc_2, desc_3] + // desc_0, desc_2, desc_3 are dangling, Parameters Y, Xte should be removed + auto res = if_op->set_output(then_op_res, else_op_res); + model = std::make_shared(OutputVector{res}, ParameterVector{X, Y}); + + manager.register_pass(); + } + { + auto then_body = std::make_shared(OutputVector{then_op_res}, ParameterVector{Yte}); + auto else_body = std::make_shared(OutputVector{else_op_res}, ParameterVector{Yte}); + auto if_op = std::make_shared(cond); + if_op->set_then_body(then_body); + if_op->set_else_body(else_body); + if_op->set_input(X, Yte, Yte); + auto res = if_op->set_output(then_op_res, else_op_res); + model_ref = std::make_shared(OutputVector{res}, ParameterVector{X, Y}); + } +} + TEST_F(TransformationTestsF, RemoveIfDanglingParametersOnlyFromBodies) { auto X = std::make_shared(element::f32, Shape{2, 4, 1}); auto Y = std::make_shared(element::f32, Shape{3, 4, 1}); @@ -518,23 +560,28 @@ TEST_F(TransformationTestsF, RemoveLoopDanglingParamsAndResults) { auto ai = std::make_shared(element::f32, Shape{2, 2}); auto b = std::make_shared(element::f32, Shape{2, 2}); auto bi = std::make_shared(element::f32, Shape{2, 2}); + auto c = std::make_shared(element::f32, Shape{2, 2}); + auto ci = std::make_shared(element::f32, Shape{2, 2}); + auto d = std::make_shared(element::f32, Shape{2, 2}); auto mul = std::make_shared(ai, ai); auto abs1 = std::make_shared(mul); auto add = std::make_shared(bi, bi); auto abs2 = std::make_shared(add); { - auto body = std::make_shared(OutputVector{condition, abs1, abs2}, ParameterVector{ai, bi}); + auto body = std::make_shared(OutputVector{condition, abs1, abs2}, ParameterVector{ai, bi, ci}); auto loop = std::make_shared(trip_count, condition); loop->set_special_body_ports({-1, 0}); loop->set_function(body); loop->set_invariant_input(ai, a); + loop->set_invariant_input(ci, d); loop->set_invariant_input(bi, b); + loop->set_invariant_input(ci, c); auto loop_res = std::make_shared(loop->get_iter_value(abs1)); loop->get_iter_value(abs2); // abs2 result is unused - model = std::make_shared(OutputVector{loop_res}, ParameterVector{a, b}); + model = std::make_shared(OutputVector{loop_res}, ParameterVector{a, b, c, d}); manager.register_pass(); } diff --git a/src/common/util/include/openvino/util/common_util.hpp b/src/common/util/include/openvino/util/common_util.hpp index 8a0b68a22ceb83..312aab17419af4 100644 --- a/src/common/util/include/openvino/util/common_util.hpp +++ b/src/common/util/include/openvino/util/common_util.hpp @@ -131,11 +131,16 @@ T ceil_div(const T& x, const T& y) { return (x == 0 ? 0 : (1 + (x - 1) / y)); } -template -bool contains(const std::vector& vec, const V& v) { - return std::any_of(vec.begin(), vec.end(), [&](const T& x) { - return x == v; - }); +/** + * @brief Checks if container contains the specific value. + * + * @param container The container of elements to examine. + * @param value Value to compare the elements to. + * @return True if value found in the container, false otherwise. + */ +template +bool contains(const R& container, const V& value) { + return std::find(std::begin(container), std::end(container), value) != std::end(container); } /** diff --git a/src/core/reference/include/openvino/reference/reduce_mean.hpp b/src/core/reference/include/openvino/reference/reduce_mean.hpp index 4c46d4ca786d09..f046f4f96197bb 100644 --- a/src/core/reference/include/openvino/reference/reduce_mean.hpp +++ b/src/core/reference/include/openvino/reference/reduce_mean.hpp @@ -26,6 +26,10 @@ void reduce_mean(const T* in, T* out, const Shape& in_shape, const AxisSet& redu reduce_sum(in, out, in_shape, reduction_axes); const auto out_shape = util::reduce(in_shape, reduction_axes); + if (shape_size(in_shape) == 0) { + return; + } + const auto out_size = shape_size(out_shape); const auto count = static_cast(shape_size(in_shape) / out_size); std::transform(out, std::next(out, out_size), out, [count](const T value) { diff --git a/src/inference/include/openvino/runtime/intel_npu/properties.hpp b/src/inference/include/openvino/runtime/intel_npu/properties.hpp index 49416f61b8b43b..8734757da1d53d 100644 --- a/src/inference/include/openvino/runtime/intel_npu/properties.hpp +++ b/src/inference/include/openvino/runtime/intel_npu/properties.hpp @@ -95,5 +95,12 @@ static constexpr ov::Property max_tiles{"NPU_MAX_TILES"}; */ static constexpr ov::Property bypass_umd_caching{"NPU_BYPASS_UMD_CACHING"}; +/** + * @brief [Only for NPU Plugin] + * Type: boolean, default is false + * This option allows to delay loading the weights until inference is created + */ +static constexpr ov::Property defer_weights_load{"NPU_DEFER_WEIGHTS_LOAD"}; + } // namespace intel_npu } // namespace ov diff --git a/src/plugins/intel_cpu/src/emitters/plugin/aarch64/jit_eltwise_emitters.cpp b/src/plugins/intel_cpu/src/emitters/plugin/aarch64/jit_eltwise_emitters.cpp index 83cdd252f9bc6f..4aec56d98873fa 100644 --- a/src/plugins/intel_cpu/src/emitters/plugin/aarch64/jit_eltwise_emitters.cpp +++ b/src/plugins/intel_cpu/src/emitters/plugin/aarch64/jit_eltwise_emitters.cpp @@ -516,6 +516,51 @@ std::set> jit_floor_emitter::get_supported_precisions return {{element::f32}}; } +/// FLOOR_MOD /// +jit_floor_mod_emitter::jit_floor_mod_emitter(dnnl::impl::cpu::aarch64::jit_generator *host, + dnnl::impl::cpu::aarch64::cpu_isa_t host_isa, + const std::shared_ptr& node) + : jit_emitter(host, host_isa, node, get_arithmetic_binary_exec_precision(node)) { +} + +jit_floor_mod_emitter::jit_floor_mod_emitter(dnnl::impl::cpu::aarch64::jit_generator *host, + dnnl::impl::cpu::aarch64::cpu_isa_t host_isa, + const ov::element::Type exec_prc): jit_emitter(host, host_isa, exec_prc) { +} + +size_t jit_floor_mod_emitter::get_inputs_count() const { return 2; } + +size_t jit_floor_mod_emitter::get_aux_vecs_count() const { return 1; } + +void jit_floor_mod_emitter::emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const { + if (host_isa_ == dnnl::impl::cpu::aarch64::asimd) { + emit_isa(in_vec_idxs, out_vec_idxs); + } else { + OV_CPU_JIT_EMITTER_THROW("Can't create jit eltwise kernel"); + } +} + +template +void jit_floor_mod_emitter::emit_isa(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const { + OV_CPU_JIT_EMITTER_ASSERT(exec_prc_ == ov::element::f32, "unsupported precision: " + exec_prc_.to_string()); + + using TReg = typename dnnl::impl::cpu::aarch64::cpu_isa_traits::TReg; + + TReg dividend = TReg(in_vec_idxs[0]); + TReg divisor = TReg(in_vec_idxs[1]); + TReg r = TReg(out_vec_idxs[0]); + TReg aux = TReg(aux_vec_idxs[0]); + + h->fdiv(aux.s, dividend.s, divisor.s); + h->frintm(aux.s, aux.s); + h->fmul(aux.s, aux.s, divisor.s); + h->fsub(r.s, dividend.s, aux.s); +} + +std::set> jit_floor_mod_emitter::get_supported_precisions(const std::shared_ptr& node) { + return {{element::f32, element::f32}}; +} + /// CEILING /// //Initialization of the emitter, taking node as input jit_ceiling_emitter::jit_ceiling_emitter(dnnl::impl::cpu::aarch64::jit_generator* host, diff --git a/src/plugins/intel_cpu/src/emitters/plugin/aarch64/jit_eltwise_emitters.hpp b/src/plugins/intel_cpu/src/emitters/plugin/aarch64/jit_eltwise_emitters.hpp index fa4f4141c388e4..2cb7e6928ade3e 100644 --- a/src/plugins/intel_cpu/src/emitters/plugin/aarch64/jit_eltwise_emitters.hpp +++ b/src/plugins/intel_cpu/src/emitters/plugin/aarch64/jit_eltwise_emitters.hpp @@ -213,7 +213,28 @@ class jit_floor_emitter : public jit_emitter { template void emit_isa(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const; }; +class jit_floor_mod_emitter : public jit_emitter { +public: + jit_floor_mod_emitter(dnnl::impl::cpu::aarch64::jit_generator *host, + dnnl::impl::cpu::aarch64::cpu_isa_t host_isa, + const ov::element::Type exec_prc = ov::element::f32); + + jit_floor_mod_emitter(dnnl::impl::cpu::aarch64::jit_generator *host, + dnnl::impl::cpu::aarch64::cpu_isa_t host_isa, + const std::shared_ptr& node); + + size_t get_inputs_count() const override; + + size_t get_aux_vecs_count() const override; + + static std::set> get_supported_precisions(const std::shared_ptr& node = nullptr); +private: + void emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const override; + + template + void emit_isa(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const; +}; class jit_ceiling_emitter : public jit_emitter { public: // Constructor with explicit precision diff --git a/src/plugins/intel_cpu/src/nodes/executors/aarch64/jit_eltwise.cpp b/src/plugins/intel_cpu/src/nodes/executors/aarch64/jit_eltwise.cpp index 660db85cd61529..5f63904fbb9342 100644 --- a/src/plugins/intel_cpu/src/nodes/executors/aarch64/jit_eltwise.cpp +++ b/src/plugins/intel_cpu/src/nodes/executors/aarch64/jit_eltwise.cpp @@ -26,6 +26,7 @@ bool JitEltwiseExecutor::isSupported( Algorithm::EltwiseEqual, Algorithm::EltwiseExp, Algorithm::EltwiseFloor, + Algorithm::EltwiseFloorMod, Algorithm::EltwiseCeiling, Algorithm::EltwiseGeluErf, Algorithm::EltwiseGeluTanh, diff --git a/src/plugins/intel_cpu/src/nodes/kernels/aarch64/jit_uni_eltwise_generic.cpp b/src/plugins/intel_cpu/src/nodes/kernels/aarch64/jit_uni_eltwise_generic.cpp index 7ac3b603353541..9a1662e2c5dab5 100644 --- a/src/plugins/intel_cpu/src/nodes/kernels/aarch64/jit_uni_eltwise_generic.cpp +++ b/src/plugins/intel_cpu/src/nodes/kernels/aarch64/jit_uni_eltwise_generic.cpp @@ -648,6 +648,7 @@ std::shared_ptr jit_uni_eltwise_generic::create_eltwise_emitte OV_CASE(Algorithm::EltwiseEqual, ov::intel_cpu::aarch64::jit_equal_emitter), OV_CASE(Algorithm::EltwiseExp, ov::intel_cpu::aarch64::jit_exp_emitter), OV_CASE(Algorithm::EltwiseFloor, ov::intel_cpu::aarch64::jit_floor_emitter), + OV_CASE(Algorithm::EltwiseFloorMod, ov::intel_cpu::aarch64::jit_floor_mod_emitter), OV_CASE(Algorithm::EltwiseCeiling, ov::intel_cpu::aarch64::jit_ceiling_emitter), OV_CASE(Algorithm::EltwiseHswish, ov::intel_cpu::aarch64::jit_hswish_emitter), OV_CASE(Algorithm::EltwiseIsFinite, ov::intel_cpu::aarch64::jit_is_finite_emitter), @@ -830,6 +831,7 @@ std::set> eltwise_precision_helper::get_supported_pre OV_CASE(Algorithm::EltwiseEqual, jit_equal_emitter), OV_CASE(Algorithm::EltwiseExp, jit_exp_emitter), OV_CASE(Algorithm::EltwiseFloor, jit_floor_emitter), + OV_CASE(Algorithm::EltwiseFloorMod, jit_floor_mod_emitter), OV_CASE(Algorithm::EltwiseCeiling, jit_ceiling_emitter), OV_CASE(Algorithm::EltwiseGeluErf, jit_gelu_erf_emitter), OV_CASE(Algorithm::EltwiseGeluTanh, jit_gelu_tanh_emitter), diff --git a/src/plugins/intel_cpu/src/nodes/reduce.cpp b/src/plugins/intel_cpu/src/nodes/reduce.cpp index 7c18421ad3d832..1bc0209e0d9c69 100644 --- a/src/plugins/intel_cpu/src/nodes/reduce.cpp +++ b/src/plugins/intel_cpu/src/nodes/reduce.cpp @@ -2020,6 +2020,7 @@ void Reduce::initSupportedPrimitiveDescriptors() { config.outConfs[0].setMemDesc(creatorsMap.at(outFormat)->createSharedDesc(outPrecision, getOutputShapeAtPort(0))); if (useAclExecutor) { +#if defined (OV_CPU_WITH_ACL) std::vector srcMemoryDescs; for (size_t i = 0; i < config.inConfs.size(); i++) { srcMemoryDescs.push_back(config.inConfs[i].getMemDesc()); @@ -2034,22 +2035,29 @@ void Reduce::initSupportedPrimitiveDescriptors() { if (!factory->isEmpty()) { supportedPrimitiveDescriptors.push_back({config, impl_type, factory}); } +#endif } else { supportedPrimitiveDescriptors.push_back({config, impl_type}); } }; #if defined (OV_CPU_WITH_ACL) - reduceAttrs.operation = algorithm; - reduceAttrs.keepDims = keep_dims; - reduceAttrs.axes = raw_axes; - for (auto &axis : reduceAttrs.axes) { - if (axis < 0) - axis += static_cast(getInputShapeAtPort(REDUCE_DATA).getRank()); + // acl doesn't support empty input + if (!isDynamicNode() && shape_size(getInputShapeAtPort(REDUCE_DATA).getStaticDims()) == 0) { + canUseAclExecutor = false; + } else { + reduceAttrs.operation = algorithm; + reduceAttrs.keepDims = keep_dims; + reduceAttrs.axes = raw_axes; + for (auto &axis : reduceAttrs.axes) { + if (axis < 0) + axis += static_cast(getInputShapeAtPort(REDUCE_DATA).getRank()); + } + pushDesc(LayoutType::nspc, LayoutType::nspc, input_prec, output_prec, impl_desc_type::undef, true); + pushDesc(LayoutType::ncsp, LayoutType::ncsp, input_prec, output_prec, impl_desc_type::undef, true); + canUseAclExecutor = !supportedPrimitiveDescriptors.empty(); } - pushDesc(LayoutType::nspc, LayoutType::nspc, input_prec, output_prec, impl_desc_type::undef, true); - pushDesc(LayoutType::ncsp, LayoutType::ncsp, input_prec, output_prec, impl_desc_type::undef, true); - canUseAclExecutor = !supportedPrimitiveDescriptors.empty(); + if (canUseAclExecutor) return; #endif @@ -2089,10 +2097,16 @@ void Reduce::initSupportedPrimitiveDescriptors() { } bool Reduce::isExecutable() const { - return !isInputTensorAtPortEmpty(REDUCE_DATA); + return !isOutputTensorAtPortEmpty(0); } void Reduce::prepareParams() { + auto srcMemPtr = getSrcMemoryAtPort(REDUCE_DATA); + auto dstMemPtr = getDstMemoryAtPort(0); + const auto& src_shape = srcMemPtr->getStaticDims(); + dst_size = dstMemPtr->getSize(); + empty_input = shape_size(src_shape) == 0; +#if defined (OV_CPU_WITH_ACL) if (canUseAclExecutor) { std::vector srcMemoryDescs; for (size_t i = 0; i < getParentEdges().size(); i++) { @@ -2102,11 +2116,15 @@ void Reduce::prepareParams() { dstMemoryDescs.push_back(getDstMemoryAtPort(0)->getDescPtr()); auto selectedPD = getSelectedPrimitiveDescriptor(); - aclExecPtr = selectedPD->getExecutorFactoryAs()->makeExecutor(reduceAttrs, srcMemoryDescs, dstMemoryDescs, {}); - selectedPD->setImplementationType(aclExecPtr->getImplType()); - + if (!empty_input) { + aclExecPtr = selectedPD->getExecutorFactoryAs()->makeExecutor(reduceAttrs, srcMemoryDescs, dstMemoryDescs, {}); + selectedPD->setImplementationType(aclExecPtr->getImplType()); + } else { + selectedPD->setImplementationType(acl); + } return; } +#endif src_dims = getParentEdgeAt(REDUCE_DATA)->getMemory().getDesc().getShape().getDims(); std::vector reduce_axes; @@ -2116,9 +2134,7 @@ void Reduce::prepareParams() { reduce_axes = raw_axes; } - auto dstMemPtr = getDstMemoryAtPort(0); const VectorDims &dst_dims = dstMemPtr->getDesc().getShape().getDims(); - dst_size = dstMemPtr->getSize(); calc_process_dst_dims(reduce_axes, dst_dims); if (jit_mode) { set_reduce_dim_flags(); @@ -2274,11 +2290,26 @@ void Reduce::execute(dnnl::stream strm) { const uint8_t *src_data = srcMemPtr->getDataAs(); uint8_t *dst_data = dstMemPtr->getDataAs(); + if (empty_input && dst_size > 0) { +#if defined(OPENVINO_ARCH_X86_64) + output_info_reassign(&dst_data); + init_dst_data(dst_data, dst_size); + output_info_restore(&dst_data); + if (attr.get()->post_ops_.len() != 0) { + reduce_kernel_post_process(dst_data); + } +#else + init_dst_data(dst_data, dst_size); +#endif + return; + } + if (jit_mode) { if (is_hybrid_layout) { dst_data = reinterpret_cast(prc_mem.get_data_handle()); } reduce_type(src_data, dst_data); +#if defined (OV_CPU_WITH_ACL) } else if (aclExecPtr) { std::vector srcMemory; for (size_t i = 0; i < getParentEdges().size(); i++) { @@ -2288,6 +2319,7 @@ void Reduce::execute(dnnl::stream strm) { dstMemory.push_back(getDstMemoryAtPort(0)); aclExecPtr->exec(srcMemory, dstMemory, postOpsDataPtrs.data()); +#endif } else { if (layout == ReduceLayoutType::reduce_ncsp) { auto in_ptr = reinterpret_cast(src_data); @@ -2725,7 +2757,7 @@ inline void Reduce::reduce_kernel_process(const uint8_t *in_p, uint8_t *out_p, s inline void Reduce::reduce_kernel_post_process(uint8_t *out_ptr) { const uint8_t *in_ptr = fuse_low_precision ? static_cast(&intermediate_buf[0]) : nullptr; - const size_t integerDivisor = IB * IC * ID * IH * IW / (OB * OC * OD * OH * OW); + const size_t integerDivisor = empty_input ? 1 : IB * IC * ID * IH * IW / (OB * OC * OD * OH * OW); const float divisor = static_cast(integerDivisor); if (layout == ReduceLayoutType::reduce_ncsp) { parallel_for2d(OB, OC, [&](size_t ob, size_t oc) { diff --git a/src/plugins/intel_cpu/src/nodes/reduce.h b/src/plugins/intel_cpu/src/nodes/reduce.h index 2464686edb1ee4..c790cefb0583b0 100644 --- a/src/plugins/intel_cpu/src/nodes/reduce.h +++ b/src/plugins/intel_cpu/src/nodes/reduce.h @@ -152,6 +152,7 @@ class Reduce : public Node { bool ReduceCDW_opt = false; bool use_aux_kernel = false; bool set_use_aux_kernel = false; + bool empty_input = false; bool ReduceN, ReduceC, ReduceD, ReduceH, ReduceW; size_t IB, IC, ID, IH, IW; size_t OB, OC, OD, OH, OW; @@ -188,9 +189,11 @@ class Reduce : public Node { std::string errorPrefix; +#if defined (OV_CPU_WITH_ACL) ReduceAttrs reduceAttrs; bool canUseAclExecutor = false; std::shared_ptr aclExecPtr = nullptr; +#endif }; } // namespace node diff --git a/src/plugins/intel_cpu/src/transformations/cpu_opset/arm/pass/convert_reduce_multi_axis.cpp b/src/plugins/intel_cpu/src/transformations/cpu_opset/arm/pass/convert_reduce_multi_axis.cpp index 22692e644e6e56..ff5632cb0a5e8f 100644 --- a/src/plugins/intel_cpu/src/transformations/cpu_opset/arm/pass/convert_reduce_multi_axis.cpp +++ b/src/plugins/intel_cpu/src/transformations/cpu_opset/arm/pass/convert_reduce_multi_axis.cpp @@ -23,6 +23,9 @@ ov::matcher_pass_callback ov::intel_cpu::ConvertReduceMultiAxisBase::convert_red if (!reduction_axes) { return false; } + if (!reduce->is_dynamic() && ov::shape_size(input0.get_shape()) == 0) { + return false; + } if (ov::shape_size(input1.get_shape()) <= 1) { return false; } diff --git a/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/classes/eltwise.cpp b/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/classes/eltwise.cpp index d7cfe80d22f617..1696f35fc1bc4a 100644 --- a/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/classes/eltwise.cpp +++ b/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/classes/eltwise.cpp @@ -259,6 +259,7 @@ std::string EltwiseLayerCPUTest::getPrimitiveType(const utils::EltwiseTypes& elt (eltwise_type == utils::EltwiseTypes::MULTIPLY) || (eltwise_type == utils::EltwiseTypes::SUBTRACT) || (eltwise_type == utils::EltwiseTypes::DIVIDE) || + (eltwise_type == utils::EltwiseTypes::FLOOR_MOD) || (eltwise_type == utils::EltwiseTypes::MOD)) { return "jit"; } @@ -317,6 +318,8 @@ const std::vector& eltwiseOpTypesBinInp() { utils::EltwiseTypes::SUBTRACT, // TODO: Fix CVS-105430 utils::EltwiseTypes::DIVIDE, // TODO: Fix CVS-105430 utils::EltwiseTypes::FLOOR_MOD, // TODO: Fix CVS-111875 +#elif defined(OPENVINO_ARCH_ARM64) + utils::EltwiseTypes::FLOOR_MOD, #endif utils::EltwiseTypes::SQUARED_DIFF, utils::EltwiseTypes::MOD, diff --git a/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/classes/reduce.cpp b/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/classes/reduce.cpp index 66b1d60932b262..b379655338aaf9 100644 --- a/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/classes/reduce.cpp +++ b/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/classes/reduce.cpp @@ -243,6 +243,25 @@ const std::vector& reductionTypes() { return reductionTypes; } +const std::vector& reductionTypesArithmetic() { + static const std::vector reductionTypesArithmetic = { + ov::test::utils::ReductionType::Mean, + ov::test::utils::ReductionType::Sum, + ov::test::utils::ReductionType::Prod, + ov::test::utils::ReductionType::L1, + ov::test::utils::ReductionType::L2, + }; + return reductionTypesArithmetic; +} + +const std::vector& reductionTypesCompare() { + static const std::vector reductionTypesCompare = { + ov::test::utils::ReductionType::Max, + ov::test::utils::ReductionType::Min, + }; + return reductionTypesCompare; +} + const std::vector& inpOutPrc() { static const std::vector inpOutPrc = {ElementType::f32}; return inpOutPrc; diff --git a/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/classes/reduce.hpp b/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/classes/reduce.hpp index b3e5fff2ba0b7b..d45cce4b3f913e 100644 --- a/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/classes/reduce.hpp +++ b/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/classes/reduce.hpp @@ -52,6 +52,8 @@ const std::vector>& axes(); const std::vector>& axesND(); const std::vector& opTypes(); const std::vector& reductionTypes(); +const std::vector& reductionTypesArithmetic(); +const std::vector& reductionTypesCompare(); const std::vector& inpOutPrc(); const std::vector> additionalConfig(); const std::vector> additionalConfigFP32(); diff --git a/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/instances/arm/reduce.cpp b/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/instances/arm/reduce.cpp index 6d1aa855c31865..45ecc774b5dbf9 100644 --- a/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/instances/arm/reduce.cpp +++ b/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/instances/arm/reduce.cpp @@ -20,6 +20,11 @@ std::vector> inputShapes_5D = { {{{}, {{2, 19, 2, 2, 9}}}}, }; +std::vector> inputShapes_5D_ZeroDim = { + {{{}, {{2, 19, 0, 2, 9}}}}, + {{{}, {{2, 19, 0, 2, 0}}}}, +}; + const std::vector> axes5D = { {2, 4}, {1, 2, 4}, @@ -70,6 +75,20 @@ const auto params_MultiAxis_5D_ref = testing::Combine( testing::Values(emptyFusingSpec), testing::ValuesIn(config_infer_prec_f32)); +const auto params_MultiAxis_5D_ZeroDim_ref = testing::Combine( + testing::Combine( + testing::ValuesIn(axes5D), + testing::Values(ov::test::utils::OpType::VECTOR), + testing::Values(true), + testing::ValuesIn(reductionTypes()), + testing::ValuesIn(inpOutPrc()), + testing::Values(ElementType::undefined), + testing::Values(ElementType::undefined), + testing::ValuesIn(inputShapes_5D_ZeroDim)), + testing::ValuesIn(filterCPUSpecificParams(cpuParams_5D_ref)), + testing::Values(emptyFusingSpec), + testing::ValuesIn(additionalConfigFP32())); + //There are dedicated instences of smoke_Reduce_MultiAxis_5D_CPU test in arm and x64 folders //because ACL does not support 0 as reduction axis INSTANTIATE_TEST_SUITE_P( @@ -87,6 +106,13 @@ INSTANTIATE_TEST_SUITE_P( ReduceCPULayerTest::getTestCaseName ); +INSTANTIATE_TEST_SUITE_P( + smoke_Reduce_MultiAxis_5D_ZeroDim_CPU_ref, + ReduceCPULayerTest, + params_MultiAxis_5D_ZeroDim_ref, + ReduceCPULayerTest::getTestCaseName +); + } // namespace } // namespace Reduce } // namespace test diff --git a/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/instances/x64/reduce.cpp b/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/instances/x64/reduce.cpp index 302e47fd45aa84..af4319d66a6efe 100644 --- a/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/instances/x64/reduce.cpp +++ b/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/instances/x64/reduce.cpp @@ -53,6 +53,12 @@ std::vector> inputShapes_SingleBatch_dyn = { {{{{1, 5}, 19, {1, 5}, {1, 10}}, {{1, 19, 2, 2}, {1, 19, 2, 9}}}}, }; +std::vector> inputShapes_Dynmic_ZeroDim = { + {{{-1, -1, -1, -1}, {{2, 0, 3, 9}}}}, + {{{2, 0, -1, -1}, {{2, 0, 3, 9}}}}, + {{{2, 0, -1, -1}, {{2, 0, 3, 0}}}} +}; + std::vector cpuParams_3D = { CPUSpecificParams({ncw}, {ncw}, {}, {}), }; @@ -99,6 +105,10 @@ const std::vector> axesGather = { {3} }; +const std::vector> axesZeroDimFusing = { + {1, 3}, +}; + std::vector cpuParams_5D = { CPUSpecificParams({nCdhw16c}, {nCdhw16c}, {}, {}), CPUSpecificParams({ndhwc}, {ndhwc}, {}, {}), @@ -144,6 +154,17 @@ const auto fusingFakeQuantizeTranspose = fusingSpecificParams{std::make_shared

fusingParamsFullSet { + emptyFusingSpec, + /* activations */ + fusingSwish, + /* FQ */ + fusingFakeQuantizePerChannelRelu, + fusingFakeQuantizePerTensorRelu, + /* another patterns */ + fusingScaleShift +}; + const std::vector fusingParamsSet { /* activations */ fusingSwish, @@ -600,6 +621,34 @@ const auto params_LowPrecision_fusing = testing::Combine( testing::ValuesIn(fusingParamsSet_LowPrecision), testing::ValuesIn(additionalConfig())); +const auto params_DimZero_Arithmetic_fusing = testing::Combine( + testing::Combine( + testing::ValuesIn(axesZeroDimFusing), + testing::Values(ov::test::utils::OpType::VECTOR), + testing::Values(true), + testing::ValuesIn(reductionTypesArithmetic()), + testing::ValuesIn(inpOutPrc()), + testing::Values(ElementType::undefined), + testing::Values(ElementType::undefined), + testing::ValuesIn(inputShapes_Dynmic_ZeroDim)), + testing::Values(emptyCPUSpec), + testing::ValuesIn(fusingParamsFullSet), + testing::ValuesIn(additionalConfig())); + +const auto params_DimZero_Compare_fusing = testing::Combine( + testing::Combine( + testing::ValuesIn(axesZeroDimFusing), + testing::Values(ov::test::utils::OpType::VECTOR), + testing::Values(true), + testing::ValuesIn(reductionTypesCompare()), + testing::ValuesIn(inpOutPrc()), + testing::Values(ElementType::undefined), + testing::Values(ElementType::undefined), + testing::ValuesIn(inputShapes_Dynmic_ZeroDim)), + testing::Values(emptyCPUSpec), + testing::ValuesIn(fusingParamsFullSet), + testing::ValuesIn(additionalConfigFP32())); + INSTANTIATE_TEST_SUITE_P( smoke_Reduce_OneAxis_fusing_CPU, ReduceCPULayerTest, @@ -635,6 +684,20 @@ INSTANTIATE_TEST_SUITE_P( ReduceCPULayerTest::getTestCaseName ); +INSTANTIATE_TEST_SUITE_P( + smoke_Reduce_DimZero_Arithmetic_fusing_CPU, + ReduceCPULayerTest, + params_DimZero_Arithmetic_fusing, + ReduceCPULayerTest::getTestCaseName +); + +INSTANTIATE_TEST_SUITE_P( + smoke_Reduce_DimZero_Compare_fusing_CPU, + ReduceCPULayerTest, + params_DimZero_Compare_fusing, + ReduceCPULayerTest::getTestCaseName +); + /* ================================ 2.2 Fusion - KeepNoDims ================================ */ const auto params_OneAxis_fusing_KeepNoDims = testing::Combine( testing::Combine( @@ -702,4 +765,4 @@ INSTANTIATE_TEST_SUITE_P( } // namespace } // namespace Reduce } // namespace test -} // namespace ov \ No newline at end of file +} // namespace ov diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/reduce_ops.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/reduce_ops.cpp index 297d973a796dd0..9b7ae687e9c81d 100644 --- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/reduce_ops.cpp +++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/reduce_ops.cpp @@ -29,6 +29,12 @@ const std::vector> input_shapes = { std::vector{3, 5, 7, 9}, }; +const std::vector> input_shapes_0_dim = { + std::vector{2, 0, 4, 1}, + std::vector{8, 0, 4, 0}, + std::vector{0, 0, 0, 0}, +}; + const std::vector> input_shapes_one_axis = { std::vector{10, 20, 30, 40}, std::vector{3, 5, 7, 9}, @@ -167,6 +173,16 @@ const auto params_reduction_types = testing::Combine( testing::Values(ov::test::utils::DEVICE_CPU) ); +const auto params_empty_input = testing::Combine( + testing::ValuesIn(axes), + testing::Values(op_types[1]), + testing::ValuesIn(keep_dims), + testing::ValuesIn(reduction_types), + testing::Values(model_types[0]), + testing::ValuesIn(input_shapes_0_dim), + testing::Values(ov::test::utils::DEVICE_CPU) +); + const auto params_reduction_types_logical = testing::Combine( testing::Values(std::vector{0, 1, 3}), testing::Values(op_types[1]), @@ -250,6 +266,13 @@ INSTANTIATE_TEST_SUITE_P( ReduceOpsLayerTest::getTestCaseName ); +INSTANTIATE_TEST_SUITE_P( + smoke_Reduce_ReductionTypes_EmptyTensor, + ReduceOpsLayerTest, + params_empty_input, + ReduceOpsLayerTest::getTestCaseName +); + INSTANTIATE_TEST_SUITE_P( smoke_ReduceLogical_ReductionTypes, ReduceOpsLayerTest, diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/mark_runtime_skippable_nodes.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/mark_runtime_skippable_nodes.cpp index 924503e26e0546..6fc98f5023d761 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/mark_runtime_skippable_nodes.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/mark_runtime_skippable_nodes.cpp @@ -19,12 +19,9 @@ #include "scatter_nd_update_inst.h" #include "program_helpers.h" -#include - using namespace cldnn; void mark_runtime_skippable_nodes::run(program& p) { - std::unordered_map runtime_skippable_depth; auto itr = p.get_processing_order().begin(); while (itr != p.get_processing_order().end()) { @@ -55,24 +52,6 @@ void mark_runtime_skippable_nodes::run(program& p) { continue; } - // Check whether consecutive runtime skippable nodes is lower than max count. - // Too long consecutive runtime skippable nodes causes huge time consumption in add_memory_dependency() of basic_memory_dependencies pass. - // max count 7 is experimentally selected in specific model. - const uint8_t max_runtime_skippable_depth = 7; - uint8_t dep_runtime_skippable_depth = 0; - for (const auto& dep : node->get_dependencies()) { - if (dep.first->is_runtime_skippable() && - (runtime_skippable_depth.find(dep.first) != runtime_skippable_depth.end())) { - dep_runtime_skippable_depth = std::max(runtime_skippable_depth[dep.first], dep_runtime_skippable_depth); - } - } - if (!node->is_runtime_skippable() && (dep_runtime_skippable_depth >= max_runtime_skippable_depth)) { - GPU_DEBUG_TRACE_DETAIL << "[mark_runtime_skippable_nodes] : " << node->id() - << " doesn't have runtime skippable due to max_runtime_skippable_depth(" - << static_cast(max_runtime_skippable_depth) << ")." << std::endl; - continue; - } - program_helpers::do_for_types(*node, [](gather_node& node) { // Check pattern auto impl_params = node.get_kernel_impl_params(); @@ -276,9 +255,5 @@ void mark_runtime_skippable_nodes::run(program& p) { GPU_DEBUG_TRACE_DETAIL << "[mark_runtime_skippable_nodes] : " << node.id() << " can_be_optimized" << std::endl; } }); - - if (node->is_runtime_skippable()) { - runtime_skippable_depth[node] = dep_runtime_skippable_depth + 1; - } } } diff --git a/src/plugins/intel_gpu/src/graph/strided_slice.cpp b/src/plugins/intel_gpu/src/graph/strided_slice.cpp index 9c33ec22b0b7c1..47248cd2a4d773 100644 --- a/src/plugins/intel_gpu/src/graph/strided_slice.cpp +++ b/src/plugins/intel_gpu/src/graph/strided_slice.cpp @@ -193,6 +193,10 @@ void strided_slice_inst::on_execute() { void strided_slice_inst::update_output_memory() { if (!can_be_optimized()) return; + + if (node->get_program().is_new_shape_infer() && input_memory_ptr() == nullptr) + return; + if (static_cast(_outputs[0]) && _network.get_engine().is_the_same_buffer(output_memory(), input_memory())) return; diff --git a/src/plugins/intel_npu/README.md b/src/plugins/intel_npu/README.md index 980faa71a15937..89aa29de9f1d15 100644 --- a/src/plugins/intel_npu/README.md +++ b/src/plugins/intel_npu/README.md @@ -176,6 +176,7 @@ The following properties are supported: | `ov::intel_npu::tiles`/
`NPU_TILES` | RW | Sets the number of npu tiles to compile the model for | `[0-]` | `-1` | | `ov::intel_npu::max_tiles`/
`NPU_MAX_TILES` | RW | Maximum number of tiles supported by the device we compile for. Can be set for offline compilation. If not set, it will be populated by driver.| `[0-]` | `[1-6] depends on npu platform` | | `ov::intel_npu::bypass_umd_caching`/
`NPU_BYPASS_UMD_CACHING` | RW | Bypass the caching of compiled models in UMD. | `YES`/ `NO`| `NO` | +| `ov::intel_npu::defer_weights_load`/
`NPU_DEFER_WEIGHTS_LOAD` | RW | Delay loading the weights until inference is created. | `YES`/ `NO`| `NO` |   ### Performance Hint: Default Number of DPU Groups / DMA Engines diff --git a/src/plugins/intel_npu/src/al/include/intel_npu/npu_private_properties.hpp b/src/plugins/intel_npu/src/al/include/intel_npu/npu_private_properties.hpp index 8aabd132e9431a..15bbd69483bb57 100644 --- a/src/plugins/intel_npu/src/al/include/intel_npu/npu_private_properties.hpp +++ b/src/plugins/intel_npu/src/al/include/intel_npu/npu_private_properties.hpp @@ -305,13 +305,6 @@ static constexpr ov::Property batch_mode{"NPU_BATCH_MODE"}; */ static constexpr ov::Property create_executor{"NPU_CREATE_EXECUTOR"}; -/** - * @brief [Only for NPU Plugin] - * Type: boolean, default is false - * This option allows to omit loading the weights until inference is created - */ -static constexpr ov::Property defer_weights_load{"NPU_DEFER_WEIGHTS_LOAD"}; - /** * @brief Read-only property to get the name of used backend */ diff --git a/src/plugins/intel_npu/src/plugin/include/compiled_model.hpp b/src/plugins/intel_npu/src/plugin/include/compiled_model.hpp index caaec951a22929..142bc15f606e68 100644 --- a/src/plugins/intel_npu/src/plugin/include/compiled_model.hpp +++ b/src/plugins/intel_npu/src/plugin/include/compiled_model.hpp @@ -61,7 +61,6 @@ class CompiledModel final : public ICompiledModel { void configure_stream_executors(); - const std::shared_ptr _model; Config _config; Logger _logger; const std::shared_ptr _device; diff --git a/src/plugins/intel_npu/src/plugin/src/compiled_model.cpp b/src/plugins/intel_npu/src/plugin/src/compiled_model.cpp index 4e86d32d2f72b1..506502c819d72e 100644 --- a/src/plugins/intel_npu/src/plugin/src/compiled_model.cpp +++ b/src/plugins/intel_npu/src/plugin/src/compiled_model.cpp @@ -30,7 +30,6 @@ CompiledModel::CompiledModel(const std::shared_ptr& model, const std::shared_ptr& graph, const Config& config) : ICompiledModel(model, plugin), - _model(model), _config(config), _logger("CompiledModel", config.get()), _device(device), @@ -77,7 +76,7 @@ void CompiledModel::export_model(std::ostream& stream) const { } std::shared_ptr CompiledModel::get_runtime_model() const { - return _model; + OPENVINO_NOT_IMPLEMENTED; } void CompiledModel::set_property(const ov::AnyMap& properties) { diff --git a/src/plugins/intel_npu/src/plugin/src/plugin.cpp b/src/plugins/intel_npu/src/plugin/src/plugin.cpp index 18a96bff02fb80..a57628c2e45510 100644 --- a/src/plugins/intel_npu/src/plugin/src/plugin.cpp +++ b/src/plugins/intel_npu/src/plugin/src/plugin.cpp @@ -489,6 +489,12 @@ Plugin::Plugin() [](const Config& config) { return config.get(); }}}, + {ov::intel_npu::defer_weights_load.name(), + {true, + ov::PropertyMutability::RW, + [](const Config& config) { + return config.get(); + }}}, // NPU Private // ========= {ov::intel_npu::dma_engines.name(), @@ -544,12 +550,6 @@ Plugin::Plugin() [](const Config& config) { return config.get(); }}}, - {ov::intel_npu::defer_weights_load.name(), - {false, - ov::PropertyMutability::RW, - [](const Config& config) { - return config.get(); - }}}, {ov::intel_npu::dynamic_shape_to_static.name(), {false, ov::PropertyMutability::RW, diff --git a/src/plugins/intel_npu/tests/functional/shared_tests_instances/skip_tests_config.cpp b/src/plugins/intel_npu/tests/functional/shared_tests_instances/skip_tests_config.cpp index 7c7da4a9e23658..d62747be75c32d 100644 --- a/src/plugins/intel_npu/tests/functional/shared_tests_instances/skip_tests_config.cpp +++ b/src/plugins/intel_npu/tests/functional/shared_tests_instances/skip_tests_config.cpp @@ -760,6 +760,11 @@ std::vector disabledTestPatterns() { ".*OVExecGraphSerializationTest.ExecutionGraph.*" }); + // get_runtime_model method is not supported on NPU + _skipRegistry.addPatterns("get_runtime_model method is not supported on NPU", { + ".*OVClassModelOptionalTestP.CompileModelCreateDefaultExecGraphResult.*", + }); + return _skipRegistry; }(); // clang-format on diff --git a/src/plugins/intel_npu/tools/protopipe/README.md b/src/plugins/intel_npu/tools/protopipe/README.md index 807bd744851b70..00849ad8bddc9a 100644 --- a/src/plugins/intel_npu/tools/protopipe/README.md +++ b/src/plugins/intel_npu/tools/protopipe/README.md @@ -60,7 +60,6 @@ log_level: INFO - `ol` - **Optional**. Output layer layout. - `iml` - **Optional**. Input model layout. - `oml` - **Optional**. Output model layout. -- `reshape` - **Optional**. Set shape for input layers. For example, "input1: [1,3,224,224], input2: [1,4]" or "[1,3,224,224]" in case of one input layer. Examples: ``` diff --git a/src/plugins/intel_npu/tools/protopipe/src/parser/config.cpp b/src/plugins/intel_npu/tools/protopipe/src/parser/config.cpp index b9f03a97ba3f69..c2a1bd6415d595 100644 --- a/src/plugins/intel_npu/tools/protopipe/src/parser/config.cpp +++ b/src/plugins/intel_npu/tools/protopipe/src/parser/config.cpp @@ -345,10 +345,6 @@ struct convert { params.output_model_layout = node["oml"].as>(); } - if (node["reshape"]) { - params.reshape = node["reshape"].as>> (); - } - if (node["config"]) { params.config = node["config"].as>(); } diff --git a/src/plugins/intel_npu/tools/protopipe/src/scenario/inference.hpp b/src/plugins/intel_npu/tools/protopipe/src/scenario/inference.hpp index f9c8877b05c53e..e4568c671438bc 100644 --- a/src/plugins/intel_npu/tools/protopipe/src/scenario/inference.hpp +++ b/src/plugins/intel_npu/tools/protopipe/src/scenario/inference.hpp @@ -91,7 +91,6 @@ struct OpenVINOParams { LayerVariantAttr output_layout; LayerVariantAttr input_model_layout; LayerVariantAttr output_model_layout; - LayerVariantAttr> reshape; std::map config; size_t nireq = 1u; }; diff --git a/src/plugins/intel_npu/tools/protopipe/src/simulation/ov_layers_reader.cpp b/src/plugins/intel_npu/tools/protopipe/src/simulation/ov_layers_reader.cpp index 33e01e36404570..57527cef0cc4aa 100644 --- a/src/plugins/intel_npu/tools/protopipe/src/simulation/ov_layers_reader.cpp +++ b/src/plugins/intel_npu/tools/protopipe/src/simulation/ov_layers_reader.cpp @@ -128,15 +128,6 @@ static void cfgOutputPostproc(ov::preprocess::PrePostProcessor& ppp, const std:: } } -static void cfgReshape(const std::shared_ptr& model, - const AttrMap> reshape_map) { - std::map partial_shapes; - for (const auto& [layer_name, shape] : reshape_map) { - partial_shapes.emplace(layer_name, shape); - } - model->reshape(partial_shapes); -} - static std::vector extractLayerNames(const std::vector>& nodes) { std::vector names; std::transform(nodes.begin(), nodes.end(), std::back_inserter(names), [](const auto& node) { @@ -157,9 +148,6 @@ InOutLayers OpenVINOLayersReader::Impl::readFromModel(const std::string& model_p const auto iml_map = unpackLayerAttr(params.input_model_layout, input_names, "input model layout"); cfgInputPreproc(ppp, model, ip_map, il_map, iml_map); - const auto reshape_map = unpackLayerAttr(params.reshape, input_names, "reshape"); - cfgReshape(model, reshape_map); - const auto& output_names = extractLayerNames(model->outputs()); const auto op_map = unpackLayerAttr(params.output_precision, output_names, "output precision"); const auto ol_map = unpackLayerAttr(params.output_layout, output_names, "output layout"); diff --git a/src/plugins/intel_npu/tools/protopipe/src/simulation/simulation.cpp b/src/plugins/intel_npu/tools/protopipe/src/simulation/simulation.cpp index 11336c77a477e9..5b1743651b6ef1 100644 --- a/src/plugins/intel_npu/tools/protopipe/src/simulation/simulation.cpp +++ b/src/plugins/intel_npu/tools/protopipe/src/simulation/simulation.cpp @@ -22,11 +22,6 @@ static cv::gapi::GNetPackage getNetPackage(const std::string& tag, const OpenVIN const auto& blob_path = std::get(params.path); network = std::make_unique

(tag, blob_path.blob, params.device); } - if (std::holds_alternative>>(params.reshape)) { - network->cfgReshape(std::get>>(params.reshape)); - } else { - network->cfgReshape(std::get>(params.reshape)); - } network->cfgPluginConfig(params.config); network->cfgNumRequests(params.nireq); diff --git a/src/plugins/template/tests/functional/op_reference/irdft.cpp b/src/plugins/template/tests/functional/op_reference/irdft.cpp index 36826ade32fb59..4bcf9fbbe27c18 100644 --- a/src/plugins/template/tests/functional/op_reference/irdft.cpp +++ b/src/plugins/template/tests/functional/op_reference/irdft.cpp @@ -29,8 +29,8 @@ struct IRDFTParams { m_expected_shape = expected_shape; m_input_type = input_type; m_expected_type = expected_type; - m_input_value = CreateTensor(input_type, input_value); - m_expected_value = CreateTensor(expected_type, expected_value); + m_input_value = CreateTensor(m_input_shape, input_type, input_value); + m_expected_value = CreateTensor(m_expected_shape, expected_type, expected_value); m_axes = axes; m_signal = signal; } @@ -48,8 +48,7 @@ struct IRDFTParams { class ReferenceIRDFTLayerTest : public testing::TestWithParam, public CommonReferenceTest { public: void SetUp() override { - legacy_compare = true; - auto params = GetParam(); + const auto& params = GetParam(); if (params.m_signal != NULL) { function = CreateFunctionWithSignal(params); } else { @@ -58,10 +57,12 @@ class ReferenceIRDFTLayerTest : public testing::TestWithParam, publ inputData = {params.m_input_value}; refOutData = {params.m_expected_value}; + + abs_threshold = 1e-6f; } static std::string getTestCaseName(const testing::TestParamInfo& obj) { - const auto param = obj.param; + const auto& param = obj.param; std::ostringstream result; result << "input_shape1=" << param.m_input_shape << "; "; @@ -74,14 +75,14 @@ class ReferenceIRDFTLayerTest : public testing::TestWithParam, publ } private: - static std::shared_ptr CreateFunction(IRDFTParams& p) { + static std::shared_ptr CreateFunction(const IRDFTParams& p) { auto in = std::make_shared(p.m_input_type, p.m_input_shape); auto irdft = std::make_shared(in, p.m_axes); return std::make_shared(irdft, ParameterVector{in}); } - static std::shared_ptr CreateFunctionWithSignal(IRDFTParams& p) { + static std::shared_ptr CreateFunctionWithSignal(const IRDFTParams& p) { auto in = std::make_shared(p.m_input_type, p.m_input_shape); auto irdft = std::make_shared(in, p.m_axes, p.m_signal); diff --git a/src/plugins/template/tests/functional/op_reference/reduce_l1.cpp b/src/plugins/template/tests/functional/op_reference/reduce_l1.cpp index 6e0c2fe2aa24e0..e47295f247b35f 100644 --- a/src/plugins/template/tests/functional/op_reference/reduce_l1.cpp +++ b/src/plugins/template/tests/functional/op_reference/reduce_l1.cpp @@ -25,6 +25,28 @@ std::vector generateReductionParams(const bool keep_dims) { reference_tests::Tensor(reduce(Shape{3, 2, 2}, AxisSet{2}, keep_dims), element::Type(IN_ET), std::vector{3, 7, 11, 15, 19, 23}))}; + auto out_shape_from_empty = Shape{2, 1, 1}; + if (keep_dims == false) { + out_shape_from_empty = Shape{2}; + } + params.push_back( + ReductionParams(ReductionType::L1, + keep_dims, + std::vector{1, 2}, + reference_tests::Tensor({2, 0, 4}, element::Type(IN_ET), std::vector{}), + reference_tests::Tensor(out_shape_from_empty, element::Type(IN_ET), std::vector{0, 0}))); + + out_shape_from_empty = Shape{2, 0, 1}; + if (keep_dims == false) { + out_shape_from_empty = Shape{2, 0}; + } + params.push_back( + ReductionParams(ReductionType::L1, + keep_dims, + std::vector{2}, + reference_tests::Tensor({2, 0, 4}, element::Type(IN_ET), std::vector{}), + reference_tests::Tensor(out_shape_from_empty, element::Type(IN_ET), std::vector{0, 0}))); + return params; } diff --git a/src/plugins/template/tests/functional/op_reference/reduce_l2.cpp b/src/plugins/template/tests/functional/op_reference/reduce_l2.cpp index 565f89d58f7238..b5820f6970ae5b 100644 --- a/src/plugins/template/tests/functional/op_reference/reduce_l2.cpp +++ b/src/plugins/template/tests/functional/op_reference/reduce_l2.cpp @@ -29,6 +29,27 @@ std::vector generateReductionParams(const bool keep_dims) { reference_tests::Tensor(reduce(Shape{3, 2, 2}, AxisSet{2}, keep_dims), element::Type(IN_ET), std::vector{2.23606798, 5.0, 7.81024968, 10.63014581, 13.45362405, 16.2788206}))}; + auto out_shape_from_empty = Shape{2, 1, 1}; + if (keep_dims == false) { + out_shape_from_empty = Shape{2}; + } + params.push_back( + ReductionParams(ReductionType::L2, + keep_dims, + std::vector{1, 2}, + reference_tests::Tensor({2, 0, 4}, element::Type(IN_ET), std::vector{}), + reference_tests::Tensor(out_shape_from_empty, element::Type(IN_ET), std::vector{0, 0}))); + + out_shape_from_empty = Shape{2, 0, 1}; + if (keep_dims == false) { + out_shape_from_empty = Shape{2, 0}; + } + params.push_back( + ReductionParams(ReductionType::L2, + keep_dims, + std::vector{2}, + reference_tests::Tensor({2, 0, 4}, element::Type(IN_ET), std::vector{}), + reference_tests::Tensor(out_shape_from_empty, element::Type(IN_ET), std::vector{}))); return params; } @@ -45,6 +66,7 @@ std::vector generateReductionParams(const bool keep_dims) { reference_tests::Tensor(reduce(Shape{3, 2, 2}, AxisSet{2}, keep_dims), element::Type(IN_ET), std::vector{2, 5, 8, 11, 13, 16}))}; + return params; } diff --git a/src/plugins/template/tests/functional/op_reference/reduce_max.cpp b/src/plugins/template/tests/functional/op_reference/reduce_max.cpp index 0674595de4ec43..7ab89fc16d1900 100644 --- a/src/plugins/template/tests/functional/op_reference/reduce_max.cpp +++ b/src/plugins/template/tests/functional/op_reference/reduce_max.cpp @@ -79,6 +79,29 @@ std::vector generateReductionParams(const bool keep_dims) { reference_tests::Tensor(reduce(Shape{3, 3, 3}, AxisSet{0, 1, 2}, keep_dims), element::Type(IN_ET), std::vector{27}))}; + auto out_shape_from_empty = Shape{2, 1, 1}; + if (keep_dims == false) { + out_shape_from_empty = Shape{2}; + } + const auto default_val = std::numeric_limits::lowest(); + params.push_back(ReductionParams( + ReductionType::Max, + keep_dims, + std::vector{1, 2}, + reference_tests::Tensor({2, 0, 4}, element::Type(IN_ET), std::vector{}), + reference_tests::Tensor(out_shape_from_empty, element::Type(IN_ET), std::vector{default_val, default_val}))); + + out_shape_from_empty = Shape{2, 0, 1}; + if (keep_dims == false) { + out_shape_from_empty = Shape{2, 0}; + } + params.push_back( + ReductionParams(ReductionType::Max, + keep_dims, + std::vector{2}, + reference_tests::Tensor({2, 0, 4}, element::Type(IN_ET), std::vector{}), + reference_tests::Tensor(out_shape_from_empty, element::Type(IN_ET), std::vector{}))); + return params; } diff --git a/src/plugins/template/tests/functional/op_reference/reduce_mean.cpp b/src/plugins/template/tests/functional/op_reference/reduce_mean.cpp index def9d837b46df6..07159de9704a30 100644 --- a/src/plugins/template/tests/functional/op_reference/reduce_mean.cpp +++ b/src/plugins/template/tests/functional/op_reference/reduce_mean.cpp @@ -41,6 +41,28 @@ std::vector generateReductionParams(const bool keep_dims) { reference_tests::Tensor(reduce(Shape{3, 2}, AxisSet{1}, keep_dims), element::Type(IN_ET), std::vector{1.5, 3.5, 5.5}))}; + auto out_shape_from_empty = Shape{2, 1, 1}; + if (keep_dims == false) { + out_shape_from_empty = Shape{2}; + } + params.push_back( + ReductionParams(ReductionType::Mean, + keep_dims, + std::vector{1, 2}, + reference_tests::Tensor({2, 0, 4}, element::Type(IN_ET), std::vector{}), + reference_tests::Tensor(out_shape_from_empty, element::Type(IN_ET), std::vector{0, 0}))); + + out_shape_from_empty = Shape{2, 0, 1}; + if (keep_dims == false) { + out_shape_from_empty = Shape{2, 0}; + } + params.push_back( + ReductionParams(ReductionType::Mean, + keep_dims, + std::vector{2}, + reference_tests::Tensor({2, 0, 4}, element::Type(IN_ET), std::vector{}), + reference_tests::Tensor(out_shape_from_empty, element::Type(IN_ET), std::vector{}))); + return params; } diff --git a/src/plugins/template/tests/functional/op_reference/reduce_min.cpp b/src/plugins/template/tests/functional/op_reference/reduce_min.cpp index abc9dca157684b..f982af07ab12a5 100644 --- a/src/plugins/template/tests/functional/op_reference/reduce_min.cpp +++ b/src/plugins/template/tests/functional/op_reference/reduce_min.cpp @@ -79,6 +79,29 @@ std::vector generateReductionParams(const bool keep_dims) { reference_tests::Tensor(reduce(Shape{3, 3, 3}, AxisSet{0, 1, 2}, keep_dims), element::Type(IN_ET), std::vector{1}))}; + auto out_shape_from_empty = Shape{2, 1, 1}; + if (keep_dims == false) { + out_shape_from_empty = Shape{2}; + } + constexpr auto max_value = + std::numeric_limits::has_infinity ? std::numeric_limits::infinity() : std::numeric_limits::max(); + params.push_back(ReductionParams( + ReductionType::Min, + keep_dims, + std::vector{1, 2}, + reference_tests::Tensor({2, 0, 4}, element::Type(IN_ET), std::vector{}), + reference_tests::Tensor(out_shape_from_empty, element::Type(IN_ET), std::vector{max_value, max_value}))); + + out_shape_from_empty = Shape{2, 0, 1}; + if (keep_dims == false) { + out_shape_from_empty = Shape{2, 0}; + } + params.push_back( + ReductionParams(ReductionType::Min, + keep_dims, + std::vector{2}, + reference_tests::Tensor({2, 0, 4}, element::Type(IN_ET), std::vector{}), + reference_tests::Tensor(out_shape_from_empty, element::Type(IN_ET), std::vector{}))); return params; } diff --git a/src/plugins/template/tests/functional/op_reference/reduce_prod.cpp b/src/plugins/template/tests/functional/op_reference/reduce_prod.cpp index d030633932fd73..54e39dad68826f 100644 --- a/src/plugins/template/tests/functional/op_reference/reduce_prod.cpp +++ b/src/plugins/template/tests/functional/op_reference/reduce_prod.cpp @@ -74,6 +74,28 @@ std::vector generateReductionParams(const bool keep_dims) { 19 * 20 * 21, 22 * 23 * 24, 25 * 26 * 27}))}; + auto out_shape_from_empty = Shape{2, 1, 1}; + if (keep_dims == false) { + out_shape_from_empty = Shape{2}; + } + const T default_val = T{1}; + params.push_back(ReductionParams( + ReductionType::Prod, + keep_dims, + std::vector{1, 2}, + reference_tests::Tensor({2, 0, 4}, element::Type(IN_ET), std::vector{}), + reference_tests::Tensor(out_shape_from_empty, element::Type(IN_ET), std::vector{default_val, default_val}))); + + out_shape_from_empty = Shape{2, 0, 1}; + if (keep_dims == false) { + out_shape_from_empty = Shape{2, 0}; + } + params.push_back( + ReductionParams(ReductionType::Prod, + keep_dims, + std::vector{2}, + reference_tests::Tensor({2, 0, 4}, element::Type(IN_ET), std::vector{}), + reference_tests::Tensor(out_shape_from_empty, element::Type(IN_ET), std::vector{}))); return params; } diff --git a/src/plugins/template/tests/functional/op_reference/reduce_sum.cpp b/src/plugins/template/tests/functional/op_reference/reduce_sum.cpp index ab77acc3cf696a..dd8dcd38635c79 100644 --- a/src/plugins/template/tests/functional/op_reference/reduce_sum.cpp +++ b/src/plugins/template/tests/functional/op_reference/reduce_sum.cpp @@ -121,6 +121,28 @@ std::vector generateReductionParams(const bool keep_dims) { reference_tests::Tensor(reduce(Shape{3, 3, 3, 3, 3}, AxisSet{0, 1, 2, 3, 4}, keep_dims), element::Type(IN_ET), std::vector{243}))}; + auto out_shape_from_empty = Shape{2, 1, 1}; + if (keep_dims == false) { + out_shape_from_empty = Shape{2}; + } + params.push_back( + ReductionParams(ReductionType::Sum, + keep_dims, + std::vector{1, 2}, + reference_tests::Tensor({2, 0, 4}, element::Type(IN_ET), std::vector{}), + reference_tests::Tensor(out_shape_from_empty, element::Type(IN_ET), std::vector{0, 0}))); + + out_shape_from_empty = Shape{2, 0, 1}; + if (keep_dims == false) { + out_shape_from_empty = Shape{2, 0}; + } + params.push_back( + ReductionParams(ReductionType::Sum, + keep_dims, + std::vector{2}, + reference_tests::Tensor({2, 0, 4}, element::Type(IN_ET), std::vector{}), + reference_tests::Tensor(out_shape_from_empty, element::Type(IN_ET), std::vector{}))); + return params; } diff --git a/tests/requirements_onnx b/tests/requirements_onnx index 1dfc0077b5d075..9772bbf1ea0d52 100644 --- a/tests/requirements_onnx +++ b/tests/requirements_onnx @@ -1,3 +1,3 @@ -numpy>=1.16.6,<1.27 +numpy>=1.16.6,<2.2.0 onnx>=1.8.1,<=1.17.0 protobuf>=3.18.1,<4.0.0