From 2a07f32d5614f5b763d4a7ae60f3198b40d7aac3 Mon Sep 17 00:00:00 2001 From: Alexandra Sidorova Date: Wed, 23 Nov 2022 17:51:36 +0400 Subject: [PATCH] [Snippets] Added Select support [Snippets] Added Broadcast support --- .../snippets/pass/insert_movebroadcast.hpp | 12 ++ .../snippets/include/snippets/utils.hpp | 10 ++ src/common/snippets/src/op/subgraph.cpp | 20 +-- .../snippets/src/pass/align_element_type.cpp | 14 ++- .../snippets/src/pass/collapse_subgraph.cpp | 44 +++++-- .../src/pass/common_optimizations.cpp | 3 +- .../src/pass/insert_movebroadcast.cpp | 23 ++++ .../include/pass/insert_movebroadcast.hpp | 14 +++ .../tests/src/pass/insert_movebroadcast.cpp | 44 +++++++ .../intel_cpu/src/emitters/cpu_generator.cpp | 3 + .../src/emitters/jit_eltwise_emitters.cpp | 52 ++++++++ .../src/emitters/jit_eltwise_emitters.hpp | 19 +++ .../snippets_mark_skipped.cpp | 3 +- src/plugins/intel_cpu/src/plugin.cpp | 4 +- .../snippets/select.cpp | 42 +++++++ .../plugin/shared/include/snippets/select.hpp | 59 +++++++++ .../plugin/shared/src/snippets/select.cpp | 114 ++++++++++++++++++ .../include/subgraph_lowered.hpp | 9 ++ .../include/subgraph_simple.hpp | 51 ++++++++ .../src/subgraph_lowered.cpp | 35 ++++++ .../src/subgraph_simple.cpp | 41 +++++++ 21 files changed, 586 insertions(+), 30 deletions(-) create mode 100644 src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/select.cpp create mode 100644 src/tests/functional/plugin/shared/include/snippets/select.hpp create mode 100644 src/tests/functional/plugin/shared/src/snippets/select.cpp diff --git a/src/common/snippets/include/snippets/pass/insert_movebroadcast.hpp b/src/common/snippets/include/snippets/pass/insert_movebroadcast.hpp index 87c69469735740..04d371d90f6e42 100644 --- a/src/common/snippets/include/snippets/pass/insert_movebroadcast.hpp +++ b/src/common/snippets/include/snippets/pass/insert_movebroadcast.hpp @@ -22,6 +22,18 @@ class InsertMoveBroadcast: public ngraph::pass::MatcherPass { InsertMoveBroadcast(); }; +/** + * @interface BroadcastToMoveBroadcast + * @brief Inserts explicit MoveBroadcast instruction if broadcasting by most varying dimension is needed instead of Broadcast, + * otherwise pass removes Brodcast operation. + * The pass is used to convert model to a canonical form for code generation + * @ingroup snippets + */ +class BroadcastToMoveBroadcast: public ngraph::pass::MatcherPass { +public: + BroadcastToMoveBroadcast(); +}; + } // namespace pass } // namespace snippets } // namespace ngraph \ No newline at end of file diff --git a/src/common/snippets/include/snippets/utils.hpp b/src/common/snippets/include/snippets/utils.hpp index 770722501eb674..58f95e9e0f4281 100644 --- a/src/common/snippets/include/snippets/utils.hpp +++ b/src/common/snippets/include/snippets/utils.hpp @@ -32,6 +32,16 @@ std::vector get_node_output_layout(const Node* node); inline ov::Dimension get_inner_dim(const ov::PartialShape &shape) { return *(shape.rbegin()); } inline ov::Dimension get_outer_dim(const ov::PartialShape &shape) { return *(shape.rbegin() + 1); } +// Non-scalar Constants are tokenized as Parameters inside Subgraph body but some of the operations which Constant inputs +// should have explicit Constants even if they're non-scalar (Reshape, Transpose, Broadcast) +// This check returns True if Constant op of this op should be inside Subgraph body +inline auto constant_input_should_be_inside_body(const std::shared_ptr& node) -> bool { + return ov::is_type(node) || + ov::is_type(node) || + ov::is_type(node) || + ov::is_type(node); +} + } // namespace utils } // namespace snippets } // namespace ngraph \ No newline at end of file diff --git a/src/common/snippets/src/op/subgraph.cpp b/src/common/snippets/src/op/subgraph.cpp index 82f73836b6c2c5..1f12478b0b6443 100644 --- a/src/common/snippets/src/op/subgraph.cpp +++ b/src/common/snippets/src/op/subgraph.cpp @@ -156,9 +156,7 @@ auto snippets::op::Subgraph::wrap_node_as_subgraph(const std::shared_ptrinput_values()) { if (ov::is_type(input.get_node_shared_ptr()) && - (ngraph::shape_size(input.get_shape()) == 1 || - ov::is_type(node) || - ov::is_type(node))) { + (ngraph::shape_size(input.get_shape()) == 1 || utils::constant_input_should_be_inside_body(node))) { body_inputs.push_back(input); } else { auto parameter = std::make_shared(input.get_element_type(), input.get_partial_shape()); @@ -382,11 +380,13 @@ void snippets::op::Subgraph::align_element_types(const BlockedShapeVector& outpu // - Insert Convert before operations that doesn't support original element type for execution // - Insert reverse Convert before operations that support original element type // but have inputs that doesn't support it (because before them will be inserted Convert with exec_type - first point) - // Then we should use ConstantFolding pass to convert element type of Scalars before inference. + // - Then we should use ConstantFolding pass to convert element type of Scalars before inference. + // - Eliminate redundant Converts which can be inserted in AlignElementType() pass ngraph::pass::Manager manager; if (config.m_is_needed_to_align_precision) { manager.register_pass(execution_element_type); manager.register_pass(); + manager.register_pass(); } manager.run_passes(m_body); } @@ -415,6 +415,7 @@ void snippets::op::Subgraph::convert_to_snippet_dialect() { manager.register_pass(tileRank); manager.register_pass(count, tileRank); manager.register_pass(); + manager.register_pass(); manager.register_pass(); manager.register_pass(); manager.register_pass(count); @@ -505,13 +506,14 @@ snippets::Schedule snippets::op::Subgraph::generate(ngraph::pass::Manager& opt, // check that body doesn't have constants for scheduling std::vector> constants; for (auto op : m_body->get_ordered_ops()) { - if (auto constant = ov::as_type_ptr(op)) { - if (ngraph::shape_size(constant->get_shape()) != 1 && constant->get_shape() != Shape()) { - constants.push_back(constant); - } + if ((ov::is_type(op) && ov::shape_size(op->get_shape()) != 1 && op->get_shape() != Shape()) || + ov::is_type(op) || + ov::is_type(op) || + ov::is_type(op) || + ov::is_type(op)) { + throw ngraph::ngraph_error("External op detected: " + std::string(op->get_type_name()) + ". Snippet is illigal for scheduling"); } } - NGRAPH_CHECK(!constants.size(), "External constants detected. Snippet is illigal for scheduling"); return {master_shape, false /*canBeLinearized*/, ptr}; } diff --git a/src/common/snippets/src/pass/align_element_type.cpp b/src/common/snippets/src/pass/align_element_type.cpp index fa45c0b5754eba..ce657cc411158f 100644 --- a/src/common/snippets/src/pass/align_element_type.cpp +++ b/src/common/snippets/src/pass/align_element_type.cpp @@ -20,15 +20,16 @@ inline auto is_in_op(const std::shared_ptr& n) -> bool { || ov::is_type(n); } -// At the moment Subgraph supports only Eltwise, Convert, FQ (which is decomposed into Eltwises and Convert) and -// Softmax (which is decompsed into Eltwises as well) -// And only Eltwises supports execution only in "exec_type". So we can check op type from the opposite +// At the moment Subgraph supports only Eltwise, Select, Convert, Broadcast and FQ (which is decomposed into Eltwises and Convert) with +// Softmax (which is decomposed into Eltwises as well) +// And only Eltwise and Select ops supports execution only in "exec_type". So we can check op type from the opposite // NOTE: This check is only for executable which isn't Parameter/Constant/Result inline auto op_supports_only_exec_type(const std::shared_ptr& n) -> bool { return !is_in_op(n) && !ov::is_type(n) && !ov::is_type(n) && - !ov::is_type(n); + !ov::is_type(n) && + !ov::is_type(n); } } // namespace @@ -60,7 +61,8 @@ bool ngraph::snippets::pass::AlignElementType::run_on_model(const std::shared_pt // - Input is Convert with unsupported destination type // - Input is Op which support any element type // We couldn't unite these conditions and just check that element type isn't supported exec type - // because we don't call validate_and_infer_types() so we don't know new precisions + // because we don't call validate_and_infer_types() so we don't know new precisions after setting of original + // input and output element types if ((existing_convert && existing_convert->get_destination_type() != exec_type) || (!op_supports_only_exec_type(shared_input))) { insertConvert(op, i, exec_type); @@ -91,6 +93,6 @@ bool ngraph::snippets::pass::AlignElementType::run_on_model(const std::shared_pt } bool ngraph::snippets::pass::AlignElementType::opNeedsAlignElementType(const std::shared_ptr& op, const ov::element::Type exec_type) { - // At the moment Snippets support only Eltwise/Convert/FQ which one output so we can just call get_element_type() + // At the moment Snippets support only Eltwise/Convert/FQ/Select/Softmax/Broadcast which one output so we can just call get_element_type() return op_supports_only_exec_type(op) && op->get_element_type() != exec_type; } diff --git a/src/common/snippets/src/pass/collapse_subgraph.cpp b/src/common/snippets/src/pass/collapse_subgraph.cpp index 5924729efec99f..26eabae55f5661 100644 --- a/src/common/snippets/src/pass/collapse_subgraph.cpp +++ b/src/common/snippets/src/pass/collapse_subgraph.cpp @@ -77,6 +77,10 @@ auto is_supported_op(const std::shared_ptr &n) -> bool { is_type(n->get_input_node_shared_ptr(4)); }; + auto is_supported_ternary_eltwise_op = [](const std::shared_ptr &n) -> bool { + return ov::is_type(n); + }; + auto is_supported_binary_eltwise_op = [](const std::shared_ptr &n) -> bool { return ov::is_type(n) || ov::is_type(n) @@ -138,12 +142,28 @@ auto is_supported_op(const std::shared_ptr &n) -> bool { return axis >= 0 && axis == (rank.get_length() - 1); }; - return is_supported_fq_op(n) - || is_supported_unary_eltwise_op(n) - || is_supported_binary_eltwise_op(n) - || is_supported_transpose(n) - || is_supported_softmax(n) - || is_supported_matmul(n); + auto is_supported_broadcast_op = [](const std::shared_ptr &n) -> bool { + // TODO: Add check for broadcastable input shapes of Broadcast children + // Codogen removes Broadcast op, insert BroadcastMove if needed and save just last dim. + // But if Broadcast child output shape depends on Broadcast we can loss needed output shape + // Example: + // in0 [1, 1, 1] in0 [1, 1, 1] in0 [1, 1, 1] in0 [1, 1, 1] + // Broadcast [1, 10, 1] / \ / + // \ / --->>> Add + // Add | + // Result [1, 10, 1] Result [1, 1, 1] + auto broadcast = ov::as_type_ptr(n); + return broadcast && broadcast->get_broadcast_spec().m_type == ov::op::AutoBroadcastType::NUMPY; + }; + + return is_supported_fq_op(n) || + is_supported_unary_eltwise_op(n) || + is_supported_binary_eltwise_op(n) || + is_supported_ternary_eltwise_op(n) || + is_supported_transpose(n) || + is_supported_softmax(n) || + is_supported_matmul(n) || + is_supported_broadcast_op(n); } auto has_supported_in_out(const std::shared_ptr &n) -> bool { @@ -151,10 +171,12 @@ auto has_supported_in_out(const std::shared_ptr &n) -> bool { static const std::set supported_data_types = { ngraph::element::f32, ngraph::element::bf16, ngraph::element::i8, ngraph::element::u8 }; // Todo: int32 isn't supported in general because i32 emitters are required for bit-exact i32 calculations in some cases - // So i32 is supported exclusively for transposes + // So i32 is supported exclusively for transposes and broadcast return t.get_partial_shape().is_static() && - (supported_data_types.count(t.get_element_type()) != 0 || - (ov::is_type(n) && t.get_element_type() == ngraph::element::i32)); + (supported_data_types.count(t.get_element_type()) != 0 || + (t.get_element_type() == ngraph::element::i32 && + (ov::is_type(n) || + ov::is_type(n)))); }; const auto & inputs = n->inputs(); const auto & outputs = n->outputs(); @@ -491,9 +513,7 @@ TokenizeSnippets::TokenizeSnippets() { // [*] We support Transpose with second Constant input (represents order). This Constant will not be scheduled // and will only be used to decompose Transpose into a proper Load, Store and Loop combination. if (ov::is_type(input_node) && - (ngraph::shape_size(input_value.get_shape()) == 1 || - ov::is_type(node) || - ov::is_type(node))) { + (ngraph::shape_size(input_value.get_shape()) == 1 || utils::constant_input_should_be_inside_body(node))) { internal_inputs.push_back(input_node->output(0)); } else { external_inputs.push_back(input_value); diff --git a/src/common/snippets/src/pass/common_optimizations.cpp b/src/common/snippets/src/pass/common_optimizations.cpp index be78a136cc71ae..23cf188473938e 100644 --- a/src/common/snippets/src/pass/common_optimizations.cpp +++ b/src/common/snippets/src/pass/common_optimizations.cpp @@ -12,6 +12,7 @@ #include "snippets/pass/fq_decomposition.hpp" #include "snippets/pass/softmax_reshape_elimination.hpp" #include "snippets/op/subgraph.hpp" +#include "snippets/utils.hpp" #include "snippets/itt.hpp" NGRAPH_RTTI_DEFINITION(ngraph::snippets::pass::CommonOptimizations, "Snippets::CommonOptimizations", 0); @@ -35,7 +36,7 @@ void ConvertConstantsToParameters(const std::shared_ptrget_output_target_inputs(0).begin()->get_node()->shared_from_this(); - if (ov::is_type(child) || ov::is_type(child)) + if (utils::constant_input_should_be_inside_body(child) && !ov::is_type(child)) continue; auto parameter = std::make_shared(constant->get_element_type(), constant->output(0).get_partial_shape()); diff --git a/src/common/snippets/src/pass/insert_movebroadcast.cpp b/src/common/snippets/src/pass/insert_movebroadcast.cpp index f42bc06844262d..82a9d6392002de 100644 --- a/src/common/snippets/src/pass/insert_movebroadcast.cpp +++ b/src/common/snippets/src/pass/insert_movebroadcast.cpp @@ -8,6 +8,7 @@ #include "snippets/pass/insert_movebroadcast.hpp" #include "snippets/snippets_isa.hpp" #include "snippets/utils.hpp" +#include #include #include @@ -121,3 +122,25 @@ ngraph::snippets::pass::InsertMoveBroadcast::InsertMoveBroadcast() { register_matcher(std::make_shared(any, matcher_name), callback); } + +ngraph::snippets::pass::BroadcastToMoveBroadcast::BroadcastToMoveBroadcast() { + MATCHER_SCOPE(BroadcastToMoveBroadcast); + + register_matcher(std::make_shared(ngraph::pattern::wrap_type(), matcher_name), + [this](ngraph::pattern::Matcher &m) { + OV_ITT_SCOPED_TASK(ngraph::pass::itt::domains::SnippetsTransform, "Snippets::op::BroadcastToMoveBroadcast") + auto root = m.get_match_root(); + auto broadcast = ov::as_type_ptr(root); + if (broadcast->get_broadcast_spec() != ngraph::op::AutoBroadcastType::NUMPY) { + return false; + } + + auto broadcast_move = broadcast_node_last_dim(broadcast->input_value(0), broadcast->get_output_shape(0), broadcast->get_input_shape(0)); + auto target_output = ov::is_type(broadcast_move) ? broadcast_move->output(0) : + broadcast->input_value(0); + replace_output_update_name(broadcast->output(0), target_output); + ngraph::copy_runtime_info(root, broadcast_move); + + return true; + }); +} diff --git a/src/common/snippets/tests/include/pass/insert_movebroadcast.hpp b/src/common/snippets/tests/include/pass/insert_movebroadcast.hpp index 98c890b9dc2f8b..1e2872cc084171 100644 --- a/src/common/snippets/tests/include/pass/insert_movebroadcast.hpp +++ b/src/common/snippets/tests/include/pass/insert_movebroadcast.hpp @@ -22,6 +22,12 @@ typedef std::tuple< Shape // Broadcast shape 1 > insertMoveBroadcastParams; +typedef std::tuple< + Shape, // Input shape 0 + Shape, // Input shape 1 + Shape // Broadcast shape +> BroadcastParams; + using ngraph::snippets::op::Subgraph; class InsertMoveBroadcastTests : public LoweringTests, public testing::WithParamInterface { public: @@ -31,6 +37,14 @@ class InsertMoveBroadcastTests : public LoweringTests, public testing::WithParam std::shared_ptr snippets_function; }; +class BroadcastToMoveBroadcastTests : public LoweringTests, public testing::WithParamInterface { +public: + static std::string getTestCaseName(testing::TestParamInfo obj); +protected: + void SetUp() override; + std::shared_ptr snippets_function; +}; + } // namespace snippets } // namespace test } // namespace ov diff --git a/src/common/snippets/tests/src/pass/insert_movebroadcast.cpp b/src/common/snippets/tests/src/pass/insert_movebroadcast.cpp index f4f3250530865c..3fe54d3065f6c5 100644 --- a/src/common/snippets/tests/src/pass/insert_movebroadcast.cpp +++ b/src/common/snippets/tests/src/pass/insert_movebroadcast.cpp @@ -36,6 +36,28 @@ void InsertMoveBroadcastTests::SetUp() { master_shape.push_back(static_cast(std::max(inputShapes[0][i], inputShapes[1][i]))); } +std::string BroadcastToMoveBroadcastTests::getTestCaseName(testing::TestParamInfo obj) { + std::vector inputShapes(2); + Shape broadcast_shape; + std::tie(inputShapes[0], inputShapes[1], broadcast_shape) = obj.param; + std::ostringstream result; + for (size_t i = 0; i < inputShapes.size(); i++) + result << "IS[" << i << "]=" << CommonTestUtils::vec2str(inputShapes[i]) << "_"; + result << "BS=" << CommonTestUtils::vec2str(broadcast_shape) << "_"; + return result.str(); +} + +void BroadcastToMoveBroadcastTests::SetUp() { + TransformationTestsF::SetUp(); + std::vector inputShapes(2); + PartialShape broadcast_shape; + std::tie(inputShapes[0], inputShapes[1], broadcast_shape) = this->GetParam(); + snippets_function = std::make_shared(inputShapes, broadcast_shape); + master_shape = {}; + for (int i = 0; i < inputShapes[0].size(); i++) + master_shape.push_back(static_cast(std::max(inputShapes[0].get_shape()[i], inputShapes[1].get_shape()[i]))); +} + TEST_P(InsertMoveBroadcastTests, AddBroadcast) { PartialShape scheduler_shape({master_shape[master_shape.size() - 2], master_shape[master_shape.size() - 1]}); @@ -44,6 +66,14 @@ TEST_P(InsertMoveBroadcastTests, AddBroadcast) { function_ref = snippets_function->getLowered(); } +TEST_P(BroadcastToMoveBroadcastTests, BroadcastSelect) { + PartialShape scheduler_shape({master_shape[master_shape.size() - 2], + master_shape[master_shape.size() - 1]}); + auto subgraph = getLoweredSubgraph(snippets_function->getOriginal(), scheduler_shape); + function = subgraph->get_body(); + function_ref = snippets_function->getLowered(); +} + namespace InsertMoveBroadcastTestsInstantiation { using ov::Shape; std::vector inputShapes0 {{1, 8, 2, 1}}; @@ -85,6 +115,20 @@ INSTANTIATE_TEST_SUITE_P(smoke_Snippets_NoBroadcast, InsertMoveBroadcastTests, ::testing::ValuesIn(paramsNo), InsertMoveBroadcastTests::getTestCaseName); } // namespace InsertMoveBroadcastTestsInstantiation + + +namespace BroadcastToMoveBroadcastTestsInstantiation { +using ov::Shape; +std::vector inputShapes0 {{1, 8, 2, 10}, {1, 8, 2, 1}, {1, 1, 1, 1}}; +std::vector inputShapes1 {{1, 8, 2, 10}, {1, 8, 2, 1}, {1, 1, 1, 1}}; +Shape broadcastShape {1, 8, 2, 10}; +INSTANTIATE_TEST_SUITE_P(smoke_Snippets_Broadcast, BroadcastToMoveBroadcastTests, + ::testing::Combine( + ::testing::ValuesIn(inputShapes0), + ::testing::ValuesIn(inputShapes1), + ::testing::Values(broadcastShape)), + BroadcastToMoveBroadcastTests::getTestCaseName); +} // namespace BroadcastToMoveBroadcastTestsInstantiation } // namespace snippets } // namespace test } // namespace ov \ No newline at end of file diff --git a/src/plugins/intel_cpu/src/emitters/cpu_generator.cpp b/src/plugins/intel_cpu/src/emitters/cpu_generator.cpp index 13178aed44fcb9..901231734b52a9 100644 --- a/src/plugins/intel_cpu/src/emitters/cpu_generator.cpp +++ b/src/plugins/intel_cpu/src/emitters/cpu_generator.cpp @@ -69,6 +69,9 @@ ov::intel_cpu::CPUTargetMachine::CPUTargetMachine(dnnl::impl::cpu::x64::cpu_isa_ jitters[ngraph::snippets::op::ConvertSaturation::get_type_info_static()] = CREATE_EMITTER(ov::intel_cpu::jit_convert_saturation_emitter); // jitters[ngraph::opset1::FakeQuantize::get_type_info_static()] = CREATE_EMITTER(); // not supported + // ternary + jitters[ngraph::opset1::Select::get_type_info_static()] = CREATE_EMITTER(ov::intel_cpu::jit_select_emitter); + // binary jitters[ngraph::opset1::Add::get_type_info_static()] = CREATE_EMITTER(ov::intel_cpu::jit_add_emitter); jitters[ngraph::opset1::Divide::get_type_info_static()] = CREATE_EMITTER(ov::intel_cpu::jit_divide_emitter); diff --git a/src/plugins/intel_cpu/src/emitters/jit_eltwise_emitters.cpp b/src/plugins/intel_cpu/src/emitters/jit_eltwise_emitters.cpp index 20d48e3ac18bde..6813dae1c9e905 100644 --- a/src/plugins/intel_cpu/src/emitters/jit_eltwise_emitters.cpp +++ b/src/plugins/intel_cpu/src/emitters/jit_eltwise_emitters.cpp @@ -1961,5 +1961,57 @@ void jit_soft_sign_emitter::register_table_entries() { push_arg_entry_of("one", 0x3f800000, true); push_arg_entry_of("positive_mask", 0x7fffffff, true); } + +/// SELECT /// +jit_select_emitter::jit_select_emitter(jit_generator *host, cpu_isa_t host_isa, const std::shared_ptr& node, Precision exec_prc) +: jit_emitter(host, host_isa, node, exec_prc) {} +jit_select_emitter::jit_select_emitter(jit_generator *host, cpu_isa_t host_isa, Precision exec_prc) +: jit_emitter(host, host_isa, exec_prc) {} + +size_t jit_select_emitter::get_inputs_num() const { return 3; } + +size_t jit_select_emitter::aux_vecs_count() const { + // mask should be xmm0 on sse41 + return host_isa_ == cpu::x64::sse41 ? 1 : 0; +} + +void jit_select_emitter::emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs, + const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs, + const emitter_context *emit_context) const { + if (host_isa_ == cpu::x64::sse41) { + emit_isa(in_vec_idxs, out_vec_idxs); + } else if (host_isa_ == cpu::x64::avx2) { + emit_isa(in_vec_idxs, out_vec_idxs); + } else if (host_isa_ == cpu::x64::avx512_core) { + emit_isa(in_vec_idxs, out_vec_idxs); + } else { + assert(!"unsupported isa"); + } +} + +template +void jit_select_emitter::emit_isa(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const { + using Vmm = typename conditional3::type; + Vmm vmm_cond = Vmm(in_vec_idxs[0]); + Vmm vmm_src0 = Vmm(in_vec_idxs[1]); + Vmm vmm_src1 = Vmm(in_vec_idxs[2]); + Vmm vmm_dst = Vmm(out_vec_idxs[0]); + + if (isa == cpu::x64::sse41) { + Vmm vmm_aux = Vmm(aux_vec_idxs[0]); + if (vmm_aux.getIdx() != vmm_cond.getIdx()) { + h->uni_vmovups(vmm_aux, vmm_cond); + } + if (vmm_src1.getIdx() != vmm_dst.getIdx()) { + h->uni_vmovups(vmm_dst, vmm_src1); + } + h->uni_vblendvps(vmm_dst, vmm_dst, vmm_src0, vmm_aux); + } else if (isa == cpu::x64::avx2) { + h->uni_vblendvps(vmm_dst, vmm_src1, vmm_src0, vmm_cond); + } else { + h->vptestmd(k_mask, vmm_cond, vmm_cond); + h->vblendmps(vmm_dst | k_mask, vmm_src1, vmm_src0); + } +} } // namespace intel_cpu } // namespace ov diff --git a/src/plugins/intel_cpu/src/emitters/jit_eltwise_emitters.hpp b/src/plugins/intel_cpu/src/emitters/jit_eltwise_emitters.hpp index 0a374a418f8c28..66cd5ea85035ad 100644 --- a/src/plugins/intel_cpu/src/emitters/jit_eltwise_emitters.hpp +++ b/src/plugins/intel_cpu/src/emitters/jit_eltwise_emitters.hpp @@ -611,5 +611,24 @@ class jit_soft_sign_emitter : public jit_emitter { void register_table_entries() override; }; +class jit_select_emitter : public jit_emitter { +public: + jit_select_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, + InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32); + jit_select_emitter(dnnl::impl::cpu::x64::jit_generator *host, dnnl::impl::cpu::x64::cpu_isa_t host_isa, const std::shared_ptr& n, + InferenceEngine::Precision exec_prc = InferenceEngine::Precision::FP32); + + size_t get_inputs_num() const override; + size_t aux_vecs_count() const override; + +private: + void emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs, + const std::vector &pool_vec_idxs, const std::vector &pool_gpr_idxs, + const emitter_context *emit_context) const override; + + template + void emit_isa(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const; +}; + } // namespace intel_cpu } // namespace ov diff --git a/src/plugins/intel_cpu/src/ngraph_transformations/snippets_mark_skipped.cpp b/src/plugins/intel_cpu/src/ngraph_transformations/snippets_mark_skipped.cpp index 33d0e2e61fec60..01463652986bee 100644 --- a/src/plugins/intel_cpu/src/ngraph_transformations/snippets_mark_skipped.cpp +++ b/src/plugins/intel_cpu/src/ngraph_transformations/snippets_mark_skipped.cpp @@ -429,7 +429,8 @@ bool SnippetsMarkSkipped::run_on_model(const std::shared_ptr &m) { RUN_ON_MODEL_SCOPE(SnippetsMarkSkipped); int channelAxis = DEFAULT_AXIS; for (auto &node : m->get_ordered_ops()) { - if (ngraph::op::is_constant(node) || ov::is_type(node)) + if (ov::is_type(node) || + ov::is_type(node)) continue; if (ngraph::op::is_parameter(node)) { diff --git a/src/plugins/intel_cpu/src/plugin.cpp b/src/plugins/intel_cpu/src/plugin.cpp index fc33ea556e66ae..22e060e10d98d9 100644 --- a/src/plugins/intel_cpu/src/plugin.cpp +++ b/src/plugins/intel_cpu/src/plugin.cpp @@ -656,7 +656,9 @@ static void TransformationUpToCPUSpecificOpSet(std::shared_ptr const bool is_disabled_tokenization = (ov::is_type(n) || ov::is_type(n) || ov::is_type(n) || - ov::is_type(n)); + ov::is_type(n) || + ov::is_type(n) || + ov::is_type(n)); const auto& inputs = n->inputs(); // todo: clarify whether we can evaluate snippets on const paths const bool has_only_const_inputs = std::all_of(inputs.begin(), inputs.end(), diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/select.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/select.cpp new file mode 100644 index 00000000000000..f6339f2fedd224 --- /dev/null +++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/select.cpp @@ -0,0 +1,42 @@ +// Copyright (C) 2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "snippets/select.hpp" +#include "common_test_utils/test_constants.hpp" + +namespace ov { +namespace test { +namespace snippets { + + +namespace { + +INSTANTIATE_TEST_SUITE_P(smoke_Snippets_Select, Select, + ::testing::Combine( + ::testing::ValuesIn({ov::Shape{1, 5, 5, 35}, ov::Shape{1}}), + ::testing::ValuesIn({ov::Shape{1, 5, 5, 35}, ov::Shape{1}}), + ::testing::ValuesIn({ov::Shape{1, 5, 5, 35}, ov::Shape{1}}), + ::testing::ValuesIn({ov::element::f32, ov::element::i8}), + ::testing::Values(4), // 1 Subgraph + 2 Sinh + 1 Roll + ::testing::Values(1), + ::testing::Values(CommonTestUtils::DEVICE_CPU)), + Select::getTestCaseName); + +INSTANTIATE_TEST_SUITE_P(smoke_Snippets_BroadcastSelect, BroadcastSelect, + ::testing::Combine( + ::testing::ValuesIn({Shape{1, 8, 2, 1}, Shape{1, 1, 1, 1}}), + ::testing::ValuesIn({Shape{1, 8, 2, 10}, Shape{1, 8, 2, 1}}), + ::testing::ValuesIn({Shape{1, 8, 2, 10}, Shape{1, 1, 1, 1}}), + ::testing::ValuesIn({Shape{1, 8, 2, 1}, Shape{1, 8, 2, 10}}), + ::testing::ValuesIn({ov::element::f32, ov::element::i8}), + ::testing::Values(4), // 1 Subgraph + 2 Sinh + 1 Roll + ::testing::Values(1), + ::testing::Values(CommonTestUtils::DEVICE_CPU)), + BroadcastSelect::getTestCaseName); + + +} // namespace +} // namespace snippets +} // namespace test +} // namespace ov \ No newline at end of file diff --git a/src/tests/functional/plugin/shared/include/snippets/select.hpp b/src/tests/functional/plugin/shared/include/snippets/select.hpp new file mode 100644 index 00000000000000..e8e15ab97e4132 --- /dev/null +++ b/src/tests/functional/plugin/shared/include/snippets/select.hpp @@ -0,0 +1,59 @@ +// Copyright (C) 2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "shared_test_classes/base/snippets_test_utils.hpp" + +namespace ov { +namespace test { +namespace snippets { + +typedef std::tuple< + ov::Shape, // Input 0 Shape + ov::Shape, // Input 1 Shape + ov::Shape, // Input 2 Shape + ov::element::Type, // Element type + size_t, // Expected num nodes + size_t, // Expected num subgraphs + std::string // Target Device +> SelectParams; + +typedef std::tuple< + ov::Shape, // Input 0 Shape + ov::Shape, // Input 1 Shape + ov::Shape, // Input 2 Shape + ov::Shape, // Input 3 Shape + ov::element::Type, // Element type + size_t, // Expected num nodes + size_t, // Expected num subgraphs + std::string // Target Device +> BroadcastSelectParams; + +class Select : public testing::WithParamInterface, + virtual public ov::test::SnippetsTestsCommon { +public: + static std::string getTestCaseName(testing::TestParamInfo obj); + +protected: + void SetUp() override; + + void generate_inputs(const std::vector& targetInputStaticShapes) override; +}; + +class BroadcastSelect : public testing::WithParamInterface, + virtual public ov::test::SnippetsTestsCommon { +public: + static std::string getTestCaseName(testing::TestParamInfo obj); + +protected: + void SetUp() override; + + void generate_inputs(const std::vector& targetInputStaticShapes) override; +}; + + +} // namespace snippets +} // namespace test +} // namespace ov diff --git a/src/tests/functional/plugin/shared/src/snippets/select.cpp b/src/tests/functional/plugin/shared/src/snippets/select.cpp new file mode 100644 index 00000000000000..a2814a578907c5 --- /dev/null +++ b/src/tests/functional/plugin/shared/src/snippets/select.cpp @@ -0,0 +1,114 @@ +// Copyright (C) 2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "common_test_utils/common_utils.hpp" +#include +#include "cpp_interfaces/interface/ie_internal_plugin_config.hpp" +#include "snippets/select.hpp" +#include "subgraph_simple.hpp" + +namespace ov { +namespace test { +namespace snippets { + +namespace { +void generate_data(std::map, ov::Tensor>& data_inputs, const std::vector>& model_inputs) { + data_inputs.clear(); + auto tensor_bool = ov::test::utils::create_and_fill_tensor(model_inputs[0].get_element_type(), model_inputs[0].get_shape(), 3, -1, 2); + auto tensor0 = ov::test::utils::create_and_fill_tensor(model_inputs[1].get_element_type(), model_inputs[1].get_shape(), 10, -10, 2); + auto tensor1 = ov::test::utils::create_and_fill_tensor(model_inputs[2].get_element_type(), model_inputs[2].get_shape(), 10, 0, 2); + data_inputs.insert({model_inputs[0].get_node_shared_ptr(), tensor_bool}); + data_inputs.insert({model_inputs[1].get_node_shared_ptr(), tensor0}); + data_inputs.insert({model_inputs[2].get_node_shared_ptr(), tensor1}); +} +} // namespace + +std::string Select::getTestCaseName(testing::TestParamInfo obj) { + ov::Shape inputShapes0, inputShapes1, inputShapes2; + ov::element::Type type; + std::string targetDevice; + size_t num_nodes, num_subgraphs; + std::tie(inputShapes0, inputShapes1, inputShapes2, type, num_nodes, num_subgraphs, targetDevice) = obj.param; + + std::ostringstream result; + result << "IS[0]=" << CommonTestUtils::vec2str(inputShapes0) << "_"; + result << "IS[1]=" << CommonTestUtils::vec2str(inputShapes1) << "_"; + result << "IS[2]=" << CommonTestUtils::vec2str(inputShapes2) << "_"; + result << "T=" << type << "_"; + result << "#N=" << num_nodes << "_"; + result << "#S=" << num_subgraphs << "_"; + result << "targetDevice=" << targetDevice; + return result.str(); +} + +void Select::SetUp() { + ov::Shape inputShape0, inputShape1, inputShape2; + ov::element::Type type; + std::tie(inputShape0, inputShape1, inputShape2, type, ref_num_nodes, ref_num_subgraphs, targetDevice) = this->GetParam(); + init_input_shapes(static_shapes_to_test_representation({inputShape0, inputShape1, inputShape2})); + + auto f = ov::test::snippets::SelectFunction({inputShape0, inputShape1, inputShape2}); + function = f.getOriginal(); + + if (!configuration.count(InferenceEngine::PluginConfigInternalParams::KEY_SNIPPETS_MODE)) { + configuration.insert({InferenceEngine::PluginConfigInternalParams::KEY_SNIPPETS_MODE, + InferenceEngine::PluginConfigInternalParams::IGNORE_CALLBACK}); + } +} + +void Select::generate_inputs(const std::vector& targetInputStaticShapes) { + generate_data(inputs, function->inputs()); +} + +std::string BroadcastSelect::getTestCaseName(testing::TestParamInfo obj) { + ov::Shape inputShapes0, inputShapes1, inputShapes2, broadcastShape; + ov::element::Type type; + std::string targetDevice; + size_t num_nodes, num_subgraphs; + std::tie(inputShapes0, inputShapes1, inputShapes2, broadcastShape, type, num_nodes, num_subgraphs, targetDevice) = obj.param; + + std::ostringstream result; + result << "IS[0]=" << CommonTestUtils::vec2str(inputShapes0) << "_"; + result << "IS[1]=" << CommonTestUtils::vec2str(inputShapes1) << "_"; + result << "IS[2]=" << CommonTestUtils::vec2str(inputShapes2) << "_"; + result << "BS=" << CommonTestUtils::vec2str(broadcastShape) << "_"; + result << "T=" << type << "_"; + result << "#N=" << num_nodes << "_"; + result << "#S=" << num_subgraphs << "_"; + result << "targetDevice=" << targetDevice; + return result.str(); +} + +void BroadcastSelect::SetUp() { + ov::Shape inputShape0, inputShape1, inputShape2, broadcastShape; + ov::element::Type type; + std::tie(inputShape0, inputShape1, inputShape2, broadcastShape, type, ref_num_nodes, ref_num_subgraphs, targetDevice) = this->GetParam(); + init_input_shapes(static_shapes_to_test_representation({inputShape0, inputShape1, inputShape2})); + + auto f = ov::test::snippets::BroadcastSelectFunction({inputShape0, inputShape1, inputShape2}, broadcastShape); + function = f.getOriginal(); + + if (!configuration.count(InferenceEngine::PluginConfigInternalParams::KEY_SNIPPETS_MODE)) { + configuration.insert({InferenceEngine::PluginConfigInternalParams::KEY_SNIPPETS_MODE, + InferenceEngine::PluginConfigInternalParams::IGNORE_CALLBACK}); + } +} + +void BroadcastSelect::generate_inputs(const std::vector& targetInputStaticShapes) { + generate_data(inputs, function->inputs()); +} + +TEST_P(Select, CompareWithRefImpl) { + run(); + validateNumSubgraphs(); +} + +TEST_P(BroadcastSelect, CompareWithRefImpl) { + run(); + validateNumSubgraphs(); +} + +} // namespace snippets +} // namespace test +} // namespace ov diff --git a/src/tests/ngraph_helpers/snippets_ngraph_functions/include/subgraph_lowered.hpp b/src/tests/ngraph_helpers/snippets_ngraph_functions/include/subgraph_lowered.hpp index 57756d8c734bfe..eb3b7f2790cb05 100644 --- a/src/tests/ngraph_helpers/snippets_ngraph_functions/include/subgraph_lowered.hpp +++ b/src/tests/ngraph_helpers/snippets_ngraph_functions/include/subgraph_lowered.hpp @@ -78,6 +78,15 @@ class AddSoftmaxLoweredFunction : public AddSoftmaxFunction { std::shared_ptr initLowered() const override; }; +class BroadcastAddLoweredFunction : public BroadcastAddFunction { +public: + explicit BroadcastAddLoweredFunction(const std::vector& inputShapes, const PartialShape& targetShape) : + BroadcastAddFunction(inputShapes, targetShape) {} + +protected: + std::shared_ptr initLowered() const override; +}; + } // namespace snippets } // namespace test } // namespace ov diff --git a/src/tests/ngraph_helpers/snippets_ngraph_functions/include/subgraph_simple.hpp b/src/tests/ngraph_helpers/snippets_ngraph_functions/include/subgraph_simple.hpp index a1254dfaa80521..2f6be364dc7d3d 100644 --- a/src/tests/ngraph_helpers/snippets_ngraph_functions/include/subgraph_simple.hpp +++ b/src/tests/ngraph_helpers/snippets_ngraph_functions/include/subgraph_simple.hpp @@ -215,6 +215,57 @@ class TwoInputsAndOutputsFunction : public SnippetsFunctionBase { protected: std::shared_ptr initOriginal() const override; }; +/// Verify Select +/// Need to remove Roll and Sin when we can create Subgraphs on inputs +// in0 in1 in2 +// Roll Sin Sin +// \ | / +// Select +// Result +class SelectFunction : public SnippetsFunctionBase { +public: + explicit SelectFunction(const std::vector& inputShapes) : SnippetsFunctionBase(inputShapes) { + NGRAPH_CHECK(input_shapes.size() == 3, "Got invalid number of input shapes"); + } +protected: + std::shared_ptr initOriginal() const override; +}; +/// Verify Broadcast in passes +// in0 in1 +// Broadcast | +// \ / +// Add +// Result +class BroadcastAddFunction : public SnippetsFunctionBase { +public: + explicit BroadcastAddFunction(const std::vector& inputShapes, const PartialShape& targetShape) + : SnippetsFunctionBase(inputShapes), m_target_shape(targetShape) { + NGRAPH_CHECK(input_shapes.size() == 2, "Got invalid number of input shapes"); + } +protected: + std::shared_ptr initOriginal() const override; + + PartialShape m_target_shape; +}; + +/// Verify Select + Broadcast +/// Just for inference test (because in0 is boolean element type which isn't explicitly supported in snippets) +/// Need to remove Roll and Sin when we can create Subgraphs on inputs +// in0 in1 in2 +// Roll | | +// Broadcast Sin Sin +// \ | / +// Select +// Result +class BroadcastSelectFunction : public SelectFunction { +public: + explicit BroadcastSelectFunction(const std::vector& inputShapes, const PartialShape& targetShape) + : SelectFunction(inputShapes), m_target_shape(targetShape) {} +protected: + std::shared_ptr initOriginal() const override; + + PartialShape m_target_shape; +}; } // namespace snippets } // namespace test } // namespace ov diff --git a/src/tests/ngraph_helpers/snippets_ngraph_functions/src/subgraph_lowered.cpp b/src/tests/ngraph_helpers/snippets_ngraph_functions/src/subgraph_lowered.cpp index 6de4c8b0d5f32d..1a5dd2d80a3213 100644 --- a/src/tests/ngraph_helpers/snippets_ngraph_functions/src/subgraph_lowered.cpp +++ b/src/tests/ngraph_helpers/snippets_ngraph_functions/src/subgraph_lowered.cpp @@ -398,6 +398,41 @@ std::shared_ptr AddSoftmaxLoweredFunction::initLowered() const { return std::make_shared(ResultVector{result}, input_params); } +std::shared_ptr BroadcastAddLoweredFunction::initLowered() const { + auto data0 = std::make_shared(precision, input_shapes[0]); + auto data1 = std::make_shared(precision, input_shapes[1]); + std::vector> datas = {data0, data1}; + auto last_dim = std::max(input_shapes[0].get_shape().back(), std::max(input_shapes[1].get_shape().back(), m_target_shape.get_shape().back())); + std::vector> loads(datas.size(), nullptr); + for (auto i = 0; i < datas.size(); i++) { + if (input_shapes[i].get_shape().back() != last_dim) { + auto new_shape = input_shapes[i]; + new_shape[new_shape.size() - 1] = last_dim; + loads[i] = std::make_shared(datas[i], new_shape); + } else { + loads[i] = std::make_shared(datas[i]); + } + } + auto add = std::make_shared(loads[0], loads[1]); + auto store = std::make_shared(add); + auto model = std::make_shared(NodeVector{store}, ParameterVector{data0, data1}); + + // Create dummy scheduler to pass graph comparison tests + // Note that if there is more than one results, they should be reverted + ResultVector results({model->get_results()[0]}); + const auto& inner_loop_begin = ngraph::snippets::op::insertLoopBegin(datas); + std::vector apply_increments(datas.size() + results.size(), true); + insertLoopEnd(results, inner_loop_begin, 1, 1, apply_increments); + auto outer_WA = std::accumulate(input_shapes.begin(), input_shapes.end(), 0, + [](int64_t max_val, const PartialShape& ps) { + return std::max(ps[ps.size() - 2].get_length(), max_val); + }); + if (outer_WA > 1) { + const auto& outer_loop_begin = ngraph::snippets::op::insertLoopBegin(datas); + insertLoopEnd(results, outer_loop_begin, 1, 1, apply_increments); + } + return model; +} } // namespace snippets } // namespace test } // namespace ov diff --git a/src/tests/ngraph_helpers/snippets_ngraph_functions/src/subgraph_simple.cpp b/src/tests/ngraph_helpers/snippets_ngraph_functions/src/subgraph_simple.cpp index d58660a6714eef..a5b3de1120ea44 100644 --- a/src/tests/ngraph_helpers/snippets_ngraph_functions/src/subgraph_simple.cpp +++ b/src/tests/ngraph_helpers/snippets_ngraph_functions/src/subgraph_simple.cpp @@ -299,6 +299,47 @@ std::shared_ptr TwoInputsAndOutputsFunction::initOriginal() const { return std::make_shared(NodeVector{hswish, sin3}, ParameterVector{data0, data1}); } +std::shared_ptr SelectFunction::initOriginal() const { + auto data0 = std::make_shared(ov::element::boolean, input_shapes[0]); + auto data1 = std::make_shared(precision, input_shapes[1]); + auto data2 = std::make_shared(precision, input_shapes[2]); + auto shift = std::make_shared(ov::element::i32, ov::Shape{1}, std::vector{1}); + auto axes = std::make_shared(ov::element::i32, ov::Shape{1}, std::vector{0}); + auto roll0 = std::make_shared(data0, shift, axes); + auto sin1 = std::make_shared(data1); + auto sin2 = std::make_shared(data2); + auto select = std::make_shared(roll0, sin1, sin2); + + return std::make_shared(NodeVector{select}, ParameterVector{data0, data1, data2}); +} + +std::shared_ptr BroadcastAddFunction::initOriginal() const { + auto data0 = std::make_shared(precision, input_shapes[0]); + auto data1 = std::make_shared(precision, input_shapes[1]); + auto target_shape = std::make_shared(ov::element::i32, ov::Shape{m_target_shape.size()}, m_target_shape.get_shape()); + auto broadcast = std::make_shared(data0, target_shape); + auto add = std::make_shared(broadcast, data1); + + return std::make_shared(NodeVector{add}, ParameterVector{data0, data1}); +} + + +std::shared_ptr BroadcastSelectFunction::initOriginal() const { + auto data0 = std::make_shared(ov::element::boolean, input_shapes[0]); + auto data1 = std::make_shared(precision, input_shapes[1]); + auto data2 = std::make_shared(precision, input_shapes[2]); + auto shift = std::make_shared(ov::element::i32, ov::Shape{1}, std::vector{1}); + auto axes = std::make_shared(ov::element::i32, ov::Shape{1}, std::vector{0}); + auto roll0 = std::make_shared(data0, shift, axes); + auto target_shape = std::make_shared(ov::element::i32, ov::Shape{m_target_shape.size()}, m_target_shape.get_shape()); + auto broadcast = std::make_shared(roll0, target_shape); + auto sin1 = std::make_shared(data1); + auto sin2 = std::make_shared(data2); + auto select = std::make_shared(broadcast, sin1, sin2); + + return std::make_shared(NodeVector{select}, ParameterVector{data0, data1, data2}); +} + } // namespace snippets } // namespace test } // namespace ov \ No newline at end of file