From d469e55433916121478664b2457c7dcc858103db Mon Sep 17 00:00:00 2001 From: Vladislav Golubev Date: Fri, 26 Jan 2024 11:11:23 +0100 Subject: [PATCH] Alexandra's comments applied: 2nd part --- .../snippets/include/snippets/op/reduce.hpp | 14 ++++--------- .../include/snippets/target_machine.hpp | 6 ++++++ .../snippets/src/lowered/target_machine.cpp | 4 ++++ src/common/snippets/src/op/reduce.cpp | 4 ++-- .../src/pass/reduce_to_snippets_reduce.cpp | 4 ++-- .../src/pass/softmax_decomposition.cpp | 4 ++-- .../src/lowered/pass/buffer_allocation.cpp | 4 ++-- .../emitters/snippets/x64/cpu_generator.cpp | 20 ++++++++++++++----- .../emitters/snippets/x64/cpu_generator.hpp | 1 + .../lowered/buffer_allocation.cpp | 4 ++-- 10 files changed, 40 insertions(+), 25 deletions(-) diff --git a/src/common/snippets/include/snippets/op/reduce.hpp b/src/common/snippets/include/snippets/op/reduce.hpp index 96598906e1909c..f1b2e739bcf44b 100644 --- a/src/common/snippets/include/snippets/op/reduce.hpp +++ b/src/common/snippets/include/snippets/op/reduce.hpp @@ -38,15 +38,12 @@ class ReduceSum : public ReduceBase { ReduceSum(const Output& x, size_t axis) : ReduceBase(x, axis) {} ReduceSum() = default; std::shared_ptr clone_with_new_inputs(const OutputVector& new_args) const override; - static std::set get_supported_precisions(const std::shared_ptr& node) { - return {{ov::element::f32}}; - } /** - * @brief Creates ReduceSum operation, computes and sets input/output subtensors + * @brief Creates ReduceSum operation, computes and sets subtensors to input/output PortDescriptors * @param x Reduce input * @param axis Reduce axis */ - static std::shared_ptr make_reduce_sum(const Output& x, size_t axis); + static std::shared_ptr make(const Output& x, size_t axis); }; class ReduceMax : public ReduceBase { @@ -55,15 +52,12 @@ class ReduceMax : public ReduceBase { ReduceMax(const Output& x, size_t axis) : ReduceBase(x, axis) {} ReduceMax() = default; std::shared_ptr clone_with_new_inputs(const OutputVector& new_args) const override; - static std::set get_supported_precisions(const std::shared_ptr& node) { - return {{ov::element::f32}}; - } /** - * @brief Creates ReduceMax operation, computes and sets input/output subtensors + * @brief Creates ReduceMax operation, computes and sets subtensors to input/output PortDescriptors * @param x Reduce input * @param axis Reduce axis */ - static std::shared_ptr make_reduce_max(const Output& x, size_t axis); + static std::shared_ptr make(const Output& x, size_t axis); }; } // namespace op diff --git a/src/common/snippets/include/snippets/target_machine.hpp b/src/common/snippets/include/snippets/target_machine.hpp index d42779bcd7153c..edd066be95f7c1 100644 --- a/src/common/snippets/include/snippets/target_machine.hpp +++ b/src/common/snippets/include/snippets/target_machine.hpp @@ -50,6 +50,12 @@ class TargetMachine { */ virtual size_t get_lanes() const = 0; + /** + * @brief reports supported precisions set for nodes which don't have emitters + * @param type node type for which the supported precisions set is requested + * @return set of supported precisions for the provided node type + */ + virtual std::set supported_precisions_for_emitterless_node(const ov::DiscreteTypeInfo& type) const; /** * @brief called by generator to all the emitter for a target machine diff --git a/src/common/snippets/src/lowered/target_machine.cpp b/src/common/snippets/src/lowered/target_machine.cpp index bf84244f1889b9..e85829e820785f 100644 --- a/src/common/snippets/src/lowered/target_machine.cpp +++ b/src/common/snippets/src/lowered/target_machine.cpp @@ -5,6 +5,10 @@ #include "snippets/target_machine.hpp" using namespace ov::snippets; +std::set TargetMachine::supported_precisions_for_emitterless_node(const ov::DiscreteTypeInfo& type) const { + OPENVINO_THROW("supported_precisions_for_emitterless_node for this class is not implemented"); +} + std::function(const lowered::ExpressionPtr&)> TargetMachine::get(const ov::DiscreteTypeInfo& type) const { auto jitter = jitters.find(type); if (jitter == jitters.end()) { diff --git a/src/common/snippets/src/op/reduce.cpp b/src/common/snippets/src/op/reduce.cpp index 1204ad9c374456..20b01ad3c281a0 100644 --- a/src/common/snippets/src/op/reduce.cpp +++ b/src/common/snippets/src/op/reduce.cpp @@ -46,7 +46,7 @@ std::shared_ptr ReduceSum::clone_with_new_inputs(const OutputVector& new_a return std::make_shared(new_args.at(0), m_axis); } -std::shared_ptr ReduceSum::make_reduce_sum(const Output& x, size_t axis) { +std::shared_ptr ReduceSum::make(const Output& x, size_t axis) { const auto reduce = std::make_shared(x, axis); compute_and_set_reduce_subtensors(reduce); return reduce; @@ -58,7 +58,7 @@ std::shared_ptr ReduceMax::clone_with_new_inputs(const OutputVector& new_a return std::make_shared(new_args.at(0), m_axis); } -std::shared_ptr ReduceMax::make_reduce_max(const Output& x, size_t axis) { +std::shared_ptr ReduceMax::make(const Output& x, size_t axis) { const auto reduce = std::make_shared(x, axis); compute_and_set_reduce_subtensors(reduce); return reduce; diff --git a/src/common/snippets/src/pass/reduce_to_snippets_reduce.cpp b/src/common/snippets/src/pass/reduce_to_snippets_reduce.cpp index b0d0f9c6fd6d60..99184424676455 100644 --- a/src/common/snippets/src/pass/reduce_to_snippets_reduce.cpp +++ b/src/common/snippets/src/pass/reduce_to_snippets_reduce.cpp @@ -37,9 +37,9 @@ snippets::pass::ReduceToSnippetsReduce::ReduceToSnippetsReduce() { std::shared_ptr snippets_reduce = nullptr; if (ov::is_type(reduce)) - snippets_reduce = ov::snippets::op::ReduceSum::make_reduce_sum(data_input, axis); + snippets_reduce = ov::snippets::op::ReduceSum::make(data_input, axis); else if (ov::is_type(reduce)) - snippets_reduce = ov::snippets::op::ReduceMax::make_reduce_max(data_input, axis); + snippets_reduce = ov::snippets::op::ReduceMax::make(data_input, axis); else OPENVINO_THROW("Reduce ", reduce, " can't be converted to snippets opset."); diff --git a/src/common/snippets/src/pass/softmax_decomposition.cpp b/src/common/snippets/src/pass/softmax_decomposition.cpp index fce4422e1582ab..de7451d45e3d24 100644 --- a/src/common/snippets/src/pass/softmax_decomposition.cpp +++ b/src/common/snippets/src/pass/softmax_decomposition.cpp @@ -42,11 +42,11 @@ SoftmaxDecomposition::SoftmaxDecomposition() { } const auto& softmax_input = softmax->input_value(0); - const auto reduce_max = ov::snippets::op::ReduceMax::make_reduce_max(softmax_input, axis); + const auto reduce_max = ov::snippets::op::ReduceMax::make(softmax_input, axis); const auto subtract = std::make_shared(softmax_input, reduce_max); const auto exp = std::make_shared(subtract); - const auto reduce_sum = ov::snippets::op::ReduceSum::make_reduce_sum(exp, axis); + const auto reduce_sum = ov::snippets::op::ReduceSum::make(exp, axis); const auto power = std::make_shared(reduce_sum, -1.f); const auto multiply = std::make_shared(exp, power); diff --git a/src/common/snippets/tests/src/lowered/pass/buffer_allocation.cpp b/src/common/snippets/tests/src/lowered/pass/buffer_allocation.cpp index 1fc194868fef4d..d4631c26084947 100644 --- a/src/common/snippets/tests/src/lowered/pass/buffer_allocation.cpp +++ b/src/common/snippets/tests/src/lowered/pass/buffer_allocation.cpp @@ -142,11 +142,11 @@ std::shared_ptr MHABufferAllocationTest::GetModel() const { const auto relu1 = std::make_shared(matmul0); // Decomposed Softmax - const auto reduce_max = ov::snippets::op::ReduceMax::make_reduce_max(relu1, 3); + const auto reduce_max = ov::snippets::op::ReduceMax::make(relu1, 3); const auto subtract = std::make_shared(relu1, reduce_max); const auto exp = std::make_shared(subtract); - const auto reduce_sum = ov::snippets::op::ReduceSum::make_reduce_sum(exp, 3); + const auto reduce_sum = ov::snippets::op::ReduceSum::make(exp, 3); const auto power = std::make_shared(reduce_sum, -1.f); const auto multiply = std::make_shared(exp, power); diff --git a/src/plugins/intel_cpu/src/emitters/snippets/x64/cpu_generator.cpp b/src/plugins/intel_cpu/src/emitters/snippets/x64/cpu_generator.cpp index 84f8d80dd7b3bd..bbd721deb5bf69 100644 --- a/src/plugins/intel_cpu/src/emitters/snippets/x64/cpu_generator.cpp +++ b/src/plugins/intel_cpu/src/emitters/snippets/x64/cpu_generator.cpp @@ -109,12 +109,12 @@ static bool is_segfault_detector_emitter(const intel_cpu::jit_emitter *emitter) } \ } -#define CREATE_UNDEFINED_EMITTER(node_type) { \ +#define CREATE_UNDEFINED_EMITTER() { \ [](const snippets::lowered::ExpressionPtr& expr) -> std::shared_ptr { \ return nullptr; \ }, \ - [](const std::shared_ptr& n) -> std::set> { \ - return node_type::get_supported_precisions(n); \ + [this](const std::shared_ptr& n) -> std::set> { \ + return supported_precisions_for_emitterless_node(n->get_type_info()); \ } \ } @@ -213,8 +213,8 @@ intel_cpu::CPUTargetMachine::CPUTargetMachine(dnnl::impl::cpu::x64::cpu_isa_t ho jitters[snippets::op::LoopEnd::get_type_info_static()] = CREATE_SNIPPETS_EMITTER(intel_cpu::jit_loop_end_emitter); jitters[intel_cpu::BrgemmCPU::get_type_info_static()] = CREATE_SNIPPETS_EMITTER(intel_cpu::jit_brgemm_emitter); jitters[intel_cpu::BrgemmCopyB::get_type_info_static()] = CREATE_SNIPPETS_EMITTER(intel_cpu::jit_brgemm_copy_b_emitter); - jitters[snippets::op::ReduceMax::get_type_info_static()] = CREATE_UNDEFINED_EMITTER(snippets::op::ReduceMax); - jitters[snippets::op::ReduceSum::get_type_info_static()] = CREATE_UNDEFINED_EMITTER(snippets::op::ReduceSum); + jitters[snippets::op::ReduceMax::get_type_info_static()] = CREATE_UNDEFINED_EMITTER(); + jitters[snippets::op::ReduceSum::get_type_info_static()] = CREATE_UNDEFINED_EMITTER(); #ifdef SNIPPETS_DEBUG_CAPS jitters[snippets::op::PerfCountBegin::get_type_info_static()] = CREATE_CPU_EMITTER(ov::intel_cpu::jit_perf_count_chrono_start_emitter); @@ -241,6 +241,16 @@ bool intel_cpu::CPUTargetMachine::is_supported() const { return dnnl::impl::cpu::x64::mayiuse(isa); } +std::set intel_cpu::CPUTargetMachine::supported_precisions_for_emitterless_node(const ov::DiscreteTypeInfo& type) const { + static const std::map> supported_precisions_map{ + {snippets::op::ReduceMax::get_type_info_static(), {{ov::element::f32}}}, + {snippets::op::ReduceSum::get_type_info_static(), {{ov::element::f32}}}, + }; + auto it = supported_precisions_map.find(type); + OPENVINO_ASSERT(it != supported_precisions_map.end(), "supported precisions set for node without emitter is not set. Type info: ", type); + return it->second; +} + snippets::CompiledSnippetPtr intel_cpu::CPUTargetMachine::get_snippet() { if (h->create_kernel() != dnnl::impl::status::success) { OPENVINO_THROW("Failed to create jit_kernel in get_snippet()"); diff --git a/src/plugins/intel_cpu/src/emitters/snippets/x64/cpu_generator.hpp b/src/plugins/intel_cpu/src/emitters/snippets/x64/cpu_generator.hpp index 6eafd3cb04771c..a0c1dbb4544175 100644 --- a/src/plugins/intel_cpu/src/emitters/snippets/x64/cpu_generator.hpp +++ b/src/plugins/intel_cpu/src/emitters/snippets/x64/cpu_generator.hpp @@ -33,6 +33,7 @@ class CPUTargetMachine : public snippets::TargetMachine { snippets::CompiledSnippetPtr get_snippet() override; size_t get_lanes() const override; dnnl::impl::cpu::x64::cpu_isa_t get_isa() const; + std::set supported_precisions_for_emitterless_node(const ov::DiscreteTypeInfo& type) const override; #ifdef SNIPPETS_DEBUG_CAPS SnippetsDebugCapsConfig debug_config; #endif diff --git a/src/plugins/intel_cpu/tests/unit/snippets_transformations/lowered/buffer_allocation.cpp b/src/plugins/intel_cpu/tests/unit/snippets_transformations/lowered/buffer_allocation.cpp index e2561f72822bfd..746ecfb3fb762b 100644 --- a/src/plugins/intel_cpu/tests/unit/snippets_transformations/lowered/buffer_allocation.cpp +++ b/src/plugins/intel_cpu/tests/unit/snippets_transformations/lowered/buffer_allocation.cpp @@ -156,11 +156,11 @@ class MHABF16AMXBufferAllocationTest : public BufferAllocationCPUTest { const auto relu1 = std::make_shared(brgemm_cpu0); // Decomposed Softmax - const auto reduce_max = ov::snippets::op::ReduceMax::make_reduce_max(relu1, 3); + const auto reduce_max = ov::snippets::op::ReduceMax::make(relu1, 3); const auto subtract = std::make_shared(relu1, reduce_max); const auto exp = std::make_shared(subtract); - const auto reduce_sum = ov::snippets::op::ReduceSum::make_reduce_sum(exp, 3); + const auto reduce_sum = ov::snippets::op::ReduceSum::make(exp, 3); const auto power = std::make_shared(reduce_sum, -1.f); const auto multiply = std::make_shared(exp, power);