Alexandra's comments applied: 2nd part

v-Golubev · Jan 29, 2024 · d469e55 · d469e55
1 parent ccba54e
commit d469e55
Show file tree

Hide file tree

Showing 10 changed files with 40 additions and 25 deletions.
diff --git a/src/common/snippets/include/snippets/op/reduce.hpp b/src/common/snippets/include/snippets/op/reduce.hpp
@@ -38,15 +38,12 @@ class ReduceSum : public ReduceBase {
     ReduceSum(const Output<Node>& x, size_t axis) : ReduceBase(x, axis) {}
     ReduceSum() = default;
     std::shared_ptr<Node> clone_with_new_inputs(const OutputVector& new_args) const override;
-    static std::set<ov::element::TypeVector> get_supported_precisions(const std::shared_ptr<ov::Node>& node) {
-        return {{ov::element::f32}};
-    }
     /**
-     * @brief Creates ReduceSum operation, computes and sets input/output subtensors
+     * @brief Creates ReduceSum operation, computes and sets subtensors to input/output PortDescriptors
      * @param x Reduce input
      * @param axis Reduce axis
      */
-    static std::shared_ptr<ReduceSum> make_reduce_sum(const Output<Node>& x, size_t axis);
+    static std::shared_ptr<ReduceSum> make(const Output<Node>& x, size_t axis);
 };
 
 class ReduceMax : public ReduceBase {
@@ -55,15 +52,12 @@ class ReduceMax : public ReduceBase {
     ReduceMax(const Output<Node>& x, size_t axis) : ReduceBase(x, axis) {}
     ReduceMax() = default;
     std::shared_ptr<Node> clone_with_new_inputs(const OutputVector& new_args) const override;
-    static std::set<ov::element::TypeVector> get_supported_precisions(const std::shared_ptr<ov::Node>& node) {
-        return {{ov::element::f32}};
-    }
     /**
-     * @brief Creates ReduceMax operation, computes and sets input/output subtensors
+     * @brief Creates ReduceMax operation, computes and sets subtensors to input/output PortDescriptors
      * @param x Reduce input
      * @param axis Reduce axis
      */
-    static std::shared_ptr<ReduceMax> make_reduce_max(const Output<Node>& x, size_t axis);
+    static std::shared_ptr<ReduceMax> make(const Output<Node>& x, size_t axis);
 };
 
 } // namespace op

diff --git a/src/common/snippets/include/snippets/target_machine.hpp b/src/common/snippets/include/snippets/target_machine.hpp
@@ -50,6 +50,12 @@ class TargetMachine {
      */
     virtual size_t get_lanes() const = 0;
 
+    /**
+     * @brief reports supported precisions set for nodes which don't have emitters
+     * @param type node type for which the supported precisions set is requested
+     * @return set of supported precisions for the provided node type
+     */
+    virtual std::set<ov::element::TypeVector> supported_precisions_for_emitterless_node(const ov::DiscreteTypeInfo& type) const;
 
     /**
      * @brief called by generator to all the emitter for a target machine

diff --git a/src/common/snippets/src/lowered/target_machine.cpp b/src/common/snippets/src/lowered/target_machine.cpp
@@ -5,6 +5,10 @@
 #include "snippets/target_machine.hpp"
 
 using namespace ov::snippets;
+std::set<ov::element::TypeVector> TargetMachine::supported_precisions_for_emitterless_node(const ov::DiscreteTypeInfo& type) const {
+    OPENVINO_THROW("supported_precisions_for_emitterless_node for this class is not implemented");
+}
+
 std::function<std::shared_ptr<Emitter>(const lowered::ExpressionPtr&)> TargetMachine::get(const ov::DiscreteTypeInfo& type) const {
     auto jitter = jitters.find(type);
     if (jitter == jitters.end()) {

diff --git a/src/common/snippets/src/op/reduce.cpp b/src/common/snippets/src/op/reduce.cpp
@@ -46,7 +46,7 @@ std::shared_ptr<Node> ReduceSum::clone_with_new_inputs(const OutputVector& new_a
     return std::make_shared<ReduceSum>(new_args.at(0), m_axis);
 }
 
-std::shared_ptr<ReduceSum> ReduceSum::make_reduce_sum(const Output<Node>& x, size_t axis) {
+std::shared_ptr<ReduceSum> ReduceSum::make(const Output<Node>& x, size_t axis) {
     const auto reduce = std::make_shared<ReduceSum>(x, axis);
     compute_and_set_reduce_subtensors(reduce);
     return reduce;
@@ -58,7 +58,7 @@ std::shared_ptr<Node> ReduceMax::clone_with_new_inputs(const OutputVector& new_a
     return std::make_shared<ReduceMax>(new_args.at(0), m_axis);
 }
 
-std::shared_ptr<ReduceMax> ReduceMax::make_reduce_max(const Output<Node>& x, size_t axis) {
+std::shared_ptr<ReduceMax> ReduceMax::make(const Output<Node>& x, size_t axis) {
     const auto reduce = std::make_shared<ReduceMax>(x, axis);
     compute_and_set_reduce_subtensors(reduce);
     return reduce;

diff --git a/src/common/snippets/src/pass/reduce_to_snippets_reduce.cpp b/src/common/snippets/src/pass/reduce_to_snippets_reduce.cpp
@@ -37,9 +37,9 @@ snippets::pass::ReduceToSnippetsReduce::ReduceToSnippetsReduce() {
 
         std::shared_ptr<snippets::op::ReduceBase> snippets_reduce = nullptr;
         if (ov::is_type<ov::op::v1::ReduceSum>(reduce))
-            snippets_reduce = ov::snippets::op::ReduceSum::make_reduce_sum(data_input, axis);
+            snippets_reduce = ov::snippets::op::ReduceSum::make(data_input, axis);
         else if (ov::is_type<ov::op::v1::ReduceMax>(reduce))
-            snippets_reduce = ov::snippets::op::ReduceMax::make_reduce_max(data_input, axis);
+            snippets_reduce = ov::snippets::op::ReduceMax::make(data_input, axis);
         else
             OPENVINO_THROW("Reduce ", reduce, " can't be converted to snippets opset.");
 

diff --git a/src/common/snippets/src/pass/softmax_decomposition.cpp b/src/common/snippets/src/pass/softmax_decomposition.cpp
@@ -42,11 +42,11 @@ SoftmaxDecomposition::SoftmaxDecomposition() {
         }
 
         const auto& softmax_input = softmax->input_value(0);
-        const auto reduce_max = ov::snippets::op::ReduceMax::make_reduce_max(softmax_input, axis);
+        const auto reduce_max = ov::snippets::op::ReduceMax::make(softmax_input, axis);
         const auto subtract = std::make_shared<ov::op::v1::Subtract>(softmax_input, reduce_max);
         const auto exp = std::make_shared<ov::op::v0::Exp>(subtract);
 
-        const auto reduce_sum = ov::snippets::op::ReduceSum::make_reduce_sum(exp, axis);
+        const auto reduce_sum = ov::snippets::op::ReduceSum::make(exp, axis);
         const auto power = std::make_shared<ov::snippets::op::PowerStatic>(reduce_sum, -1.f);
         const auto multiply = std::make_shared<ov::op::v1::Multiply>(exp, power);
 

diff --git a/src/common/snippets/tests/src/lowered/pass/buffer_allocation.cpp b/src/common/snippets/tests/src/lowered/pass/buffer_allocation.cpp
@@ -142,11 +142,11 @@ std::shared_ptr<ov::Model> MHABufferAllocationTest::GetModel() const {
     const auto relu1 = std::make_shared<ov::op::v0::Relu>(matmul0);
 
     // Decomposed Softmax
-    const auto reduce_max = ov::snippets::op::ReduceMax::make_reduce_max(relu1, 3);
+    const auto reduce_max = ov::snippets::op::ReduceMax::make(relu1, 3);
     const auto subtract = std::make_shared<ov::op::v1::Subtract>(relu1, reduce_max);
     const auto exp = std::make_shared<ov::op::v0::Exp>(subtract);
 
-    const auto reduce_sum = ov::snippets::op::ReduceSum::make_reduce_sum(exp, 3);
+    const auto reduce_sum = ov::snippets::op::ReduceSum::make(exp, 3);
     const auto power = std::make_shared<ov::snippets::op::PowerStatic>(reduce_sum, -1.f);
     const auto multiply = std::make_shared<ov::op::v1::Multiply>(exp, power);
 

diff --git a/src/plugins/intel_cpu/src/emitters/snippets/x64/cpu_generator.cpp b/src/plugins/intel_cpu/src/emitters/snippets/x64/cpu_generator.cpp
@@ -109,12 +109,12 @@ static bool is_segfault_detector_emitter(const intel_cpu::jit_emitter *emitter)
     } \
 }
 
-#define CREATE_UNDEFINED_EMITTER(node_type) { \
+#define CREATE_UNDEFINED_EMITTER() { \
     [](const snippets::lowered::ExpressionPtr& expr) -> std::shared_ptr<snippets::Emitter> { \
         return nullptr; \
     }, \
-    [](const std::shared_ptr<ov::Node>& n) -> std::set<std::vector<element::Type>> { \
-        return node_type::get_supported_precisions(n); \
+    [this](const std::shared_ptr<ov::Node>& n) -> std::set<std::vector<element::Type>> { \
+        return supported_precisions_for_emitterless_node(n->get_type_info()); \
     } \
 }
 
@@ -213,8 +213,8 @@ intel_cpu::CPUTargetMachine::CPUTargetMachine(dnnl::impl::cpu::x64::cpu_isa_t ho
     jitters[snippets::op::LoopEnd::get_type_info_static()] = CREATE_SNIPPETS_EMITTER(intel_cpu::jit_loop_end_emitter);
     jitters[intel_cpu::BrgemmCPU::get_type_info_static()] = CREATE_SNIPPETS_EMITTER(intel_cpu::jit_brgemm_emitter);
     jitters[intel_cpu::BrgemmCopyB::get_type_info_static()] = CREATE_SNIPPETS_EMITTER(intel_cpu::jit_brgemm_copy_b_emitter);
-    jitters[snippets::op::ReduceMax::get_type_info_static()] = CREATE_UNDEFINED_EMITTER(snippets::op::ReduceMax);
-    jitters[snippets::op::ReduceSum::get_type_info_static()] = CREATE_UNDEFINED_EMITTER(snippets::op::ReduceSum);
+    jitters[snippets::op::ReduceMax::get_type_info_static()] = CREATE_UNDEFINED_EMITTER();
+    jitters[snippets::op::ReduceSum::get_type_info_static()] = CREATE_UNDEFINED_EMITTER();
 
 #ifdef SNIPPETS_DEBUG_CAPS
     jitters[snippets::op::PerfCountBegin::get_type_info_static()] = CREATE_CPU_EMITTER(ov::intel_cpu::jit_perf_count_chrono_start_emitter);
@@ -241,6 +241,16 @@ bool intel_cpu::CPUTargetMachine::is_supported() const {
     return dnnl::impl::cpu::x64::mayiuse(isa);
 }
 
+std::set<ov::element::TypeVector> intel_cpu::CPUTargetMachine::supported_precisions_for_emitterless_node(const ov::DiscreteTypeInfo& type) const {
+    static const std::map<ov::DiscreteTypeInfo, std::set<ov::element::TypeVector>> supported_precisions_map{
+        {snippets::op::ReduceMax::get_type_info_static(), {{ov::element::f32}}},
+        {snippets::op::ReduceSum::get_type_info_static(), {{ov::element::f32}}},
+    };
+    auto it = supported_precisions_map.find(type);
+    OPENVINO_ASSERT(it != supported_precisions_map.end(), "supported precisions set for node without emitter is not set. Type info: ", type);
+    return it->second;
+}
+
 snippets::CompiledSnippetPtr intel_cpu::CPUTargetMachine::get_snippet() {
     if (h->create_kernel() != dnnl::impl::status::success) {
         OPENVINO_THROW("Failed to create jit_kernel in get_snippet()");

diff --git a/src/plugins/intel_cpu/src/emitters/snippets/x64/cpu_generator.hpp b/src/plugins/intel_cpu/src/emitters/snippets/x64/cpu_generator.hpp
@@ -33,6 +33,7 @@ class CPUTargetMachine : public snippets::TargetMachine {
     snippets::CompiledSnippetPtr get_snippet() override;
     size_t get_lanes() const override;
     dnnl::impl::cpu::x64::cpu_isa_t get_isa() const;
+    std::set<ov::element::TypeVector> supported_precisions_for_emitterless_node(const ov::DiscreteTypeInfo& type) const override;
 #ifdef SNIPPETS_DEBUG_CAPS
     SnippetsDebugCapsConfig debug_config;
 #endif

diff --git a/src/plugins/intel_cpu/tests/unit/snippets_transformations/lowered/buffer_allocation.cpp b/src/plugins/intel_cpu/tests/unit/snippets_transformations/lowered/buffer_allocation.cpp
@@ -156,11 +156,11 @@ class MHABF16AMXBufferAllocationTest : public BufferAllocationCPUTest {
         const auto relu1 = std::make_shared<ov::op::v0::Relu>(brgemm_cpu0);
 
         // Decomposed Softmax
-        const auto reduce_max = ov::snippets::op::ReduceMax::make_reduce_max(relu1, 3);
+        const auto reduce_max = ov::snippets::op::ReduceMax::make(relu1, 3);
         const auto subtract = std::make_shared<ov::op::v1::Subtract>(relu1, reduce_max);
         const auto exp = std::make_shared<ov::op::v0::Exp>(subtract);
 
-        const auto reduce_sum = ov::snippets::op::ReduceSum::make_reduce_sum(exp, 3);
+        const auto reduce_sum = ov::snippets::op::ReduceSum::make(exp, 3);
         const auto power = std::make_shared<ov::snippets::op::PowerStatic>(reduce_sum, -1.f);
         const auto multiply = std::make_shared<ov::op::v1::Multiply>(exp, power);