Alexandra's comments applied: 1st part

v-Golubev · Jan 26, 2024 · 5961ea2 · 5961ea2
1 parent a23670a
commit 5961ea2
Show file tree

Hide file tree

Showing 7 changed files with 57 additions and 31 deletions.
diff --git a/src/common/snippets/include/snippets/op/reduce.hpp b/src/common/snippets/include/snippets/op/reduce.hpp
@@ -14,7 +14,7 @@ namespace op {
 /**
  * @interface ReduceBase
  * @brief Base class for reduce operations.
- * @arg m_axis reduce axis.
+ * @param m_axis reduce axis.
  * @ingroup snippets
  */
 class ReduceBase : public ov::op::Op {
@@ -41,6 +41,12 @@ class ReduceSum : public ReduceBase {
     static std::set<ov::element::TypeVector> get_supported_precisions(const std::shared_ptr<ov::Node>& node) {
         return {{ov::element::f32}};
     }
+    /**
+     * @brief Creates ReduceSum operation, computes and sets input/output subtensors
+     * @param x Reduce input
+     * @param axis Reduce axis
+     */
+    static std::shared_ptr<ReduceSum> make_reduce_sum(const Output<Node>& x, size_t axis);
 };
 
 class ReduceMax : public ReduceBase {
@@ -52,6 +58,12 @@ class ReduceMax : public ReduceBase {
     static std::set<ov::element::TypeVector> get_supported_precisions(const std::shared_ptr<ov::Node>& node) {
         return {{ov::element::f32}};
     }
+    /**
+     * @brief Creates ReduceSum operation, computes and sets input/output subtensors
+     * @param x Reduce input
+     * @param axis Reduce axis
+     */
+    static std::shared_ptr<ReduceMax> make_reduce_max(const Output<Node>& x, size_t axis);
 };
 
 } // namespace op

diff --git a/src/common/snippets/src/op/reduce.cpp b/src/common/snippets/src/op/reduce.cpp
@@ -2,14 +2,28 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include "snippets/itt.hpp"
-
 #include "snippets/op/reduce.hpp"
 
+#include "snippets/itt.hpp"
+#include "snippets/lowered/port_descriptor.hpp"
 
 namespace ov {
 namespace snippets {
 namespace op {
+namespace {
+void compute_and_set_reduce_subtensors(const std::shared_ptr<ReduceBase>& reduce) {
+    OPENVINO_ASSERT(reduce->get_input_partial_shape(0).rank().is_static(),
+                    "Subtensors can be automatically calculated only for reduce with static rank.");
+    const auto reduce_rank = reduce->get_input_partial_shape(0).size();
+    const auto axis = reduce->get_axis();
+
+    std::vector<size_t> subtensor(reduce_rank, 1);
+    for (size_t i = axis; i < reduce_rank; ++i)
+        subtensor[i] = lowered::PortDescriptor::ServiceDimensions::FULL_DIM;
+    lowered::PortDescriptorUtils::set_port_descriptor_ptr(reduce->input(0), std::make_shared<lowered::PortDescriptor>(reduce->input(0), subtensor));
+    lowered::PortDescriptorUtils::set_port_descriptor_ptr(reduce->output(0), std::make_shared<lowered::PortDescriptor>(reduce->output(0), subtensor));
+}
+}  // namespace
 
 ReduceBase::ReduceBase(const Output<Node>& x, size_t axis) : Op({x}), m_axis(axis) {
     constructor_validate_and_infer_types();
@@ -32,12 +46,24 @@ std::shared_ptr<Node> ReduceSum::clone_with_new_inputs(const OutputVector& new_a
     return std::make_shared<ReduceSum>(new_args.at(0), m_axis);
 }
 
+std::shared_ptr<ReduceSum> ReduceSum::make_reduce_sum(const Output<Node>& x, size_t axis) {
+    const auto reduce = std::make_shared<ReduceSum>(x, axis);
+    compute_and_set_reduce_subtensors(reduce);
+    return reduce;
+}
+
 std::shared_ptr<Node> ReduceMax::clone_with_new_inputs(const OutputVector& new_args) const {
     INTERNAL_OP_SCOPE(ReduceMax);
     check_new_args_count(this, new_args);
     return std::make_shared<ReduceMax>(new_args.at(0), m_axis);
 }
 
+std::shared_ptr<ReduceMax> ReduceMax::make_reduce_max(const Output<Node>& x, size_t axis) {
+    const auto reduce = std::make_shared<ReduceMax>(x, axis);
+    compute_and_set_reduce_subtensors(reduce);
+    return reduce;
+}
+
 } // namespace op
 } // namespace snippets
 } // namespace ov
diff --git a/src/common/snippets/src/pass/collapse_subgraph.cpp b/src/common/snippets/src/pass/collapse_subgraph.cpp
@@ -169,10 +169,10 @@ auto is_supported_op(const std::shared_ptr<const Node> &n) -> bool {
         if (ov::is_type<const ov::op::v1::ReduceMax>(n) || ov::is_type<const ov::op::v1::ReduceSum>(n)) {
             const auto& reduce_base = ov::as_type_ptr<const ov::op::util::ArithmeticReductionKeepDims>(n);
             const auto& axis_constant = ov::as_type_ptr<const ov::op::v0::Constant>(n->get_input_node_shared_ptr(1));
-            if (!reduce_base->get_keep_dims() || !axis_constant || shape_size(axis_constant->get_shape()) != 1)
+            const auto rank = n->get_input_partial_shape(0).rank();
+            if (rank.is_dynamic() || !reduce_base->get_keep_dims() || !axis_constant || shape_size(axis_constant->get_shape()) != 1)
                 return false;
 
-            const auto rank = n->get_input_partial_shape(0).rank();
             const auto axis_value = axis_constant->cast_vector<int32_t>(1)[0];
             const auto normalized_axis = ov::util::normalize_axis(n->get_friendly_name(), axis_value, rank);
             // Note: Reduction only over the last dimension is currently supported

diff --git a/src/common/snippets/src/pass/reduce_to_snippets_reduce.cpp b/src/common/snippets/src/pass/reduce_to_snippets_reduce.cpp
@@ -29,24 +29,20 @@ snippets::pass::ReduceToSnippetsReduce::ReduceToSnippetsReduce() {
         const auto& axis_constant = ov::as_type_ptr<ov::op::v0::Constant>(reduce->get_input_node_shared_ptr(1));
         // Note: we do not check the Constant value here. If the Reduce was tokenized, then we assume that it is supported
         OPENVINO_ASSERT(reduce_base->get_keep_dims() && axis_constant, "Unspported Reduce was tokenized by Snippets");
+
         const auto& data_input = reduce->get_input_source_output(0);
         const auto reduce_rank = reduce->get_input_partial_shape(0).rank();
+        OPENVINO_ASSERT(reduce_rank.is_static(), "ReduceToSnippetsReduce doesn't support dynamic ranks.");
         const auto axis = ov::util::normalize_axis(reduce->get_friendly_name(), axis_constant->cast_vector<int32_t>(1)[0], reduce_rank);
 
         std::shared_ptr<snippets::op::ReduceBase> snippets_reduce = nullptr;
         if (ov::is_type<ov::op::v1::ReduceSum>(reduce))
-            snippets_reduce = std::make_shared<snippets::op::ReduceSum>(data_input, axis);
+            snippets_reduce = ov::snippets::op::ReduceSum::make_reduce_sum(data_input, axis);
         else if (ov::is_type<ov::op::v1::ReduceMax>(reduce))
-            snippets_reduce = std::make_shared<snippets::op::ReduceMax>(data_input, axis);
+            snippets_reduce = ov::snippets::op::ReduceMax::make_reduce_max(data_input, axis);
         else
             OPENVINO_THROW("Reduce ", reduce, " can't be converted to snippets opset.");
 
-        std::vector<size_t> subtensor(reduce_rank.get_length(), 1);
-        for (auto i = axis; i < reduce_rank.get_length(); ++i)
-            subtensor[i] = PortDescriptor::ServiceDimensions::FULL_DIM;
-        PortDescriptorUtils::set_port_descriptor_ptr(snippets_reduce->input(0), std::make_shared<PortDescriptor>(snippets_reduce->input(0), subtensor));
-        PortDescriptorUtils::set_port_descriptor_ptr(snippets_reduce->output(0), std::make_shared<PortDescriptor>(snippets_reduce->output(0), subtensor));
-
         ov::replace_node(reduce, snippets_reduce);
         snippets_reduce->set_friendly_name(reduce->get_friendly_name());
         ov::copy_runtime_info(reduce, snippets_reduce);

diff --git a/src/common/snippets/src/pass/softmax_decomposition.cpp b/src/common/snippets/src/pass/softmax_decomposition.cpp
@@ -42,11 +42,11 @@ SoftmaxDecomposition::SoftmaxDecomposition() {
         }
 
         const auto& softmax_input = softmax->input_value(0);
-        const auto reduce_max = std::make_shared<ov::snippets::op::ReduceMax>(softmax_input, axis);
+        const auto reduce_max = ov::snippets::op::ReduceMax::make_reduce_max(softmax_input, axis);
         const auto subtract = std::make_shared<ov::op::v1::Subtract>(softmax_input, reduce_max);
         const auto exp = std::make_shared<ov::op::v0::Exp>(subtract);
 
-        const auto reduce_sum = std::make_shared<ov::snippets::op::ReduceSum>(exp, axis);
+        const auto reduce_sum = ov::snippets::op::ReduceSum::make_reduce_sum(exp, axis);
         const auto power = std::make_shared<ov::snippets::op::PowerStatic>(reduce_sum, -1.f);
         const auto multiply = std::make_shared<ov::op::v1::Multiply>(exp, power);
 
@@ -55,10 +55,6 @@ SoftmaxDecomposition::SoftmaxDecomposition() {
         for (size_t i = axis; i < rank; ++i)
             subtensor[i] = PortDescriptor::ServiceDimensions::FULL_DIM;
 
-        PortDescriptorUtils::set_port_descriptor_ptr(reduce_max->input(0), std::make_shared<PortDescriptor>(reduce_max->input(0), subtensor));
-        PortDescriptorUtils::set_port_descriptor_ptr(reduce_max->output(0), std::make_shared<PortDescriptor>(reduce_max->output(0), subtensor));
-        PortDescriptorUtils::set_port_descriptor_ptr(reduce_sum->input(0), std::make_shared<PortDescriptor>(reduce_sum->input(0), subtensor));
-        PortDescriptorUtils::set_port_descriptor_ptr(reduce_sum->output(0), std::make_shared<PortDescriptor>(reduce_sum->output(0), subtensor));
         PortDescriptorUtils::set_port_descriptor_ptr(power->input(0), std::make_shared<PortDescriptor>(power->input(0), subtensor));
         PortDescriptorUtils::set_port_descriptor_ptr(power->output(0), std::make_shared<PortDescriptor>(power->output(0), subtensor));
 

diff --git a/src/common/snippets/tests/src/lowered/pass/buffer_allocation.cpp b/src/common/snippets/tests/src/lowered/pass/buffer_allocation.cpp
@@ -129,7 +129,7 @@ std::shared_ptr<ov::Model> MHABufferAllocationTest::GetModel() const {
     const auto subtensor_scalar = std::vector<size_t>{1};
     const auto subtensor_eltwise = std::vector<size_t>{1, m_vector_size};
     const auto subtensor_brgemm = std::vector<size_t>{32, ov::snippets::lowered::PortDescriptor::ServiceDimensions::FULL_DIM};
-    const auto subtensor_softmax = std::vector<size_t>{1, ov::snippets::lowered::PortDescriptor::ServiceDimensions::FULL_DIM};
+    const auto subtensor_power = std::vector<size_t>{1, ov::snippets::lowered::PortDescriptor::ServiceDimensions::FULL_DIM};
 
     const auto parameter0 = std::make_shared<ov::op::v0::Parameter>(ov::element::f32, ov::PartialShape({1, 12, 128, 64}));
     const auto parameter1 = std::make_shared<ov::op::v0::Parameter>(ov::element::f32, ov::PartialShape({1, 128, 12, 64}));
@@ -142,11 +142,11 @@ std::shared_ptr<ov::Model> MHABufferAllocationTest::GetModel() const {
     const auto relu1 = std::make_shared<ov::op::v0::Relu>(matmul0);
 
     // Decomposed Softmax
-    const auto reduce_max = std::make_shared<ov::snippets::op::ReduceMax>(relu1, 3);
+    const auto reduce_max = ov::snippets::op::ReduceMax::make_reduce_max(relu1, 3);
     const auto subtract = std::make_shared<ov::op::v1::Subtract>(relu1, reduce_max);
     const auto exp = std::make_shared<ov::op::v0::Exp>(subtract);
 
-    const auto reduce_sum = std::make_shared<ov::snippets::op::ReduceSum>(exp, 3);
+    const auto reduce_sum = ov::snippets::op::ReduceSum::make_reduce_sum(exp, 3);
     const auto power = std::make_shared<ov::snippets::op::PowerStatic>(reduce_sum, -1.f);
     const auto multiply = std::make_shared<ov::op::v1::Multiply>(exp, power);
 
@@ -157,9 +157,7 @@ std::shared_ptr<ov::Model> MHABufferAllocationTest::GetModel() const {
 
     MarkOp(load_reshape, subtensor_scalar);
     MarkOp(store, subtensor_scalar);
-    MarkOp(reduce_max, subtensor_softmax);
-    MarkOp(reduce_sum, subtensor_softmax);
-    MarkOp(power, subtensor_softmax);
+    MarkOp(power, subtensor_power);
 
     MarkBrgemm(matmul0, subtensor_brgemm);
     MarkBrgemm(matmul1, subtensor_brgemm);

diff --git a/src/plugins/intel_cpu/tests/unit/snippets_transformations/lowered/buffer_allocation.cpp b/src/plugins/intel_cpu/tests/unit/snippets_transformations/lowered/buffer_allocation.cpp
@@ -131,7 +131,7 @@ class MHABF16AMXBufferAllocationTest : public BufferAllocationCPUTest {
 protected:
     std::shared_ptr<ov::Model> GetModel() const override {
         const auto subtensor_scalar = std::vector<size_t>{1};
-        const auto subtensor_softmax = std::vector<size_t>{1, ov::snippets::lowered::PortDescriptor::ServiceDimensions::FULL_DIM};
+        const auto subtensor_power = std::vector<size_t>{1, ov::snippets::lowered::PortDescriptor::ServiceDimensions::FULL_DIM};
         const auto subtensor_full = std::vector<size_t>(2, ov::snippets::lowered::PortDescriptor::ServiceDimensions::FULL_DIM);
 
         const auto parameter0 = std::make_shared<ov::op::v0::Parameter>(ov::element::bf16, ov::PartialShape({1, 12, 128, 64}));
@@ -156,11 +156,11 @@ class MHABF16AMXBufferAllocationTest : public BufferAllocationCPUTest {
         const auto relu1 = std::make_shared<ov::op::v0::Relu>(brgemm_cpu0);
 
         // Decomposed Softmax
-        const auto reduce_max = std::make_shared<ov::snippets::op::ReduceMax>(relu1, 3);
+        const auto reduce_max = ov::snippets::op::ReduceMax::make_reduce_max(relu1, 3);
         const auto subtract = std::make_shared<ov::op::v1::Subtract>(relu1, reduce_max);
         const auto exp = std::make_shared<ov::op::v0::Exp>(subtract);
 
-        const auto reduce_sum = std::make_shared<ov::snippets::op::ReduceSum>(exp, 3);
+        const auto reduce_sum = ov::snippets::op::ReduceSum::make_reduce_sum(exp, 3);
         const auto power = std::make_shared<ov::snippets::op::PowerStatic>(reduce_sum, -1.f);
         const auto multiply = std::make_shared<ov::op::v1::Multiply>(exp, power);
 
@@ -181,9 +181,7 @@ class MHABF16AMXBufferAllocationTest : public BufferAllocationCPUTest {
 
         MarkOp(load_reshape, subtensor_scalar);
         MarkOp(store, subtensor_scalar);
-        MarkOp(reduce_max, subtensor_softmax);
-        MarkOp(reduce_sum, subtensor_softmax);
-        MarkOp(power, subtensor_softmax);
+        MarkOp(power, subtensor_power);
 
         MarkOp(brgemm_cpu0, subtensor_full);
         MarkOp(brgemm_cpu1, subtensor_full);