Merge branch 'master' into river/capi_add_status_for_wait_for

riverlijunjie · Oct 27, 2023 · 98fb56d · 98fb56d
2 parents 8ea781d + fd88a6b
commit 98fb56d
Show file tree

Hide file tree

Showing 81 changed files with 1,979 additions and 832 deletions.
diff --git a/src/bindings/python/tests_compatibility/test_onnx/test_backend.py b/src/bindings/python/tests_compatibility/test_onnx/test_backend.py
@@ -565,9 +565,7 @@ def expect_fail(test_case_path, xfail):  # type: (str) -> None
     ),
     (
         xfail_issue_99955,
-        "OnnxBackendNodeModelTest.test_group_normalization_epsilon_cpu",
         "OnnxBackendNodeModelTest.test_group_normalization_epsilon_expanded_cpu",
-        "OnnxBackendNodeModelTest.test_group_normalization_example_cpu",
         "OnnxBackendNodeModelTest.test_group_normalization_example_expanded_cpu",
     ),
     (

diff --git a/src/common/snippets/include/snippets/lowered/port_descriptor.hpp b/src/common/snippets/include/snippets/lowered/port_descriptor.hpp
@@ -65,6 +65,16 @@ class PortDescriptor {
     VectorDims m_subtensor_shape{};
     /// \brief The corresponding abstract/physical register
     size_t m_reg = 0;
+
+    /// Notes:
+    ///   - `m_tensor_shape` is dense shape which is controlled by expression outputs.
+    ///     It means that the result of data writing of expression outputs should be read using this shape by the next expression inputs.
+    ///   - `m_layout` is the order of data reading or writing by MemoryAccess ops. Note that only MemoryAccess ops may have `m_layout`.
+    ///     For other expressions this order parameter is simply ignored for now.
+    ///     if it's input port of MemoryAccess expression:
+    ///      - `m_layout` shows how the data should be read (by which strides) using m_tensor_shape.
+    ///     If it's output port of MemoryAccess expression:
+    ///      - `m_layout` shows how the data should be written (by which strides) to get m_tensor_shape.
 };
 
 class PortDescriptorUtils {

diff --git a/src/common/snippets/include/snippets/op/subgraph.hpp b/src/common/snippets/include/snippets/op/subgraph.hpp
@@ -10,7 +10,7 @@
 #include <openvino/op/util/sub_graph_base.hpp>
 #include "openvino/op/op.hpp"
 #include "openvino/core/rt_info.hpp"
-#include "snippets/pass_manager.hpp"
+#include "snippets/pass/manager.hpp"
 #include "snippets/shape_inference/shape_inference.hpp"
 #include "snippets/lowered/pass/pass.hpp"
 

diff --git a/src/common/snippets/include/snippets/pass/common_optimizations.hpp b/src/common/snippets/include/snippets/pass/common_optimizations.hpp
@@ -5,30 +5,22 @@
 #pragma once
 
 #include "openvino/pass/graph_rewrite.hpp"
-#include "snippets/op/subgraph.hpp"
 #include "snippets/pass/tokenization.hpp"
 
 namespace ov {
 namespace snippets {
 namespace pass {
 
 class CommonOptimizations : public ov::pass::MatcherPass {
+    class SubgraphPass;
+    class SubgraphManager;
+    friend class ExtractConstants;
+    friend class ExtractUnsupportedTransposes;
+    friend class SplitDimensionM;
+
 public:
     OPENVINO_RTTI("CommonOptimizations", "0");
     CommonOptimizations(const SnippetsTokenization::Config& config = {});
-
-    // Returns True if parallelism work amount can be increased using SplitDimensionM optimization
-    static bool CanOptimizeParallelWA(const std::shared_ptr<const ov::Node>& node, size_t concurrency);
-
-private:
-    // Move up Constants which aren't scalars from body to Subgraph and replace them with Parameters inside body
-    void ExtractConstants(const std::shared_ptr<op::Subgraph>& subgraph);
-    // Move up unsupported Transposes on Parameter outputs from body
-    void ExtractUnsupportedTransposes(const std::shared_ptr<op::Subgraph>& subgraph);
-    // Insert Reshape nodes after and before Parameters and Results in Subgraphs with MatMul inside
-    // to split dimension M for MatMuls to increase work amount for parallelism
-    // Note: works only with 3D MHA patterns
-    void SplitDimensionM(const std::shared_ptr<op::Subgraph>& subgraph, size_t concurrency);
 };
 
 }  // namespace pass

diff --git a/src/common/snippets/include/snippets/pass/extract_constants.hpp b/src/common/snippets/include/snippets/pass/extract_constants.hpp
@@ -0,0 +1,29 @@
+// Copyright (C) 2018-2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "subgraph_pass.hpp"
+
+namespace ov {
+namespace snippets {
+namespace pass {
+
+/**
+ * @interface ExtractConstants
+ * @brief Moves up Constants which aren't scalars outside of the Subgraph's body and replaces them with Parameters inside body
+ * @ingroup snippets
+ */
+class ExtractConstants: public CommonOptimizations::SubgraphPass {
+public:
+    OPENVINO_RTTI("ExtractConstants", "0");
+    ExtractConstants() = default;
+
+    bool run_on_subgraph(const std::shared_ptr<op::Subgraph>& subgraph) override;
+};
+
+
+} // namespace pass
+} // namespace snippets
+} // namespace ov
diff --git a/src/common/snippets/include/snippets/pass/extract_unsupported_transposes.hpp b/src/common/snippets/include/snippets/pass/extract_unsupported_transposes.hpp
@@ -0,0 +1,29 @@
+// Copyright (C) 2018-2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "subgraph_pass.hpp"
+
+namespace ov {
+namespace snippets {
+namespace pass {
+
+/**
+ * @interface ExtractUnsupportedTransposes
+ * @brief Moves up unsupported Transposes on Parameter outputs from body
+ * @ingroup snippets
+ */
+class ExtractUnsupportedTransposes: public CommonOptimizations::SubgraphPass {
+public:
+    OPENVINO_RTTI("ExtractUnsupportedTransposes", "0");
+    ExtractUnsupportedTransposes() = default;
+
+    bool run_on_subgraph(const std::shared_ptr<op::Subgraph>& subgraph) override;
+};
+
+
+} // namespace pass
+} // namespace snippets
+} // namespace ov
diff --git a/src/common/snippets/include/snippets/pass/fuse_transpose_brgemm.hpp b/src/common/snippets/include/snippets/pass/fuse_transpose_brgemm.hpp
@@ -18,18 +18,17 @@ namespace pass {
 /**
  * @interface FuseTransposeBrgemm
  * @brief Fuses Transpose with Brgemm node, fusing on both Brgemm inputs and output is supported. Applicable to
- *        Transposes that don't change the position of the last dimension (since Brgemm supports strided rows i/o),
- *        but only 0213 Transpose is currently supported.
+ *        Transposes that don't change the position of the last dimension (since Brgemm supports strided rows i/o).
+ *        Supported any Transpose order where last index is equal to [rank - 1] - it means that last dimension isn't moved.
  * @ingroup snippets
  */
 class FuseTransposeBrgemm: public ov::pass::MatcherPass {
 public:
     OPENVINO_RTTI("FuseTransposeBrgemm", "0");
     FuseTransposeBrgemm();
-    static const std::set<std::vector<int>> supported_cases;
 
-private:
-    static bool is_supported_transpose(const Output<Node>& transpose_port);
+    static bool is_supported_transpose(const Output<Node>& transpose_out);
+    static bool is_supported_transpose_order(const std::vector<int32_t>& order);
 };
 
 }  // namespace pass

diff --git a/...nippets/include/snippets/pass_manager.hpp → ...nippets/include/snippets/pass/manager.hpp b/...nippets/include/snippets/pass_manager.hpp → ...nippets/include/snippets/pass/manager.hpp
@@ -3,15 +3,18 @@
 //
 
 #pragma once
+
 #include "openvino/pass/manager.hpp"
 #include "openvino/pass/pass.hpp"
 #include "openvino/pass/validate.hpp"
+
 #include <typeinfo>
 
 
 namespace ov {
 namespace snippets {
 namespace pass {
+
 /**
  * @brief Manager is like ov::pass::Manager, but allows to insert new passes at arbitrary places in the pipeline
  * @ingroup snippets

diff --git a/src/common/snippets/include/snippets/pass/mha_tokenization.hpp b/src/common/snippets/include/snippets/pass/mha_tokenization.hpp
@@ -43,6 +43,9 @@ class TokenizeMHASnippets: public ov::pass::MatcherPass {
 public:
     OPENVINO_RTTI("TokenizeMHASnippets", "0");
     TokenizeMHASnippets(const SnippetsTokenization::Config& config = {});
+
+    static std::vector<int32_t> get_fusion_transpose_order(size_t rank);
+    static std::vector<int32_t> get_decomposed_transpose_order(size_t rank);
     static bool is_matmul0_supported(const std::shared_ptr<ov::opset1::MatMul>& matmul);
 };
 

diff --git a/src/common/snippets/include/snippets/pass/split_dimension_m.hpp b/src/common/snippets/include/snippets/pass/split_dimension_m.hpp
@@ -0,0 +1,44 @@
+// Copyright (C) 2018-2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "subgraph_pass.hpp"
+
+namespace ov {
+namespace snippets {
+namespace pass {
+
+/**
+ * @interface SplitDimensionM
+ * @brief Inserts Reshape nodes before inputs and after outputs of Subgraphs with MatMul inside
+ *        to split dimension M for MatMuls. It allows to increase work amount for parallelism
+ * @ingroup snippets
+ */
+class SplitDimensionM: public CommonOptimizations::SubgraphPass {
+public:
+    OPENVINO_RTTI("SplitDimensionM", "0");
+    SplitDimensionM(size_t concurrency) : m_concurrency(concurrency) {}
+
+    bool run_on_subgraph(const std::shared_ptr<op::Subgraph>& subgraph) override;
+
+    // Return True if the MatMul node is supported by this optimization
+    static bool is_supported_matmul(const std::shared_ptr<const ov::Node>& node);
+    // Returns True if parallelism work amount (concurrency) can be increased by this optimization
+    static bool can_be_optimized(const std::shared_ptr<const ov::Node>& node, size_t concurrency);
+
+private:
+    static std::shared_ptr<ov::op::v0::MatMul> get_matmul(const std::shared_ptr<op::Subgraph>& subgraph);
+    static std::pair<size_t, size_t> get_splited_dimensions(size_t batch_dim, size_t m_dim, size_t optimal_parallelism_work_amount);
+    static bool split(const ov::Shape& shape, size_t optimal_parallelism_work_amount, size_t& batch_m_dim, size_t& new_m_dim);
+
+    void reshape_subgraph(const std::shared_ptr<op::Subgraph>& subgraph, const ov::Shape& shape, size_t batch_m_dim, size_t new_m_dim);
+
+    size_t m_concurrency;
+};
+
+
+} // namespace pass
+} // namespace snippets
+} // namespace ov
diff --git a/src/common/snippets/include/snippets/pass/subgraph_manager.hpp b/src/common/snippets/include/snippets/pass/subgraph_manager.hpp
@@ -0,0 +1,49 @@
+// Copyright (C) 2018-2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <memory>
+#include <typeinfo>
+#include <vector>
+
+#include "snippets/pass/common_optimizations.hpp"
+
+#include "snippets/pass/subgraph_pass.hpp"
+#include "snippets/op/subgraph.hpp"
+
+namespace ov {
+namespace snippets {
+namespace pass {
+/**
+ * @brief Manager class allows to manage transformation passes (SubgraphPasses) on Subgraph ops.
+ *        See SubgraphPasses description for more details.
+ *        It's light version of ov::Manager implementation the purpose of which is to change only Subgraph as separate node in model.
+ * @ingroup snippets
+ */
+class CommonOptimizations::SubgraphManager {
+public:
+    SubgraphManager() = default;
+
+    /// @brief Register given transformation class type to execution list
+    /// @return shared_ptr to the transformation instance
+    template <typename T, class... Args>
+    std::shared_ptr<T> register_pass(Args&&... args) {
+        static_assert(std::is_base_of<SubgraphPass, T>::value, "pass not derived from SubgraphPass base");
+        auto pass = std::make_shared<T>(std::forward<Args>(args)...);
+        m_pass_list.push_back(std::static_pointer_cast<SubgraphPass>(pass));
+        return pass;
+    }
+
+    /// @brief      Runs registered transformations on a given model
+    /// @param      subgraph Input model
+    /// @return     Returns true if the model was changed by transformations, false otherwise.
+    bool run_passes(std::shared_ptr<ov::snippets::op::Subgraph> subgraph);
+
+protected:
+    std::vector<std::shared_ptr<SubgraphPass>> m_pass_list;
+};
+}  // namespace pass
+}  // namespace snippets
+}  // namespace ov
diff --git a/src/common/snippets/include/snippets/pass/subgraph_pass.hpp b/src/common/snippets/include/snippets/pass/subgraph_pass.hpp
@@ -0,0 +1,45 @@
+// Copyright (C) 2018-2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <typeinfo>
+
+#include "snippets/pass/common_optimizations.hpp"
+
+
+namespace ov {
+namespace snippets {
+namespace pass {
+
+/**
+ * @brief Base class for Subgraph passes.
+ *        The pass runs on `Subgraph` op that allows users to transform
+ *        `Subgraph` as node and `body` of this `Subgraph` as model at the same time.
+ *        These passes may change `Subgraph` as node, its `body` and other ops around `Subgraph` in model.
+ *        To avoid unsafe changes of other ops in model, SubgraphPass is not derived from ov::Pass to avoid
+ *        registration to ov::Model
+ * @ingroup snippets
+ */
+class CommonOptimizations::SubgraphPass {
+public:
+    SubgraphPass() = default;
+    virtual ~SubgraphPass() = default;
+
+    virtual bool run_on_subgraph(const std::shared_ptr<op::Subgraph>& subgraph) = 0;
+
+    void set_name(const std::string& name) { m_name = name; }
+    std::string get_name() const { return m_name; }
+
+    using type_info_t = DiscreteTypeInfo;
+    virtual const type_info_t& get_type_info() const = 0;
+
+private:
+    std::string m_name;
+};
+
+
+} // namespace pass
+} // namespace snippets
+} // namespace ov
diff --git a/src/common/snippets/include/snippets/pass/tokenization.hpp b/src/common/snippets/include/snippets/pass/tokenization.hpp
@@ -51,6 +51,11 @@ class EnumerateNodes : public ov::pass::ModelPass {
  *         2. MHA tokenization
  *         3. Common tokenization
  *         4. Some common transformations for Subgraphs. For example, FakeQuantize decomposition
+ *         Naming policy:
+ *           - During tokenization new Subgraph op takes the name of the last tokenized op.
+ *             It's needed to save output names of model in cases when tokenized op was before model Result.
+ *           - If some transformation (for example, SplitDimensionM) insert new op after Subgraph,
+ *             the op should be called as this Subgraph to save output name. The Subgraph name is updated using suffix "_original".
  * @ingroup snippets
  */
 class SnippetsTokenization : public ov::pass::ModelPass {
@@ -61,9 +66,9 @@ class SnippetsTokenization : public ov::pass::ModelPass {
      * @ingroup snippets
      */
     struct Config {
-        Config(size_t concurrency = 1, bool split_m_dimension = true, bool enable_transpose_on_output = true)
+        Config(size_t concurrency = 1, bool split_m_dimension = true, bool enable_transpose_on_output = true, std::set<size_t> mha_transpose_ranks = {3, 4})
             : concurrency(concurrency), split_m_dimension(split_m_dimension),
-              mha_token_enable_transpose_on_output(enable_transpose_on_output) {}
+              mha_token_enable_transpose_on_output(enable_transpose_on_output), mha_supported_transpose_ranks(std::move(mha_transpose_ranks)) {}
 
         size_t concurrency = 1;
         // True if "SplitDimensionM" optimization is enabled. Otherwise, it's disabled.
@@ -72,6 +77,10 @@ class SnippetsTokenization : public ov::pass::ModelPass {
         // Otherwise, it may be fused into Subgraph if possible
         // TODO [111813]: Remove please when the ticket 111813 is implemented
         bool mha_token_enable_transpose_on_output = true;
+        // Set of supported Transpose shape ranks for tokenization in MHATokenization pass.
+        // Note that in general Snippets support Transpose of any ranks.
+        // But at the moment Transpose is used only in MHA pattern where 3D and 4D tensors are supported.
+        std::set<size_t> mha_supported_transpose_ranks = { 3, 4 };
     };
 
     OPENVINO_RTTI("SnippetsTokenization", "0");

diff --git a/src/common/snippets/include/snippets/pass/transpose_decomposition.hpp b/src/common/snippets/include/snippets/pass/transpose_decomposition.hpp
@@ -20,7 +20,9 @@ class TransposeDecomposition: public ov::pass::MatcherPass {
 public:
     OPENVINO_RTTI("TransposeDecomposition", "0");
     TransposeDecomposition();
-    static const std::set<std::vector<int>> supported_cases;
+
+    static bool is_supported_transpose(const Output<Node>& transpose_out);
+    static bool is_supported_transpose_order(const std::vector<int32_t>& order);
 };
 
 }  // namespace pass