openvinotoolkit · eshoguli · Mar 28, 2023 · Apr 6, 2023 · Apr 9, 2023 · Jan 26, 2023
@@ -27,6 +27,8 @@ class Brgemm : public MemoryAccess {
     size_t get_offset_b() const { return get_input_offset(1); }
     size_t get_offset_c() const { return get_output_offset(0); }
 
+    static ov::element::Type get_output_type(const ov::element::Type& in_type0, const ov::element::Type& in_type1);
+
     void validate_and_infer_types() override;
     std::shared_ptr<Node> clone_with_new_inputs(const OutputVector& new_args) const override;
 

@@ -19,15 +19,18 @@ namespace op {
  *               - All buffers in a graph have the same memory pointer. So if we have a few buffers,
  *                 each the corresponding MemoryAccess op for Buffer should have offset for common memory pointer of this Buffer
  *               - Buffer should be a single consumer for operation output port
+ * @param m_type - type of Buffer: IntermediateMemory/NewMemory
+ * @param m_shape - output allocation shape for Buffer with type NewMemory
+ * @param m_id - Buffer ID in common Buffer system
  * @ingroup snippets
  */
 class Buffer : public ngraph::op::Op {
 public:
     OPENVINO_OP("Buffer", "SnippetsOpset");
     Buffer() = default;
-    Buffer(const ov::Shape& shape);
-    Buffer(const ov::Output<ov::Node>& arg, const ov::Shape& shape);
-    Buffer(const ov::Output<ov::Node>& arg, int32_t allocation_rank = -1);
+    Buffer(const ov::Shape& shape, size_t id = 0);
+    Buffer(const ov::Output<ov::Node>& arg, const ov::Shape& shape, size_t id = 0);
+    Buffer(const ov::Output<ov::Node>& arg, int32_t allocation_rank = -1, size_t id = 0);
 
     bool visit_attributes(AttributeVisitor& visitor) override;
     void validate_and_infer_types() override;
@@ -38,16 +41,20 @@ class Buffer : public ngraph::op::Op {
         IntermediateMemory
     };
 
+    void set_id(size_t id) { m_id = id; }
+
+    size_t get_id() const { return m_id; }
+    size_t get_byte_size() const;
     Type get_type() const { return m_type; }
     ov::Shape get_allocation_shape() const { return m_shape; }
-    size_t get_byte_size() const;
 
     bool is_intermediate_memory() const { return m_type == Type::IntermediateMemory; }
     bool is_new_memory() const { return m_type == Type::NewMemory; }
 
 private:
     Type m_type = Type::IntermediateMemory;
     ov::Shape m_shape = {};
+    size_t m_id = 0;
 };
 
 } // namespace op

@@ -83,6 +83,7 @@ class LoopEnd : public LoopBase {
             std::vector<int64_t> ptr_increments, std::vector<int64_t> finalization_offsets);
     LoopEnd() = default;
     std::shared_ptr<LoopBegin> get_loop_begin();
+    bool visit_attributes(AttributeVisitor& visitor) override;
     void validate_and_infer_types() override;
     std::shared_ptr<Node> clone_with_new_inputs(const OutputVector& inputs)  const override;
     const std::vector<int64_t>& get_finalization_offsets() const;

@@ -97,7 +97,6 @@ class Subgraph : public ov::op::util::SubGraphOp {
 
     size_t get_buffer_scratchpad_size() const { return m_buffer_scratchpad; }
     size_t get_virtual_port_count() const { return m_virtual_port_count; }
-    bool is_buffer_needed() const { return m_buffer_needed; }
     bool is_quantized() const { return config.m_is_quantized; }
     bool has_type_relaxed_ops() const { return config.m_has_type_relaxed_ops; }
     bool has_domain_sensitive_ops() const { return config.m_has_domain_sensitive_ops; }
@@ -122,7 +121,6 @@ class Subgraph : public ov::op::util::SubGraphOp {
     void set_generator(std::shared_ptr<ngraph::snippets::Generator> generator);
     void set_tile_rank(size_t newRank) {tileRank = newRank;}
     void set_virtual_port_count(const size_t count);
-    void set_buffer_needed(const bool need);
 
     void print() const;
     void print_statistics(bool verbose);
@@ -137,8 +135,10 @@ class Subgraph : public ov::op::util::SubGraphOp {
     // should have explicit Constants even if they're non-scalar (Reshape, Transpose, Broadcast)
     // This check returns True if Constant op which is input of this op should be inside Subgraph body
     static auto constant_input_should_be_inside_body(const std::shared_ptr<ov::Node>& node) -> bool;
-
     static bool check_broadcast(const std::shared_ptr<const ov::Node>& node) noexcept;
+    // Return estimated unqiue buffer count (rating from above). It's needed for tokenization
+    static auto get_estimated_buffer_count(const ov::NodeVector& ops) -> size_t;
+    static auto is_domain_sensitive_op(const std::shared_ptr<ov::Node>& op) -> bool;
 
 private:
     void align_element_types(const BlockedShapeVector& outputShapes, const BlockedShapeVector& inputShapes);
@@ -147,19 +147,13 @@ class Subgraph : public ov::op::util::SubGraphOp {
     void initialize_buffer_scratchpad_size();
     // Count of Subgraph virtual ports:
     //  - Potential non-scalar Constants that will be created after some transformations (At the moment it's relevant only for FakeQuantize decomposition)
-    // Need Buffer op or not
-    //  - Buffers. All Buffers are considered as one common additional virtual port. So we cannot summarize them as potential non-scalar Constants
     // NOTE: To avoid overheads in each calculation of this count (for example, in validate_and_type_infer()),
     //       we should MANUALLY calculate it where it needed.
     size_t m_virtual_port_count = 0;
-    bool m_buffer_needed = false;
     size_t m_buffer_scratchpad = 0lu;
     Shape exec_domain = {};
     std::shared_ptr<ngraph::snippets::Generator> m_generator = nullptr;
 
-    // TODO: Change logic of insert Converts. This exec element type can be different for plugins
-    const ov::element::Type execution_element_type = ov::element::f32;
-
     ov::PartialShape master_shape;
     size_t tileRank = 0; // set by plugin to specify the number of dimensions processed in a single kernel call
 

@@ -0,0 +1,47 @@
+// Copyright (C) 2018-2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <ngraph/pass/graph_rewrite.hpp>
+#include <ngraph/pattern/matcher.hpp>
+
+#include "snippets/snippets_isa.hpp"
+
+namespace ngraph {
+namespace snippets {
+namespace pass {
+
+/**
+ * @interface BufferIdentification
+ * @brief The pass set identifiers for Buffers in common Buffer system.
+ *        The buffers with the same identifier has the same data register.
+ *        The pass uses greedy graph coloring algorithm using adjacency matrix:
+ *          - Buffers - are vertices of graph
+ *          - Loops, Brgemm (the same other ops) - are "edges" between Buffers (hub of edges).
+ *                   The buffers are connected to the same Loop - are adjacent in graph sense bounds.
+ *          - The vertices (buffers) are adjacent if they are connected to the same Loop and
+ *            their data pointers cannot be proportionally incremented in Loops: different ptr increments or data sizes.
+ *          - Firstly, create adjacency matrix using the definition above
+ *          - Secondly, color vertices of graph (buffers) using adjacency matrix
+ *        Note: should be called before ResetBuffer() pass to have correct offsets
+ * @ingroup snippets
+ */
+class BufferIdentification: public ngraph::pass::FunctionPass {
+public:
+    OPENVINO_RTTI("BufferIdentification", "0");
+    BufferIdentification() = default;
+
+    bool run_on_model(const std::shared_ptr<ngraph::Function>& m) override;
+
+private:
+    using BufferSet = std::vector<std::shared_ptr<snippets::op::Buffer>>;
+
+    std::vector<bool> create_adjacency_matrix(const BufferSet& buffers);
+    std::map<size_t, BufferSet> coloring(BufferSet& buffers, std::vector<bool>& adj);
+};
+
+} // namespace pass
+} // namespace snippets
+} // namespace ngraph
@@ -39,6 +39,8 @@ class PropagatePrecision: public ngraph::pass::FunctionPass {
         const element::Type& actual,
         const element::Type& required) noexcept;
 
+    static bool validate_and_infer_types_and_restore_outputs(const std::shared_ptr<ngraph::Node>& op);
+
 private:
     const std::shared_ptr<const TargetMachine> target_machine;
 };

@@ -9,6 +9,7 @@
 
 #include "snippets/pass/mha_tokenization.hpp"
 #include "snippets/pass/collapse_subgraph.hpp"
+#include "snippets/op/subgraph.hpp"
 
 namespace ngraph {
 namespace snippets {
@@ -19,8 +20,16 @@ namespace pass {
  SkippedByPlugin - indicate that snippets can't include this node in subgraph. Can be set by Plugin via SetSnippetsNodeType(...).
  */
 enum class SnippetsNodeType : int64_t {NotSet, SkippedByPlugin};
+/*
+ NotSet - default value returned if the subgraph wasn't marked and snippets can include nodes in this subgraph
+ Completed - indicate that snippets can't include any nodes in this subgraph.
+             It's used in separate tokenization pass, for example, tokenization by matcher (MHA Tokenization).
+ */
+enum class SnippetsSubgraphType : int64_t {NotSet, Completed};
 void SetSnippetsNodeType(const std::shared_ptr<Node>&, SnippetsNodeType);
+void SetSnippetsSubgraphType(const std::shared_ptr<op::Subgraph>&, SnippetsSubgraphType);
 SnippetsNodeType GetSnippetsNodeType(const std::shared_ptr<const Node>&);
+SnippetsSubgraphType GetSnippetsSubgraphType(const std::shared_ptr<const op::Subgraph>&);
 void SetTopologicalOrder(const std::shared_ptr<Node>&, int64_t);
 int64_t GetTopologicalOrder(const std::shared_ptr<const Node>&);
 

@@ -17,7 +17,7 @@ Brgemm::Brgemm(const Output<Node>& A, const Output<Node>& B,
     set_output_size(1);
     set_input_offset(offset_a, 0);
     set_input_offset(offset_b, 1);
-    set_output_offset(offset_a, 0);
+    set_output_offset(offset_c, 0);
     constructor_validate_and_infer_types();
 }
 
@@ -45,22 +45,29 @@ std::shared_ptr<Node> Brgemm::clone_with_new_inputs(const OutputVector& new_args
     return std::make_shared<Brgemm>(new_args.at(0), new_args.at(1), get_offset_a(), get_offset_b(), get_offset_c());
 }
 
-ov::element::Type Brgemm::get_output_type() const {
-    const auto element_type_a = get_input_element_type(0);
-    const auto element_type_b = get_input_element_type(1);
-    const bool is_f32 = utils::everyone_is(element::f32, element_type_a, element_type_b);
-    const bool is_int8 = utils::one_of(element_type_a, element::i8, element::u8) && element_type_b == element::i8;
-    const bool is_bf16 = utils::everyone_is(element::bf16, element_type_a, element_type_b);
+ov::element::Type Brgemm::get_output_type(const ov::element::Type& in_type0, const ov::element::Type& in_type1) {
+    const bool is_f32 = utils::everyone_is(element::f32, in_type0, in_type1);
+    const bool is_int8 = utils::one_of(in_type0, element::i8, element::u8) && in_type1 == element::i8;
+    const bool is_bf16 = utils::everyone_is(element::bf16, in_type0, in_type1);
     if (is_f32 || is_bf16) {
-       return element::f32;
+        return element::f32;
     } else if (is_int8) {
         return element::i32;
     } else {
+        return element::undefined;
+    }
+}
+
+ov::element::Type Brgemm::get_output_type() const {
+    auto output_type = get_output_type(get_input_element_type(0), get_input_element_type(1));
+    if (output_type == element::undefined) {
         throw ngraph_error("BrgemmCPU node has incompatible input element types: " +
-                            element_type_a.get_type_name() +
-                            " and " +
-                            element_type_b.get_type_name());
+                           get_input_element_type(0).get_type_name() +
+                           " and " +
+                           get_input_element_type(1).get_type_name());
     }
+
+    return output_type;
 }
 
 ov::PartialShape Brgemm::get_output_partial_shape(const std::vector<ov::PartialShape>& input_shapes) const {

@@ -1,4 +1,4 @@
-// Copyright (C) 2018-2022 Intel Corporation
+// Copyright (C) 2018-2023 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -12,22 +12,18 @@
 using namespace std;
 using namespace ngraph;
 
-auto normalize_rank(int32_t allocation_rank, const size_t shape_rank) -> int32_t {
-    return allocation_rank < 0 ? allocation_rank + static_cast<int32_t>(shape_rank) : allocation_rank;
-}
-
-snippets::op::Buffer::Buffer(const ov::Shape& shape)
-    : Op(), m_type(Type::NewMemory), m_shape(shape) {
+snippets::op::Buffer::Buffer(const ov::Shape& shape, size_t id)
+    : Op(), m_type(Type::NewMemory), m_shape(shape), m_id(id) {
     constructor_validate_and_infer_types();
 }
 
-snippets::op::Buffer::Buffer(const ov::Output<ov::Node>& arg, const ov::Shape& shape)
-    : Op({arg}), m_type(Type::IntermediateMemory), m_shape(shape) {
+snippets::op::Buffer::Buffer(const ov::Output<ov::Node>& arg, const ov::Shape& shape, size_t id)
+    : Op({arg}), m_type(Type::IntermediateMemory), m_shape(shape), m_id(id) {
     constructor_validate_and_infer_types();
 }
 
-snippets::op::Buffer::Buffer(const ov::Output<ov::Node>& arg, int32_t allocation_rank)
-    : Op({arg}), m_type(Type::IntermediateMemory) {
+snippets::op::Buffer::Buffer(const ov::Output<ov::Node>& arg, int32_t allocation_rank, size_t id)
+    : Op({arg}), m_type(Type::IntermediateMemory), m_id(id) {
     const auto pshape = arg.get_partial_shape();
     OPENVINO_ASSERT(pshape.is_static(), "Buffer supports only static input shape");
     const auto shape = pshape.get_shape();
@@ -40,6 +36,7 @@ snippets::op::Buffer::Buffer(const ov::Output<ov::Node>& arg, int32_t allocation
 bool snippets::op::Buffer::visit_attributes(AttributeVisitor& visitor) {
     INTERNAL_OP_SCOPE(Buffer_visit_attributes);
     visitor.on_attribute("allocation_shape", m_shape);
+    visitor.on_attribute("id", m_id);
     return true;
 }
 

@@ -181,6 +181,15 @@ void LoopEnd::validate_and_infer_types() {
         get_output_descriptor(i).set_tensor_ptr(get_input_descriptor(i).get_output().get_tensor_ptr());
 }
 
+bool LoopEnd::visit_attributes(AttributeVisitor& visitor) {
+    LoopBase::visit_attributes(visitor);
+    for (size_t i = 0; i < ptr_increments.size(); ++i) {
+        visitor.on_attribute("ptr_increment_" + std::to_string(i), ptr_increments[i]);
+        visitor.on_attribute("finalization_offsets_" + std::to_string(i), finalization_offsets[i]);
+    }
+    return true;
+}
+
 } // namespace op
 } // namespace snippets
 } // namespace ngraph