[Snippets] Refactoring

openvinotoolkit · Apr 17, 2023 · c22b2f6 · c22b2f6
1 parent be72f40
commit c22b2f6
Show file tree

Hide file tree

Showing 68 changed files with 1,604 additions and 1,442 deletions.
diff --git a/src/common/snippets/include/snippets/emitter.hpp b/src/common/snippets/include/snippets/emitter.hpp
@@ -21,11 +21,9 @@ class Emitter {
     /**
      * @brief Default constructor
      */
-    Emitter(const std::shared_ptr<ngraph::Node>& n) {
-    }
+    Emitter(const std::shared_ptr<ngraph::Node>& n) {}
 
-    Emitter(std::vector<std::pair<std::shared_ptr<Emitter>, RegInfo>>& region) {
-    }
+    Emitter(std::vector<std::pair<std::shared_ptr<Emitter>, RegInfo>>& region) {}
 
     /**
      * @brief called by generator to generate code to produce target code for a specific operation
@@ -44,8 +42,8 @@ class Emitter {
      * @brief called by generator to generate data section, if needed for a specific operation
      * @return void
      */
-    virtual void emit_data() const {
-    }
+    virtual void emit_data() const {}
+
     virtual ~Emitter() = default;
 };
 

diff --git a/src/common/snippets/include/snippets/generator.hpp b/src/common/snippets/include/snippets/generator.hpp
@@ -9,10 +9,9 @@
 #pragma once
 
 #include "snippets_isa.hpp"
-#include "emitter.hpp"
-#include "target_machine.hpp"
-#include "lowered_expr.hpp"
-#include "pass/lowered/linear_IR_transformation.hpp"
+
+#include "snippets/lowered/linear_ir.hpp"
+#include "snippets/lowered/pass/transformation.hpp"
 
 namespace ngraph {
 namespace snippets {
@@ -46,7 +45,7 @@ class Schedule {
     bool is_flat {false};
     code ptr {nullptr};
 };
-class LoweredExprIR;
+
 /**
  * @interface Generator
  * @brief Target independent code generator interface
@@ -78,7 +77,7 @@ class Generator {
          code binary_code = nullptr;
          size_t buffer_scratchpad_size = 0;
      };
-    LoweringResult generate(std::shared_ptr<ov::Model>& m, const LoweringConfig& config, const void* compile_params = nullptr);
+    LoweringResult generate(std::shared_ptr<ov::Model>& m, const lowered::Config& config, const void* compile_params = nullptr);
 
     /**
      * @brief gets target machine
@@ -111,12 +110,12 @@ class Generator {
     /**
     * @brief gets target specific transformations for code generation
     */
-    virtual pass::lowered::LinearIRTransformationPipeline target_specific_transformations() const;
+    virtual lowered::pass::TransformationPipeline target_specific_transformations() const;
 
     std::shared_ptr<TargetMachine> target;
     // todo: we need to save lowered code to access compiled brgemm kernels on execution time (normally lowered is destructed by then).
     //  This is temporary solution, remove this when kernel caching is implemented. Don't forget to make generate const method.
-    LoweredExprIR lowered_saved;
+    lowered::LinearIR lowered_saved;
 };
 
 } // namespace snippets

diff --git a/src/common/snippets/include/snippets/lowered/expression.hpp b/src/common/snippets/include/snippets/lowered/expression.hpp
@@ -0,0 +1,116 @@
+// Copyright (C) 2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <list>
+
+#include <openvino/core/node.hpp>
+#include <openvino/opsets/opset1.hpp>
+
+#include "snippets/tensor_descriptor.hpp"
+#include "snippets/emitter.hpp"
+#include "snippets/target_machine.hpp"
+
+
+namespace ngraph {
+namespace snippets {
+namespace lowered {
+
+class LinearIR;
+class Expression;
+using ExpressionPtr = std::shared_ptr<Expression>;
+
+class ExpressionPort {
+    friend class Expression;
+
+public:
+    enum Type {
+        Input,
+        Output
+    };
+
+    ExpressionPort() = default;
+
+    Type get_type() const { return m_type; }
+
+    ExpressionPtr expr = nullptr;
+    size_t port = 0;
+
+private:
+    ExpressionPort(const ExpressionPtr& expr, size_t port, Type type);
+
+    Type m_type = Type::Input;
+};
+
+class Expression : public std::enable_shared_from_this<Expression> {
+    friend class LinearIR;
+
+public:
+    static size_t LOOP_NULL_ID;
+
+    Expression() = default;
+    explicit Expression(const std::shared_ptr<Node>& n);
+    // The ctor fills outputs automatically from rt_info and/or tensor shapes
+    explicit Expression(const std::shared_ptr<Node>& n, std::vector<TensorDescriptorPtr> inputs);
+    explicit Expression(const std::shared_ptr<Node>& n, std::vector<TensorDescriptorPtr> inputs, std::vector<TensorDescriptorPtr> outputs);
+
+    virtual ~Expression() = default;
+
+    std::shared_ptr<Node> get_node() const;
+    std::shared_ptr<Emitter> get_emitter() const;
+
+    RegInfo get_reg_info() const { return  m_reg_info; }
+    void set_reg_info(RegInfo rinfo) { m_reg_info = std::move(rinfo); }
+
+    const std::vector<TensorDescriptorPtr>& get_inputs() { return m_inputs; }
+    const std::vector<TensorDescriptorPtr>& get_outputs() { return m_outputs; }
+
+    std::vector<size_t> get_loop_ids() const { return m_loop_ids; }
+    void set_loop_ids(const std::vector<size_t>& loops) { m_loop_ids = loops; }
+    void set_loop_id(size_t id, size_t idx);
+    void remove_loop_id(size_t id);
+    bool is_outside_loop() const { return m_is_outside_loop; }
+
+    void init_emitter(const std::shared_ptr<const TargetMachine>& target);
+
+    ExpressionPort input_port(size_t i);
+    ExpressionPort output_port(size_t i);
+
+protected:
+    void replace_input(size_t port, TensorDescriptorPtr to);
+    void replace_output(size_t port, TensorDescriptorPtr to);
+
+    std::shared_ptr<Node> m_source_node{nullptr};
+    std::shared_ptr<Emitter> m_emitter{nullptr};
+    std::vector<TensorDescriptorPtr> m_inputs;
+    std::vector<TensorDescriptorPtr> m_outputs;
+    RegInfo m_reg_info{{}, {}};
+    // The order Loops identifies: Outer ---> Inner
+    std::vector<size_t> m_loop_ids;
+    bool m_is_outside_loop = false;
+};
+
+class IOExpression : public Expression {
+public:
+    enum class io_type {INPUT, OUTPUT, UNDEFINED};
+
+    IOExpression(const std::shared_ptr<ov::opset1::Parameter>& n, int64_t index);
+    IOExpression(const std::shared_ptr<ov::opset1::Result>& n, int64_t index, std::vector<TensorDescriptorPtr> inputs);
+
+    int64_t get_index() const  { return m_index; }
+    io_type get_type() const { return m_type; }
+
+private:
+    int64_t m_index = -1;
+    io_type m_type = io_type::UNDEFINED;
+};
+
+bool operator==(const ExpressionPort& lhs, const ExpressionPort& rhs);
+bool operator!=(const ExpressionPort& lhs, const ExpressionPort& rhs);
+bool operator<(const ExpressionPort& lhs, const ExpressionPort& rhs);
+
+} // namespace lowered
+} // namespace snippets
+} // namespace ngraph
diff --git a/src/common/snippets/include/snippets/lowered/linear_ir.hpp b/src/common/snippets/include/snippets/lowered/linear_ir.hpp
@@ -0,0 +1,118 @@
+// Copyright (C) 2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <list>
+
+#include "expression.hpp"
+
+namespace ngraph {
+namespace snippets {
+namespace lowered {
+
+class Config {
+public:
+    // True if the lowered Emitters need to be accessed during runtime. Normally they're destroyed after code emission.
+    bool m_save_lowered_code = false;
+    // True if we should check runtime info for nodes to call specific needed transformations
+    bool m_need_fill_tail_register = false;
+    bool m_explicit_loop_insertion = false;
+    ov::PartialShape m_master_shape{};
+    size_t m_loop_depth = 1;
+};
+
+class LinearIR {
+public:
+    using container = std::list<ExpressionPtr>;
+    using io_container = std::list<std::shared_ptr<IOExpression>>;
+    using exprIt = container::iterator;
+    using constExprIt = container::const_iterator;
+
+    LinearIR() = default;
+    explicit LinearIR(const std::shared_ptr<ov::Model>& m, Config config = {});
+
+    LinearIR deep_copy() const;
+    static LinearIR::container deep_copy_range(LinearIR::container::const_iterator begin, LinearIR::container::const_iterator end);
+
+    const container& get_ops() const {return m_lowered_ops; }
+    const io_container& get_IO_ops() const {return m_io_lowered_ops; }
+    Config get_config() {return m_config; }
+
+    ExpressionPtr get_expr_by_node(const std::shared_ptr<Node>& n) const;
+    ExpressionPort get_expr_by_output(const TensorDescriptorPtr& n) const;
+    const std::set<ExpressionPort>& get_exprs_by_input(const TensorDescriptorPtr& n) const;
+
+    void replace_input(const ExpressionPort& expr_port, const TensorDescriptorPtr& to);
+    void replace_input(const ExpressionPtr& expr, size_t port, const TensorDescriptorPtr& to);
+    void replace_output(const ExpressionPort& expr_port, const TensorDescriptorPtr& to);
+    void replace_output(const ExpressionPtr& expr, size_t port, const TensorDescriptorPtr& to);
+
+    /**
+    * @brief Move an expression from the position "from" to the position immediately before "to".
+     * Note: this method does NOT take care about data dependencies and no relevant checks are performed.
+     *       and doesn't touch internal maps.
+    */
+    void move(constExprIt from, constExprIt to);
+
+    bool empty() const noexcept {return m_lowered_ops.empty(); }
+    void debug_print(bool tds_as_pointers = false) const;
+
+    container::reference back() noexcept {return m_lowered_ops.back();}
+    container::const_reference back() const noexcept {return m_lowered_ops.back();}
+    container::reference front() noexcept {return m_lowered_ops.front();}
+    container::const_reference front() const noexcept {return m_lowered_ops.front();}
+
+    exprIt begin() noexcept {return m_lowered_ops.begin();}
+    exprIt end() noexcept {return m_lowered_ops.end();}
+    constExprIt begin() const noexcept {return cbegin();}
+    constExprIt end() const noexcept {return cend();}
+    constExprIt cbegin() const noexcept {return m_lowered_ops.cbegin();}
+    constExprIt cend() const noexcept {return m_lowered_ops.cend();}
+    container::reverse_iterator rbegin() noexcept {return m_lowered_ops.rbegin();}
+    container::reverse_iterator rend() noexcept {return m_lowered_ops.rend();}
+    container::const_reverse_iterator crbegin() const noexcept {return m_lowered_ops.crbegin();}
+    container::const_reverse_iterator crend() const noexcept {return m_lowered_ops.crend();}
+
+    exprIt insert(constExprIt pos, const ov::NodeVector& nodes);
+    exprIt insert(constExprIt pos, const std::shared_ptr<Node>& n);
+    exprIt insert(constExprIt pos, container::value_type&& value);
+    exprIt insert(constExprIt pos, const container::value_type& value);
+    exprIt insert(constExprIt pos, exprIt begin, exprIt end);
+    exprIt insert(constExprIt pos, constExprIt begin, constExprIt end);
+
+    exprIt erase(exprIt pos);
+    exprIt erase(constExprIt pos);
+
+    void init_emitters(const std::shared_ptr<TargetMachine>& target);
+    void serialize(const std::string& xml, const std::string& bin);
+
+    static ov::NodeVector get_ordered_ops(const std::shared_ptr<ov::Model>& model);
+
+    class LoopManager;
+    using LoopManagerPtr = std::shared_ptr<LoopManager>;
+
+    const LoopManagerPtr& get_loop_manager() const { return m_loop_manager; }
+
+private:
+    void register_expression(const ExpressionPtr& expr);
+    // Like register_expression, but doesn't allow Parameter or Result registration. You can do it only through ctor
+    void register_regular_expression(const ExpressionPtr& expr);
+    void unregister_expression(const ExpressionPtr& expr);
+
+    container m_lowered_ops{};
+    std::unordered_map<std::shared_ptr<Node>, std::shared_ptr<Expression>> m_node2expression_map;
+    // Expression must be uniquely identified by an output, so there can't be expressions that have the same output
+    std::unordered_map<TensorDescriptorPtr, ExpressionPort> m_output2expression_map;
+    // At the same time, several expressions can have the same input if they are connected to the same parent
+    // E.g. LoopEnd will always have the same input as a Load inside the loop (since it has to increment the same reg)
+    std::unordered_map<TensorDescriptorPtr, std::set<ExpressionPort>> m_input2expression_map;
+    io_container m_io_lowered_ops;
+    Config m_config{};
+    LoopManagerPtr m_loop_manager = nullptr;
+};
+
+} // namespace lowered
+} // namespace snippets
+} // namespace ngraph
diff --git a/src/common/snippets/include/snippets/lowered/loop_manager.hpp b/src/common/snippets/include/snippets/lowered/loop_manager.hpp
@@ -0,0 +1,89 @@
+// Copyright (C) 2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "linear_ir.hpp"
+
+#include <openvino/core/node.hpp>
+#include <openvino/opsets/opset1.hpp>
+
+#include "snippets/tensor_descriptor.hpp"
+
+namespace ngraph {
+namespace snippets {
+namespace lowered {
+
+class LinearIR::LoopManager {
+public:
+    LoopManager() = default;
+
+    class LoopInfo {
+    public:
+        LoopInfo() = default;
+        LoopInfo(size_t work_amount, size_t increment,
+                 const std::vector<ExpressionPort>& entries,
+                 const std::vector<ExpressionPort>& exits)
+            : work_amount(work_amount), increment(increment), entry_exprs(entries), exit_exprs(exits) {}
+        size_t work_amount = 0;
+        size_t increment = 0;
+        // The order of entry and exit expressions is important:
+        //     - The position before first entry expr is Loop Begin position
+        //     - The position after last exit expr is Loop End position
+        // Note: Scalars aren't entry expressions but can be before first entry expr in Linear IR
+        std::vector<ExpressionPort> entry_exprs = {};
+        std::vector<ExpressionPort> exit_exprs = {};
+    };
+    using LoopInfoPtr = std::shared_ptr<LoopInfo>;
+
+    size_t add_loop_info(const LoopInfoPtr& loop);
+    void remove_loop_info(size_t index);
+    LoopInfoPtr get_loop_info(size_t index) const;
+    size_t get_loop_count() const { return m_map.size(); }
+    const std::map<size_t, LoopInfoPtr>& get_map() const;
+
+    static void skipped_mark(LinearIR::constExprIt loop_begin_pos,
+                             LinearIR::constExprIt loop_end_pos,
+                             size_t loop_depth);
+    void mark_loop(LinearIR& linear_ir,
+                   LinearIR::constExprIt loop_begin_pos,
+                   LinearIR::constExprIt loop_end_pos,
+                   size_t loop_depth, size_t vector_size);
+    void mark_loop(LinearIR& linear_ir,
+                   LinearIR::constExprIt loop_begin_pos,
+                   LinearIR::constExprIt loop_end_pos,
+                   size_t idx,
+                   size_t work_amount,
+                   size_t work_amount_increment,
+                   const std::vector<ExpressionPort>& entries,
+                   const std::vector<ExpressionPort>& exits);
+
+    void get_loop_bounds(const LinearIR& linear_ir,
+                         size_t loop_id,
+                         LinearIR::constExprIt& loop_begin_pos,
+                         LinearIR::constExprIt& loop_end_pos) const;
+    static void get_loop_bounds(const LinearIR& linear_ir,
+                                const std::vector<ExpressionPort>& entries,
+                                const std::vector<ExpressionPort>& exits,
+                                LinearIR::constExprIt& loop_begin_pos,
+                                LinearIR::constExprIt& loop_end_pos,
+                                size_t loop_id = Expression::LOOP_NULL_ID);
+
+private:
+    static void exprs_marking(LinearIR::constExprIt loop_begin_pos,
+                              LinearIR::constExprIt loop_end_pos,
+                              size_t loop_id, size_t idx);
+    static void get_io_loop_ports(LinearIR& linear_ir,
+                                  LinearIR::constExprIt loop_begin_pos,
+                                  LinearIR::constExprIt loop_end_pos,
+                                  std::vector<ExpressionPort>& entries,
+                                  std::vector<ExpressionPort>& exits);
+
+    std::map<size_t, LoopInfoPtr> m_map = {};
+    size_t next_id = 0;
+};
+
+} // namespace lowered
+} // namespace snippets
+} // namespace ngraph