[Snippets] Added Softmax support

openvinotoolkit · Nov 4, 2022 · 250aa6f · 250aa6f
1 parent 5b89a1a
commit 250aa6f
Show file tree

Hide file tree

Showing 55 changed files with 2,542 additions and 208 deletions.
diff --git a/src/common/snippets/include/snippets/op/buffer.hpp b/src/common/snippets/include/snippets/op/buffer.hpp
@@ -0,0 +1,42 @@
+// Copyright (C) 2018-2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <ngraph/op/op.hpp>
+
+namespace ngraph {
+namespace snippets {
+namespace op {
+
+/**
+ * @interface Buffer
+ * @brief TODO
+ * @ingroup snippets
+ */
+class Buffer : public ngraph::op::Op {
+public:
+    OPENVINO_OP("Buffer", "SnippetsOpset");
+
+    Buffer(const Output<Node>& x, const size_t offset = 0);
+    Buffer() = default;
+
+    size_t get_offset() const;
+    void set_offset(const size_t offset);
+
+    // If Buffer has offset this method set this offset to near Load and Store ops
+    // to correctly read and write data
+    void propogateOffset();
+
+    bool visit_attributes(AttributeVisitor& visitor) override { return true; };
+    std::shared_ptr<Node> clone_with_new_inputs(const OutputVector& new_args) const override;
+    void validate_and_infer_types() override;
+
+private:
+    size_t offset;
+};
+
+} // namespace op
+} // namespace snippets
+} // namespace ngraph
diff --git a/src/common/snippets/include/snippets/op/fill.hpp b/src/common/snippets/include/snippets/op/fill.hpp
@@ -0,0 +1,44 @@
+// Copyright (C) 2018-2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <ngraph/op/op.hpp>
+
+namespace ngraph {
+namespace snippets {
+namespace op {
+
+/**
+ * @interface Fill
+ * @brief TODO
+ * @ingroup snippets
+ */
+class Fill : public ngraph::op::Op {
+public:
+    OPENVINO_OP("Fill", "SnippetsOpset");
+
+    Fill(const Output<Node>& x, const int64_t offset, const std::string fill_value = "zero");
+    Fill() = default;
+
+    int64_t get_offset() const { return m_offset; }
+    std::string get_fill_value() const { return m_fill_value; }
+
+    void set_offset(const size_t offset) { m_offset = offset; }
+    void set_fill_value(const std::string fill_value) { m_fill_value = fill_value; }
+
+    bool visit_attributes(AttributeVisitor& visitor) override;
+
+    std::shared_ptr<Node> clone_with_new_inputs(const OutputVector& new_args) const override;
+
+    void validate_and_infer_types() override;
+
+protected:
+    int64_t m_offset = 0lu;
+    std::string m_fill_value = "zero";
+};
+
+} // namespace op
+} // namespace snippets
+} // namespace ngraph
diff --git a/src/common/snippets/include/snippets/op/horizon_max.hpp b/src/common/snippets/include/snippets/op/horizon_max.hpp
@@ -0,0 +1,33 @@
+// Copyright (C) 2018-2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "ngraph/op/op.hpp"
+
+namespace ngraph {
+namespace snippets {
+namespace op {
+
+/**
+ * @interface HorizonMax
+ * @brief TODO
+ * @ingroup snippets
+ */
+class HorizonMax : public ngraph::op::Op {
+public:
+    OPENVINO_OP("HorizonMax", "SnippetsOpset");
+
+    HorizonMax(const Output<Node>& x);
+    HorizonMax() = default;
+
+    bool visit_attributes(AttributeVisitor& visitor) override { return true;}
+
+    std::shared_ptr<Node> clone_with_new_inputs(const OutputVector& new_args) const override;
+    void validate_and_infer_types() override;
+};
+
+} // namespace op
+} // namespace snippets
+} // namespace ngraph
diff --git a/src/common/snippets/include/snippets/op/horizon_sum.hpp b/src/common/snippets/include/snippets/op/horizon_sum.hpp
@@ -0,0 +1,32 @@
+// Copyright (C) 2018-2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "ngraph/op/op.hpp"
+
+namespace ngraph {
+namespace snippets {
+namespace op {
+
+/**
+ * @interface HorizonSum
+ * @brief TODO
+ * @ingroup snippets
+ */
+class HorizonSum : public ngraph::op::Op {
+public:
+    OPENVINO_OP("HorizonSum", "SnippetsOpset");
+
+    HorizonSum(const Output<Node>& x);
+    HorizonSum() = default;
+
+    bool visit_attributes(AttributeVisitor& visitor) override { return true;}
+    std::shared_ptr<Node> clone_with_new_inputs(const OutputVector& new_args) const override;
+    void validate_and_infer_types() override;
+};
+
+} // namespace op
+} // namespace snippets
+} // namespace ngraph
diff --git a/src/common/snippets/include/snippets/op/load.hpp b/src/common/snippets/include/snippets/op/load.hpp
@@ -15,18 +15,21 @@ namespace op {
  * @brief Generated by Canonicalization step where explicit instructions should be emitted for data loading
  *        where number of elements to load is determined by "count"
  *        Default value is "1" - to load one element
+ *        TODO: ADD DESCRIPTION OF PARAMS
  * @ingroup snippets
  */
 class Load : public ngraph::op::Op {
 public:
     OPENVINO_OP("Load", "SnippetsOpset");
 
-    Load(const Output<Node>& x, const size_t count = 1lu);
+    Load(const Output<Node>& x, const size_t count = 1lu, const size_t offset = 0lu);
     Load() = default;
 
     size_t get_count() const { return m_count; }
+    size_t get_offset() const { return m_offset; }
 
     void set_count(const size_t count) { m_count = count; }
+    void set_offset(const size_t offset) { m_offset = offset; }
 
     bool visit_attributes(AttributeVisitor& visitor) override;
 
@@ -40,6 +43,7 @@ class Load : public ngraph::op::Op {
 
 protected:
     size_t m_count = 0lu;
+    size_t m_offset = 0lu;
 };
 
 } // namespace op

diff --git a/src/common/snippets/include/snippets/op/loop.hpp b/src/common/snippets/include/snippets/op/loop.hpp
@@ -64,25 +64,28 @@ class LoopEnd : public LoopBase {
 public:
     OPENVINO_OP("LoopEnd", "SnippetsOpset");
     LoopEnd(const std::vector<Output<Node>>& args, size_t dimension, size_t work_amount, size_t increment,
-              std::vector<bool> apply_increment, std::vector<int64_t> finalization_offsets);
+              std::vector<bool> apply_increment, std::vector<int64_t> finalization_offsets, std::vector<bool> forse_finalizations);
     LoopEnd() = delete;
     std::shared_ptr<LoopBegin> get_loop_begin();
     void validate_and_infer_types() override;
     std::shared_ptr<Node> clone_with_new_inputs(const OutputVector& inputs)  const override;
     const std::vector<int64_t>& get_finalization_offsets() const;
     const std::vector<bool>& get_apply_increment() const;
+    const std::vector<bool>& get_forse_finalization_offsets() const;
     void set_finalization_offsets(std::vector<int64_t> offsets);
     void set_apply_increment(std::vector<bool> apply_increment);
+    void set_forse_finalization_offsets(std::vector<bool> forse_finalizations_offsets);
     void set_work_amount(size_t new_work_amount);
     void set_increment(size_t new_increment);
     void set_evaluate_once(bool once);
-    // Used to propagate information about Loop structure, needed to simplify some optimizations. For example,
-    // to skip pointer increments when outer Loop is empty, and work_amount == vector_size (one inner vector Loop)
-    // true by default, the optimizations enabled if it's false;
-    bool has_outer_loop;
 
 private:
     std::vector<bool> apply_increment;
+    // Used to propagate information about Loop structure, needed to simplify some optimizations. For example,
+    // to skip pointer increments when outer Loop is empty, work_amount == vector_size (one inner vector Loop) or
+    // pointer aleays should be reverted (for example, for buffers)
+    // true by default, the optimizations enabled if it's false.
+    std::vector<bool> forse_finalization_offsets;
     std::vector<int64_t> finalization_offsets;
     size_t loop_io_size;
 };

diff --git a/src/common/snippets/include/snippets/op/loop_helpers.hpp b/src/common/snippets/include/snippets/op/loop_helpers.hpp
@@ -18,7 +18,8 @@ std::shared_ptr<LoopEnd> insertLoopEndBeforeInputs(const std::vector<Input<Node>
                                                   const std::shared_ptr<LoopBegin>& tileBegin,
                                                   size_t dimension, size_t work_amount, size_t increment,
                                                   std::vector<bool> apply_increment = {},
-                                                  std::vector<int64_t> finalization_offsets = {});
+                                                  std::vector<int64_t> finalization_offsets = {},
+                                                  std::vector<bool> forse_finalization_offsets = {});
 template<typename T>
 std::shared_ptr<LoopBegin> insertLoopBegin(const T& afterTheseNodes) {
     static_assert(std::is_same<T, ParameterVector>() || std::is_same<T, NodeVector>(),

diff --git a/src/common/snippets/include/snippets/op/store.hpp b/src/common/snippets/include/snippets/op/store.hpp
@@ -21,12 +21,14 @@ class Store : public ngraph::op::Op {
 public:
     OPENVINO_OP("Store", "SnippetsOpset");
 
-    Store(const Output<Node>& x, const size_t count = 1lu);
+    Store(const Output<Node>& x, const size_t count = 1lu, const size_t offset = 0lu);
     Store() = default;
 
     size_t get_count() const { return m_count; }
+    size_t get_offset() const { return m_offset; }
 
     void set_count(const size_t count) { m_count = count; }
+    void set_offset(const size_t offset) { m_offset = offset; }
 
     bool visit_attributes(AttributeVisitor& visitor) override;
 
@@ -40,6 +42,7 @@ class Store : public ngraph::op::Op {
 
 protected:
     size_t m_count = 0lu;
+    size_t m_offset = 0lu;
 };
 
 } // namespace op

diff --git a/src/common/snippets/include/snippets/op/subgraph.hpp b/src/common/snippets/include/snippets/op/subgraph.hpp
@@ -89,17 +89,10 @@ class Subgraph : public ngraph::op::Op {
         return m_generator;
     }
 
-    size_t get_non_scalar_constants_count() const {
-        return m_non_scalar_constants_count;
-    }
-
-    bool is_quantized() const {
-        return config.m_is_quantized;
-    }
-
-    bool has_type_relaxed_ops() const {
-        return config.m_has_type_relaxed_ops;
-    }
+    size_t get_buffer_scratchpad_size() const;
+    size_t get_additional_data_count() const { return m_additional_data_count; }
+    bool is_quantized() const { return config.m_is_quantized; }
+    bool has_type_relaxed_ops() const { return config.m_has_type_relaxed_ops; }
 
     snippets::Schedule generate(const BlockedShapeVector& output_shapes, const BlockedShapeVector& input_shapes, ngraph::pass::Manager& opt,
                                 const void* compile_params = nullptr);
@@ -114,7 +107,7 @@ class Subgraph : public ngraph::op::Op {
     // plugin sets generator for a snippet to some specific generator.
     // it's going to be replaced with Jitters table later
     void set_generator(std::shared_ptr<ngraph::snippets::Generator> generator);
-    void set_non_scalar_constants_count(const size_t count);
+    void set_additional_data_count(const size_t count);
 
     void print() const;
     void print_statistics(bool verbose);
@@ -128,11 +121,12 @@ class Subgraph : public ngraph::op::Op {
 private:
     void align_element_types(const BlockedShapeVector& outputShapes, const BlockedShapeVector& inputShapes);
     void convert_to_snippet_dialect();
+    void init_config();
     // Count of potentional non-scalar Consants that will be created after some tranformations
     // At the moment it's relevant only for FakeQuantize decomposition
     // NOTE: To avoid overheads in each calcution of this count (for example, in validate_and_type_infer()),
     //       we should MANUALLY calculate it where it needed.
-    size_t m_non_scalar_constants_count = 0;
+    size_t m_additional_data_count = 0;
     Shape exec_domain = {};
     std::shared_ptr<ov::Model> m_body = nullptr;
     std::shared_ptr<ngraph::snippets::Generator> m_generator = nullptr;
@@ -150,6 +144,11 @@ class Subgraph : public ngraph::op::Op {
         // True if Subgraph contains TypeRelaxed nodes -> for several streams in tp mode we should copy body using mutexes
         // because TypeRelaxed::copy_with_new_inputs() isn't save-thread method
         bool m_has_type_relaxed_ops = false;
+        // True if we should check runtime info for nodes to call specific needed transformations
+        bool m_check_rt_info = false;
+        // True if we should go through whole body to check for where loops should be explicitly inserted.
+        // Otherwise we insert Loops on Parameters and Results - for example, it's better for subgraph witn only eltwise ops
+        bool m_need_exact_pass_for_loop_insertion = false;
     } config;
 
     ov::PartialShape master_shape;

diff --git a/src/common/snippets/include/snippets/op/vector_buffer.hpp b/src/common/snippets/include/snippets/op/vector_buffer.hpp
@@ -0,0 +1,31 @@
+// Copyright (C) 2018-2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <ngraph/op/op.hpp>
+
+namespace ngraph {
+namespace snippets {
+namespace op {
+
+/**
+ * @interface Buffer
+ * @brief TODO
+ * @ingroup snippets
+ */
+class VectorBuffer : public ngraph::op::Op {
+public:
+    OPENVINO_OP("VectorBuffer", "SnippetsOpset");
+
+    VectorBuffer();
+
+    bool visit_attributes(AttributeVisitor& visitor) override { return true;}
+    std::shared_ptr<Node> clone_with_new_inputs(const OutputVector& new_args) const override;
+    void validate_and_infer_types() override;
+};
+
+} // namespace op
+} // namespace snippets
+} // namespace ngraph
diff --git a/src/common/snippets/include/snippets/pass/insert_buffer.hpp b/src/common/snippets/include/snippets/pass/insert_buffer.hpp
@@ -0,0 +1,27 @@
+// Copyright (C) 2018-2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <ngraph/pass/graph_rewrite.hpp>
+#include <ngraph/pattern/matcher.hpp>
+
+namespace ngraph {
+namespace snippets {
+namespace pass {
+
+/**
+ * @interface InsertBuffer
+ * @brief TODO:
+ * @ingroup snippets
+ */
+class InsertBuffer: public ngraph::pass::MatcherPass {
+public:
+    InsertBuffer();
+};
+
+
+}  // namespace pass
+}  // namespace snippets
+}  // namespace ngraph
diff --git a/src/common/snippets/include/snippets/pass/insert_loops.hpp b/src/common/snippets/include/snippets/pass/insert_loops.hpp
@@ -19,12 +19,13 @@ namespace pass {
 class InsertLoops: public ngraph::pass::FunctionPass {
 public:
     OPENVINO_RTTI("InsertLoops", "0");
-    InsertLoops(ov::PartialShape master_shape, size_t vector_size);
+    InsertLoops(ov::PartialShape master_shape, size_t vector_size, bool is_optimized = true);
     bool run_on_model(const std::shared_ptr<ngraph::Function>& m) override;
 
 private:
     ov::PartialShape master_shape;
     size_t vector_size;
+    bool is_optimized;
 };
 
 }  // namespace pass