Merge branch 'master' into an/general_fix_fp16

allnes · Oct 25, 2023 · 0e175c5 · 0e175c5
2 parents 2acb311 + daa2c9d
commit 0e175c5
Show file tree

Hide file tree

Showing 80 changed files with 2,652 additions and 298 deletions.
diff --git a/.github/workflows/assign_issue.yml b/.github/workflows/assign_issue.yml
@@ -0,0 +1,23 @@
+name: Take Issue
+
+on:
+  issue_comment:
+    types:
+      - created
+      - edited
+
+jobs:
+  take-issue:
+    name: Take issue
+    runs-on: ubuntu-latest
+    permissions:
+      issues: write
+    timeout-minutes: 10
+    steps:
+      - name: take an issue
+        uses: bdougie/[email protected]
+        with:
+          message: Thank you for looking into this issue! Please let us know if you have any questions or require any help.
+          issueCurrentlyAssignedMessage: Thanks for being interested in this issue. It looks like this ticket is already assigned to a contributor. Please communicate with the assigned contributor to confirm the status of the issue.
+          trigger: .take
+          token: ${{ secrets.GITHUB_TOKEN }}
diff --git a/README.md b/README.md
@@ -177,6 +177,8 @@ See [How to build OpenVINO](./docs/dev/build.md) to get more information about t
 See [Contributions Welcome](https://github.com/openvinotoolkit/openvino/issues/17502) for good first issues.
 
 See [CONTRIBUTING](./CONTRIBUTING.md) for contribution details. Thank you!
+## Take the issue
+If you wish to be assigned to an issue please add a comment with `.take` command.  
 
 ## Get a support
 

diff --git a/...ommon/low_precision_transformations/tests/mark_dequantization_subgraph_transformation.cpp b/...ommon/low_precision_transformations/tests/mark_dequantization_subgraph_transformation.cpp
@@ -8,6 +8,7 @@
 #include "transformations/fp16_compression/mark_decompression_convert_constant_folding.hpp"
 #include "transformations/rt_info/decompression.hpp"
 #include "transformations/rt_info/dequantization_node.hpp"
+#include "transformations/rt_info/keep_const_precision.hpp"
 
 #include "common_test_utils/ov_test_utils.hpp"
 
@@ -38,7 +39,8 @@ TEST_F(TransformationTestsF, MarkDequantizationSubgraphTransformation) {
     //
     // After MarkDequantizationSubgraph all Subtract and Multiply nodes from above graph
     // are marked with 'DequantizationNode' attribute.
-    // Also all 'Convert(DCF)' nodes from above graph are marked with 'DisableConstantFolding' attribute
+    // All 'Convert(DCF)' nodes from above graph are marked with 'DisableConstantFolding' attribute
+    // Weights and zero points are marked with 'KeepConstPrecision' attribute
 
     {
         auto parameter = std::make_shared<opset10::Parameter>(element::f32, Shape{1, 16, 14, 14});
@@ -107,10 +109,12 @@ TEST_F(TransformationTestsF, MarkDequantizationSubgraphTransformation) {
         }
 
         std::shared_ptr<Node> weights = opset10::Constant::create(element::i8, Shape{4, 16, 1, 1}, {-2});
+        enable_keep_const_precision(weights);
         {
             auto convert = std::make_shared<opset10::Convert>(weights, element::f32);
             pass::disable_constant_folding(convert);
             auto zero_point = opset10::Constant::create(element::i8, Shape{}, {127});
+            enable_keep_const_precision(zero_point);
             auto convert_on_zero_point = std::make_shared<opset10::Convert>(zero_point, element::f32);
             pass::disable_constant_folding(convert_on_zero_point);
             auto subtract = std::make_shared<opset10::Subtract>(convert, convert_on_zero_point);
@@ -157,6 +161,7 @@ TEST_F(TransformationTestsF, MarkDequantizationSubgraphTransformationNoZeroPoint
     // After MarkDequantizationSubgraph all Multiply nodes from above graph
     // are marked with 'DequantizationNode' attribute.
     // Also 'Convert(DCF)' node from above graph is marked with 'DisableConstantFolding' attribute
+    // Weights node is marked with 'KeepConstPrecision' attribute
 
     {
         auto parameter = std::make_shared<opset10::Parameter>(element::f32, Shape{1, 16, 14, 14});
@@ -214,6 +219,7 @@ TEST_F(TransformationTestsF, MarkDequantizationSubgraphTransformationNoZeroPoint
         }
 
         std::shared_ptr<Node> weights = opset10::Constant::create(element::i8, Shape{4, 16, 1, 1}, {-2});
+        enable_keep_const_precision(weights);
         {
             auto convert = std::make_shared<opset10::Convert>(weights, element::f32);
             pass::disable_constant_folding(convert);
@@ -259,6 +265,7 @@ TEST_F(TransformationTestsF, MarkDequantizationSubgraphTransformationNoZeroPoint
     // After MarkDequantizationSubgraph all Multiply nodes from above graph
     // are marked with 'DequantizationNode' attribute.
     // Also 'Convert(DCF)' node from above graph is marked with 'DisableConstantFolding' attribute
+    // Weights node is marked with 'KeepConstPrecision' attribute
 
     {
         auto parameter = std::make_shared<opset10::Parameter>(element::f32, Shape{1, 16, 14, 14});
@@ -323,6 +330,7 @@ TEST_F(TransformationTestsF, MarkDequantizationSubgraphTransformationNoZeroPoint
         }
 
         std::shared_ptr<Node> weights = opset10::Constant::create(element::i8, Shape{4, 16, 1, 1}, {-2});
+        enable_keep_const_precision(weights);
         {
             auto convert = std::make_shared<opset10::Convert>(weights, element::f32);
             pass::disable_constant_folding(convert);
@@ -373,6 +381,7 @@ TEST_F(TransformationTestsF, MarkDequantizationSubgraphTransformationNotConstant
     // After MarkDequantizationSubgraph all Subtract and Multiply nodes from above graph
     // are marked with 'DequantizationNode' attribute.
     // Also all 'Convert(DCF)' nodes from above graph are marked with 'DisableConstantFolding' attribute
+    // Weights and zero point nodes are marked with 'KeepConstPrecision' attribute
 
     {
         auto parameter = std::make_shared<opset10::Parameter>(element::f32, Shape{1, 16, 14, 14});
@@ -395,10 +404,12 @@ TEST_F(TransformationTestsF, MarkDequantizationSubgraphTransformationNotConstant
         }
 
         std::shared_ptr<Node> weights = opset10::Constant::create(element::i8, Shape{4, 16, 1, 1}, {-3});
+        enable_keep_const_precision(weights);
         {
             auto clamp = std::make_shared<opset10::Clamp>(weights, -2, 2);
             auto convert = std::make_shared<opset10::Convert>(clamp, element::f32);
             auto zero_point = opset10::Constant::create(element::i8, Shape{}, {127});
+            enable_keep_const_precision(zero_point);
             auto convert_on_zero_point = std::make_shared<opset10::Convert>(zero_point, element::f32);
             auto subtract = std::make_shared<opset10::Subtract>(convert, convert_on_zero_point);
             auto scale = opset10::Constant::create(element::f32, Shape{}, {0.2});
@@ -488,6 +499,7 @@ TEST_F(TransformationTestsF, MarkDequantizationSubgraphTransformationFoldSubCons
     // are marked with 'DequantizationNode' attribute.
     // Also all 'Convert(DCF)' node before weights is marked with 'DisableConstantFolding' attribute
     // but Convert before Dequantization Sub const isn't because fold_subtract_const is set to true
+    // Weights node is marked with 'KeepConstPrecision' attribute
 
     {
         auto weights = opset10::Constant::create(element::u8, Shape{4, 16, 1, 1}, {3});
@@ -505,6 +517,7 @@ TEST_F(TransformationTestsF, MarkDequantizationSubgraphTransformationFoldSubCons
 
     {
         auto weights = opset10::Constant::create(element::u8, Shape{4, 16, 1, 1}, {3});
+        enable_keep_const_precision(weights);
         auto convert = std::make_shared<opset10::Convert>(weights, element::f32);
         pass::disable_constant_folding(convert);
         auto zero_point = opset10::Constant::create(element::f32, Shape{}, {127});

diff --git a/...mon/transformations/include/transformations/symbolic_transformations/dereshape_matmul.hpp b/...mon/transformations/include/transformations/symbolic_transformations/dereshape_matmul.hpp
@@ -0,0 +1,66 @@
+// Copyright (C) 2018-2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "openvino/pass/graph_rewrite.hpp"
+#include "transformations_visibility.hpp"
+
+namespace ov {
+namespace pass {
+class TRANSFORMATIONS_API DeReshapeMatMul;
+}  // namespace pass
+}  // namespace ov
+
+/**
+ * @ingroup ie_transformation_common_api
+ * @brief Transformation uses symbol / label information to optimize out Reshape operations surrounding MatMul.
+ * It checks that surrounding Reshapes are only manipulating with batch dimensions of tensor in a do-undo kind of way.
+ *
+ * Example:
+ *   Before:
+ *     [A,B,C,D] -> Reshape -> [A*B,C,D]
+ *                                       MatMul [A*B,C,E] -> Reshape -> [A,B,C,E]
+ *     [A,B,D,E] -> Reshape -> [A*B,D,E]
+ *
+ *   After:
+ *     [A,B,C,D]  ->
+ *                   MatMul -> [A,B,C,E]
+ *     [A,B,D,E]  ->
+ *
+ *  Transformation allows slightly different variations of the pattern on inputs of MatMul.
+ *    - Simplest pattern contains only Reshape operation on MatMul input:
+ *        Reshape -> MatMul
+ *
+ *    - The next acceptable variation is Concat of two inputs on MatMul input:
+ *        Reshape -[-> Concat -]-> MatMul
+ *      This variation would be transformed with realignment of the other input of Concat and the other outputs of
+ *      Concat with the help of Reshape operations
+ *
+ *    - The most complex variation on the MatMul input pattern is with Binary Elementwise Operation with scalar second
+ *      input: Reshape -[-> Concat -]-[-> BEA (scalar) -]-> MatMul
+ *
+ *  Additionally, transformation supports variation of the pattern on output of MatMul. It allows for
+ *  Binary Elementwise Arithmetic operation without second input scalar restriction.
+ *        MatMul -[-> BEA -]-> Reshape
+ *  this pattern variation is only applicable for the case when input reshapes are 4D -> 3D and output reshape is 3D ->
+ *  4D. Additionally, shape labels on output of MatMul should be equal to the input shape labels of the last Reshape,
+ *  meaning that this Binary Elementwise Arithmetic doesn't perform any broadcasting of input coming from MatMul -- only
+ *  other input may be broadcasted to the MatMul input of this BEA. This effect (equality of MatMul output shape labels
+ *  and output shape of BEA) is being handled by LabelResolvingThroughSelect transformation in the particular models
+ *  that this variation targets.
+ *
+ *  Full pattern this transformation searches for:
+ *     -> Reshape -[-> Concat -]-[-> BEA (scalar) -]->
+ *                                                     MatMul -[-> BEA -]-> Reshape ->
+ *     -> Reshape -[-> Concat -]-[-> BEA (scalar) -]->
+ *
+ *   NOTE: input branches could be (and in observed model cases are) asymmetrical, meaning that the presence of Concat
+ *         on one input of MatMul doesn't require the other input to also have Concat
+ */
+class ov::pass::DeReshapeMatMul : public ov::pass::MatcherPass {
+public:
+    OPENVINO_RTTI("DeReshapeMatMul", "0");
+    DeReshapeMatMul();
+};
diff --git a/...ansformations/include/transformations/symbolic_transformations/symbolic_optimizations.hpp b/...ansformations/include/transformations/symbolic_transformations/symbolic_optimizations.hpp
@@ -4,16 +4,17 @@
 
 #pragma once
 
-#include <openvino/pass/graph_rewrite.hpp>
-#include <openvino/pass/manager.hpp>
-#include <openvino/pass/pass.hpp>
-#include <openvino/pass/pattern/matcher.hpp>
-#include <transformations_visibility.hpp>
+#include "openvino/pass/graph_rewrite.hpp"
+#include "openvino/pass/manager.hpp"
+#include "openvino/pass/pass.hpp"
+#include "openvino/pass/pattern/matcher.hpp"
+#include "transformations_visibility.hpp"
 
 namespace ov {
 namespace pass {
 class TRANSFORMATIONS_API SymbolicOptimizations;
 class TRANSFORMATIONS_API SymbolicPropagation;
+class TRANSFORMATIONS_API LabelResolvingThroughSelect;
 }  // namespace pass
 }  // namespace ov
 
@@ -48,3 +49,19 @@ class ov::pass::SymbolicPropagation : public ov::pass::ModelPass {
 private:
     std::shared_ptr<ov::TableOfEquivalence> m_te;
 };
+
+/**
+ * @ingroup ie_transformation_common_api
+ * @brief Transformation requires equal labels on one input of Add and output of last Reshape in the pattern:
+ *      -> Add -> Reshape -[then or else input]-> Select -> Softmax -> Reshape ->
+ *
+ * If shape labels onn mentioned tensors are equal we proved that no broadcasting of this input was done for Add and
+ * for Select. Therefore, we can put the same labels on the output of Add and Select. This transformation helps
+ * propagate labels and will not be needed if we would use information on equality of products of input and output
+ * dimensions of Reshape operations
+ */
+class ov::pass::LabelResolvingThroughSelect : public ov::pass::MatcherPass {
+public:
+    OPENVINO_RTTI("LabelResolvingThroughSelect", "0");
+    LabelResolvingThroughSelect();
+};
diff --git a/src/common/transformations/include/transformations/symbolic_transformations/utils.hpp b/src/common/transformations/include/transformations/symbolic_transformations/utils.hpp
@@ -4,12 +4,11 @@
 
 #pragma once
 
-#include <transformations_visibility.hpp>
-
 #include "openvino/core/descriptor/tensor.hpp"
 #include "openvino/core/dimension.hpp"
 #include "openvino/core/partial_shape.hpp"
 #include "openvino/core/type/element_type.hpp"
+#include "transformations_visibility.hpp"
 
 namespace ov {
 namespace symbol {
@@ -38,6 +37,16 @@ TRANSFORMATIONS_API bool get_labels(const ov::Output<ov::Node>& output, ov::Tens
 ///
 /// \return true if labels are unique and equal between lhs and rhs else false
 TRANSFORMATIONS_API bool are_unique_and_equal_labels(const ov::TensorLabel& lhs, const ov::TensorLabel& rhs);
+
+/// \brief Compares dimensions: if dimensions are static compares values of dimensions, if dimensions are dynamic
+/// compares their respective labels using TableOfEquivalence
+///
+/// \param lhs   Dimension object to compare
+/// \param rhs   Dimension object to compare
+///
+/// \return true if static dimensions are equal and dynamic dimensions have equal labels else false
+TRANSFORMATIONS_API bool dims_are_equal(const ov::Dimension& lhs, const ov::Dimension& rhs);
+
 }  // namespace util
 }  // namespace symbol
 }  // namespace ov
diff --git a/...common/transformations/src/transformations/low_precision/mark_dequantization_subgraph.cpp b/...common/transformations/src/transformations/low_precision/mark_dequantization_subgraph.cpp
@@ -9,6 +9,7 @@
 #include "openvino/pass/pattern/op/wrap_type.hpp"
 #include "transformations/rt_info/dequantization_node.hpp"
 #include "transformations/rt_info/disable_constant_folding.hpp"
+#include "transformations/rt_info/keep_const_precision.hpp"
 #include "transformations/utils/utils.hpp"
 
 ov::pass::MarkDequantizationSubgraph::MarkDequantizationSubgraph(const element::TypeVector& precisions,
@@ -62,6 +63,16 @@ ov::pass::MarkDequantizationSubgraph::MarkDequantizationSubgraph(const element::
         if (ov::op::util::is_on_constant_path(input)) {
             // disable ConstantFolding if dequantization subgraph is on constant data
             ov::disable_constant_folding(convert);
+            // It is also necessary to avoid precision conversion for constant nodes with input_precision
+            auto keep_const_precision = [&](Node* node) {
+                if (auto constant = ov::as_type<ov::opset10::Constant>(node)) {
+                    const auto& const_et = constant->get_element_type();
+                    if (std::find(precisions.begin(), precisions.end(), const_et) != precisions.end())
+                        ov::enable_keep_const_precision(convert->get_input_node_shared_ptr(0));
+                }
+            };
+            std::unordered_set<Node*> visited;
+            ov::op::util::visit_shape_path(input.get_node(), visited, keep_const_precision);
         }
 
         if (subtract_it != pattern_map.end()) {
@@ -75,6 +86,7 @@ ov::pass::MarkDequantizationSubgraph::MarkDequantizationSubgraph(const element::
                 // so we don't have to constantfold it and then convert it back to
                 // low precision in LP transformations
                 ov::disable_constant_folding(zero_point);
+                ov::enable_keep_const_precision(zero_point->get_input_node_shared_ptr(0));
             }
         }