From 64ecc673e917a326bb287807abebe4e07b940aac Mon Sep 17 00:00:00 2001
From: Pavel Esir <pavel.esir@intel.com>
Date: Wed, 4 Oct 2023 11:38:53 +0200
Subject: [PATCH] wip

---
 .../src/openvino/runtime/passes/__init__.py   |  2 +-
 .../graph/passes/transformations.cpp          | 18 ++++++
 .../src/transformations/convert_precision.cpp |  2 +-
 ...k_subgraphs_to_keep_in_mixed_precision.cpp | 62 +++++++++++++++++++
 .../openvino/pass/upcast_to_fp32_by_name.hpp  | 39 ++++++++++++
 src/core/src/pass/upcast_to_fp32_by_name.cpp  | 44 +++++++++++++
 src/core/src/pass/visualize_tree.cpp          |  4 ++
 .../src/plugin/transformations_pipeline.cpp   |  7 +++
 .../ovc/partially_upcast_nodes_to_fp32.py     | 17 ++---
 9 files changed, 186 insertions(+), 9 deletions(-)
 create mode 100644 src/core/include/openvino/pass/upcast_to_fp32_by_name.hpp
 create mode 100644 src/core/src/pass/upcast_to_fp32_by_name.cpp
diff --git a/src/bindings/python/src/openvino/runtime/passes/__init__.py b/src/bindings/python/src/openvino/runtime/passes/__init__.py
index 281299a71560d4..71441be3f9b5b7 100644
--- a/src/bindings/python/src/openvino/runtime/passes/__init__.py
+++ b/src/bindings/python/src/openvino/runtime/passes/__init__.py
@@ -14,6 +14,6 @@
     type_matches,
     type_matches_any,
 )
-from openvino._pyopenvino.passes import Serialize, ConstantFolding, VisualizeTree, MakeStateful, LowLatency2, ConvertFP32ToFP16, Version, PartiallyConvertToFP16
+from openvino._pyopenvino.passes import Serialize, ConstantFolding, VisualizeTree, MakeStateful, LowLatency2, ConvertFP32ToFP16, Version, UpcastToFP32ByName, PartiallyConvertToFP16
 from openvino.runtime.passes.manager import Manager
 from openvino.runtime.passes.graph_rewrite import GraphRewrite, BackwardGraphRewrite
diff --git a/src/bindings/python/src/pyopenvino/graph/passes/transformations.cpp b/src/bindings/python/src/pyopenvino/graph/passes/transformations.cpp
index a767ce7718a08b..a35f7a4b185405 100644
--- a/src/bindings/python/src/pyopenvino/graph/passes/transformations.cpp
+++ b/src/bindings/python/src/pyopenvino/graph/passes/transformations.cpp
@@ -20,6 +20,7 @@
 
 #include "pyopenvino/core/common.hpp"
 #include "pyopenvino/utils/utils.hpp"
+#include "openvino/pass/upcast_to_fp32_by_name.hpp"
 
 namespace py = pybind11;
 using Version = ov::pass::Serialize::Version;
@@ -107,6 +108,23 @@ void regclass_transformations(py::module m) {
         return Common::get_simple_repr(self);
     });
 
+    py::class_<ov::pass::UpcastToFP32ByName,
+            std::shared_ptr<ov::pass::UpcastToFP32ByName>,
+            ov::pass::ModelPass,
+            ov::pass::PassBase>
+            calibrate_fp16(m, "UpcastToFP32ByName");
+    calibrate_fp16.doc() = "openvino.runtime.passes.UpcastToFP32ByName transformation";
+
+    calibrate_fp16.def(py::init<const std::vector<std::string>>(),
+                  py::arg("node_names_list"),
+                  R"(
+                  Cast selected nodes to FP32 and keep in that precision.
+    )");
+    calibrate_fp16.def("__repr__", [](const ov::pass::UpcastToFP32ByName& self) {
+        return Common::get_simple_repr(self);
+    });
+
+
     py::class_<ov::pass::ConvertPrecision, std::shared_ptr<ov::pass::ConvertPrecision>, ov::pass::ModelPass, ov::pass::PassBase>
             part_convert_to_fp16(m, "PartiallyConvertToFP16");
     part_convert_to_fp16.doc() = "openvino.runtime.passes.ConvertPrecision transformation";
diff --git a/src/common/transformations/src/transformations/convert_precision.cpp b/src/common/transformations/src/transformations/convert_precision.cpp
index 65387086310830..c836ecceef30e9 100644
--- a/src/common/transformations/src/transformations/convert_precision.cpp
+++ b/src/common/transformations/src/transformations/convert_precision.cpp
@@ -367,7 +367,7 @@ bool ov::pass::ConvertPrecision::run_on_model(const std::shared_ptr<ov::Model>&
         return false;
 
     bool has_fp16_compression = m_precisions.count(element::f32) > 0 && m_precisions[element::f32] == element::f16;
-
+    // todo: comment/uncomment here
     if (m_keep_precision_sensitive_in_fp32 && has_fp16_compression) {
         pass::Manager manager(get_pass_config());
         // Mark subgraphs with disable_fp16_compression to keep them in FP32
diff --git a/src/common/transformations/src/transformations/fp16_compression/mark_subgraphs_to_keep_in_mixed_precision.cpp b/src/common/transformations/src/transformations/fp16_compression/mark_subgraphs_to_keep_in_mixed_precision.cpp
index ff873eba035d0e..c6e04e64238a51 100644
--- a/src/common/transformations/src/transformations/fp16_compression/mark_subgraphs_to_keep_in_mixed_precision.cpp
+++ b/src/common/transformations/src/transformations/fp16_compression/mark_subgraphs_to_keep_in_mixed_precision.cpp
@@ -18,7 +18,9 @@
 #include "openvino/op/maximum.hpp"
 #include "openvino/op/multiply.hpp"
 #include "openvino/op/mvn.hpp"
+#include "openvino/op/tanh.hpp"
 #include "openvino/op/normalize_l2.hpp"
+#include "openvino/pass/serialize.hpp"
 #include "openvino/op/power.hpp"
 #include "openvino/op/reduce_max.hpp"
 #include "openvino/op/reduce_mean.hpp"
@@ -82,6 +84,7 @@ void erase_fq_path(const std::shared_ptr<Node>& node) {
 const std::shared_ptr<Node> propagate_through_ops =
     pattern::wrap_type<ov::op::v0::Squeeze,
                        ov::op::v0::Unsqueeze,
+                       ov::op::v1::Transpose,
                        ov::op::v1::Reshape,
                        op::util::BroadcastBase,
                        op::util::BinaryElementwiseArithmetic,
@@ -92,6 +95,7 @@ const std::shared_ptr<Node> propagate_through_ops =
                        ov::op::v0::Sqrt,
                        ov::op::v1::StridedSlice,
                        ov::op::v1::ReduceSum,
+//                       ov::op::v0::Tanh,
                        ov::op::v1::ReduceMean,
                        ov::op::v8::Slice,
                        ov::op::v1::VariadicSplit,
@@ -364,6 +368,57 @@ class MarkDivWithEps : public MatcherPass {
     }
 };
 
+/*
+ * MarkNormalizationOps marks MVN and NormalizeL2 to be kept in f32 precision.
+ */
+class MarkNormalizationOps : public MatcherPass {
+public:
+    OPENVINO_RTTI("MarkNormalizationOps", "0");
+
+    MarkNormalizationOps() {
+        MATCHER_SCOPE(MarkNormalizationOps);
+        auto ops_to_be_kept_fp32 = pattern::wrap_type<ov::op::v0::MVN, ov::op::v6::MVN>();
+
+        matcher_pass_callback callback = [=](pattern::Matcher& m) {
+            const auto& node = m.get_match_root();
+            if (!node)
+                return false;
+
+            disable_fp16_compression(node);
+            return true;
+        };
+        auto m = make_shared<pattern::Matcher>(ops_to_be_kept_fp32, matcher_name);
+        register_matcher(m, callback);
+    }
+};
+
+class MarkReduceWithPow : public MatcherPass {
+public:
+    OPENVINO_RTTI("MarkReduceWithPow", "0");
+    MarkReduceWithPow() {
+        MATCHER_SCOPE(MarkReduceWithPow);
+
+        auto input_1 = pattern::any_input();
+        auto pow_const = pattern::wrap_type<ov::op::v0::Constant>(); // value_is_equal_to<float>({2.0}));
+        auto pow_pattern = pattern::wrap_type<ov::op::v1::Power>({input_1, pow_const});
+        auto mean3_axes = pattern::wrap_type<ov::op::v0::Constant>();
+        auto reduce_pattern = pattern::wrap_type<ov::op::v1::ReduceMean>({pow_pattern, mean3_axes});
+
+        matcher_pass_callback callback = [=](pattern::Matcher& m) {
+            const auto& pattern_to_output = m.get_pattern_map();
+            if (!m.get_match_root())
+                return false;
+            auto pow_node = pattern_to_output.at(pow_pattern);
+            disable_fp16_compression(pow_node);
+            disable_fp16_compression(m.get_match_root());
+            return true;
+        };
+
+        auto m = make_shared<pattern::Matcher>(reduce_pattern, matcher_name);
+        register_matcher(m, callback);
+    }
+};
+
 class PropagateDownDisableSensitivityForQuantized : public pass::MatcherPass {
 public:
     OPENVINO_RTTI("DisableMarkingForQuantizedNodes", "0");
@@ -432,6 +487,8 @@ bool MarkSugraphsToKeepInMixedPrecision::run_on_model(const shared_ptr<ov::Model
     Manager manager(get_pass_config());
     // Mark root of Division with eps pattern to keep in FP32
     REGISTER_PASS(manager, MarkDivWithEps)
+//    REGISTER_PASS(manager, MarkReduceWithPow)
+//    REGISTER_PASS(manager, MarkNormalizationOps)
     REGISTER_PASS(manager, MarkExpInReduceOpPath)
     REGISTER_PASS(manager, PropagateDownDisableSensitivityForQuantized)
 
@@ -449,6 +506,11 @@ bool MarkSugraphsToKeepInMixedPrecision::run_on_model(const shared_ptr<ov::Model
     for (auto& node : m->get_ops()) {
         erase_reduceop_path(node);
         erase_fq_path(node);
+        auto& rt_info = node->get_rt_info();
+        bool is_disabled = rt_info.count(DisableFP16Compression::get_type_info_static());
+        if (is_disabled) {
+            rt_info[DisableFP16Compression::get_type_info_static()] = DisableFP16Compression{};
+        }
     }
 
     return false;  // no need to revalidate
diff --git a/src/core/include/openvino/pass/upcast_to_fp32_by_name.hpp b/src/core/include/openvino/pass/upcast_to_fp32_by_name.hpp
new file mode 100644
index 00000000000000..29e105fb5cb1e5
--- /dev/null
+++ b/src/core/include/openvino/pass/upcast_to_fp32_by_name.hpp
@@ -0,0 +1,39 @@
+// Copyright (C) 2018-2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <functional>
+#include <set>
+#include <sstream>
+#include <string>
+#include <typeindex>
+#include <typeinfo>
+#include <unordered_map>
+#include <utility>
+
+#include "openvino/pass/pass.hpp"
+
+class HeightMap;
+
+
+namespace ov {
+namespace pass {
+/**
+ * @brief UpcastToFP32ByName sets nodes to fp32
+ * @ingroup ov_pass_cpp_api
+ */
+class OPENVINO_API UpcastToFP32ByName : public ModelPass {
+public:
+    OPENVINO_RTTI("ov::pass::UpcastToFP32ByName");
+
+    explicit UpcastToFP32ByName(std::vector<std::string> node_names_list): nodes_to_keep_in_fp32(node_names_list) {}
+
+    bool run_on_model(const std::shared_ptr<ov::Model>&) override;
+private:
+    std::vector<std::string> nodes_to_keep_in_fp32;
+
+};
+}  // namespace pass
+}  // namespace ov
diff --git a/src/core/src/pass/upcast_to_fp32_by_name.cpp b/src/core/src/pass/upcast_to_fp32_by_name.cpp
new file mode 100644
index 00000000000000..d3aea09d36e6a0
--- /dev/null
+++ b/src/core/src/pass/upcast_to_fp32_by_name.cpp
@@ -0,0 +1,44 @@
+// Copyright (C) 2018-2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "openvino/pass/upcast_to_fp32_by_name.hpp"
+#include "itt.hpp"
+
+#include <cmath>
+#include <fstream>
+#include "vector"
+#include "openvino/cc/pass/itt.hpp"
+#include "openvino/core/type.hpp"
+#include "openvino/op/constant.hpp"
+#include "openvino/op/parameter.hpp"
+#include "openvino/op/util/op_types.hpp"
+#include "openvino/util/common_util.hpp"
+
+#include "openvino/pass/manager.hpp"
+#include "openvino/pass/graph_rewrite.hpp"
+#include "transformations/rt_info/disable_fp16_compression.hpp"
+
+using namespace std;
+
+namespace ov {
+    namespace pass {
+
+
+bool ov::pass::UpcastToFP32ByName::run_on_model(const std::shared_ptr<ov::Model> &f) {
+    RUN_ON_MODEL_SCOPE(UpcastToFP32ByName);
+
+    bool is_changed = false;
+    for (auto &node: f->get_ops()) {
+        if (std::count(nodes_to_keep_in_fp32.begin(), nodes_to_keep_in_fp32.end(), node->get_friendly_name())) {
+            disable_fp16_compression(node);
+            is_changed = true;
+        }
+
+    }
+
+    return is_changed;
+}
+
+}
+}
diff --git a/src/core/src/pass/visualize_tree.cpp b/src/core/src/pass/visualize_tree.cpp
index 15f1bc410b2189..152ac326bbe6ef 100644
--- a/src/core/src/pass/visualize_tree.cpp
+++ b/src/core/src/pass/visualize_tree.cpp
@@ -148,6 +148,10 @@ static std::string get_attribute_values(const std::map<std::string, ov::Any>& at
     std::stringstream ss;
     bool first = true;
     for (const auto& item : attributes) {
+        if (item.first == "fused_names_0") {
+            continue;
+        }
+
         ss << (first ? " " : delimiter) << item.first;
         if (item.second.is<ov::RuntimeAttribute>()) {
             ss << "{" << item.second.as<ov::RuntimeAttribute>().to_string() << "}";
diff --git a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp
index 6aff07cfdd4b16..7704d201c93766 100644
--- a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp
+++ b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp
@@ -236,6 +236,7 @@ void TransformationsPipeline::apply(std::shared_ptr<ov::Model> func) {
                                                           keep_precision_sensitive_in_fp32_1,
                                                           convert_input_output_precision);
 
+//        manager.register_pass<ov::pass::VisualizeTree>("after_conver_prec.svg");
         manager.register_pass<ov::pass::CommonOptimizations>();
 
         manager.register_pass<ov::pass::WrapInterpolateIntoTransposes>();
@@ -628,8 +629,14 @@ void TransformationsPipeline::apply(std::shared_ptr<ov::Model> func) {
                 return num_iter >= 16;
             });
         manager.register_pass<ov::pass::ResolveNameCollisions>(true);
+        manager.register_pass<ov::pass::VisualizeTree>("at_the_end_of_GPU.svg");
 
         manager.run_passes(func);
+//        for (auto& node : func->get_ops()) {
+//            if (fp16_compression_is_disabled(node) && as_type_ptr<ov::op::v0::Constant>(node) == nullptr) {
+//                std::cout << node->get_friendly_name() << std::endl;
+//            }
+//        }
     }
 
     {
diff --git a/tools/ovc/openvino/tools/ovc/partially_upcast_nodes_to_fp32.py b/tools/ovc/openvino/tools/ovc/partially_upcast_nodes_to_fp32.py
index e897d1db407ade..39eb9c181a11eb 100644
--- a/tools/ovc/openvino/tools/ovc/partially_upcast_nodes_to_fp32.py
+++ b/tools/ovc/openvino/tools/ovc/partially_upcast_nodes_to_fp32.py
@@ -21,7 +21,7 @@
 
 thresholds_per_op = {
     'Convolution': (0.1, 0.05),
-    # 'MatMul': (0.1, 0.05),
+    'MatMul': (0.1, 0.05),
 }
 
 
@@ -138,14 +138,17 @@ def mark_nodes_to_upcast_to_fp32(model: Model, nodes: List[Node], fp16_infer_val
     for node, fp16_val, fp32_val in zip(nodes, fp16_infer_vals, fp32_infer_vals):
         if compare_tensors(node, fp16_val[0], fp32_val):
             nodes_with_errors.append(node.get_friendly_name())
-    
-    for node in model.get_ordered_ops():
-        if node.get_friendly_name() in nodes_with_errors:
-            node.get_rt_info()['disable_fp16_compression_0'] = ''
 
-    from openvino.runtime.passes import VisualizeTree, Manager
+    # todo: uncomment when xxx-122082 is fixed
+    # for node in model.get_ordered_ops():
+    #     if node.get_friendly_name() in nodes_with_errors:
+    #         node.get_rt_info()['disable_fp16_compression_0'] = ''
+
+    # todo: a dirty workaround until xxx-122082 is fixed
+    from openvino.runtime.passes import VisualizeTree, Manager, UpcastToFP32ByName, PartiallyConvertToFP16
     manager = Manager()
-    manager.register_pass(VisualizeTree("upcasted.svg"))
+    manager.register_pass(UpcastToFP32ByName(nodes_with_errors))
+    manager.register_pass(PartiallyConvertToFP16)
     manager.run_passes(model)