From 64ecc673e917a326bb287807abebe4e07b940aac Mon Sep 17 00:00:00 2001 From: Pavel Esir Date: Wed, 4 Oct 2023 11:38:53 +0200 Subject: [PATCH] wip --- .../src/openvino/runtime/passes/__init__.py | 2 +- .../graph/passes/transformations.cpp | 18 ++++++ .../src/transformations/convert_precision.cpp | 2 +- ...k_subgraphs_to_keep_in_mixed_precision.cpp | 62 +++++++++++++++++++ .../openvino/pass/upcast_to_fp32_by_name.hpp | 39 ++++++++++++ src/core/src/pass/upcast_to_fp32_by_name.cpp | 44 +++++++++++++ src/core/src/pass/visualize_tree.cpp | 4 ++ .../src/plugin/transformations_pipeline.cpp | 7 +++ .../ovc/partially_upcast_nodes_to_fp32.py | 17 ++--- 9 files changed, 186 insertions(+), 9 deletions(-) create mode 100644 src/core/include/openvino/pass/upcast_to_fp32_by_name.hpp create mode 100644 src/core/src/pass/upcast_to_fp32_by_name.cpp diff --git a/src/bindings/python/src/openvino/runtime/passes/__init__.py b/src/bindings/python/src/openvino/runtime/passes/__init__.py index 281299a71560d4..71441be3f9b5b7 100644 --- a/src/bindings/python/src/openvino/runtime/passes/__init__.py +++ b/src/bindings/python/src/openvino/runtime/passes/__init__.py @@ -14,6 +14,6 @@ type_matches, type_matches_any, ) -from openvino._pyopenvino.passes import Serialize, ConstantFolding, VisualizeTree, MakeStateful, LowLatency2, ConvertFP32ToFP16, Version, PartiallyConvertToFP16 +from openvino._pyopenvino.passes import Serialize, ConstantFolding, VisualizeTree, MakeStateful, LowLatency2, ConvertFP32ToFP16, Version, UpcastToFP32ByName, PartiallyConvertToFP16 from openvino.runtime.passes.manager import Manager from openvino.runtime.passes.graph_rewrite import GraphRewrite, BackwardGraphRewrite diff --git a/src/bindings/python/src/pyopenvino/graph/passes/transformations.cpp b/src/bindings/python/src/pyopenvino/graph/passes/transformations.cpp index a767ce7718a08b..a35f7a4b185405 100644 --- a/src/bindings/python/src/pyopenvino/graph/passes/transformations.cpp +++ b/src/bindings/python/src/pyopenvino/graph/passes/transformations.cpp @@ -20,6 +20,7 @@ #include "pyopenvino/core/common.hpp" #include "pyopenvino/utils/utils.hpp" +#include "openvino/pass/upcast_to_fp32_by_name.hpp" namespace py = pybind11; using Version = ov::pass::Serialize::Version; @@ -107,6 +108,23 @@ void regclass_transformations(py::module m) { return Common::get_simple_repr(self); }); + py::class_, + ov::pass::ModelPass, + ov::pass::PassBase> + calibrate_fp16(m, "UpcastToFP32ByName"); + calibrate_fp16.doc() = "openvino.runtime.passes.UpcastToFP32ByName transformation"; + + calibrate_fp16.def(py::init>(), + py::arg("node_names_list"), + R"( + Cast selected nodes to FP32 and keep in that precision. + )"); + calibrate_fp16.def("__repr__", [](const ov::pass::UpcastToFP32ByName& self) { + return Common::get_simple_repr(self); + }); + + py::class_, ov::pass::ModelPass, ov::pass::PassBase> part_convert_to_fp16(m, "PartiallyConvertToFP16"); part_convert_to_fp16.doc() = "openvino.runtime.passes.ConvertPrecision transformation"; diff --git a/src/common/transformations/src/transformations/convert_precision.cpp b/src/common/transformations/src/transformations/convert_precision.cpp index 65387086310830..c836ecceef30e9 100644 --- a/src/common/transformations/src/transformations/convert_precision.cpp +++ b/src/common/transformations/src/transformations/convert_precision.cpp @@ -367,7 +367,7 @@ bool ov::pass::ConvertPrecision::run_on_model(const std::shared_ptr& return false; bool has_fp16_compression = m_precisions.count(element::f32) > 0 && m_precisions[element::f32] == element::f16; - + // todo: comment/uncomment here if (m_keep_precision_sensitive_in_fp32 && has_fp16_compression) { pass::Manager manager(get_pass_config()); // Mark subgraphs with disable_fp16_compression to keep them in FP32 diff --git a/src/common/transformations/src/transformations/fp16_compression/mark_subgraphs_to_keep_in_mixed_precision.cpp b/src/common/transformations/src/transformations/fp16_compression/mark_subgraphs_to_keep_in_mixed_precision.cpp index ff873eba035d0e..c6e04e64238a51 100644 --- a/src/common/transformations/src/transformations/fp16_compression/mark_subgraphs_to_keep_in_mixed_precision.cpp +++ b/src/common/transformations/src/transformations/fp16_compression/mark_subgraphs_to_keep_in_mixed_precision.cpp @@ -18,7 +18,9 @@ #include "openvino/op/maximum.hpp" #include "openvino/op/multiply.hpp" #include "openvino/op/mvn.hpp" +#include "openvino/op/tanh.hpp" #include "openvino/op/normalize_l2.hpp" +#include "openvino/pass/serialize.hpp" #include "openvino/op/power.hpp" #include "openvino/op/reduce_max.hpp" #include "openvino/op/reduce_mean.hpp" @@ -82,6 +84,7 @@ void erase_fq_path(const std::shared_ptr& node) { const std::shared_ptr propagate_through_ops = pattern::wrap_type propagate_through_ops = ov::op::v0::Sqrt, ov::op::v1::StridedSlice, ov::op::v1::ReduceSum, +// ov::op::v0::Tanh, ov::op::v1::ReduceMean, ov::op::v8::Slice, ov::op::v1::VariadicSplit, @@ -364,6 +368,57 @@ class MarkDivWithEps : public MatcherPass { } }; +/* + * MarkNormalizationOps marks MVN and NormalizeL2 to be kept in f32 precision. + */ +class MarkNormalizationOps : public MatcherPass { +public: + OPENVINO_RTTI("MarkNormalizationOps", "0"); + + MarkNormalizationOps() { + MATCHER_SCOPE(MarkNormalizationOps); + auto ops_to_be_kept_fp32 = pattern::wrap_type(); + + matcher_pass_callback callback = [=](pattern::Matcher& m) { + const auto& node = m.get_match_root(); + if (!node) + return false; + + disable_fp16_compression(node); + return true; + }; + auto m = make_shared(ops_to_be_kept_fp32, matcher_name); + register_matcher(m, callback); + } +}; + +class MarkReduceWithPow : public MatcherPass { +public: + OPENVINO_RTTI("MarkReduceWithPow", "0"); + MarkReduceWithPow() { + MATCHER_SCOPE(MarkReduceWithPow); + + auto input_1 = pattern::any_input(); + auto pow_const = pattern::wrap_type(); // value_is_equal_to({2.0})); + auto pow_pattern = pattern::wrap_type({input_1, pow_const}); + auto mean3_axes = pattern::wrap_type(); + auto reduce_pattern = pattern::wrap_type({pow_pattern, mean3_axes}); + + matcher_pass_callback callback = [=](pattern::Matcher& m) { + const auto& pattern_to_output = m.get_pattern_map(); + if (!m.get_match_root()) + return false; + auto pow_node = pattern_to_output.at(pow_pattern); + disable_fp16_compression(pow_node); + disable_fp16_compression(m.get_match_root()); + return true; + }; + + auto m = make_shared(reduce_pattern, matcher_name); + register_matcher(m, callback); + } +}; + class PropagateDownDisableSensitivityForQuantized : public pass::MatcherPass { public: OPENVINO_RTTI("DisableMarkingForQuantizedNodes", "0"); @@ -432,6 +487,8 @@ bool MarkSugraphsToKeepInMixedPrecision::run_on_model(const shared_ptrget_ops()) { erase_reduceop_path(node); erase_fq_path(node); + auto& rt_info = node->get_rt_info(); + bool is_disabled = rt_info.count(DisableFP16Compression::get_type_info_static()); + if (is_disabled) { + rt_info[DisableFP16Compression::get_type_info_static()] = DisableFP16Compression{}; + } } return false; // no need to revalidate diff --git a/src/core/include/openvino/pass/upcast_to_fp32_by_name.hpp b/src/core/include/openvino/pass/upcast_to_fp32_by_name.hpp new file mode 100644 index 00000000000000..29e105fb5cb1e5 --- /dev/null +++ b/src/core/include/openvino/pass/upcast_to_fp32_by_name.hpp @@ -0,0 +1,39 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "openvino/pass/pass.hpp" + +class HeightMap; + + +namespace ov { +namespace pass { +/** + * @brief UpcastToFP32ByName sets nodes to fp32 + * @ingroup ov_pass_cpp_api + */ +class OPENVINO_API UpcastToFP32ByName : public ModelPass { +public: + OPENVINO_RTTI("ov::pass::UpcastToFP32ByName"); + + explicit UpcastToFP32ByName(std::vector node_names_list): nodes_to_keep_in_fp32(node_names_list) {} + + bool run_on_model(const std::shared_ptr&) override; +private: + std::vector nodes_to_keep_in_fp32; + +}; +} // namespace pass +} // namespace ov diff --git a/src/core/src/pass/upcast_to_fp32_by_name.cpp b/src/core/src/pass/upcast_to_fp32_by_name.cpp new file mode 100644 index 00000000000000..d3aea09d36e6a0 --- /dev/null +++ b/src/core/src/pass/upcast_to_fp32_by_name.cpp @@ -0,0 +1,44 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "openvino/pass/upcast_to_fp32_by_name.hpp" +#include "itt.hpp" + +#include +#include +#include "vector" +#include "openvino/cc/pass/itt.hpp" +#include "openvino/core/type.hpp" +#include "openvino/op/constant.hpp" +#include "openvino/op/parameter.hpp" +#include "openvino/op/util/op_types.hpp" +#include "openvino/util/common_util.hpp" + +#include "openvino/pass/manager.hpp" +#include "openvino/pass/graph_rewrite.hpp" +#include "transformations/rt_info/disable_fp16_compression.hpp" + +using namespace std; + +namespace ov { + namespace pass { + + +bool ov::pass::UpcastToFP32ByName::run_on_model(const std::shared_ptr &f) { + RUN_ON_MODEL_SCOPE(UpcastToFP32ByName); + + bool is_changed = false; + for (auto &node: f->get_ops()) { + if (std::count(nodes_to_keep_in_fp32.begin(), nodes_to_keep_in_fp32.end(), node->get_friendly_name())) { + disable_fp16_compression(node); + is_changed = true; + } + + } + + return is_changed; +} + +} +} diff --git a/src/core/src/pass/visualize_tree.cpp b/src/core/src/pass/visualize_tree.cpp index 15f1bc410b2189..152ac326bbe6ef 100644 --- a/src/core/src/pass/visualize_tree.cpp +++ b/src/core/src/pass/visualize_tree.cpp @@ -148,6 +148,10 @@ static std::string get_attribute_values(const std::map& at std::stringstream ss; bool first = true; for (const auto& item : attributes) { + if (item.first == "fused_names_0") { + continue; + } + ss << (first ? " " : delimiter) << item.first; if (item.second.is()) { ss << "{" << item.second.as().to_string() << "}"; diff --git a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp index 6aff07cfdd4b16..7704d201c93766 100644 --- a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp +++ b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp @@ -236,6 +236,7 @@ void TransformationsPipeline::apply(std::shared_ptr func) { keep_precision_sensitive_in_fp32_1, convert_input_output_precision); +// manager.register_pass("after_conver_prec.svg"); manager.register_pass(); manager.register_pass(); @@ -628,8 +629,14 @@ void TransformationsPipeline::apply(std::shared_ptr func) { return num_iter >= 16; }); manager.register_pass(true); + manager.register_pass("at_the_end_of_GPU.svg"); manager.run_passes(func); +// for (auto& node : func->get_ops()) { +// if (fp16_compression_is_disabled(node) && as_type_ptr(node) == nullptr) { +// std::cout << node->get_friendly_name() << std::endl; +// } +// } } { diff --git a/tools/ovc/openvino/tools/ovc/partially_upcast_nodes_to_fp32.py b/tools/ovc/openvino/tools/ovc/partially_upcast_nodes_to_fp32.py index e897d1db407ade..39eb9c181a11eb 100644 --- a/tools/ovc/openvino/tools/ovc/partially_upcast_nodes_to_fp32.py +++ b/tools/ovc/openvino/tools/ovc/partially_upcast_nodes_to_fp32.py @@ -21,7 +21,7 @@ thresholds_per_op = { 'Convolution': (0.1, 0.05), - # 'MatMul': (0.1, 0.05), + 'MatMul': (0.1, 0.05), } @@ -138,14 +138,17 @@ def mark_nodes_to_upcast_to_fp32(model: Model, nodes: List[Node], fp16_infer_val for node, fp16_val, fp32_val in zip(nodes, fp16_infer_vals, fp32_infer_vals): if compare_tensors(node, fp16_val[0], fp32_val): nodes_with_errors.append(node.get_friendly_name()) - - for node in model.get_ordered_ops(): - if node.get_friendly_name() in nodes_with_errors: - node.get_rt_info()['disable_fp16_compression_0'] = '' - from openvino.runtime.passes import VisualizeTree, Manager + # todo: uncomment when xxx-122082 is fixed + # for node in model.get_ordered_ops(): + # if node.get_friendly_name() in nodes_with_errors: + # node.get_rt_info()['disable_fp16_compression_0'] = '' + + # todo: a dirty workaround until xxx-122082 is fixed + from openvino.runtime.passes import VisualizeTree, Manager, UpcastToFP32ByName, PartiallyConvertToFP16 manager = Manager() - manager.register_pass(VisualizeTree("upcasted.svg")) + manager.register_pass(UpcastToFP32ByName(nodes_with_errors)) + manager.register_pass(PartiallyConvertToFP16) manager.run_passes(model)