Skip to content

Commit

Permalink
wip
Browse files Browse the repository at this point in the history
  • Loading branch information
pavel-esir committed Oct 4, 2023
1 parent 4909305 commit 64ecc67
Show file tree
Hide file tree
Showing 9 changed files with 186 additions and 9 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,6 @@
type_matches,
type_matches_any,
)
from openvino._pyopenvino.passes import Serialize, ConstantFolding, VisualizeTree, MakeStateful, LowLatency2, ConvertFP32ToFP16, Version, PartiallyConvertToFP16
from openvino._pyopenvino.passes import Serialize, ConstantFolding, VisualizeTree, MakeStateful, LowLatency2, ConvertFP32ToFP16, Version, UpcastToFP32ByName, PartiallyConvertToFP16
from openvino.runtime.passes.manager import Manager
from openvino.runtime.passes.graph_rewrite import GraphRewrite, BackwardGraphRewrite
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@

#include "pyopenvino/core/common.hpp"
#include "pyopenvino/utils/utils.hpp"
#include "openvino/pass/upcast_to_fp32_by_name.hpp"

namespace py = pybind11;
using Version = ov::pass::Serialize::Version;
Expand Down Expand Up @@ -107,6 +108,23 @@ void regclass_transformations(py::module m) {
return Common::get_simple_repr(self);
});

py::class_<ov::pass::UpcastToFP32ByName,
std::shared_ptr<ov::pass::UpcastToFP32ByName>,
ov::pass::ModelPass,
ov::pass::PassBase>
calibrate_fp16(m, "UpcastToFP32ByName");
calibrate_fp16.doc() = "openvino.runtime.passes.UpcastToFP32ByName transformation";

calibrate_fp16.def(py::init<const std::vector<std::string>>(),
py::arg("node_names_list"),
R"(
Cast selected nodes to FP32 and keep in that precision.
)");
calibrate_fp16.def("__repr__", [](const ov::pass::UpcastToFP32ByName& self) {
return Common::get_simple_repr(self);
});


py::class_<ov::pass::ConvertPrecision, std::shared_ptr<ov::pass::ConvertPrecision>, ov::pass::ModelPass, ov::pass::PassBase>
part_convert_to_fp16(m, "PartiallyConvertToFP16");
part_convert_to_fp16.doc() = "openvino.runtime.passes.ConvertPrecision transformation";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -367,7 +367,7 @@ bool ov::pass::ConvertPrecision::run_on_model(const std::shared_ptr<ov::Model>&
return false;

bool has_fp16_compression = m_precisions.count(element::f32) > 0 && m_precisions[element::f32] == element::f16;

// todo: comment/uncomment here
if (m_keep_precision_sensitive_in_fp32 && has_fp16_compression) {
pass::Manager manager(get_pass_config());
// Mark subgraphs with disable_fp16_compression to keep them in FP32
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,9 @@
#include "openvino/op/maximum.hpp"
#include "openvino/op/multiply.hpp"
#include "openvino/op/mvn.hpp"
#include "openvino/op/tanh.hpp"
#include "openvino/op/normalize_l2.hpp"
#include "openvino/pass/serialize.hpp"
#include "openvino/op/power.hpp"
#include "openvino/op/reduce_max.hpp"
#include "openvino/op/reduce_mean.hpp"
Expand Down Expand Up @@ -82,6 +84,7 @@ void erase_fq_path(const std::shared_ptr<Node>& node) {
const std::shared_ptr<Node> propagate_through_ops =
pattern::wrap_type<ov::op::v0::Squeeze,
ov::op::v0::Unsqueeze,
ov::op::v1::Transpose,
ov::op::v1::Reshape,
op::util::BroadcastBase,
op::util::BinaryElementwiseArithmetic,
Expand All @@ -92,6 +95,7 @@ const std::shared_ptr<Node> propagate_through_ops =
ov::op::v0::Sqrt,
ov::op::v1::StridedSlice,
ov::op::v1::ReduceSum,
// ov::op::v0::Tanh,
ov::op::v1::ReduceMean,
ov::op::v8::Slice,
ov::op::v1::VariadicSplit,
Expand Down Expand Up @@ -364,6 +368,57 @@ class MarkDivWithEps : public MatcherPass {
}
};

/*
* MarkNormalizationOps marks MVN and NormalizeL2 to be kept in f32 precision.
*/
class MarkNormalizationOps : public MatcherPass {
public:
OPENVINO_RTTI("MarkNormalizationOps", "0");

MarkNormalizationOps() {
MATCHER_SCOPE(MarkNormalizationOps);
auto ops_to_be_kept_fp32 = pattern::wrap_type<ov::op::v0::MVN, ov::op::v6::MVN>();

matcher_pass_callback callback = [=](pattern::Matcher& m) {
const auto& node = m.get_match_root();
if (!node)
return false;

disable_fp16_compression(node);
return true;
};
auto m = make_shared<pattern::Matcher>(ops_to_be_kept_fp32, matcher_name);
register_matcher(m, callback);
}
};

class MarkReduceWithPow : public MatcherPass {
public:
OPENVINO_RTTI("MarkReduceWithPow", "0");
MarkReduceWithPow() {
MATCHER_SCOPE(MarkReduceWithPow);

auto input_1 = pattern::any_input();
auto pow_const = pattern::wrap_type<ov::op::v0::Constant>(); // value_is_equal_to<float>({2.0}));
auto pow_pattern = pattern::wrap_type<ov::op::v1::Power>({input_1, pow_const});
auto mean3_axes = pattern::wrap_type<ov::op::v0::Constant>();
auto reduce_pattern = pattern::wrap_type<ov::op::v1::ReduceMean>({pow_pattern, mean3_axes});

matcher_pass_callback callback = [=](pattern::Matcher& m) {
const auto& pattern_to_output = m.get_pattern_map();
if (!m.get_match_root())
return false;
auto pow_node = pattern_to_output.at(pow_pattern);
disable_fp16_compression(pow_node);
disable_fp16_compression(m.get_match_root());
return true;
};

auto m = make_shared<pattern::Matcher>(reduce_pattern, matcher_name);
register_matcher(m, callback);
}
};

class PropagateDownDisableSensitivityForQuantized : public pass::MatcherPass {
public:
OPENVINO_RTTI("DisableMarkingForQuantizedNodes", "0");
Expand Down Expand Up @@ -432,6 +487,8 @@ bool MarkSugraphsToKeepInMixedPrecision::run_on_model(const shared_ptr<ov::Model
Manager manager(get_pass_config());
// Mark root of Division with eps pattern to keep in FP32
REGISTER_PASS(manager, MarkDivWithEps)
// REGISTER_PASS(manager, MarkReduceWithPow)
// REGISTER_PASS(manager, MarkNormalizationOps)
REGISTER_PASS(manager, MarkExpInReduceOpPath)
REGISTER_PASS(manager, PropagateDownDisableSensitivityForQuantized)

Expand All @@ -449,6 +506,11 @@ bool MarkSugraphsToKeepInMixedPrecision::run_on_model(const shared_ptr<ov::Model
for (auto& node : m->get_ops()) {
erase_reduceop_path(node);
erase_fq_path(node);
auto& rt_info = node->get_rt_info();
bool is_disabled = rt_info.count(DisableFP16Compression::get_type_info_static());
if (is_disabled) {
rt_info[DisableFP16Compression::get_type_info_static()] = DisableFP16Compression{};
}
}

return false; // no need to revalidate
Expand Down
39 changes: 39 additions & 0 deletions src/core/include/openvino/pass/upcast_to_fp32_by_name.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
// Copyright (C) 2018-2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#pragma once

#include <functional>
#include <set>
#include <sstream>
#include <string>
#include <typeindex>
#include <typeinfo>
#include <unordered_map>
#include <utility>

#include "openvino/pass/pass.hpp"

class HeightMap;


namespace ov {
namespace pass {
/**
* @brief UpcastToFP32ByName sets nodes to fp32
* @ingroup ov_pass_cpp_api
*/
class OPENVINO_API UpcastToFP32ByName : public ModelPass {
public:
OPENVINO_RTTI("ov::pass::UpcastToFP32ByName");

explicit UpcastToFP32ByName(std::vector<std::string> node_names_list): nodes_to_keep_in_fp32(node_names_list) {}

bool run_on_model(const std::shared_ptr<ov::Model>&) override;
private:
std::vector<std::string> nodes_to_keep_in_fp32;

};
} // namespace pass
} // namespace ov
44 changes: 44 additions & 0 deletions src/core/src/pass/upcast_to_fp32_by_name.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
// Copyright (C) 2018-2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#include "openvino/pass/upcast_to_fp32_by_name.hpp"
#include "itt.hpp"

#include <cmath>
#include <fstream>
#include "vector"
#include "openvino/cc/pass/itt.hpp"
#include "openvino/core/type.hpp"
#include "openvino/op/constant.hpp"
#include "openvino/op/parameter.hpp"
#include "openvino/op/util/op_types.hpp"
#include "openvino/util/common_util.hpp"

#include "openvino/pass/manager.hpp"
#include "openvino/pass/graph_rewrite.hpp"
#include "transformations/rt_info/disable_fp16_compression.hpp"

using namespace std;

namespace ov {
namespace pass {


bool ov::pass::UpcastToFP32ByName::run_on_model(const std::shared_ptr<ov::Model> &f) {
RUN_ON_MODEL_SCOPE(UpcastToFP32ByName);

bool is_changed = false;
for (auto &node: f->get_ops()) {
if (std::count(nodes_to_keep_in_fp32.begin(), nodes_to_keep_in_fp32.end(), node->get_friendly_name())) {
disable_fp16_compression(node);
is_changed = true;
}

}

return is_changed;
}

}
}
4 changes: 4 additions & 0 deletions src/core/src/pass/visualize_tree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,10 @@ static std::string get_attribute_values(const std::map<std::string, ov::Any>& at
std::stringstream ss;
bool first = true;
for (const auto& item : attributes) {
if (item.first == "fused_names_0") {
continue;
}

ss << (first ? " " : delimiter) << item.first;
if (item.second.is<ov::RuntimeAttribute>()) {
ss << "{" << item.second.as<ov::RuntimeAttribute>().to_string() << "}";
Expand Down
7 changes: 7 additions & 0 deletions src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -236,6 +236,7 @@ void TransformationsPipeline::apply(std::shared_ptr<ov::Model> func) {
keep_precision_sensitive_in_fp32_1,
convert_input_output_precision);

// manager.register_pass<ov::pass::VisualizeTree>("after_conver_prec.svg");
manager.register_pass<ov::pass::CommonOptimizations>();

manager.register_pass<ov::pass::WrapInterpolateIntoTransposes>();
Expand Down Expand Up @@ -628,8 +629,14 @@ void TransformationsPipeline::apply(std::shared_ptr<ov::Model> func) {
return num_iter >= 16;
});
manager.register_pass<ov::pass::ResolveNameCollisions>(true);
manager.register_pass<ov::pass::VisualizeTree>("at_the_end_of_GPU.svg");

manager.run_passes(func);
// for (auto& node : func->get_ops()) {
// if (fp16_compression_is_disabled(node) && as_type_ptr<ov::op::v0::Constant>(node) == nullptr) {
// std::cout << node->get_friendly_name() << std::endl;
// }
// }
}

{
Expand Down
17 changes: 10 additions & 7 deletions tools/ovc/openvino/tools/ovc/partially_upcast_nodes_to_fp32.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@

thresholds_per_op = {
'Convolution': (0.1, 0.05),
# 'MatMul': (0.1, 0.05),
'MatMul': (0.1, 0.05),
}


Expand Down Expand Up @@ -138,14 +138,17 @@ def mark_nodes_to_upcast_to_fp32(model: Model, nodes: List[Node], fp16_infer_val
for node, fp16_val, fp32_val in zip(nodes, fp16_infer_vals, fp32_infer_vals):
if compare_tensors(node, fp16_val[0], fp32_val):
nodes_with_errors.append(node.get_friendly_name())

for node in model.get_ordered_ops():
if node.get_friendly_name() in nodes_with_errors:
node.get_rt_info()['disable_fp16_compression_0'] = ''

from openvino.runtime.passes import VisualizeTree, Manager
# todo: uncomment when xxx-122082 is fixed
# for node in model.get_ordered_ops():
# if node.get_friendly_name() in nodes_with_errors:
# node.get_rt_info()['disable_fp16_compression_0'] = ''

# todo: a dirty workaround until xxx-122082 is fixed
from openvino.runtime.passes import VisualizeTree, Manager, UpcastToFP32ByName, PartiallyConvertToFP16
manager = Manager()
manager.register_pass(VisualizeTree("upcasted.svg"))
manager.register_pass(UpcastToFP32ByName(nodes_with_errors))
manager.register_pass(PartiallyConvertToFP16)
manager.run_passes(model)


Expand Down

0 comments on commit 64ecc67

Please sign in to comment.