forked from openvinotoolkit/openvino
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
5 changed files
with
92 additions
and
109 deletions.
There are no files selected for viewing
29 changes: 29 additions & 0 deletions
29
...ansformations/include/transformations/common_optimizations/fuse_u4_weights_zero_point.hpp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
// Copyright (C) 2018-2023 Intel Corporation | ||
// SPDX-License-Identifier: Apache-2.0 | ||
// | ||
|
||
#pragma once | ||
|
||
#include "openvino/pass/graph_rewrite.hpp" | ||
#include "transformations_visibility.hpp" | ||
|
||
namespace ov { | ||
namespace pass { | ||
|
||
class TRANSFORMATIONS_API FuseU4WeightsAndZeroPoint; | ||
|
||
} // namespace pass | ||
} // namespace ov | ||
|
||
/** | ||
* @ingroup ie_transformation_common_api | ||
* @brief Applies zero point to U4 weights and fuses the result to the I4 constant if the result values are inside I4 range. | ||
* If some values are out of I4 range, converts zero point constant to scalar. | ||
* Limitations: works only in case when zero point is equal to 8 | ||
*/ | ||
|
||
class ov::pass::FuseU4WeightsAndZeroPoint : public ov::pass::MatcherPass { | ||
public: | ||
OPENVINO_RTTI("FuseU4WeightsAndZeroPoint", "0"); | ||
FuseU4WeightsAndZeroPoint(); | ||
}; |
29 changes: 0 additions & 29 deletions
29
...ransformations/include/transformations/common_optimizations/weights_zero_point_fusion.hpp
This file was deleted.
Oops, something went wrong.
61 changes: 61 additions & 0 deletions
61
...n/transformations/src/transformations/common_optimizations/fuse_u4_weights_zero_point.cpp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,61 @@ | ||
// Copyright (C) 2018-2023 Intel Corporation | ||
// SPDX-License-Identifier: Apache-2.0 | ||
// | ||
|
||
#include "transformations/common_optimizations/fuse_u4_weights_zero_point.hpp" | ||
|
||
#include "itt.hpp" | ||
#include "openvino/core/rt_info.hpp" | ||
#include "openvino/op/constant.hpp" | ||
#include "openvino/op/subtract.hpp" | ||
#include "openvino/pass/pattern/op/wrap_type.hpp" | ||
#include "openvino/reference/autobroadcast_binop.hpp" | ||
#include "transformations/utils/utils.hpp" | ||
|
||
ov::pass::FuseU4WeightsAndZeroPoint::FuseU4WeightsAndZeroPoint() { | ||
MATCHER_SCOPE(FuseU4WeightsAndZeroPoint); | ||
auto weights_m = pattern::wrap_type<ov::op::v0::Constant>(pattern::type_matches(ov::element::u4)); | ||
auto convert_m = pattern::wrap_type<ov::op::v0::Convert>({weights_m}); | ||
auto zero_point_m = pattern::wrap_type<ov::op::v0::Constant>(); | ||
auto subtract_m = pattern::wrap_type<ov::op::v1::Subtract>({convert_m, zero_point_m}); | ||
|
||
ov::matcher_pass_callback callback = [=](ov::pass::pattern::Matcher& m) { | ||
auto& pattern_map = m.get_pattern_value_map(); | ||
const auto& subtract = pattern_map.at(subtract_m); | ||
const int8_t zero_point_value = 8; | ||
auto zero_point = ov::as_type_ptr<ov::op::v0::Constant>(pattern_map.at(zero_point_m).get_node_shared_ptr()); | ||
if (!zero_point || !ov::op::util::constantIsEqualTo(zero_point, zero_point_value)) | ||
return false; | ||
|
||
bool can_be_fused = true; | ||
auto apply_zero_point = [&can_be_fused](int8_t weights_val, int8_t zp_val) mutable { | ||
auto result_value = weights_val - zp_val; | ||
can_be_fused &= -8 <= result_value && result_value <= 7; | ||
return static_cast<int8_t>(result_value); | ||
}; | ||
const auto weights = ov::as_type_ptr<ov::op::v0::Constant>(pattern_map.at(weights_m).get_node_shared_ptr()); | ||
auto weights_values = weights->cast_vector<int8_t>(); | ||
std::vector<int8_t> zero_point_values{8}; | ||
std::vector<int8_t> new_weights_values(ov::shape_size(weights->get_shape())); | ||
ov::reference::autobroadcast_binop(weights_values.data(), | ||
zero_point_values.data(), | ||
new_weights_values.data(), | ||
weights->get_shape(), | ||
ov::Shape{}, | ||
ov::op::AutoBroadcastType::NUMPY, | ||
apply_zero_point); | ||
if (can_be_fused) { | ||
auto new_weights = ov::op::v0::Constant::create(ov::element::i4, weights->get_shape(), new_weights_values); | ||
ov::replace_node_update_name(weights, new_weights); | ||
ov::replace_output_update_name(subtract, subtract.get_node()->input_value(0)); | ||
} else { | ||
const auto new_zp = ov::op::v0::Constant::create(zero_point->get_element_type(), {}, {zero_point_value}); | ||
ov::replace_node_update_name(zero_point, new_zp); | ||
zero_point = new_zp; | ||
} | ||
return true; | ||
}; | ||
|
||
auto m = std::make_shared<ov::pass::pattern::Matcher>(subtract_m, matcher_name); | ||
register_matcher(m, callback); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
78 changes: 0 additions & 78 deletions
78
...on/transformations/src/transformations/common_optimizations/weights_zero_point_fusion.cpp
This file was deleted.
Oops, something went wrong.