Skip to content

Commit

Permalink
Softmax decomposition moved to data flow pipeline
Browse files Browse the repository at this point in the history
  • Loading branch information
v-Golubev committed Jan 25, 2024
1 parent c175d24 commit cd881a2
Show file tree
Hide file tree
Showing 43 changed files with 908 additions and 310 deletions.
2 changes: 0 additions & 2 deletions src/common/snippets/include/snippets/lowered/loop_manager.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -225,8 +225,6 @@ class LinearIR::LoopManager {

// When the previous expression was replaced with new expressions (decomposition), the method updates the corresponding Loop.
// If ports of decomposed expression were the Loop ports, these Loop ports may be updated by parameters `entries` and `exits`
// Note: This method should be removed when Softmax decomposition will be moved on data flow pipeline since
// all decompositions should be call on this pipeline
void expression_replacement(constExprIt new_expr_begin, constExprIt new_expr_end, const ExpressionPtr& decomposed_expr,
size_t loop_id, const std::vector<ExpressionPort>& new_entries, const std::vector<ExpressionPort>& exits);

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
// Copyright (C) 2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#pragma once

#include "pass.hpp"

namespace ov {
namespace snippets {
namespace lowered {
namespace pass {

/**
* @interface ReduceDecomposition
* @brief Decomposes snippets::Reduce operations to a range of low-level operations on linear IR
* @attention Only Reduce by last dimension is supported
* @ingroup snippets
*/
class ReduceDecomposition : public Pass {
public:
OPENVINO_RTTI("ReduceDecomposition", "Pass")
explicit ReduceDecomposition(size_t vector_size);
bool run(LinearIR& linear_ir) override;

private:
size_t m_vector_size;
};

} // namespace pass
} // namespace lowered
} // namespace snippets
} // namespace ov

This file was deleted.

59 changes: 59 additions & 0 deletions src/common/snippets/include/snippets/op/reduce.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
// Copyright (C) 2018-2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#pragma once

#include "openvino/op/op.hpp"
#include "snippets/shape_inference/shape_infer_instances.hpp"

namespace ov {
namespace snippets {
namespace op {

/**
* @interface ReduceBase
* @brief Base class for reduce operations.
* @arg m_axis reduce axis.
* @ingroup snippets
*/
class ReduceBase : public ov::op::Op {
public:
OPENVINO_OP("ReduceBase", "SnippetsOpset");

ReduceBase(const Output<Node>& x, size_t axis);
ReduceBase() = default;

bool visit_attributes(AttributeVisitor& visitor) override;
void validate_and_infer_types() override;
size_t get_axis() const { return m_axis; }

protected:
size_t m_axis;
};

class ReduceSum : public ReduceBase {
public:
OPENVINO_OP("ReduceSum", "SnippetsOpset", ReduceBase);
ReduceSum(const Output<Node>& x, size_t axis) : ReduceBase(x, axis) {}
ReduceSum() = default;
std::shared_ptr<Node> clone_with_new_inputs(const OutputVector& new_args) const override;
static std::set<ov::element::TypeVector> get_supported_precisions(const std::shared_ptr<ov::Node>& node) {
return {{ov::element::f32}};
}
};

class ReduceMax : public ReduceBase {
public:
OPENVINO_OP("ReduceMax", "SnippetsOpset", ReduceBase);
ReduceMax(const Output<Node>& x, size_t axis) : ReduceBase(x, axis) {}
ReduceMax() = default;
std::shared_ptr<Node> clone_with_new_inputs(const OutputVector& new_args) const override;
static std::set<ov::element::TypeVector> get_supported_precisions(const std::shared_ptr<ov::Node>& node) {
return {{ov::element::f32}};
}
};

} // namespace op
} // namespace snippets
} // namespace ov
Original file line number Diff line number Diff line change
Expand Up @@ -4,23 +4,25 @@

#pragma once

#include "openvino/pass/graph_rewrite.hpp"
#include "openvino/pass/pattern/matcher.hpp"
#include "openvino/pass/graph_rewrite.hpp"

namespace ov {
namespace snippets {
namespace pass {

/**
* @interface SetSoftmaxPorts
* @brief The pass updates port descriptors in accordance with the Softmax reduction axis
* @interface ReduceToSnippetsReduce
* @brief Converts ReduceMax snd ReduceSum from openvino opset to snippets opset.
* Also checks that reduction operation is supported by snippets.
* @ingroup snippets
*/
class SetSoftmaxPorts: public ov::pass::MatcherPass {
class ReduceToSnippetsReduce: public ov::pass::MatcherPass {
public:
SetSoftmaxPorts();
ReduceToSnippetsReduce();
};


} // namespace pass
} // namespace snippets
} // namespace ov
} // namespace ov
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
// Copyright (C) 2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#pragma once

#include "openvino/pass/graph_rewrite.hpp"
#include "openvino/pass/pattern/matcher.hpp"

namespace ov {
namespace snippets {
namespace pass {

/**
* @interface SoftmaxDecomposition
* @brief Decomposes Softmax to a range of low-level operations
* @ingroup snippets
*/
class SoftmaxDecomposition: public ov::pass::MatcherPass {
public:
OPENVINO_RTTI("SoftmaxDecomposition", "0");
SoftmaxDecomposition();
};

} // namespace pass
} // namespace snippets
} // namespace ov
Original file line number Diff line number Diff line change
Expand Up @@ -68,5 +68,12 @@ class BrgemmShapeInfer : public IShapeInferSnippets {
Result infer(const std::vector<VectorDimsRef>& input_shapes) override;
};

class ReduceShapeInfer : public IShapeInferSnippets {
size_t m_axis;
public:
explicit ReduceShapeInfer(const std::shared_ptr<Node>& n);
Result infer(const std::vector<VectorDimsRef>& input_shapes) override;
};

} // namespace snippets
} // namespace ov
1 change: 1 addition & 0 deletions src/common/snippets/include/snippets/snippets_isa.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
#include "op/vector_buffer.hpp"
#include "op/rank_normalization.hpp"
#include "op/perf_count.hpp"
#include "op/reduce.hpp"

namespace ov {
namespace snippets {
Expand Down
2 changes: 2 additions & 0 deletions src/common/snippets/include/snippets/snippets_isa_tbl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@ OV_OP(BroadcastMove, ov::snippets::op)
OV_OP(Scalar, ov::snippets::op)
OV_OP(Nop, ov::snippets::op)
OV_OP(RankNormalization, ov::snippets::op)
OV_OP(ReduceMax, ov::snippets::op)
OV_OP(ReduceSum, ov::snippets::op)

#ifdef SNIPPETS_DEBUG_CAPS
OV_OP(PerfCountBegin, ov::snippets::op)
Expand Down
4 changes: 3 additions & 1 deletion src/common/snippets/src/lowered/linear_ir.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -175,8 +175,10 @@ void LinearIR::debug_print(bool tds_as_pointers) const {

void LinearIR::init_emitters(const std::shared_ptr<TargetMachine>& target) {
for (auto& expr : m_expressions) {
if (!expr->get_emitter())
if (!expr->get_emitter()) {
expr->m_emitter = target->get(expr->get_node()->get_type_info())(expr);
OPENVINO_ASSERT(expr->m_emitter, "Emitter can't be created for the node ", expr->get_node());
}
}
}

Expand Down
2 changes: 1 addition & 1 deletion src/common/snippets/src/lowered/pass/assign_registers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ bool AssignRegisters::run(LinearIR& linear_ir) {
manually_assigned_gprs[expr->get_output_port_connector(0)] =
static_cast<Reg>(num_results + num_parameters + buffer_id);
} else if (ov::is_type<op::HorizonMax>(op) || ov::is_type<op::HorizonSum>(op)) {
// Only in SoftmaxDecomposition ReduceMax and ReduceSum use HorizonMax/HorizonSum and VectorBuffer.
// Only in ReduceDecomposition Reduce ops use HorizonMax/HorizonSum and VectorBuffer.
// We should manually set the one vector register for VectorBuffer and Max/Sum output to simulate a accumulator
// TODO [96351]: We should rewrite accumulator pattern using another way
const auto& input_tensor = expr->get_input_port_connector(0);
Expand Down
130 changes: 130 additions & 0 deletions src/common/snippets/src/lowered/pass/reduce_decomposition.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
// Copyright (C) 2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#include "snippets/lowered/pass/reduce_decomposition.hpp"

#include "snippets/itt.hpp"
#include "snippets/lowered/linear_ir.hpp"
#include "snippets/lowered/loop_manager.hpp"
#include "snippets/lowered/pass/iter_handler.hpp"
#include "snippets/snippets_isa.hpp"

namespace ov {
namespace snippets {
namespace lowered {
namespace pass {

namespace {
uint32_t get_initial_value(const ov::DiscreteTypeInfo& type_info) {
static const std::map<ov::DiscreteTypeInfo, uint32_t> reduce_initial_values {
{op::ReduceMax::get_type_info_static(), uint32_t(0xff7fffff)},
{op::ReduceSum::get_type_info_static(), uint32_t(0x00000000)},
};
OPENVINO_ASSERT(reduce_initial_values.count(type_info), "Unexpected ReduceType");
return reduce_initial_values.at(type_info);
}

std::shared_ptr<ov::Node> get_accumulation_node(const ov::Output<ov::Node>& input0,
const ov::Output<ov::Node>& input1,
const ov::DiscreteTypeInfo& type_info) {
if (type_info == op::ReduceMax::get_type_info_static()) {
return std::make_shared<ov::op::v1::Maximum>(input0, input1);
} else if (type_info == op::ReduceSum::get_type_info_static()) {
return std::make_shared<ov::op::v1::Add>(input0, input1);
} else {
OPENVINO_THROW("Unsupported reduce type: ", type_info);
}
}

std::shared_ptr<ov::Node> get_horizon_node(const ov::Output<ov::Node>& input, const ov::DiscreteTypeInfo& type_info) {
if (type_info == op::ReduceMax::get_type_info_static()) {
return std::make_shared<op::HorizonMax>(input);
} else if (type_info == op::ReduceSum::get_type_info_static()) {
return std::make_shared<op::HorizonSum>(input);
} else {
OPENVINO_THROW("Unsupported reduce type: ", type_info);
}
}
} // namespace

using LoopInfo = LinearIR::LoopManager::LoopInfo;
using HandlerType = LoopInfo::SpecificIterationHandlers::HandlerType;

ReduceDecomposition::ReduceDecomposition(size_t vector_size) : m_vector_size{vector_size} {}

bool ReduceDecomposition::run(LinearIR& linear_ir) {
OV_ITT_SCOPED_TASK(ov::pass::itt::domains::SnippetsTransform, "Snippets::ReduceMaxDecompositionLowered")
const auto& loop_manager = linear_ir.get_loop_manager();
bool modified = false;
for (auto expr_it = linear_ir.begin(); expr_it != linear_ir.end(); expr_it++) {
const auto& reduce_expr = *expr_it;
const auto& reduce = ov::as_type_ptr<ov::snippets::op::ReduceBase>(reduce_expr->get_node());
if (!reduce)
continue;

const auto& reduce_type_info = reduce->get_type_info();
const auto& input_shape = reduce_expr->get_input_port_descriptor(0)->get_shape();
const auto work_amount = *(input_shape.rbegin());
const auto increment = m_vector_size <= work_amount ? m_vector_size : work_amount;
const bool is_dynamic = reduce->is_dynamic();
OPENVINO_ASSERT(reduce->get_axis() == input_shape.size() - 1, "ReduceDecomposition supports only Reduce by last dimension.");

// We need an iterator to the inserted element
auto push_node = [&](const std::shared_ptr<Node>& n) {
const auto expr = linear_ir.insert(expr_it, n);
if (is_dynamic)
expr->get()->updateShapes();
return std::make_pair(expr, n);
};
// Float constant values in byte representation
const auto fill_value = get_initial_value(reduce_type_info);
// Note: VectorBuffer is a special case, since it should go before the initial Load.
// The buffer must be initialized with fill_value before reduction
const auto vector_buffer = push_node(std::make_shared<op::VectorBuffer>());
const auto initial_fill = push_node(std::make_shared<op::Fill>(vector_buffer.second, 0, fill_value));

// Reduce loop
const auto fill = push_node(std::make_shared<op::Fill>(reduce->get_input_source_output(0), increment, fill_value));
const auto accumulation = push_node(get_accumulation_node(fill.second, initial_fill.second, reduce_type_info));

const auto reduce_loop_id = loop_manager->mark_loop(
fill.first,
expr_it,
work_amount,
increment,
0,
std::vector<ExpressionPort>{(*fill.first)->get_input_port(0), (*accumulation.first)->get_input_port(1)},
std::vector<ExpressionPort>{(*accumulation.first)->get_output_port(0)});
const auto tail_size = work_amount % increment;
if (tail_size != 0) {
loop_manager->get_loop_info(reduce_loop_id)->register_handler<HandlerType::LAST_ITER, SetFillOffset>(tail_size);
}
const auto horizon = push_node(get_horizon_node(accumulation.second, reduce_type_info));

// Transfer original ExpressionPorts
replace_input_port_connectors({fill.first->get()->get_input_port(0)}, reduce_expr->get_input_port_connector(0));
replace_input_port_connectors(reduce_expr->get_output_port_connector(0)->get_consumers(), horizon.first->get()->get_output_port_connector(0));

// Update Loop info for outer loops
const std::vector<ExpressionPort> entry_points{(*fill.first)->get_input_port(0)};
const std::vector<ExpressionPort> exit_points{(*horizon.first)->get_output_port(0)};
for (auto loop_id : reduce_expr->get_loop_ids()) {
loop_manager->expression_replacement(vector_buffer.first,
expr_it,
reduce_expr,
loop_id,
entry_points,
exit_points);
}

expr_it = linear_ir.erase(expr_it);
modified = true;
}
return modified;
}

} // namespace pass
} // namespace lowered
} // namespace snippets
} // namespace ov
Loading

0 comments on commit cd881a2

Please sign in to comment.