Skip to content

Commit

Permalink
Alexandra's comments applied: first part
Browse files Browse the repository at this point in the history
  • Loading branch information
v-Golubev committed Jan 18, 2024
1 parent dceb5b4 commit f13cc1e
Show file tree
Hide file tree
Showing 16 changed files with 117 additions and 86 deletions.
2 changes: 1 addition & 1 deletion src/common/snippets/include/snippets/lowered/linear_ir.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ class LinearIR {

const container& get_ops() const { return m_expressions; }
const io_container& get_IO_ops() const { return m_io_expressions; }
Config get_config() const { return m_config; }
const Config& get_config() const { return m_config; }
void set_loop_depth(size_t loop_depth) { m_config.m_loop_depth = loop_depth; }

const ExpressionPtr& get_expr_by_node(const std::shared_ptr<Node>& n) const;
Expand Down
32 changes: 13 additions & 19 deletions src/common/snippets/include/snippets/lowered/loop_manager.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,15 +42,15 @@ class LinearIR::LoopManager {
class LoopInfo {
public:
enum {UNDEFINED_DIM_IDX = std::numeric_limits<size_t>::max()};
// This enum is used for loop specific iterations handlers enumeration
enum {FIRST_ITER, MAIN_BODY, LAST_ITER};
LoopInfo() = default;
LoopInfo(size_t work_amount, size_t increment,
const std::vector<LoopPort>& entries,
const std::vector<LoopPort>& exits,
bool outer_splited_loop = false);
const std::vector<LoopPort>& exits);
LoopInfo(size_t work_amount, size_t increment,
const std::vector<ExpressionPort>& entries,
const std::vector<ExpressionPort>& exits,
bool outer_splited_loop = false);
const std::vector<ExpressionPort>& exits);

std::shared_ptr<LoopInfo> clone_with_new_expr(const ExressionMap& expr_map) const;

Expand All @@ -60,18 +60,16 @@ class LinearIR::LoopManager {
size_t get_increment() const;
const std::vector<LoopPort>& get_entry_points() const;
const std::vector<LoopPort>& get_exit_points() const;
bool get_outer_splited_loop() const;
const std::vector<lowered::pass::PassPipeline>& get_handlers() const;

// Sets dim_idx to all entry and exit points
void set_dim_idx(size_t dim_idx);
void set_work_amount(size_t work_amount);
void set_increment(size_t increment);
void set_entry_points(std::vector<LoopPort> entry_points);
void set_exit_points(std::vector<LoopPort> exit_points);
void set_outer_splited_loop(bool outer_splited_loop);

enum {FIRST_ITER, MAIN_BODY, LAST_ITER};
std::vector<lowered::pass::PassPipeline> handlers;
void set_handlers(std::vector<lowered::pass::PassPipeline> handlers);
void set_default_handlers();

private:
size_t m_work_amount = 0;
Expand All @@ -82,8 +80,7 @@ class LinearIR::LoopManager {
// Note: Scalars aren't entry expressions but can be before first entry expr in Linear IR
std::vector<LoopPort> m_entry_points = {};
std::vector<LoopPort> m_exit_points = {};
// True if this Loop is outer Loop for nested Loops that splits the same dimension
bool m_outer_splited_loop = false;
std::vector<lowered::pass::PassPipeline> m_handlers = {};
};
using LoopInfoPtr = std::shared_ptr<LoopInfo>;

Expand Down Expand Up @@ -112,16 +109,14 @@ class LinearIR::LoopManager {
const std::vector<T>& entries,
const std::vector<T>& exits,
bool set_default_handlers = true) {
if (increment > work_amount)
increment = work_amount;
const auto loop_info = std::make_shared<LoopManager::LoopInfo>(work_amount, increment, entries, exits);
const auto loop_info = std::make_shared<LoopManager::LoopInfo>(work_amount, std::min(increment, work_amount), entries, exits);
loop_info->set_dim_idx(dim_idx);
const auto loop_id = this->add_loop_info(loop_info);
for (auto expr_it = loop_begin_pos; expr_it != loop_end_pos; ++expr_it) {
insert_loop_id(*expr_it, loop_id);
}
if (set_default_handlers) {
set_default_loop_handlers(loop_info);
loop_info->set_default_handlers();
}
return loop_id;
}
Expand All @@ -142,7 +137,7 @@ class LinearIR::LoopManager {
insert_loop_id(*expr_it, loop_id);
}
if (set_default_handlers) {
set_default_loop_handlers(loop_info);
loop_info->set_default_handlers();
}
return loop_id;
}
Expand Down Expand Up @@ -209,7 +204,6 @@ class LinearIR::LoopManager {
size_t loop_id, bool loop_ops_inserted = false);

LoopPort get_loop_port_by_expr_port(const ExpressionPort& expr_port, const size_t loop_id);
static void set_default_loop_handlers(const LoopInfoPtr& loop_info);

private:
static void get_io_loop_ports(LinearIR::constExprIt loop_begin_pos,
Expand All @@ -221,8 +215,8 @@ class LinearIR::LoopManager {
std::vector<LinearIR::LoopManager::LoopPort>& entry_points,
size_t loop_id);
static std::vector<lowered::pass::PassPipeline> fuse_loop_handlers(
std::vector<lowered::pass::PassPipeline>& lhs,
std::vector<lowered::pass::PassPipeline>& rhs);
const std::vector<lowered::pass::PassPipeline>& lhs,
const std::vector<lowered::pass::PassPipeline>& rhs);

/* ===== The methods for work with Loop IDs of Expression ===== */
// Notes:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,19 @@

#include "snippets/lowered/linear_ir.hpp"
#include "snippets/lowered/pass/pass.hpp"
#include "snippets/op/loop.hpp"

namespace ov {
namespace snippets {
namespace lowered {
namespace pass {
/**
* @interface UpdateMemoryAccessOps
* @brief The pass changes counts of all MemoryAccess ops in the Loop
* @attention The pass skips inner loops
* @attention The pass ignores memory access ports which have count == 1
* @param m_count - count which must be set
* @ingroup snippets
*/
class UpdateMemoryAccessOps : public pass::RangedPass {
public:
UpdateMemoryAccessOps(size_t count);
Expand All @@ -22,6 +29,12 @@ class UpdateMemoryAccessOps : public pass::RangedPass {
size_t m_count;
};

/**
* @interface SetFillOffset
* @brief The pass changes offset of all Fill ops in the Loop
* @param m_offset - offset which must be set
* @ingroup snippets
*/
class SetFillOffset : public pass::RangedPass {
public:
SetFillOffset(size_t offset);
Expand All @@ -32,6 +45,12 @@ class SetFillOffset : public pass::RangedPass {
size_t m_offset;
};

/**
* @interface TransformInnerSplitLoop
* @brief The pass updates finalization offsets, work amount and increment of inner Loop basing on tail_size of the current Loop
* @param m_tail_size - tail_size of the current Loop
* @ingroup snippets
*/
class TransformInnerSplitLoop : public pass::RangedPass {
public:
TransformInnerSplitLoop(size_t tail_size);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,14 @@ namespace ov {
namespace snippets {
namespace lowered {
namespace pass {

/**
* @interface UpdateSubtensors
* @brief The pass updates subtensors of all operations in Loop basing on tail size.
* Firstly, the pass updates subtensors of all Loop entry points.
* After that, shape inference infrastructure is used to update subtensors of all ops in Loop body
* @param m_offset - offset which must be set
* @ingroup snippets
*/
class UpdateSubtensors : public pass::RangedPass {
public:
UpdateSubtensors(size_t tail_size);
Expand Down
77 changes: 36 additions & 41 deletions src/common/snippets/src/lowered/loop_manager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,31 +42,35 @@ std::shared_ptr<LoopPort> LoopPort::clone_with_new_expr(const ExpressionPtr& new
LinearIR::LoopManager::LoopInfo::LoopInfo(size_t work_amount,
size_t increment,
const std::vector<LoopPort>& entries,
const std::vector<LoopPort>& exits,
bool outer_splited_loop)
const std::vector<LoopPort>& exits)
: m_work_amount(work_amount),
m_increment(increment),
m_entry_points(entries),
m_exit_points(exits),
m_outer_splited_loop(outer_splited_loop) {
handlers.resize(3);
m_exit_points(exits) {
// Note: loop info always contain at least 3 set of handlers:
// 1. For first loop iteration
// 2. For main loop body
// 3. For last loop iteration
m_handlers.resize(3);
}

LinearIR::LoopManager::LoopInfo::LoopInfo(size_t work_amount,
size_t increment,
const std::vector<ExpressionPort>& entries,
const std::vector<ExpressionPort>& exits,
bool outer_splited_loop)
const std::vector<ExpressionPort>& exits)
: m_work_amount(work_amount),
m_increment(increment),
m_outer_splited_loop(outer_splited_loop) {
m_increment(increment) {
m_entry_points.reserve(entries.size());
m_exit_points.reserve(exits.size());
for (const auto& port : entries)
m_entry_points.emplace_back(port);
for (const auto& port : exits)
m_exit_points.emplace_back(port);
handlers.resize(3);
// Note: loop info always contain at least 3 set of handlers:
// 1. For first loop iteration
// 2. For main loop body
// 3. For last loop iteration
m_handlers.resize(3);
}

std::shared_ptr<LoopInfo> LoopInfo::clone_with_new_expr(const ExressionMap& expr_map) const {
Expand All @@ -84,8 +88,8 @@ std::shared_ptr<LoopInfo> LoopInfo::clone_with_new_expr(const ExressionMap& expr
const auto& new_entry_points = clone_loop_ports(m_entry_points);
const auto& new_exit_points = clone_loop_ports(m_exit_points);

auto new_info = std::make_shared<LoopInfo>(m_work_amount, m_increment, new_entry_points, new_exit_points, m_outer_splited_loop);
new_info->handlers = handlers;
auto new_info = std::make_shared<LoopInfo>(m_work_amount, m_increment, new_entry_points, new_exit_points);
new_info->set_handlers(m_handlers);
return new_info;
}

Expand All @@ -105,8 +109,8 @@ const std::vector<LoopPort>& LoopInfo::get_exit_points() const {
return m_exit_points;
}

bool LoopInfo::get_outer_splited_loop() const {
return m_outer_splited_loop;
const std::vector<lowered::pass::PassPipeline>& LoopInfo::get_handlers() const {
return m_handlers;
}

size_t LinearIR::LoopManager::LoopInfo::get_dim_idx() const {
Expand Down Expand Up @@ -144,11 +148,19 @@ void LoopInfo::set_entry_points(std::vector<LoopPort> entry_points) {
}

void LoopInfo::set_exit_points(std::vector<LoopPort> exit_points) {
m_exit_points = std::move(exit_points);;
m_exit_points = std::move(exit_points);
}

void LoopInfo::set_handlers(std::vector<lowered::pass::PassPipeline> handlers) {
m_handlers = std::move(handlers);
}

void LoopInfo::set_outer_splited_loop(bool outer_splited_loop) {
m_outer_splited_loop = outer_splited_loop;
void LoopInfo::set_default_handlers() {
const auto tail_size = get_work_amount() % get_increment();
if (tail_size != 0) {
m_handlers[LoopInfo::LAST_ITER].register_pass<lowered::pass::UpdateMemoryAccessOps>(tail_size);
m_handlers[LoopInfo::LAST_ITER].register_pass<lowered::pass::UpdateSubtensors>(tail_size);
}
}

bool operator==(const LinearIR::LoopManager::LoopPort& lhs, const LinearIR::LoopManager::LoopPort& rhs) {
Expand Down Expand Up @@ -287,14 +299,6 @@ LinearIR::LoopManager::LoopPort LinearIR::LoopManager::get_loop_port_by_expr_por
: get_loop_port(loop_info->get_exit_points());
}

void LinearIR::LoopManager::set_default_loop_handlers(const LoopInfoPtr& loop_info) {
const auto tail_size = loop_info->get_work_amount() % loop_info->get_increment();
if (tail_size != 0) {
loop_info->handlers[LoopInfo::LAST_ITER].register_pass<lowered::pass::UpdateMemoryAccessOps>(tail_size);
loop_info->handlers[LoopInfo::LAST_ITER].register_pass<lowered::pass::UpdateSubtensors>(tail_size);
}
}

void LinearIR::LoopManager::get_io_loop_ports(LinearIR::constExprIt loop_begin_pos,
LinearIR::constExprIt loop_end_pos,
std::vector<ExpressionPort> &entries,
Expand Down Expand Up @@ -444,14 +448,11 @@ void LinearIR::LoopManager::fuse_loops(LinearIR::constExprIt loop_begin_target,
loop_info->set_entry_points(new_entries);
loop_info->set_exit_points(new_exits);

loop_info->handlers = fuse_loop_handlers(loop_info_upper->handlers, loop_info_lower->handlers);
loop_info->set_handlers(fuse_loop_handlers(loop_info_upper->get_handlers(), loop_info_lower->get_handlers()));
// Since fusion can be called for broadcastable loops (one of the loops has work_amount = increment = 1),
// maximum value is set to the fused loop
loop_info->set_work_amount(std::max(loop_info_upper->get_work_amount(), loop_info_lower->get_work_amount()));
loop_info->set_increment(std::max(loop_info_upper->get_increment(), loop_info_lower->get_increment()));
// If one of the Loops is outer for nested loops that splits the same dimension,
// after fusion new common Loop saves this status
loop_info->set_outer_splited_loop(loop_info_upper->get_outer_splited_loop() || loop_info_lower->get_outer_splited_loop());

const auto& from = fuse_into_upper ? loop_id_lower : loop_id_upper;
const auto& to = fuse_into_upper ? loop_id_upper : loop_id_lower;
Expand All @@ -464,15 +465,13 @@ void LinearIR::LoopManager::fuse_loops(LinearIR::constExprIt loop_begin_target,
}

std::vector<lowered::pass::PassPipeline> LinearIR::LoopManager::fuse_loop_handlers(
std::vector<lowered::pass::PassPipeline>& from,
std::vector<lowered::pass::PassPipeline>& to) {
const auto min_size = std::min(from.size(), to.size());
std::vector<lowered::pass::PassPipeline> merged_handlers;
merged_handlers.resize(min_size);
for (size_t i = 0; i < min_size; ++i) {
merged_handlers[i] = from[i];
const std::vector<lowered::pass::PassPipeline>& lhs,
const std::vector<lowered::pass::PassPipeline>& rhs) {
OPENVINO_ASSERT(lhs.size() == rhs.size(), "fuse_loop_handlers supports only handlers vectors with equal sizes.");
auto merged_handlers = lhs;
for (size_t i = 0; i < lhs.size(); ++i) {
const auto& res_passes = merged_handlers[i].get_passes();
for (const auto& pass : to[i].get_passes()) {
for (const auto& pass : rhs[i].get_passes()) {
auto pred = [&pass](const std::shared_ptr<lowered::pass::PassBase>& p) {
return p->get_type_info() == pass->get_type_info();
};
Expand All @@ -481,10 +480,6 @@ std::vector<lowered::pass::PassPipeline> LinearIR::LoopManager::fuse_loop_handle
}
}
}
auto& handlers_with_larger_size = from.size() > to.size() ? from : to;
for (size_t i = min_size; i < handlers_with_larger_size.size(); ++i) {
merged_handlers.emplace_back(std::move(handlers_with_larger_size[i]));
}
return merged_handlers;
}

Expand Down
2 changes: 1 addition & 1 deletion src/common/snippets/src/lowered/pass/allocate_buffers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ bool AllocateBuffers::run(lowered::LinearIR& linear_ir, lowered::LinearIR::const
pipeline.register_pass<NormalizeBufferIDs>();
pipeline.run(linear_ir);
} else {
InitBuffersDefault(m_buffer_scratchpad_size).run(linear_ir, linear_ir.begin(), linear_ir.end());
InitBuffersDefault(m_buffer_scratchpad_size).run(linear_ir, linear_ir.cbegin(), linear_ir.cend());
}

return m_buffer_scratchpad_size > 0;
Expand Down
9 changes: 6 additions & 3 deletions src/common/snippets/src/lowered/pass/fuse_loops.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,8 @@ bool FuseLoops::can_be_fused(const LoopInfoPtr& loop_current, const LoopInfoPtr&
const auto target_work_amount = loop_target->get_work_amount();
const auto current_increment = loop_current->get_increment();
const auto target_increment = loop_target->get_increment();
const auto& current_handlers = loop_current->get_handlers();
const auto& target_handlers = loop_target->get_handlers();
// Loop fusion is supported only if Loops have equal/broadcastable increments and work amounts.
// Note: For example, Broadcastable work amounts are possible in the following case:
// Relu_0 [16x1] Relu_1 [16x128]
Expand All @@ -58,15 +60,16 @@ bool FuseLoops::can_be_fused(const LoopInfoPtr& loop_current, const LoopInfoPtr&
// - Relu_1 and Add with work amount `128` and increment `vector size`
// We can fuse them into one Loop with work amount `128` and increment `vector size`

const bool handlers_sizes_match = current_handlers.size() == target_handlers.size();
// WA: we can't fuse 2 loops if one of them has first iteration handler but second hasn't,
// because in this case Main/Tail body handlers of the loop wo first iter handler must be reset with new parameters
// (e.g. tail size). This logic is not implemented for now, so fusion for such loops is skipped.
const bool first_iter_handlers_match = loop_current->handlers[LoopManager::LoopInfo::FIRST_ITER].empty() ==
loop_target->handlers[LoopManager::LoopInfo::FIRST_ITER].empty();
const bool first_iter_handlers_match = current_handlers[LoopManager::LoopInfo::FIRST_ITER].empty() ==
target_handlers[LoopManager::LoopInfo::FIRST_ITER].empty();
const bool equal_parameters = current_work_amount == target_work_amount && current_increment == target_increment;
const bool current_bcastable = current_work_amount == 1 && current_increment == 1;
const bool target_bcastable = target_work_amount == 1 && target_increment == 1;
return first_iter_handlers_match && (equal_parameters || current_bcastable || target_bcastable);
return handlers_sizes_match && first_iter_handlers_match && (equal_parameters || current_bcastable || target_bcastable);
}

void FuseLoops::move(LinearIR& linear_ir, const LinearIR::LoopManagerPtr& loop_manager, size_t loop_id,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ bool InsertSpecificIterations::run(LinearIR& linear_ir, lowered::LinearIR::const
const auto& loop_info = loop_manager->get_loop_info(loop_end->get_id());
const auto work_amount = loop_info->get_work_amount();
const auto increment = loop_info->get_increment();
auto& handlers = loop_info->handlers;
const auto& handlers = loop_info->get_handlers();

const auto main_body_begin_it = linear_ir.find(linear_ir.get_expr_by_node(loop_end->get_loop_begin()));
const auto main_body_end_it = linear_ir.find(linear_ir.get_expr_by_node(loop_end));
Expand All @@ -100,10 +100,11 @@ bool InsertSpecificIterations::run(LinearIR& linear_ir, lowered::LinearIR::const

auto copy_and_run_specific_handlers = [&](const PassPipeline& handlers) {
const auto& cloned_body = copy_loop(linear_ir, loop_end->get_id());
linear_ir.insert(main_body_begin_it, cloned_body.begin(), cloned_body.end());
const auto& loop_end_it = std::prev(cloned_body.end());
handlers.run(linear_ir, cloned_body.begin(), loop_end_it);
return ov::as_type_ptr<op::LoopEnd>(loop_end_it->get()->get_node());
lowered::LinearIR::constExprIt start = linear_ir.insert(main_body_begin_it, cloned_body.begin(), cloned_body.end());
const auto cloned_loop_end = *std::prev(cloned_body.end());
auto end = linear_ir.find_after(start, cloned_loop_end);
handlers.run(linear_ir, start, end);
return ov::as_type_ptr<op::LoopEnd>(cloned_loop_end->get_node());
};

const bool specific_first_iteration = !handlers[LoopInfo::FIRST_ITER].empty();
Expand Down
2 changes: 1 addition & 1 deletion src/common/snippets/src/lowered/pass/iter_handler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ bool TransformInnerSplitLoop::run(LinearIR& linear_ir, LinearIR::constExprIt beg
const auto inner_loop_begin_it = std::find(begin, it, linear_ir.get_expr_by_node(inner_loop_begin));
const auto inner_loop_end_it = std::next(end);
OPENVINO_ASSERT(inner_loop_begin_it != it, "LoopBegin has not been found!");
const auto& last_iter_handlers = inner_loop_info->handlers[LinearIR::LoopManager::LoopInfo::LAST_ITER];
const auto& last_iter_handlers = inner_loop_info->get_handlers()[LinearIR::LoopManager::LoopInfo::LAST_ITER];
last_iter_handlers.run(linear_ir, inner_loop_begin_it, inner_loop_end_it);
modified = true;
}
Expand Down
Loading

0 comments on commit f13cc1e

Please sign in to comment.