Skip to content

Commit

Permalink
[Snippets] Added SplitLoops support for dynamic Loops
Browse files Browse the repository at this point in the history
[Snippets] Fixed LoopInfo cloning

[Snippets] WA for ComputeBufferAllocationSize::get_allocation_size

[Snippets] Fixed dynamic increment and single increment for Eltwise Loops

[Snippets] Added dynamic SplitLoops support

[Snippets] Added debug prints to RuntimeConfig

[Snippets] Fixed 0 case in get_finalization_offset

[Snippets] Updated ValidateExpandedLoopInfo + Updated SnippetsUnitTest

[Snippets] Supported InnerSplittedLoopInfo in SplitLoops as Unified

[Snippets] DISABLED EXTRACT LOOP INVARIANTS

[Snippets] Fixed ExtractLoopInvariants

[Snippets] Fixed FuseLoops::can_be_fused

[Snippets] Fixes
  • Loading branch information
a-sidorova committed Aug 12, 2024
1 parent 3073c6a commit c0f4282
Show file tree
Hide file tree
Showing 28 changed files with 607 additions and 288 deletions.
98 changes: 63 additions & 35 deletions src/common/snippets/include/snippets/lowered/loop_info.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,18 +23,10 @@ class LoopInfo {
enum {UNDEFINED_DIM_IDX = std::numeric_limits<size_t>::max()};

LoopInfo() = default;
LoopInfo(size_t work_amount, size_t increment, const std::vector<LoopPort>& entries, const std::vector<LoopPort>& exits, bool is_wa_const = false);
LoopInfo(size_t work_amount, size_t increment, const std::vector<ExpressionPort>& entries, const std::vector<ExpressionPort>& exits,
bool is_wa_const = false);
LoopInfo(size_t work_amount, size_t increment, const std::vector<LoopPort>& entries, const std::vector<LoopPort>& exits);
LoopInfo(size_t work_amount, size_t increment, const std::vector<ExpressionPort>& entries, const std::vector<ExpressionPort>& exits);
virtual ~LoopInfo() = default;

/**
* @brief Clone LoopInfo with new expressions
* @param expr_map map of new and old expressions
* @return the copy
*/
virtual std::shared_ptr<LoopInfo> clone_with_new_expr(const ExpressionMap& expr_map) const = 0;

/**
* @brief Check if some parameters of Loop are dynamic (undefined)
* @return True if some parameters of Loop are unknown, False if all parameters are static
Expand Down Expand Up @@ -62,7 +54,7 @@ class LoopInfo {
* @brief Returns work amount of the Loop.
* @return m_work_amount
*/
size_t get_work_amount() const;
virtual size_t get_work_amount() const;
/**
* @brief Returns step of loop counter increment.
* @return m_increment
Expand All @@ -83,17 +75,12 @@ class LoopInfo {
* @return m_output_ports
*/
const std::vector<LoopPort>& get_output_ports() const;
/**
* @brief Returns True if `work_amount` cannot be rewritten/updated by passes.
* @return m_is_work_amount_const
*/
bool is_work_amount_const() const;

/**
* @brief Set m_work_amount value
* @param work_amount - work amount of the loop
*/
void set_work_amount(size_t work_amount);
virtual void set_work_amount(size_t work_amount);
/**
* @brief Set m_increment value
* @param increment - step of loop counter increment
Expand All @@ -104,13 +91,8 @@ class LoopInfo {
* @param dim_idx - index
*/
void set_dim_idx(size_t dim_idx);
/**
* @brief Sets `value` to `m_is_work_amount_const`
* @param value - value of the attribute
*/
void set_work_amount_const(bool value);

/**
/**
* @brief Replace the current LoopPort `actual_port` with new `target_ports`
* @param actual_port actual port
* @param target_ports new ports. The ports order is important. Can contain `actual_port`
Expand Down Expand Up @@ -188,10 +170,6 @@ class LoopInfo {
// Note: Scalars aren't input expressions but can be before first input expr in Linear IR
std::vector<LoopPort> m_input_ports = {};
std::vector<LoopPort> m_output_ports = {};

// TODO [143394] : All static values in compilation stage should be `is_const=True` (not only `work_amount`)
// If True, no one pass can rewrite the value of `m_work_amount`
bool m_is_work_amount_const = false;
};
using LoopInfoPtr = std::shared_ptr<LoopInfo>;

Expand Down Expand Up @@ -227,20 +205,20 @@ class UnifiedLoopInfo : public LoopInfo {
UnifiedLoopInfo(size_t work_amount, size_t increment,
const std::vector<LoopPort>& entries, const std::vector<LoopPort>& exits,
const std::vector<LoopPortDesc>& in_descs, const std::vector<LoopPortDesc>& out_descs,
const SpecificIterationHandlers& handlers = SpecificIterationHandlers(), bool is_wa_const = false);
const SpecificIterationHandlers& handlers = SpecificIterationHandlers());
UnifiedLoopInfo(size_t work_amount, size_t increment,
const std::vector<LoopPort>& entries, const std::vector<LoopPort>& exits,
const SpecificIterationHandlers& handlers = SpecificIterationHandlers(), bool is_wa_const = false);
const SpecificIterationHandlers& handlers = SpecificIterationHandlers());
UnifiedLoopInfo(size_t work_amount, size_t increment,
const std::vector<ExpressionPort>& entries, const std::vector<ExpressionPort>& exits,
const SpecificIterationHandlers& handlers = SpecificIterationHandlers(), bool is_wa_const = false);
const SpecificIterationHandlers& handlers = SpecificIterationHandlers());

/**
* @brief Clone LoopInfo with new expressions
* @param expr_map map of new and old expressions
* @return the copy
*/
std::shared_ptr<LoopInfo> clone_with_new_expr(const ExpressionMap& expr_map) const override;
std::shared_ptr<LoopInfo> clone_with_new_expr(const ExpressionMap& expr_map) const;

/**
* @brief Check if some parameters of Loop are dynamic (undefined)
Expand Down Expand Up @@ -362,7 +340,7 @@ class UnifiedLoopInfo : public LoopInfo {
caller(m_output_ports[i], m_output_port_descs[i]);
}

private:
protected:
/**
* @brief Clone LoopPortDesc[actual_port_idx] `new_count` times and insert on the place of current desc
* @param actual_port_idx index of the current descriptor/port
Expand All @@ -377,6 +355,56 @@ class UnifiedLoopInfo : public LoopInfo {
};
using UnifiedLoopInfoPtr = std::shared_ptr<UnifiedLoopInfo>;

/**
* @interface InnerSplittedUnifiedLoopInfo
* @brief The structure describes inner splitted Loop after `SplitLoops`.
* Contains pointer to outer splitted loop info. WorkAmount is equal to increment of outer splitted loop info.
* @ingroup snippets
*/
class InnerSplittedUnifiedLoopInfo : public UnifiedLoopInfo {
public:
OPENVINO_RTTI("InnerSplittedUnifiedLoopInfo", "0", UnifiedLoopInfo)

InnerSplittedUnifiedLoopInfo() = default;
InnerSplittedUnifiedLoopInfo(size_t increment, const std::vector<LoopPort>& entries, const std::vector<LoopPort>& exits,
const std::vector<LoopPortDesc>& in_descs, const std::vector<LoopPortDesc>& out_descs,
const SpecificIterationHandlers& handlers, LoopInfoPtr outer_splitted_loop_info);

/**
* @brief Clone LoopInfo with new expressions
* @param expr_map map of new and old expressions
* @param new_outer_splitted_loop_info new outer splitted loop info
* @return the copy
*/
std::shared_ptr<LoopInfo> clone_with_new_expr(const ExpressionMap& expr_map, LoopInfoPtr new_outer_splitted_loop_info) const;

/**
* @brief Returns work amount of the Loop.
* @return m_work_amount
*/
size_t get_work_amount() const override;
/**
* @brief Returns OuterSplittedLoopInfo
* @return m_outer_splitted_loop_info
*/
LoopInfoPtr get_outer_splitted_loop_info() const;

/**
* @brief Set m_work_amount value
* @param work_amount - work amount of the loop
*/
void set_work_amount(size_t work_amount) override;
/**
* @brief Set m_outer_splitted_loop_info value
* @param outer - OuterSplittedLoopInfo
*/
void set_outer_splitted_loop_info(LoopInfoPtr outer);

private:
LoopInfoPtr m_outer_splitted_loop_info = nullptr;
};
using InnerSplittedUnifiedLoopInfoPtr = std::shared_ptr<InnerSplittedUnifiedLoopInfo>;

/**
* @interface ExpandedLoopInfo
* @brief The structure describes expanded Loop (specific iterations) after unified loop decomposition into specific loop iterations.
Expand All @@ -390,14 +418,14 @@ class ExpandedLoopInfo : public LoopInfo {
ExpandedLoopInfo(size_t work_amount, size_t increment,
const std::vector<LoopPort>& entries, const std::vector<LoopPort>& exits,
std::vector<int64_t> ptr_increments, std::vector<int64_t> final_offsets, std::vector<int64_t> data_sizes,
SpecificLoopIterType type, std::shared_ptr<UnifiedLoopInfo> unified_loop_info, bool is_wa_const = false,
bool evaluate_once = false);
SpecificLoopIterType type, UnifiedLoopInfoPtr unified_loop_info, bool evaluate_once = false);
/**
* @brief Clone LoopInfo with new expressions
* @param expr_map map of new and old expressions
* @param new_unified_loop_info new unified loop info
* @return the copy
*/
std::shared_ptr<LoopInfo> clone_with_new_expr(const ExpressionMap& expr_map) const override;
std::shared_ptr<LoopInfo> clone_with_new_expr(const ExpressionMap& expr_map, UnifiedLoopInfoPtr new_unified_loop_info) const;

/**
* @brief Check if some parameters of Loop are dynamic (undefined)
Expand Down
16 changes: 7 additions & 9 deletions src/common/snippets/include/snippets/lowered/loop_manager.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -98,13 +98,12 @@ class LoopManager {
size_t increment,
const std::vector<T>& entries,
const std::vector<T>& exits,
bool set_default_handlers = true,
bool is_work_amount_const = false) {
bool set_default_handlers = true) {
const auto normalized_increment = utils::is_dynamic_value(work_amount) || work_amount == 0 ? increment : std::min(increment, work_amount);
const auto& handlers = set_default_handlers
? SpecificIterationHandlers(work_amount, normalized_increment)
: SpecificIterationHandlers();
const auto loop_info = std::make_shared<UnifiedLoopInfo>(work_amount, normalized_increment, entries, exits, handlers, is_work_amount_const);
const auto loop_info = std::make_shared<UnifiedLoopInfo>(work_amount, normalized_increment, entries, exits);
if (set_default_handlers)
loop_info->set_handlers(SpecificIterationHandlers(work_amount, normalized_increment, loop_info->get_dim_idx()));

const auto loop_id = this->add_loop_info(loop_info);
for (auto expr_it = loop_begin_pos; expr_it != loop_end_pos; ++expr_it) {
insert_loop_id(*expr_it, loop_id);
Expand All @@ -131,9 +130,8 @@ class LoopManager {
size_t dim_idx,
const std::vector<T>& entries,
const std::vector<T>& exits,
bool set_default_handlers = true,
bool is_work_amount_const = false) {
const auto loop_id = mark_loop(loop_begin_pos, loop_end_pos, work_amount, increment, entries, exits, set_default_handlers, is_work_amount_const);
bool set_default_handlers = true) {
const auto loop_id = mark_loop(loop_begin_pos, loop_end_pos, work_amount, increment, entries, exits, set_default_handlers);
const auto loop_info = get_loop_info<UnifiedLoopInfo>(loop_id);
loop_info->set_dim_idx(dim_idx);
return loop_id;
Expand Down
12 changes: 11 additions & 1 deletion src/common/snippets/include/snippets/lowered/pass/init_loops.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,17 @@ class InitLoops : public Pass {
InitLoops() = default;
bool run(LinearIR& linear_ir) override;

static void init_loop_info(const UnifiedLoopInfoPtr& loop_info, size_t loop_id, bool only_runtime_args = false);
/**
* @brief Updates ptr_increments and finalization offsets of the provided "loop_info" based on current work amount
*/
static void update_data_pointer_shifts(const UnifiedLoopInfoPtr& loop_info);
/**
* @brief Updates work amount and updates data pointer shifts of the provided "loop_info"
*/
static void update_runtime_parameters(const UnifiedLoopInfoPtr& loop_info);

private:
static void update_compile_parameters(const UnifiedLoopInfoPtr& loop_info, size_t loop_id);
};

} // namespace pass
Expand Down
28 changes: 5 additions & 23 deletions src/common/snippets/include/snippets/lowered/pass/iter_handler.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -48,32 +48,14 @@ class SetFillOffset : public pass::RangedPass {
};

/**
* @interface TransformInnerSplitLoop
* @brief The pass updates finalization offsets, work amount and increment of inner Loop basing on tail_size of the current Loop
* @param m_tail_size - tail_size of the current Loop
* @interface SetLoopIncrementOne
* @brief The pass set `increment = 1` to ExpandedLoopInfo which is mapped on LoopEnd in the passed iterator `end` and to this LoopEnd.
* @ingroup snippets
*/
class TransformInnerSplitLoop : public pass::RangedPass {
class SetLoopIncrementOne : public snippets::lowered::pass::RangedPass {
public:
TransformInnerSplitLoop(size_t tail_size);
OPENVINO_RTTI("TransformInnerSplitLoop", "RangedPass")
bool run(LinearIR& linear_ir, LinearIR::constExprIt begin, LinearIR::constExprIt end) override;
std::shared_ptr<pass::PassBase> merge(const std::shared_ptr<pass::PassBase>& other) override;

private:
size_t m_tail_size;
};

/**
* @interface SetEvaluateOnce
* @brief The pass set `evaluate once = true` only to ExpandedLoopInfo which is mapped on LoopEnd in the passed iterator `end`.
* The pointer arithmetic should be updated in the separate optimization `OptimizeLoopSingleEvaluation`
* @ingroup snippets
*/
class SetEvaluateOnce : public snippets::lowered::pass::RangedPass {
public:
SetEvaluateOnce() = default;
OPENVINO_RTTI("SetEvaluateOnce", "RangedPass")
SetLoopIncrementOne() = default;
OPENVINO_RTTI("SetLoopIncrementOne", "RangedPass")
bool run(snippets::lowered::LinearIR& linear_ir,
snippets::lowered::LinearIR::constExprIt begin,
snippets::lowered::LinearIR::constExprIt end) override;
Expand Down
16 changes: 16 additions & 0 deletions src/common/snippets/include/snippets/lowered/pass/split_loops.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,24 @@ class SplitLoops : public RangedPass {
SplitLoops();
bool run(LinearIR& linear_ir, lowered::LinearIR::constExprIt begin, lowered::LinearIR::constExprIt end) override;

static void split(LinearIR& linear_ir, size_t loop_to_split_id, size_t outer_increment);

private:
static bool can_be_split(const UnifiedLoopInfoPtr& current, const UnifiedLoopInfoPtr& target);

/**
* @interface TransformInnerSplitLoop
* @brief The pass replace existing inner splitted LoopInfo with new InnerSplittedUnifiedLoopInfo and
* update the corresponding LoopInfo
* @ingroup snippets
*/
class TransformInnerSplitLoop : public pass::RangedPass {
public:
TransformInnerSplitLoop() = default;
OPENVINO_RTTI("TransformInnerSplitLoop", "RangedPass")
bool run(LinearIR& linear_ir, LinearIR::constExprIt begin, LinearIR::constExprIt end) override;
std::shared_ptr<pass::PassBase> merge(const std::shared_ptr<pass::PassBase>& other) override;
};
};

} // namespace pass
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ namespace lowered {
class SpecificIterationHandlers {
public:
SpecificIterationHandlers() = default;
SpecificIterationHandlers(size_t loop_work_amount, size_t loop_increment);
SpecificIterationHandlers(size_t loop_work_amount, size_t loop_increment, size_t processing_dim_idx);
SpecificIterationHandlers(pass::PassPipeline first_iter_handlers,
pass::PassPipeline main_body_handlers,
pass::PassPipeline last_iter_handlers);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ namespace pass {

/**
* @interface AnalyzeBroadcastableInputs
* @brief Analyzes body parameters which affects inputs of broadcastable operations (If needed, `Broadcast` op should be inserted there).s
* @brief Analyzes body parameters which affects inputs of broadcastable operations (If needed, `Broadcast` op should be inserted there).
* Also the pass initializes special map `BroadcastableInputsMap`
* Notes:
* - Must be called after Canonicalization pass
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,10 @@ class RuntimeConfig {
return get_type_info().name;
}

#ifdef SNIPPETS_DEBUG_CAPS
virtual std::string to_string() const;
#endif

size_t tensor_rank = 0;
size_t tile_rank = 0;

Expand Down Expand Up @@ -136,7 +140,8 @@ class RuntimeConfigurator {
std::vector<snippets::lowered::PortDescriptorPtr> m_io_descs = {};
std::vector<size_t> m_io_data_sizes = {};
// [cluster_id -> buffer expressions ]
std::map<size_t, std::set<lowered::ExpressionPtr>> m_dynamic_buffer_clusters;
std::map<size_t, std::set<lowered::ExpressionPtr>> m_dynamic_buffer_clusters = {};
std::vector<size_t> m_ordered_loop_ids = {};

std::vector<ov::snippets::VectorDims> m_latest_shapes = {};
};
Expand Down
6 changes: 4 additions & 2 deletions src/common/snippets/src/generator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,9 +51,11 @@ LoweringResult Generator::generate(const lowered::LinearIRPtr& linear_ir, const
}
result.compiled_snippet = target->get_snippet();
result.kernel_executor_table = target->get_runtime_configurator()->get_kernel_executor_table();
// Some kernel executors might've been registered during code emission.
// In static case some kernel executors might've been registered during code emission.
// We need to update them, so appropriate kernels will be compiled.
result.kernel_executor_table->update_state(linear_ir);
// In dynamic case it should be handled by RuntimeConfigurator
if (!linear_ir->is_dynamic())
result.kernel_executor_table->update_state(linear_ir);

return result;
}
Expand Down
Loading

0 comments on commit c0f4282

Please sign in to comment.