diff --git a/src/common/snippets/include/snippets/generator.hpp b/src/common/snippets/include/snippets/generator.hpp index 8ac9444e331e2c..2991b873002ea1 100644 --- a/src/common/snippets/include/snippets/generator.hpp +++ b/src/common/snippets/include/snippets/generator.hpp @@ -11,7 +11,7 @@ #include "snippets_isa.hpp" #include "snippets/lowered/linear_ir.hpp" -#include "snippets/lowered/pass/transformation.hpp" +#include "snippets/lowered/pass/pass.hpp" namespace ngraph { namespace snippets { @@ -73,11 +73,10 @@ class Generator { * @return pointer to generated code */ struct LoweringResult { - LoweringResult(code c, size_t size) : binary_code(c), buffer_scratchpad_size(size) {} + LoweringResult(code c) : binary_code(c) {} code binary_code = nullptr; - size_t buffer_scratchpad_size = 0; }; - LoweringResult generate(std::shared_ptr& m, const lowered::Config& config, const void* compile_params = nullptr); + LoweringResult generate(lowered::LinearIR& linear_ir, const lowered::Config& config, const void* compile_params = nullptr); /** * @brief gets target machine @@ -107,10 +106,6 @@ class Generator { * @return register type */ virtual opRegType get_specific_op_reg_type(const std::shared_ptr& op) const; - /** - * @brief gets target specific transformations for code generation - */ - virtual lowered::pass::TransformationPipeline target_specific_transformations() const; std::shared_ptr target; // todo: we need to save lowered code to access compiled brgemm kernels on execution time (normally lowered is destructed by then). diff --git a/src/common/snippets/include/snippets/lowered/pass/allocate_buffers.hpp b/src/common/snippets/include/snippets/lowered/pass/allocate_buffers.hpp index d1ad2fb2d5296f..7bc202955a1d5a 100644 --- a/src/common/snippets/include/snippets/lowered/pass/allocate_buffers.hpp +++ b/src/common/snippets/include/snippets/lowered/pass/allocate_buffers.hpp @@ -4,7 +4,7 @@ #pragma once -#include "transformation.hpp" +#include "pass.hpp" #include "snippets/snippets_isa.hpp" namespace ngraph { @@ -18,9 +18,9 @@ namespace pass { * @ingroup snippets */ -class AllocateBuffers : public Transformation { +class AllocateBuffers : public Pass { public: - OPENVINO_RTTI("AllocateBuffers", "Transformation") + OPENVINO_RTTI("AllocateBuffers", "Pass") bool run(lowered::LinearIR& linear_ir) override; size_t get_scratchpad_size() const { return m_buffer_scratchpad_size; } diff --git a/src/common/snippets/include/snippets/lowered/pass/assign_registers.hpp b/src/common/snippets/include/snippets/lowered/pass/assign_registers.hpp index 29b889dba27684..91a0a57b43b500 100644 --- a/src/common/snippets/include/snippets/lowered/pass/assign_registers.hpp +++ b/src/common/snippets/include/snippets/lowered/pass/assign_registers.hpp @@ -4,7 +4,7 @@ #pragma once -#include "transformation.hpp" +#include "pass.hpp" #include "snippets/generator.hpp" namespace ngraph { @@ -18,9 +18,9 @@ namespace pass { * Note that changing of the IR is likely to invalidate register assignment. * @ingroup snippets */ -class AssignRegisters : public Transformation { +class AssignRegisters : public Pass { public: - OPENVINO_RTTI("AssignRegisters", "Transformation") + OPENVINO_RTTI("AssignRegisters", "Pass") explicit AssignRegisters(const std::function& op)>& mapper) : m_reg_type_mapper(mapper) {} bool run(LinearIR& linear_ir) override; diff --git a/src/common/snippets/include/snippets/lowered/pass/clean_repeated_ptr_shifts.hpp b/src/common/snippets/include/snippets/lowered/pass/clean_repeated_ptr_shifts.hpp index 9ca1b051680d45..8069f944b4a33e 100644 --- a/src/common/snippets/include/snippets/lowered/pass/clean_repeated_ptr_shifts.hpp +++ b/src/common/snippets/include/snippets/lowered/pass/clean_repeated_ptr_shifts.hpp @@ -4,7 +4,7 @@ #pragma once -#include "transformation.hpp" +#include "pass.hpp" namespace ngraph { namespace snippets { @@ -21,9 +21,9 @@ namespace pass { * This condition should be removed when Buffers stop being inplace by default. * @ingroup snippets */ -class CleanRepeatedDataPointerShifts: public Transformation { +class CleanRepeatedDataPointerShifts: public Pass { public: - OPENVINO_RTTI("CleanRepeatedDataPointerShifts", "Transformation") + OPENVINO_RTTI("CleanRepeatedDataPointerShifts", "Pass") CleanRepeatedDataPointerShifts() = default; bool run(LinearIR& linear_ir) override; diff --git a/src/common/snippets/include/snippets/lowered/pass/cleanup_loop_offsets.hpp b/src/common/snippets/include/snippets/lowered/pass/cleanup_loop_offsets.hpp index 4cd7f9f1aefb43..e022f58b889887 100644 --- a/src/common/snippets/include/snippets/lowered/pass/cleanup_loop_offsets.hpp +++ b/src/common/snippets/include/snippets/lowered/pass/cleanup_loop_offsets.hpp @@ -4,7 +4,7 @@ #pragma once -#include "transformation.hpp" +#include "pass.hpp" namespace ngraph { namespace snippets { @@ -17,9 +17,9 @@ namespace pass { * This transformation "fuses" the offsets with an outer loop's ptr_increments, and zeroes the offsets before Results. * @ingroup snippets */ -class CleanupLoopOffsets : public Transformation { +class CleanupLoopOffsets : public Pass { public: - OPENVINO_RTTI("CleanupLoopOffsets", "Transformation") + OPENVINO_RTTI("CleanupLoopOffsets", "Pass") bool run(LinearIR& linear_ir) override; }; diff --git a/src/common/snippets/include/snippets/lowered/pass/fuse_loops.hpp b/src/common/snippets/include/snippets/lowered/pass/fuse_loops.hpp index ce692cac78c8f4..e5522d20583e76 100644 --- a/src/common/snippets/include/snippets/lowered/pass/fuse_loops.hpp +++ b/src/common/snippets/include/snippets/lowered/pass/fuse_loops.hpp @@ -4,7 +4,7 @@ #pragma once -#include "transformation.hpp" +#include "pass.hpp" #include "snippets/lowered/loop_manager.hpp" @@ -36,9 +36,9 @@ namespace pass { * The main conditions of possible fusion is the equal increments and the equal/broadcastable work amounts. * @ingroup snippets */ -class FuseLoops : public Transformation { +class FuseLoops : public Pass { public: - OPENVINO_RTTI("FuseLoops", "Transformation") + OPENVINO_RTTI("FuseLoops", "Pass") FuseLoops(); bool run(LinearIR& linear_ir) override; diff --git a/src/common/snippets/include/snippets/lowered/pass/identify_buffers.hpp b/src/common/snippets/include/snippets/lowered/pass/identify_buffers.hpp index 05bedba6f72453..e7e9d0daa344a2 100644 --- a/src/common/snippets/include/snippets/lowered/pass/identify_buffers.hpp +++ b/src/common/snippets/include/snippets/lowered/pass/identify_buffers.hpp @@ -4,7 +4,7 @@ #pragma once -#include "transformation.hpp" +#include "pass.hpp" #include "snippets/op/buffer.hpp" @@ -28,9 +28,9 @@ namespace pass { * Note: should be called before ResetBuffer() pass to have correct offsets * @ingroup snippets */ -class IdentifyBuffers: public Transformation { +class IdentifyBuffers: public Pass { public: - OPENVINO_RTTI("IdentifyBuffers", "Transformation") + OPENVINO_RTTI("IdentifyBuffers", "Pass") IdentifyBuffers() = default; bool run(LinearIR& linear_ir) override; diff --git a/src/common/snippets/include/snippets/lowered/pass/init_loops.hpp b/src/common/snippets/include/snippets/lowered/pass/init_loops.hpp index bffed1594fb356..fcb08c704871e0 100644 --- a/src/common/snippets/include/snippets/lowered/pass/init_loops.hpp +++ b/src/common/snippets/include/snippets/lowered/pass/init_loops.hpp @@ -4,7 +4,7 @@ #pragma once -#include "transformation.hpp" +#include "pass.hpp" #include "snippets/lowered/loop_manager.hpp" @@ -18,9 +18,9 @@ namespace pass { * @brief The pass explicitly insert LoadBegin and LoadEnd in Linear IR using LoopManager::LoopInfo from Loop markup algorithm * @ingroup snippets */ -class InitLoops : public Transformation { +class InitLoops : public Pass { public: - OPENVINO_RTTI("InsertLoops", "Transformation") + OPENVINO_RTTI("InsertLoops", "Pass") InitLoops(); bool run(LinearIR& linear_ir) override; diff --git a/src/common/snippets/include/snippets/lowered/pass/insert_buffers.hpp b/src/common/snippets/include/snippets/lowered/pass/insert_buffers.hpp index 9abded985e60c7..2add0902de2cc4 100644 --- a/src/common/snippets/include/snippets/lowered/pass/insert_buffers.hpp +++ b/src/common/snippets/include/snippets/lowered/pass/insert_buffers.hpp @@ -4,7 +4,7 @@ #pragma once -#include "transformation.hpp" +#include "pass.hpp" namespace ngraph { namespace snippets { @@ -19,9 +19,9 @@ namespace pass { * @param m_buffer_allocation_rank - rank of shape for memory allocation: shape[shape_rank - normalize(m_allocation_rank) : shape_rank] * @ingroup snippets */ -class InsertBuffers : public Transformation { +class InsertBuffers : public Pass { public: - OPENVINO_RTTI("InsertBuffers", "Transformation") + OPENVINO_RTTI("InsertBuffers", "Pass") InsertBuffers(int32_t buffer_allocation_rank); bool run(LinearIR& linear_ir) override; diff --git a/src/common/snippets/include/snippets/lowered/pass/insert_load_store.hpp b/src/common/snippets/include/snippets/lowered/pass/insert_load_store.hpp index 0f64f54b12593b..bd9044dd20c0f5 100644 --- a/src/common/snippets/include/snippets/lowered/pass/insert_load_store.hpp +++ b/src/common/snippets/include/snippets/lowered/pass/insert_load_store.hpp @@ -4,7 +4,7 @@ #pragma once -#include "transformation.hpp" +#include "pass.hpp" #include "snippets/lowered/loop_manager.hpp" @@ -20,10 +20,10 @@ namespace pass { * @param m_vector_size - the count of elements for loading/storing * @ingroup snippets */ -class InsertLoadStore : public Transformation { +class InsertLoadStore : public Pass { public: explicit InsertLoadStore(size_t vector_size); - OPENVINO_RTTI("InsertLoadStore", "Transformation") + OPENVINO_RTTI("InsertLoadStore", "Pass") bool run(LinearIR& linear_ir) override; private: diff --git a/src/common/snippets/include/snippets/lowered/pass/insert_tail_loop.hpp b/src/common/snippets/include/snippets/lowered/pass/insert_tail_loop.hpp index d946933a0bfc61..95711c71ec8b27 100644 --- a/src/common/snippets/include/snippets/lowered/pass/insert_tail_loop.hpp +++ b/src/common/snippets/include/snippets/lowered/pass/insert_tail_loop.hpp @@ -4,7 +4,7 @@ #pragma once -#include "transformation.hpp" +#include "pass.hpp" namespace ngraph { namespace snippets { @@ -17,13 +17,13 @@ namespace pass { * Additional optimizations are performed if a loop body is executed only once. * @ingroup snippets */ -class InsertTailLoop : public Transformation { +class InsertTailLoop : public Pass { static void tail_transformations(LinearIR& linear_ir, LinearIR::container::const_iterator tail_begin, LinearIR::container::const_iterator tail_end, size_t tail_size); public: - OPENVINO_RTTI("InsertTailLoop", "Transformation") + OPENVINO_RTTI("InsertTailLoop", "Pass") bool run(LinearIR& linear_ir) override; }; diff --git a/src/common/snippets/include/snippets/lowered/pass/load_movebroadcast_to_broadcastload.hpp b/src/common/snippets/include/snippets/lowered/pass/load_movebroadcast_to_broadcastload.hpp index 589e237bc7957d..14d96d71fd5107 100644 --- a/src/common/snippets/include/snippets/lowered/pass/load_movebroadcast_to_broadcastload.hpp +++ b/src/common/snippets/include/snippets/lowered/pass/load_movebroadcast_to_broadcastload.hpp @@ -4,7 +4,7 @@ #pragma once -#include "transformation.hpp" +#include "pass.hpp" namespace ngraph { namespace snippets { @@ -16,10 +16,10 @@ namespace pass { * @brief Fuses consecutive Load and MoveBroadcast into a single load insctruction. * @ingroup snippets */ -class LoadMoveBroadcastToBroadcastLoad: public Transformation { +class LoadMoveBroadcastToBroadcastLoad: public Pass { public: LoadMoveBroadcastToBroadcastLoad() = default; - OPENVINO_RTTI("LoadMoveBroadcastToBroadcastLoad", "Transformation") + OPENVINO_RTTI("LoadMoveBroadcastToBroadcastLoad", "Pass") bool run(LinearIR& linear_ir) override; }; diff --git a/src/common/snippets/include/snippets/lowered/pass/mark_loops.hpp b/src/common/snippets/include/snippets/lowered/pass/mark_loops.hpp index 4f454013f14ecb..5c0185397ee795 100644 --- a/src/common/snippets/include/snippets/lowered/pass/mark_loops.hpp +++ b/src/common/snippets/include/snippets/lowered/pass/mark_loops.hpp @@ -4,7 +4,7 @@ #pragma once -#include "transformation.hpp" +#include "pass.hpp" namespace ngraph { @@ -20,9 +20,9 @@ namespace pass { * - the consumer of the expression is explicitly after this expression - the pass marks the branches * @ingroup snippets */ -class MarkLoops : public Transformation { +class MarkLoops : public Pass { public: - OPENVINO_RTTI("MarkLoops", "Transformation") + OPENVINO_RTTI("MarkLoops", "Pass") MarkLoops(size_t vector_size); bool run(LinearIR& linear_ir) override; diff --git a/src/common/snippets/include/snippets/lowered/pass/move_result_out_of_loop.hpp b/src/common/snippets/include/snippets/lowered/pass/move_result_out_of_loop.hpp index 302d042af517f4..4534ef13afbdbb 100644 --- a/src/common/snippets/include/snippets/lowered/pass/move_result_out_of_loop.hpp +++ b/src/common/snippets/include/snippets/lowered/pass/move_result_out_of_loop.hpp @@ -4,7 +4,7 @@ #pragma once -#include "transformation.hpp" +#include "pass.hpp" namespace ngraph { namespace snippets { @@ -19,9 +19,9 @@ namespace pass { * The pass extracts Result expressions from Loop and insert after. * @ingroup snippets */ -class MoveResultOutOfLoop : public Transformation { +class MoveResultOutOfLoop : public Pass { public: - OPENVINO_RTTI("MoveResultOutOfLoop", "Transformation") + OPENVINO_RTTI("MoveResultOutOfLoop", "Pass") MoveResultOutOfLoop() = default; bool run(LinearIR& linear_ir) override; }; diff --git a/src/common/snippets/include/snippets/lowered/pass/move_scalar_to_consumer.hpp b/src/common/snippets/include/snippets/lowered/pass/move_scalar_to_consumer.hpp index d5151e71540c7a..ae46eb30db137f 100644 --- a/src/common/snippets/include/snippets/lowered/pass/move_scalar_to_consumer.hpp +++ b/src/common/snippets/include/snippets/lowered/pass/move_scalar_to_consumer.hpp @@ -4,7 +4,7 @@ #pragma once -#include "transformation.hpp" +#include "pass.hpp" namespace ngraph { namespace snippets { @@ -22,9 +22,9 @@ namespace pass { * To avoid such cases, we move Constants to the places in Linear IR before right Consumer to execute Scalar on each Loop iteration. * @ingroup snippets */ -class MoveScalarToConsumer : public Transformation { +class MoveScalarToConsumer : public Pass { public: - OPENVINO_RTTI("MoveScalarsToConsumer", "Transformation") + OPENVINO_RTTI("MoveScalarsToConsumer", "Pass") MoveScalarToConsumer() = default; bool run(LinearIR& linear_ir) override; }; diff --git a/src/common/snippets/include/snippets/lowered/pass/transformation.hpp b/src/common/snippets/include/snippets/lowered/pass/pass.hpp similarity index 61% rename from src/common/snippets/include/snippets/lowered/pass/transformation.hpp rename to src/common/snippets/include/snippets/lowered/pass/pass.hpp index ef00e881662e3b..e229cd74822b97 100644 --- a/src/common/snippets/include/snippets/lowered/pass/transformation.hpp +++ b/src/common/snippets/include/snippets/lowered/pass/pass.hpp @@ -15,18 +15,18 @@ namespace lowered { namespace pass { /** - * @interface Transformation + * @interface Pass * @brief Base class for transformations on linear IR * @ingroup snippets */ -class Transformation { +class Pass { public: - Transformation() = default; - virtual ~Transformation() = default; + Pass() = default; + virtual ~Pass() = default; // Note that get_type_info_static and get_type_info are needed to mimic OPENVINO_RTTI interface, // so the standard OPENVINO_RTTI(...) macros could be used in derived classes. _OPENVINO_HIDDEN_METHOD static const ::ov::DiscreteTypeInfo& get_type_info_static() { - static ::ov::DiscreteTypeInfo type_info_static {"Transformation"}; + static ::ov::DiscreteTypeInfo type_info_static {"Pass"}; type_info_static.hash(); return type_info_static; } @@ -42,23 +42,23 @@ class Transformation { virtual bool run(lowered::LinearIR& linear_ir) = 0; }; -class TransformationPipeline { +class PassPipeline { public: - TransformationPipeline() = default; + PassPipeline() = default; - void register_transformation(const std::shared_ptr& transformation); + void register_pass(const std::shared_ptr& pass); template - void register_transformation(Args&&... args) { - static_assert(std::is_base_of::value, "Transformation not derived from lowered::Transformation"); - auto transformation = std::make_shared(std::forward(args)...); - register_transformation(transformation); + void register_pass(Args&&... args) { + static_assert(std::is_base_of::value, "Pass not derived from lowered::Pass"); + auto pass = std::make_shared(std::forward(args)...); + register_pass(pass); } void run(lowered::LinearIR& linear_ir); private: - std::vector> m_transformations; + std::vector> m_passes; }; } // namespace pass diff --git a/src/common/snippets/include/snippets/lowered/pass/propagate_layout.hpp b/src/common/snippets/include/snippets/lowered/pass/propagate_layout.hpp index 4f7731b45449a6..d22a6397913599 100644 --- a/src/common/snippets/include/snippets/lowered/pass/propagate_layout.hpp +++ b/src/common/snippets/include/snippets/lowered/pass/propagate_layout.hpp @@ -4,7 +4,7 @@ #pragma once -#include "transformation.hpp" +#include "pass.hpp" namespace ngraph { namespace snippets { @@ -17,9 +17,9 @@ namespace pass { * proper data pointer offsets in the Kernel; * @ingroup snippets */ -class PropagateLayout : public Transformation { +class PropagateLayout : public Pass { public: - OPENVINO_RTTI("PropagateLayout", "Transformation") + OPENVINO_RTTI("PropagateLayout", "Pass") bool run(LinearIR& linear_ir) override; }; diff --git a/src/common/snippets/include/snippets/lowered/pass/softmax_decomposition.hpp b/src/common/snippets/include/snippets/lowered/pass/softmax_decomposition.hpp index 7e86f7107a7611..3fa6748aae6d4c 100644 --- a/src/common/snippets/include/snippets/lowered/pass/softmax_decomposition.hpp +++ b/src/common/snippets/include/snippets/lowered/pass/softmax_decomposition.hpp @@ -4,7 +4,7 @@ #pragma once -#include "transformation.hpp" +#include "pass.hpp" namespace ngraph { namespace snippets { @@ -16,10 +16,10 @@ namespace pass { * @brief Decomposes Softmax to a range of low-level operations on linear IR * @ingroup snippets */ -class SoftmaxDecomposition : public Transformation { +class SoftmaxDecomposition : public Pass { public: explicit SoftmaxDecomposition(size_t vector_size); - OPENVINO_RTTI("SoftmaxDecomposition", "Transformation") + OPENVINO_RTTI("SoftmaxDecomposition", "Pass") bool run(LinearIR& linear_ir) override; private: diff --git a/src/common/snippets/include/snippets/lowered/pass/vector_to_scalar.hpp b/src/common/snippets/include/snippets/lowered/pass/vector_to_scalar.hpp index b6cb96e9bb977d..5d8e94c507f9ee 100644 --- a/src/common/snippets/include/snippets/lowered/pass/vector_to_scalar.hpp +++ b/src/common/snippets/include/snippets/lowered/pass/vector_to_scalar.hpp @@ -4,7 +4,7 @@ #pragma once -#include "transformation.hpp" +#include "pass.hpp" namespace ngraph { namespace snippets { @@ -35,10 +35,10 @@ namespace pass { // Result // Note: Load* should be replaced with ScalarLoad in this example to avoid invalid read in vector Loop. -class SetScalarCountForLoadStore : public Transformation { +class SetScalarCountForLoadStore : public Pass { public: explicit SetScalarCountForLoadStore(); - OPENVINO_RTTI("SetScalarCountForLoadStore", "Transformation") + OPENVINO_RTTI("SetScalarCountForLoadStore", "Pass") bool run(lowered::LinearIR& linear_ir) override; }; diff --git a/src/common/snippets/include/snippets/op/subgraph.hpp b/src/common/snippets/include/snippets/op/subgraph.hpp index 8261fbc31525e3..265b41c1f0de9e 100644 --- a/src/common/snippets/include/snippets/op/subgraph.hpp +++ b/src/common/snippets/include/snippets/op/subgraph.hpp @@ -101,14 +101,16 @@ class Subgraph : public ov::op::util::SubGraphOp { bool has_domain_sensitive_ops() const { return config.m_has_domain_sensitive_ops; } snippets::Schedule generate(const BlockedShapeVector& output_shapes, const BlockedShapeVector& input_shapes, - ngraph::pass::Manager& pre_dialect, - ngraph::pass::Manager& post_dialect, + ngraph::pass::Manager& pre_common, + ngraph::pass::Manager& post_common, ngraph::pass::Manager& post_precision, + lowered::pass::PassPipeline& target_lowered_pipeline, const void* compile_params = nullptr); snippets::Schedule generate(const BlockedShapeVector& output_shapes, const BlockedShapeVector& input_shapes, const void* compile_params = nullptr); - snippets::Schedule generate(ngraph::pass::Manager& pre_dialect, - ngraph::pass::Manager& post_dialect, + snippets::Schedule generate(ngraph::pass::Manager& pre_common, + ngraph::pass::Manager& post_common, ngraph::pass::Manager& post_precision, + lowered::pass::PassPipeline& target_lowered_pipeline, const void* compile_params = nullptr); snippets::Schedule generate(const void* compile_params = nullptr); ov::PartialShape canonicalize(const BlockedShapeVector& output_shapes, const BlockedShapeVector& input_shapes); @@ -142,7 +144,8 @@ class Subgraph : public ov::op::util::SubGraphOp { private: void align_element_types(const BlockedShapeVector& outputShapes, const BlockedShapeVector& inputShapes); - void convert_to_snippet_dialect(); + void data_flow_transformations(ngraph::pass::Manager& pre_common, ngraph::pass::Manager& post_common, ngraph::pass::Manager& post_precision); + void control_flow_transformations(lowered::LinearIR& linear_ir, lowered::pass::PassPipeline& target_pipeline, const lowered::Config& config); void init_config(); // Count of Subgraph virtual ports: // - Potential non-scalar Constants that will be created after some transformations (At the moment it's relevant only for FakeQuantize decomposition) diff --git a/src/common/snippets/src/generator.cpp b/src/common/snippets/src/generator.cpp index c1f86206195cd9..037a5bf3afe492 100644 --- a/src/common/snippets/src/generator.cpp +++ b/src/common/snippets/src/generator.cpp @@ -7,21 +7,6 @@ #include "snippets/lowered/linear_ir.hpp" #include "snippets/lowered/pass/assign_registers.hpp" #include "snippets/lowered/pass/insert_tail_loop.hpp" -#include "snippets/lowered/pass/mark_loops.hpp" -#include "snippets/lowered/pass/fuse_loops.hpp" -#include "snippets/lowered/pass/init_loops.hpp" -#include "snippets/lowered/pass/insert_buffers.hpp" -#include "snippets/lowered/pass/insert_load_store.hpp" -#include "snippets/lowered/pass/vector_to_scalar.hpp" -#include "snippets/lowered/pass/load_movebroadcast_to_broadcastload.hpp" -#include "snippets/lowered/pass/allocate_buffers.hpp" -#include "snippets/lowered/pass/propagate_layout.hpp" -#include "snippets/lowered/pass/cleanup_loop_offsets.hpp" -#include "snippets/lowered/pass/softmax_decomposition.hpp" -#include "snippets/lowered/pass/move_scalar_to_consumer.hpp" -#include "snippets/lowered/pass/move_result_out_of_loop.hpp" -#include "snippets/lowered/pass/clean_repeated_ptr_shifts.hpp" -#include "snippets/lowered/pass/identify_buffers.hpp" #include "snippets/op/kernel.hpp" @@ -30,52 +15,19 @@ namespace ngraph { namespace snippets { -Generator::LoweringResult Generator::generate(std::shared_ptr& m, const lowered::Config& config, const void* compile_params) { +Generator::LoweringResult Generator::generate(lowered::LinearIR& linear_ir, const lowered::Config& config, const void* compile_params) { OV_ITT_SCOPED_TASK(ngraph::pass::itt::domains::SnippetsTransform, "Snippets::Generator::generate") OV_ITT_TASK_CHAIN(GENERATE, ngraph::pass::itt::domains::SnippetsTransform, "Snippets::Generator", "::Transformations") if (!target->is_supported()) OPENVINO_THROW("unsupported architecture for code generation"); - auto linear_ir = lowered::LinearIR(m, config); - const size_t vector_size = get_target_machine()->get_lanes(); - const int32_t buffer_allocation_rank = static_cast(config.m_loop_depth); - - // Note: The pass InitLoops uses LoopInfo that contains entry and exit points of the corresponding Loop. - // To avoid the Loop information corruption, we should call the passes with Load/Store work - // (for example, LoadMoveBroadcastToBroadcastLoad()) after explicit Loop insertion (InitLoops()) - lowered::pass::TransformationPipeline common_pipeline; - common_pipeline.register_transformation(vector_size); - common_pipeline.register_transformation(vector_size); - common_pipeline.register_transformation(); - common_pipeline.register_transformation(); - common_pipeline.register_transformation(buffer_allocation_rank); - common_pipeline.register_transformation(vector_size); - common_pipeline.register_transformation(); - common_pipeline.register_transformation(); - common_pipeline.register_transformation(); - common_pipeline.register_transformation(); - common_pipeline.run(linear_ir); - - lowered::pass::TransformationPipeline target_pipeline = target_specific_transformations(); - target_pipeline.run(linear_ir); - std::function& op)> reg_type_mapper = [&](const std::shared_ptr& op) -> opRegType { return get_op_reg_type(op); }; - - const auto buffer_allocation_pass = std::make_shared(); - lowered::pass::TransformationPipeline buffer_pipeline; - buffer_pipeline.register_transformation(); - buffer_pipeline.register_transformation(); - buffer_pipeline.register_transformation(buffer_allocation_pass); - buffer_pipeline.run(linear_ir); - - lowered::pass::TransformationPipeline final_pipeline; - final_pipeline.register_transformation(); - final_pipeline.register_transformation(); - final_pipeline.register_transformation(reg_type_mapper); - final_pipeline.register_transformation(); - final_pipeline.run(linear_ir); + lowered::pass::PassPipeline lowered_pipeline; + lowered_pipeline.register_pass(reg_type_mapper); + lowered_pipeline.register_pass(); + lowered_pipeline.run(linear_ir); linear_ir.init_emitters(target); @@ -97,7 +49,7 @@ Generator::LoweringResult Generator::generate(std::shared_ptr& m, con if (config.m_save_lowered_code) lowered_saved = linear_ir; - return {target->get_snippet(), buffer_allocation_pass->get_scratchpad_size()}; + return { target->get_snippet() }; } std::shared_ptr Generator::get_target_machine() const { @@ -139,9 +91,5 @@ Generator::opRegType Generator::get_specific_op_reg_type(const std::shared_ptrget_type_name()) + " isn't determined!"); } -lowered::pass::TransformationPipeline Generator::target_specific_transformations() const { - return lowered::pass::TransformationPipeline(); -} - }// namespace snippets }// namespace ngraph diff --git a/src/common/snippets/src/lowered/pass/fuse_loops.cpp b/src/common/snippets/src/lowered/pass/fuse_loops.cpp index 85f74bb32677e8..6aea59f81a3e87 100644 --- a/src/common/snippets/src/lowered/pass/fuse_loops.cpp +++ b/src/common/snippets/src/lowered/pass/fuse_loops.cpp @@ -17,7 +17,7 @@ namespace pass { using LoopManager = LinearIR::LoopManager; using LoopInfoPtr = LoopManager::LoopInfoPtr; -FuseLoops::FuseLoops() : Transformation() {} +FuseLoops::FuseLoops() : Pass() {} bool FuseLoops::can_be_fused(const LoopInfoPtr& loop_current, const LoopInfoPtr& loop_target) { auto current_work_amount = loop_current->work_amount; diff --git a/src/common/snippets/src/lowered/pass/init_loops.cpp b/src/common/snippets/src/lowered/pass/init_loops.cpp index f659b781a2ba15..5cd4463c1a0692 100644 --- a/src/common/snippets/src/lowered/pass/init_loops.cpp +++ b/src/common/snippets/src/lowered/pass/init_loops.cpp @@ -53,7 +53,7 @@ int64_t get_dim_stride(const size_t dim, const std::vector& layout, cons } } // namespace -InitLoops::InitLoops() : Transformation() {} +InitLoops::InitLoops() : Pass() {} std::vector InitLoops::init_ptr_increments(const std::vector& loop_inputs, const std::vector& loop_outputs, diff --git a/src/common/snippets/src/lowered/pass/insert_buffers.cpp b/src/common/snippets/src/lowered/pass/insert_buffers.cpp index 1e701117e95a02..5361064a3917a8 100644 --- a/src/common/snippets/src/lowered/pass/insert_buffers.cpp +++ b/src/common/snippets/src/lowered/pass/insert_buffers.cpp @@ -16,7 +16,7 @@ namespace lowered { namespace pass { InsertBuffers::InsertBuffers(int32_t buffer_allocation_rank) - : Transformation(), m_buffer_allocation_rank(buffer_allocation_rank) {} + : Pass(), m_buffer_allocation_rank(buffer_allocation_rank) {} LinearIR::constExprIt InsertBuffers::insertion_position(const LinearIR& linear_ir, const LinearIR::LoopManagerPtr& loop_manager, const ExpressionPtr& up_expr, const ExpressionPtr& down_expr) { diff --git a/src/common/snippets/src/lowered/pass/mark_loops.cpp b/src/common/snippets/src/lowered/pass/mark_loops.cpp index 1b13dbcdbbd4b3..4f1b4b6c561e75 100644 --- a/src/common/snippets/src/lowered/pass/mark_loops.cpp +++ b/src/common/snippets/src/lowered/pass/mark_loops.cpp @@ -14,7 +14,7 @@ namespace snippets { namespace lowered { namespace pass { -MarkLoops::MarkLoops(size_t vector_size) : Transformation(), m_vector_size(vector_size) {} +MarkLoops::MarkLoops(size_t vector_size) : Pass(), m_vector_size(vector_size) {} bool MarkLoops::run(LinearIR& linear_ir) { OV_ITT_SCOPED_TASK(ngraph::pass::itt::domains::SnippetsTransform, "Snippets::MarkLoops") diff --git a/src/common/snippets/src/lowered/pass/pass.cpp b/src/common/snippets/src/lowered/pass/pass.cpp new file mode 100644 index 00000000000000..2370e1780e2b3a --- /dev/null +++ b/src/common/snippets/src/lowered/pass/pass.cpp @@ -0,0 +1,26 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "snippets/lowered/pass/pass.hpp" + + +namespace ngraph { +namespace snippets { +namespace lowered { +namespace pass { + +void PassPipeline::register_pass(const std::shared_ptr& pass) { + m_passes.push_back(pass); +} + +void PassPipeline::run(LinearIR& linear_ir) { + for (const auto& pass : m_passes) { + pass->run(linear_ir); + } +} + +} // namespace pass +} // namespace lowered +} // namespace snippets +} // namespace ngraph diff --git a/src/common/snippets/src/lowered/pass/transformation.cpp b/src/common/snippets/src/lowered/pass/transformation.cpp deleted file mode 100644 index 8af054830799e8..00000000000000 --- a/src/common/snippets/src/lowered/pass/transformation.cpp +++ /dev/null @@ -1,26 +0,0 @@ -// Copyright (C) 2023 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#include "snippets/lowered/pass/transformation.hpp" - - -namespace ngraph { -namespace snippets { -namespace lowered { -namespace pass { - -void TransformationPipeline::register_transformation(const std::shared_ptr& transformation) { - m_transformations.push_back(transformation); -} - -void TransformationPipeline::run(LinearIR& linear_ir) { - for (const auto& transformation : m_transformations) { - transformation->run(linear_ir); - } -} - -} // namespace pass -} // namespace lowered -} // namespace snippets -} // namespace ngraph diff --git a/src/common/snippets/src/op/subgraph.cpp b/src/common/snippets/src/op/subgraph.cpp index 02a4118b76fd2a..59148fc7f097c2 100644 --- a/src/common/snippets/src/op/subgraph.cpp +++ b/src/common/snippets/src/op/subgraph.cpp @@ -7,6 +7,7 @@ #include "snippets/op/subgraph.hpp" #include "snippets/op/convert_saturation.hpp" + #include "snippets/pass/insert_movebroadcast.hpp" #include "snippets/pass/broadcast_to_movebroadcast.hpp" #include "snippets/pass/propagate_precision.hpp" @@ -17,8 +18,27 @@ #include "snippets/pass/matmul_to_brgemm.hpp" #include "snippets/pass/fuse_transpose_brgemm.hpp" #include "snippets/pass/set_softmax_ports.hpp" + #include "snippets/utils.hpp" + #include "snippets/lowered/port_descriptor.hpp" +#include "snippets/lowered/linear_ir.hpp" +#include "snippets/lowered/pass/assign_registers.hpp" +#include "snippets/lowered/pass/mark_loops.hpp" +#include "snippets/lowered/pass/fuse_loops.hpp" +#include "snippets/lowered/pass/init_loops.hpp" +#include "snippets/lowered/pass/insert_buffers.hpp" +#include "snippets/lowered/pass/insert_load_store.hpp" +#include "snippets/lowered/pass/vector_to_scalar.hpp" +#include "snippets/lowered/pass/load_movebroadcast_to_broadcastload.hpp" +#include "snippets/lowered/pass/allocate_buffers.hpp" +#include "snippets/lowered/pass/propagate_layout.hpp" +#include "snippets/lowered/pass/cleanup_loop_offsets.hpp" +#include "snippets/lowered/pass/softmax_decomposition.hpp" +#include "snippets/lowered/pass/move_scalar_to_consumer.hpp" +#include "snippets/lowered/pass/move_result_out_of_loop.hpp" +#include "snippets/lowered/pass/clean_repeated_ptr_shifts.hpp" +#include "snippets/lowered/pass/identify_buffers.hpp" #include "transformations/common_optimizations/nop_elimination.hpp" #include "transformations/utils/utils.hpp" @@ -447,34 +467,92 @@ void snippets::op::Subgraph::align_element_types(const BlockedShapeVector& outpu } } -void snippets::op::Subgraph::convert_to_snippet_dialect() { +void snippets::op::Subgraph::data_flow_transformations(ngraph::pass::Manager& pre_common, + ngraph::pass::Manager& post_common, + ngraph::pass::Manager& post_precision) { INTERNAL_OP_SCOPE(Subgraph); - OV_ITT_SCOPED_TASK(ngraph::pass::itt::domains::SnippetsTransform, "Snippets::convert_to_snippet_dialect") - const auto& params = body_ptr()->get_parameters(); + OV_ITT_SCOPED_TASK(ngraph::pass::itt::domains::SnippetsTransform, "Snippets::op::data_flow_transformations") + const auto& params = body_ptr()->get_parameters(); bool inputs_has_dynamic_last_dims = std::any_of(params.begin(), params.end(), - [](const shared_ptr& p){ + [](const shared_ptr& p) { return p->get_partial_shape().rbegin()->is_dynamic(); }); - ngraph::pass::Manager manager; + + pre_common.run_passes(body_ptr()); + + ngraph::pass::Manager common_manager; if (config.m_has_domain_sensitive_ops) { - manager.register_pass(); - manager.register_pass(); - manager.register_pass(); - manager.register_pass(); + common_manager.register_pass(); + common_manager.register_pass(); + common_manager.register_pass(); + common_manager.register_pass(); } - manager.register_pass(); - manager.register_pass(); - manager.register_pass(); + common_manager.register_pass(); + common_manager.register_pass(); + common_manager.register_pass(); // todo: presently dynamic pipeline is activated even if the last two dimension are static // In general, we can use static kernels in this case, but several parameters (src and dst memory pointers for example) // should be passed as run-time args, so it's a mixed mode: kernel is shape-aware, but some additional runtime args are required // Presently Broadcasting is organized in the following way: // * ALL last dims are static => broadcasting is handled via MoveBroadcast and pointer arithmetics (even for dynamic upper dims) if (!inputs_has_dynamic_last_dims) { - manager.register_pass(); + common_manager.register_pass(); } - manager.run_passes(body_ptr()); + common_manager.run_passes(body_ptr()); + + post_common.run_passes(body_ptr()); + + ngraph::pass::Manager precision_manager; + precision_manager.register_pass(m_generator->get_target_machine()); + precision_manager.register_pass(); + precision_manager.register_pass(); + precision_manager.run_passes(body_ptr()); + + post_precision.run_passes(body_ptr()); +} + +void snippets::op::Subgraph::control_flow_transformations(lowered::LinearIR& linear_ir, + lowered::pass::PassPipeline& target_pipeline, + const lowered::Config& config) { + INTERNAL_OP_SCOPE(Subgraph); + OV_ITT_SCOPED_TASK(ngraph::pass::itt::domains::SnippetsTransform, "Snippets::op::control_flow_transformations") + + linear_ir = lowered::LinearIR(body_ptr(), config); + const size_t vector_size = get_generator()->get_target_machine()->get_lanes(); + const int32_t buffer_allocation_rank = static_cast(config.m_loop_depth); + + // Note: The pass InitLoops uses LoopInfo that contains entry and exit points of the corresponding Loop. + // To avoid the Loop information corruption, we should call the passes with Load/Store work + // (for example, LoadMoveBroadcastToBroadcastLoad()) after explicit Loop insertion (InitLoops()) + lowered::pass::PassPipeline common_pipeline; + common_pipeline.register_pass(vector_size); + common_pipeline.register_pass(vector_size); + common_pipeline.register_pass(); + common_pipeline.register_pass(); + common_pipeline.register_pass(buffer_allocation_rank); + common_pipeline.register_pass(vector_size); + common_pipeline.register_pass(); + common_pipeline.register_pass(); + common_pipeline.register_pass(); + common_pipeline.register_pass(); + common_pipeline.run(linear_ir); + + target_pipeline.run(linear_ir); + + const auto buffer_allocation_pass = std::make_shared(); + lowered::pass::PassPipeline buffer_pipeline; + buffer_pipeline.register_pass(); + buffer_pipeline.register_pass(); + buffer_pipeline.register_pass(buffer_allocation_pass); + buffer_pipeline.run(linear_ir); + + lowered::pass::PassPipeline final_pipeline; + final_pipeline.register_pass(); + final_pipeline.register_pass(); + final_pipeline.run(linear_ir); + + m_buffer_scratchpad = buffer_allocation_pass->get_scratchpad_size(); } snippets::Schedule snippets::op::Subgraph::generate(const BlockedShapeVector& output_shapes, @@ -486,49 +564,43 @@ snippets::Schedule snippets::op::Subgraph::generate(const BlockedShapeVector& ou snippets::Schedule snippets::op::Subgraph::generate(const BlockedShapeVector& output_shapes, const BlockedShapeVector& input_shapes, - ngraph::pass::Manager& pre_dialect, - ngraph::pass::Manager& post_dialect, + ngraph::pass::Manager& pre_common, + ngraph::pass::Manager& post_common, ngraph::pass::Manager& post_precision, + lowered::pass::PassPipeline& target_lowered_pipeline, const void* compile_params) { canonicalize(output_shapes, input_shapes); - return generate(pre_dialect, post_dialect, post_precision, compile_params); + return generate(pre_common, post_common, post_precision, target_lowered_pipeline, compile_params); } snippets::Schedule snippets::op::Subgraph::generate(const void* compile_params) { auto mngr = ngraph::pass::Manager(); - return generate(mngr, mngr, mngr, compile_params); + auto lowered = lowered::pass::PassPipeline(); + return generate(mngr, mngr, mngr, lowered, compile_params); } snippets::Schedule snippets::op::Subgraph::generate( - ngraph::pass::Manager& pre_dialect, - ngraph::pass::Manager& post_dialect, + ngraph::pass::Manager& pre_common, + ngraph::pass::Manager& post_common, ngraph::pass::Manager& post_precision, + lowered::pass::PassPipeline& target_lowered_pipeline, const void* compile_params) { INTERNAL_OP_SCOPE(Subgraph); OV_ITT_SCOPED_TASK(ngraph::pass::itt::domains::SnippetsTransform, "Snippets::op::generate") NGRAPH_CHECK(m_generator != nullptr, "generate is called while generator is not set"); - pre_dialect.run_passes(body_ptr()); - convert_to_snippet_dialect(); - post_dialect.run_passes(body_ptr()); - - ngraph::pass::Manager precision_manager; - precision_manager.register_pass(m_generator->get_target_machine()); - precision_manager.register_pass(); - precision_manager.register_pass(); - precision_manager.run_passes(body_ptr()); - - post_precision.run_passes(body_ptr()); - - const auto ops = body_ptr()->get_ops(); - // actual code emission + lowered::LinearIR linear_ir; lowered::Config lowering_config; lowering_config.m_save_lowered_code = config.m_has_domain_sensitive_ops; lowering_config.m_need_fill_tail_register = config.m_has_domain_sensitive_ops; lowering_config.m_loop_depth = tileRank; - const auto& lowering_result = m_generator->generate(body_ptr(), lowering_config, compile_params); - ngraph::snippets::code ptr = lowering_result.binary_code; - m_buffer_scratchpad = lowering_result.buffer_scratchpad_size; + + data_flow_transformations(pre_common, post_common, post_precision); + control_flow_transformations(linear_ir, target_lowered_pipeline, lowering_config); + + // actual code emission + const auto& lowering_result = m_generator->generate(linear_ir, lowering_config, compile_params); + const auto ptr = lowering_result.binary_code; return {master_shape, false /*canBeLinearized*/, ptr}; } diff --git a/src/common/snippets/tests/include/lowering_utils.hpp b/src/common/snippets/tests/include/lowering_utils.hpp index dd587f4de994e7..975556c568e0ae 100644 --- a/src/common/snippets/tests/include/lowering_utils.hpp +++ b/src/common/snippets/tests/include/lowering_utils.hpp @@ -55,6 +55,7 @@ class LoweringTests : public TransformationTestsF { ov::pass::Manager pre_dialect = {}, ov::pass::Manager post_dialect = {}, ov::pass::Manager post_precision = {}, + ngraph::snippets::lowered::pass::PassPipeline lowered_pipeline = {}, const std::shared_ptr generator = nullptr); static std::shared_ptr getTokenizedSubgraph(const std::shared_ptr& f); ov::PartialShape master_shape{}; diff --git a/src/common/snippets/tests/src/lowering_utils.cpp b/src/common/snippets/tests/src/lowering_utils.cpp index be7f6514f6cd4b..222ce7932a79c0 100644 --- a/src/common/snippets/tests/src/lowering_utils.cpp +++ b/src/common/snippets/tests/src/lowering_utils.cpp @@ -103,6 +103,7 @@ std::shared_ptr LoweringTests::getLoweredSubgrap ov::pass::Manager pre_dialect, ov::pass::Manager post_dialect, ov::pass::Manager post_precision, + ngraph::snippets::lowered::pass::PassPipeline lowered_pipeline, const std::shared_ptr generator) { auto subgraph = getTokenizedSubgraph(f); subgraph->set_generator(generator == nullptr ? std::make_shared() : generator); @@ -124,7 +125,7 @@ std::shared_ptr LoweringTests::getLoweredSubgrap } body_rt_info["PluginShapesOverride"] = new_shapes; subgraph->set_tile_rank(2); - subgraph->generate(pre_dialect, post_precision, post_precision); + subgraph->generate(pre_dialect, post_precision, post_precision, lowered_pipeline); return subgraph; } diff --git a/src/plugins/intel_cpu/src/emitters/x64/cpu_generator.cpp b/src/plugins/intel_cpu/src/emitters/x64/cpu_generator.cpp index 7ca7517d5974e4..70ec973eace9f1 100644 --- a/src/plugins/intel_cpu/src/emitters/x64/cpu_generator.cpp +++ b/src/plugins/intel_cpu/src/emitters/x64/cpu_generator.cpp @@ -184,9 +184,3 @@ ngraph::snippets::Generator::opRegType ov::intel_cpu::CPUGenerator::get_specific else OPENVINO_THROW("Register type of the operation " + std::string(op->get_type_name()) + " isn't determined!"); } - -ngraph::snippets::lowered::pass::TransformationPipeline ov::intel_cpu::CPUGenerator::target_specific_transformations() const { - ngraph::snippets::lowered::pass::TransformationPipeline target_specific_transformation; - target_specific_transformation.register_transformation(); - return target_specific_transformation; -} diff --git a/src/plugins/intel_cpu/src/emitters/x64/cpu_generator.hpp b/src/plugins/intel_cpu/src/emitters/x64/cpu_generator.hpp index c20a8db060b9c3..9b917af528ad07 100644 --- a/src/plugins/intel_cpu/src/emitters/x64/cpu_generator.hpp +++ b/src/plugins/intel_cpu/src/emitters/x64/cpu_generator.hpp @@ -32,7 +32,6 @@ class CPUGenerator : public ngraph::snippets::Generator { protected: opRegType get_specific_op_reg_type(const std::shared_ptr& op) const override; - ngraph::snippets::lowered::pass::TransformationPipeline target_specific_transformations() const override; }; } // namespace intel_cpu diff --git a/src/plugins/intel_cpu/src/nodes/subgraph.cpp b/src/plugins/intel_cpu/src/nodes/subgraph.cpp index 382b9019455595..17e49f9606d162 100644 --- a/src/plugins/intel_cpu/src/nodes/subgraph.cpp +++ b/src/plugins/intel_cpu/src/nodes/subgraph.cpp @@ -564,10 +564,14 @@ void Snippet::generate(const jit_snippets_compile_args* jcp) { CPU_REGISTER_PASS_X64(post_precision, ov::intel_cpu::pass::RemoveConverts); CPU_REGISTER_PASS_X64(post_precision, ov::intel_cpu::pass::MulAddToFMA); + ngraph::snippets::lowered::pass::PassPipeline target_specific_pipeline; + CPU_REGISTER_PASS_X64(target_specific_pipeline, ov::intel_cpu::pass::FuseLoadStoreConvert); + schedule = snippet->generate( pre_dialect, post_dialect, post_precision, + target_specific_pipeline, reinterpret_cast(jcp)); } diff --git a/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/lowered/fuse_load_store_and_convert.hpp b/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/lowered/fuse_load_store_and_convert.hpp index 45a466b3691aa6..00b33e2b4a2329 100644 --- a/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/lowered/fuse_load_store_and_convert.hpp +++ b/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/lowered/fuse_load_store_and_convert.hpp @@ -4,7 +4,7 @@ #pragma once -#include "snippets/lowered/pass/transformation.hpp" +#include "snippets/lowered/pass/pass.hpp" namespace ov { namespace intel_cpu { @@ -18,7 +18,7 @@ namespace pass { * Fuse Store and ConvertTruncation into one op StoreConvertTruncation * @ingroup snippets */ -class FuseLoadStoreConvert: public ngraph::snippets::lowered::pass::Transformation { +class FuseLoadStoreConvert: public ngraph::snippets::lowered::pass::Pass { public: FuseLoadStoreConvert() = default; OPENVINO_RTTI("FuseLoadStoreConvert", "LinearIRTransformation");