Skip to content

Commit

Permalink
Applied comments by Dmitry
Browse files Browse the repository at this point in the history
  • Loading branch information
a-sidorova committed May 19, 2023
1 parent 13d956f commit 0ac8608
Show file tree
Hide file tree
Showing 35 changed files with 191 additions and 354 deletions.
36 changes: 18 additions & 18 deletions src/common/snippets/include/snippets/lowered/linear_ir.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ namespace lowered {
class Config {
public:
// True if the lowered Emitters need to be accessed during runtime. Normally they're destroyed after code emission.
bool m_save_lowered_code = false;
bool m_save_expressions = false;
// True if we should check runtime info for nodes to call specific needed transformations
bool m_need_fill_tail_register = false;
size_t m_loop_depth = 1;
Expand All @@ -36,8 +36,8 @@ class LinearIR {

static LinearIR::container deep_copy_range(LinearIR::container::const_iterator begin, LinearIR::container::const_iterator end);

const container& get_ops() const {return m_lowered_ops; }
const io_container& get_IO_ops() const {return m_io_lowered_ops; }
const container& get_ops() const {return m_expressions; }
const io_container& get_IO_ops() const {return m_io_expressions; }
Config get_config() {return m_config; }

const ExpressionPtr& get_expr_by_node(const std::shared_ptr<Node>& n) const;
Expand All @@ -52,24 +52,24 @@ class LinearIR {
*/
void move(constExprIt from, constExprIt to);

bool empty() const noexcept {return m_lowered_ops.empty(); }
bool empty() const noexcept {return m_expressions.empty(); }
void debug_print(bool tds_as_pointers = false) const;

container::reference back() noexcept {return m_lowered_ops.back();}
container::const_reference back() const noexcept {return m_lowered_ops.back();}
container::reference front() noexcept {return m_lowered_ops.front();}
container::const_reference front() const noexcept {return m_lowered_ops.front();}
container::reference back() noexcept {return m_expressions.back();}
container::const_reference back() const noexcept {return m_expressions.back();}
container::reference front() noexcept {return m_expressions.front();}
container::const_reference front() const noexcept {return m_expressions.front();}

exprIt begin() noexcept {return m_lowered_ops.begin();}
exprIt end() noexcept {return m_lowered_ops.end();}
exprIt begin() noexcept {return m_expressions.begin();}
exprIt end() noexcept {return m_expressions.end();}
constExprIt begin() const noexcept {return cbegin();}
constExprIt end() const noexcept {return cend();}
constExprIt cbegin() const noexcept {return m_lowered_ops.cbegin();}
constExprIt cend() const noexcept {return m_lowered_ops.cend();}
container::reverse_iterator rbegin() noexcept {return m_lowered_ops.rbegin();}
container::reverse_iterator rend() noexcept {return m_lowered_ops.rend();}
container::const_reverse_iterator crbegin() const noexcept {return m_lowered_ops.crbegin();}
container::const_reverse_iterator crend() const noexcept {return m_lowered_ops.crend();}
constExprIt cbegin() const noexcept {return m_expressions.cbegin();}
constExprIt cend() const noexcept {return m_expressions.cend();}
container::reverse_iterator rbegin() noexcept {return m_expressions.rbegin();}
container::reverse_iterator rend() noexcept {return m_expressions.rend();}
container::const_reverse_iterator crbegin() const noexcept {return m_expressions.crbegin();}
container::const_reverse_iterator crend() const noexcept {return m_expressions.crend();}

exprIt insert(constExprIt pos, const ov::NodeVector& nodes);
exprIt insert(constExprIt pos, const std::shared_ptr<Node>& n);
Expand Down Expand Up @@ -97,9 +97,9 @@ class LinearIR {
void register_expression(const ExpressionPtr& expr, bool io_allowed = false);
void unregister_expression(const ExpressionPtr& expr);

container m_lowered_ops{};
container m_expressions{};
std::unordered_map<std::shared_ptr<Node>, std::shared_ptr<Expression>> m_node2expression_map;
io_container m_io_lowered_ops;
io_container m_io_expressions;
Config m_config{};
LoopManagerPtr m_loop_manager = nullptr;
};
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,12 @@ namespace pass {

/**
* @interface AllocateBuffers
* @brief The pass calculation common size of buffer scratchpad and propagates Buffer offsets to connected MemoryAccess operations.
* @brief The pass calculates common size of buffer scratchpad and propagates Buffer offsets to connected MemoryAccess operations.
* Notes:
* - The pass implicitly regulates InPlace processing for some Buffers when it's possible.
* The pass don't allocate new memory for InPlace Buffers, we propagate the same offsets for them.
* - The pass should be splitted into two passes: ProcessInplace (markup of Buffers which can use the same memory)
* and AllocateBuffer (allocate memory for Buffers using MemorySolver which can optimally reuse memory).
* @ingroup snippets
*/

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ class InsertLoadStore : public Pass {
const ExpressionPort& actual_port, const std::vector<ExpressionPort>& target_ports, bool is_entry = true);
void update_loop(const LinearIR::LoopManager::LoopInfoPtr& loop_info,
const ExpressionPort& actual_port, const std::vector<ExpressionPort>& target_ports, bool is_entry = true);
size_t get_count(const PortDescriptorPtr& port_desc) const;

size_t m_vector_size;
};
Expand Down

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -62,11 +62,11 @@ class PortDescriptor {
std::vector<size_t> m_layout{};
/// \brief Minimal tensor size that could be processed in one call
std::vector<size_t> m_subtensor_shape{};
/// \brief The corresponding abstract register
/// \brief The corresponding abstract/physical register
size_t m_reg = 0;
};

class PortManager {
class PortDescriptorUtils {
public:
static void set_port_descriptor_ptr(const ov::Input<ov::Node>& n, const PortDescriptorPtr& desc);
static void set_port_descriptor_ptr(const ov::Output<ov::Node>& n, const PortDescriptorPtr& desc);
Expand Down
2 changes: 1 addition & 1 deletion src/common/snippets/include/snippets/op/subgraph.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@ class Subgraph : public ov::op::util::SubGraphOp {
private:
void align_element_types(const BlockedShapeVector& outputShapes, const BlockedShapeVector& inputShapes);
void data_flow_transformations(ov::pass::Manager& pre_common, ov::pass::Manager& post_common, ov::pass::Manager& post_precision);
void control_flow_transformations(lowered::LinearIR& linear_ir, lowered::pass::PassPipeline& target_pipeline, const lowered::Config& config);
void control_flow_transformations(lowered::LinearIR& linear_ir, lowered::pass::PassPipeline& target_pipeline);
void init_config();
// Count of Subgraph virtual ports:
// - Potential non-scalar Constants that will be created after some transformations (At the moment it's relevant only for FakeQuantize decomposition)
Expand Down
2 changes: 1 addition & 1 deletion src/common/snippets/src/generator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ Generator::LoweringResult Generator::generate(lowered::LinearIR& linear_ir, cons

// todo: we save lowered to access compiled brgemm kernels on execution time (normally lowered is destructed by then)
// remove this when kernel caching is implemented. Don't forget to make generate const method.
if (config.m_save_lowered_code)
if (config.m_save_expressions)
lowered_saved = linear_ir;

return { target->get_snippet() };
Expand Down
4 changes: 2 additions & 2 deletions src/common/snippets/src/lowered/expression.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,10 @@ Expression::Expression(const std::shared_ptr<Node>& n) : m_source_node{n}, m_emi
m_input_port_descriptors.reserve(n->get_input_size());
m_output_port_descriptors.reserve(n->get_output_size());
for (const auto& input : n->inputs()) {
m_input_port_descriptors.push_back(PortManager::get_port_descriptor_ptr(input));
m_input_port_descriptors.push_back(PortDescriptorUtils::get_port_descriptor_ptr(input));
}
for (const auto& output : n->outputs()) {
m_output_port_descriptors.push_back(PortManager::get_port_descriptor_ptr(output));
m_output_port_descriptors.push_back(PortDescriptorUtils::get_port_descriptor_ptr(output));
}
}

Expand Down
34 changes: 17 additions & 17 deletions src/common/snippets/src/lowered/linear_ir.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,10 @@ namespace snippets {
namespace lowered {

LinearIR::LinearIR(const std::shared_ptr<ov::Model>& model, Config config)
: m_io_lowered_ops{}, m_config{std::move(config)}, m_loop_manager(std::make_shared<LoopManager>()) {
constExprIt last_param = m_lowered_ops.end();
: m_io_expressions{}, m_config{std::move(config)}, m_loop_manager(std::make_shared<LoopManager>()) {
constExprIt last_param = m_expressions.end();
for (const auto& n : get_ordered_ops(model)) {
constExprIt insertion_pos = m_lowered_ops.end();
constExprIt insertion_pos = m_expressions.end();
const auto expr = create_expression(n, model);

// Scalar should be on the Linear IR beginning after Parameters to have valid expression order after Loop passes.
Expand All @@ -33,10 +33,10 @@ LinearIR::LinearIR(const std::shared_ptr<ov::Model>& model, Config config)
}

register_expression(expr, true);
const auto& it = m_lowered_ops.insert(insertion_pos, expr);
const auto& it = m_expressions.insert(insertion_pos, expr);

if (const auto io_expr = std::dynamic_pointer_cast<IOExpression>(expr)) {
m_io_lowered_ops.push_back(io_expr);
m_io_expressions.push_back(io_expr);
if (ov::is_type<ov::op::v0::Parameter>(n))
last_param = it;
}
Expand Down Expand Up @@ -71,7 +71,7 @@ void LinearIR::serialize(const std::string& xml, const std::string& bin) {
first_node->set_friendly_name("Start");
first_node->get_rt_info()["execTimeMcs"] = 0;
std::shared_ptr<Node> body_node = first_node;
for (const auto& expr : m_lowered_ops) {
for (const auto& expr : m_expressions) {
body_node = std::make_shared<op::SerializationNode>(body_node, expr);
}
auto last_node = std::make_shared<ov::op::v0::Result>(body_node);
Expand Down Expand Up @@ -116,7 +116,7 @@ void LinearIR::debug_print(bool tds_as_pointers) const {
std::map<TensorPtr, int> td2int;
int td_counter = 0;
int counter = 0;
for (const auto& expr : m_lowered_ops) {
for (const auto& expr : m_expressions) {
const auto& node = expr->get_node();
std::cerr << counter++ << " : " <<
node->get_friendly_name() << " : ";
Expand Down Expand Up @@ -148,7 +148,7 @@ void LinearIR::debug_print(bool tds_as_pointers) const {
}

void LinearIR::init_emitters(const std::shared_ptr<TargetMachine>& target) {
for (auto& expr : m_lowered_ops) {
for (auto& expr : m_expressions) {
if (!expr->get_emitter())
expr->init_emitter(target);
}
Expand Down Expand Up @@ -206,12 +206,12 @@ void LinearIR::unregister_expression(const ExpressionPtr& expr) {

LinearIR::exprIt LinearIR::insert(constExprIt pos, container::value_type&& value) {
register_expression(value);
return m_lowered_ops.insert(pos, value);
return m_expressions.insert(pos, value);
}

LinearIR::exprIt LinearIR::insert(constExprIt pos, const container::value_type& value) {
register_expression(value);
return m_lowered_ops.insert(pos, value);
return m_expressions.insert(pos, value);
}

LinearIR::exprIt LinearIR::insert(constExprIt pos, exprIt begin, exprIt end) {
Expand All @@ -223,15 +223,15 @@ LinearIR::exprIt LinearIR::insert(constExprIt pos, exprIt begin, exprIt end) {
LinearIR::exprIt LinearIR::insert(constExprIt pos, constExprIt begin, constExprIt end) {
for (auto b = begin; b != end; b++)
register_expression(*b);
return m_lowered_ops.insert(pos, begin, end);
return m_expressions.insert(pos, begin, end);
}

LinearIR::exprIt LinearIR::insert(LinearIR::constExprIt pos, const NodeVector& nodes) {
auto ret = m_lowered_ops.end();
auto ret = m_expressions.end();
for (const auto& n : nodes) {
const auto& expr = create_expression(n);
register_expression(expr);
ret = m_lowered_ops.insert(pos, expr);
ret = m_expressions.insert(pos, expr);
}
// Need to return iterator to the first of the inserted values
return std::prev(ret, static_cast<int64_t>(nodes.size()));
Expand All @@ -240,22 +240,22 @@ LinearIR::exprIt LinearIR::insert(LinearIR::constExprIt pos, const NodeVector& n
LinearIR::exprIt LinearIR::insert(LinearIR::constExprIt pos, const std::shared_ptr<Node>& n) {
const auto& expr = create_expression(n);
register_expression(expr);
return m_lowered_ops.insert(pos, expr);
return m_expressions.insert(pos, expr);
}

LinearIR::exprIt LinearIR::erase(LinearIR::exprIt pos) {
unregister_expression(*pos);
return m_lowered_ops.erase(pos);
return m_expressions.erase(pos);
}

LinearIR::exprIt LinearIR::erase(LinearIR::constExprIt pos) {
unregister_expression(*pos);
return m_lowered_ops.erase(pos);
return m_expressions.erase(pos);
}

void LinearIR::move(LinearIR::constExprIt from, LinearIR::constExprIt to) {
// Instead of `insert()` + `erase()`, we use `splice()` for the same list
m_lowered_ops.splice(to, m_lowered_ops, from);
m_expressions.splice(to, m_expressions, from);
}

}// namespace lowered
Expand Down
4 changes: 2 additions & 2 deletions src/common/snippets/src/lowered/pass/insert_buffers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ void InsertBuffers::insertion(LinearIR& linear_ir, const LinearIR::LoopManagerPt
// Need to insert between 2nd and 4th Loops - after 2nd Loop
const auto pos = insertion_position(linear_ir, loop_manager, parent_expr, expr);
const auto buffer = std::make_shared<op::Buffer>(parent->output(parent_port), m_buffer_allocation_rank);
PortManager::set_port_descriptor_ptr(buffer->output(0), parent_expr_output.get_descriptor_ptr()->clone());
PortDescriptorUtils::set_port_descriptor_ptr(buffer->output(0), parent_expr_output.get_descriptor_ptr()->clone());
// Output tensor is automatically filled from PortDescriptor
const auto buffer_expr = linear_ir.create_expression(buffer, {input_tensor});
linear_ir.insert(pos, buffer_expr);
Expand Down Expand Up @@ -178,7 +178,7 @@ void InsertBuffers::insertion(LinearIR& linear_ir, const LinearIR::LoopManagerPt
const auto pos = insertion_position(linear_ir, loop_manager, expr, (*potential_consumers.begin()).get_expr());

auto buffer = std::make_shared<op::Buffer>(node->output(port), m_buffer_allocation_rank);
PortManager::set_port_descriptor_ptr(buffer->output(0), exit_point.get_descriptor_ptr()->clone());
PortDescriptorUtils::set_port_descriptor_ptr(buffer->output(0), exit_point.get_descriptor_ptr()->clone());
// We cannot insert Node output tensor on Buffer output because not all consumers of Node needs Buffer
// Example:
// Add
Expand Down
18 changes: 14 additions & 4 deletions src/common/snippets/src/lowered/pass/insert_load_store.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,16 @@ void InsertLoadStore::update_loop(const LinearIR::LoopManager::LoopInfoPtr& loop
ports.insert(port_it, target_ports.cbegin(), target_ports.cend());
}

size_t InsertLoadStore::get_count(const PortDescriptorPtr& port_desc) const {
const auto layout = port_desc->get_layout();
const auto shape = port_desc->get_shape();
// Find last dimension by layout
const auto last_dim_idx = std::find(layout.begin(), layout.end(), layout.size() - 1);
OPENVINO_ASSERT(last_dim_idx != layout.end(), "Load/Store expression have incorrect layout");
const auto dim = shape[*last_dim_idx];
return dim == 1 ? 1 : m_vector_size;
}

bool InsertLoadStore::insert_load(LinearIR& linear_ir, const LinearIR::constExprIt& data_expr_it) {
const auto& loop_manager = linear_ir.get_loop_manager();
const auto& data_expr = *data_expr_it;
Expand All @@ -71,8 +81,8 @@ bool InsertLoadStore::insert_load(LinearIR& linear_ir, const LinearIR::constExpr
const auto inner_loop = get_inner_loop_id(loop_ids);
OPENVINO_ASSERT(inner_loop != Expression::LOOP_NULL_ID, "Loop hasn't been found!");

const auto load = std::make_shared<op::Load>(data_node->output(0), m_vector_size);
PortManager::set_port_descriptor_ptr(load->output(0), consumer_input.get_descriptor_ptr()->clone());
const auto load = std::make_shared<op::Load>(data_node->output(0), get_count(data_expr->get_output_port_descriptor(0)));
PortDescriptorUtils::set_port_descriptor_ptr(load->output(0), consumer_input.get_descriptor_ptr()->clone());
const auto load_expr = linear_ir.create_expression(load, {output_tensor});
linear_ir.insert(std::find(data_expr_it, linear_ir.cend(), consumer_expr), load_expr);
linear_ir.replace_input(consumer_input, load_expr->get_output_tensor(0));
Expand Down Expand Up @@ -106,8 +116,8 @@ bool InsertLoadStore::insert_store(LinearIR& linear_ir, const LinearIR::constExp
const auto inner_loop = get_inner_loop_id(loop_ids);
OPENVINO_ASSERT(inner_loop != Expression::LOOP_NULL_ID, "Loop hasn't been found!");

const auto store = std::make_shared<op::Store>(parent->output(port), m_vector_size);
PortManager::set_port_descriptor_ptr(store->output(0), parent_output.get_descriptor_ptr()->clone());
const auto store = std::make_shared<op::Store>(parent->output(port), get_count(data_expr->get_input_port_descriptor(0)));
PortDescriptorUtils::set_port_descriptor_ptr(store->output(0), parent_output.get_descriptor_ptr()->clone());
const auto store_expr = linear_ir.create_expression(store, {input_tensor});
const auto& reverse_insertion_pos = std::find(std::reverse_iterator<LinearIR::constExprIt>(data_expr_it), linear_ir.crend(), parent_expr);
const auto& insertion_pos = reverse_insertion_pos.base();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ bool LoadMoveBroadcastToBroadcastLoad::run(LinearIR& linear_ir) {
const auto& outshape = move_broadcast->get_output_partial_shape(0);
const auto broadcastload = std::make_shared<snippets::op::BroadcastLoad>(load->input_value(0), outshape, load->get_offset());
const auto move_consumers = expr->get_output_tensor(0)->get_consumers();
PortManager::set_port_descriptor_ptr(broadcastload->output(0), expr->get_output_port(0).get_descriptor_ptr()->clone());
PortDescriptorUtils::set_port_descriptor_ptr(broadcastload->output(0), expr->get_output_port(0).get_descriptor_ptr()->clone());
const auto broadcastload_expr = linear_ir.create_expression(broadcastload, { parent_expr->get_input_tensor(0) });
const auto mv_expr_it = expr_it;
const auto insertion_pos = std::next(expr_it);
Expand Down
Loading

0 comments on commit 0ac8608

Please sign in to comment.