Skip to content

Commit

Permalink
[Snippets] Added Buffer identification
Browse files Browse the repository at this point in the history
  • Loading branch information
a-sidorova committed Apr 4, 2023
1 parent 61ef97a commit be72f40
Show file tree
Hide file tree
Showing 23 changed files with 829 additions and 138 deletions.
15 changes: 11 additions & 4 deletions src/common/snippets/include/snippets/op/buffer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,18 +16,22 @@ namespace op {
* If Buffer has a parent, the operation is for intermediate data storage - IntermediateMemory type.
* Otherwise, the operation is for allocation of new empty memory with shape `m_shape` - NewMemory type
* Notes:
* - All buffers in a graph have the same memory pointer. So if we have a few buffers,
* - All buffers with the same ID in a graph have the same memory pointer. So if we have a few buffers,
* each the corresponding MemoryAccess op for Buffer should have offset for common memory pointer of this Buffer
* - Buffer should be a single consumer for operation output port
* @param m_type - type of Buffer: IntermediateMemory/NewMemory
* @param m_shape - output allocation shape for Buffer with type NewMemory
* @param m_offset - offset in common Buffer scratchpad
* @param m_id - Buffer ID in common Buffer system
* @ingroup snippets
*/
class Buffer : public ngraph::op::Op {
public:
OPENVINO_OP("Buffer", "SnippetsOpset");
Buffer() = default;
Buffer(const ov::Shape& shape);
Buffer(const ov::Output<ov::Node>& arg, const ov::Shape& shape);
Buffer(const ov::Output<ov::Node>& arg, int32_t allocation_rank = -1);
Buffer(const ov::Shape& shape, size_t id = 0);
Buffer(const ov::Output<ov::Node>& arg, const ov::Shape& shape, size_t id = 0);
Buffer(const ov::Output<ov::Node>& arg, int32_t allocation_rank = -1, size_t id = 0);

bool visit_attributes(AttributeVisitor& visitor) override;
void validate_and_infer_types() override;
Expand All @@ -38,9 +42,11 @@ class Buffer : public ngraph::op::Op {
IntermediateMemory
};

size_t get_id() const { return m_id; }
Type get_type() const { return m_type; }
ov::Shape get_allocation_shape() const { return m_shape; }
int64_t get_offset() const { return m_offset; }
void set_id(size_t id) { m_id = id; }
void set_offset(int64_t offset) { m_offset = offset; }

size_t get_byte_size() const;
Expand All @@ -52,6 +58,7 @@ class Buffer : public ngraph::op::Op {
Type m_type = Type::IntermediateMemory;
ov::Shape m_shape = {};
int64_t m_offset = 0;
size_t m_id = 0; // Default ID - 0. All Buffers are from the same set
};

} // namespace op
Expand Down
10 changes: 4 additions & 6 deletions src/common/snippets/include/snippets/op/subgraph.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -93,11 +93,10 @@ class Subgraph : public ov::op::util::SubGraphOp {
ov::Model& body() { return *m_bodies[0]; }

const std::shared_ptr<ngraph::snippets::Generator>& get_generator() const { return m_generator; }
std::shared_ptr<ngraph::snippets::Generator> & get_generator() { return m_generator; }
std::shared_ptr<ngraph::snippets::Generator>& get_generator() { return m_generator; }

size_t get_buffer_scratchpad_size() const { return m_buffer_scratchpad; }
size_t get_virtual_port_count() const { return m_virtual_port_count; }
bool is_buffer_needed() const { return m_buffer_needed; }
bool is_quantized() const { return config.m_is_quantized; }
bool has_type_relaxed_ops() const { return config.m_has_type_relaxed_ops; }
bool has_domain_sensitive_ops() const { return config.m_has_domain_sensitive_ops; }
Expand All @@ -122,7 +121,6 @@ class Subgraph : public ov::op::util::SubGraphOp {
void set_generator(std::shared_ptr<ngraph::snippets::Generator> generator);
void set_tile_rank(size_t newRank) {tileRank = newRank;}
void set_virtual_port_count(const size_t count);
void set_buffer_needed(const bool need);

void print() const;
void print_statistics(bool verbose);
Expand All @@ -139,19 +137,19 @@ class Subgraph : public ov::op::util::SubGraphOp {
static auto constant_input_should_be_inside_body(const std::shared_ptr<ov::Node>& node) -> bool;

static bool check_broadcast(const std::shared_ptr<const ov::Node>& node) noexcept;
// Return estimated unqiue buffer count (rating from above). It's needed for tokenization
static auto get_estimated_buffer_count(const ov::NodeVector& ops) -> size_t;
static auto is_domain_sensitive_op(const std::shared_ptr<ov::Node>& op) -> bool;

private:
void align_element_types(const BlockedShapeVector& outputShapes, const BlockedShapeVector& inputShapes);
void convert_to_snippet_dialect();
void init_config();
// Count of Subgraph virtual ports:
// - Potential non-scalar Constants that will be created after some transformations (At the moment it's relevant only for FakeQuantize decomposition)
// Need Buffer op or not
// - Buffers. All Buffers are considered as one common additional virtual port. So we cannot summarize them as potential non-scalar Constants
// NOTE: To avoid overheads in each calculation of this count (for example, in validate_and_type_infer()),
// we should MANUALLY calculate it where it needed.
size_t m_virtual_port_count = 0;
bool m_buffer_needed = false;
size_t m_buffer_scratchpad = 0lu;
Shape exec_domain = {};
std::shared_ptr<ngraph::snippets::Generator> m_generator = nullptr;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
// Copyright (C) 2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#pragma once

#include "linear_IR_transformation.hpp"

namespace ngraph {
namespace snippets {
namespace pass {
namespace lowered {

/**
* @interface BufferIdentification
* @brief The pass set identifiers for Buffers in common Buffer system.
* The buffers with the same identifier has the same data register.
* The pass uses greedy graph coloring algorithm using adjacency matrix:
* - Buffers - are vertices of graph
* - Loops, Brgemm (the same other ops) - are "edges" between Buffers (hub of edges).
* The buffers are connected to the same Loop - are adjacent in graph sense bounds.
* - The vertices (buffers) are adjacent if they are connected to the same Loop and
* their data pointers cannot be proportionally incremented in Loops: different ptr increments or data sizes.
* - Firstly, create adjacency matrix using the definition above
* - Secondly, color vertices of graph (buffers) using adjacency matrix
* Note: should be called before ResetBuffer() pass to have correct offsets
* @ingroup snippets
*/
class BufferIdentification: public LinearIRTransformation {
public:
OPENVINO_RTTI("BufferIdentification", "LinearIRTransformation")
BufferIdentification() = default;

bool run(LoweredExprIR& linear_ir) override;

private:
using BufferSet = std::vector<LoweredExprPtr>;

std::vector<bool> create_adjacency_matrix(const LoweredExprIR& linear_ir, const BufferSet& buffers) const;
std::map<size_t, BufferSet> coloring(BufferSet& buffers, std::vector<bool>& adj);
};

} // namespace lowered
} // namespace pass
} // namespace snippets
} // namespace ngraph
38 changes: 38 additions & 0 deletions src/common/snippets/include/snippets/pass/lowered/buffer_reset.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
// Copyright (C) 2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#pragma once

#include "linear_IR_transformation.hpp"

namespace ngraph {
namespace snippets {
namespace pass {
namespace lowered {

/**
* @interface BufferReset
* @brief The pass `fuses` (reset) ptr increments and finalization offsets for ports of Loop
* with the same Buffers (with the same ID) to avoid double ptr shifts
* Note: Buffer always employ inplace logics by default. It means that if a loop has both
* an input and an output connected to Buffers, the corresponding register should nevertheless be
* incremented only once (because when the input reg is incremented, output incremented automatically).
* This condition should be removed when Buffers stop being inplace by default.
* @ingroup snippets
*/
class BufferReset: public LinearIRTransformation {
public:
OPENVINO_RTTI("BufferReset", "LinearIRTransformation")
BufferReset() = default;

bool run(LoweredExprIR& linear_ir) override;

private:
bool reuse_buffer_increments(const LoweredExprIR& linear_ir, const LoweredExprPtr& loop_end_expr);
};

} // namespace lowered
} // namespace pass
} // namespace snippets
} // namespace ngraph
Original file line number Diff line number Diff line change
Expand Up @@ -31,11 +31,6 @@ class LoopInit : public LinearIRTransformation {
std::vector<int64_t> init_finalization_offsets(const std::vector<int64_t>& finalization_offsets, size_t work_amount) const;
std::vector<int64_t> init_element_type_sizes(const std::vector<LoweredExprPort>& loop_inputs,
const std::vector<LoweredExprPort>& loop_outputs);
void reuse_buffer_increments(std::vector<int64_t>& ptr_increments,
std::vector<int64_t>& finalization_offsets,
const LoweredExprIR& linear_ir,
const std::vector<LoweredExprPort>& loop_inputs,
const std::vector<LoweredExprPort>& loop_outputs);
};

} // namespace lowered
Expand Down
15 changes: 11 additions & 4 deletions src/common/snippets/src/generator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@
#include "snippets/pass/lowered/softmax_decomposition.hpp"
#include "snippets/pass/lowered/move_scalar_to_consumer.hpp"
#include "snippets/pass/lowered/move_result_out_of_loop.hpp"
#include "snippets/pass/lowered/buffer_reset.hpp"
#include "snippets/pass/lowered/buffer_identification.hpp"
#include "snippets/tensor_descriptor.hpp"

namespace ngraph {
Expand All @@ -40,7 +42,6 @@ Generator::LoweringResult Generator::generate(std::shared_ptr<ov::Model>& m, con
// Note: The pass LoopInit uses LoopInfo that contains entry and exit points of the corresponding Loop.
// To avoid the Loop information corruption, we should call the passes with Load/Store work
// (for example, LoadMoveBroadcastToBroadcastLoad()) after explicit Loop insertion (LoopInit())
const auto buffer_allocation_pass = std::make_shared<pass::lowered::BufferAllocation>();
pass::lowered::LinearIRTransformationPipeline common_pipeline;
common_pipeline.register_transformation<pass::lowered::LoopMarkup>(vector_size);
common_pipeline.register_transformation<pass::lowered::SoftmaxDecomposition>(vector_size);
Expand All @@ -52,9 +53,7 @@ Generator::LoweringResult Generator::generate(std::shared_ptr<ov::Model>& m, con
common_pipeline.register_transformation<pass::lowered::LoopInit>();
common_pipeline.register_transformation<pass::lowered::MoveScalarToConsumer>();
common_pipeline.register_transformation<pass::lowered::LoadMoveBroadcastToBroadcastLoad>();
common_pipeline.register_transformation<pass::lowered::PropagateLayout>();
common_pipeline.register_transformation(buffer_allocation_pass);
common_pipeline.register_transformation<pass::lowered::CleanupLoopOffsets>();
common_pipeline.register_transformation<pass::lowered::PropagateLayout>(); // or should be in final?
common_pipeline.run(linear_ir);

pass::lowered::LinearIRTransformationPipeline target_pipeline = target_specific_transformations();
Expand All @@ -64,7 +63,15 @@ Generator::LoweringResult Generator::generate(std::shared_ptr<ov::Model>& m, con
return get_op_reg_type(op);
};

const auto buffer_allocation_pass = std::make_shared<pass::lowered::BufferAllocation>();
pass::lowered::LinearIRTransformationPipeline buffer_pipeline;
buffer_pipeline.register_transformation<pass::lowered::BufferIdentification>();
buffer_pipeline.register_transformation<pass::lowered::BufferReset>();
buffer_pipeline.register_transformation(buffer_allocation_pass);
buffer_pipeline.run(linear_ir);

pass::lowered::LinearIRTransformationPipeline final_pipeline;
final_pipeline.register_transformation<pass::lowered::CleanupLoopOffsets>();
final_pipeline.register_transformation<pass::lowered::AssignRegisters>(reg_type_mapper);
final_pipeline.register_transformation<pass::lowered::InsertTailLoop>();
final_pipeline.run(linear_ir);
Expand Down
17 changes: 9 additions & 8 deletions src/common/snippets/src/op/buffer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,18 +16,18 @@ auto normalize_rank(int32_t allocation_rank, const size_t shape_rank) -> int32_t
return allocation_rank < 0 ? allocation_rank + static_cast<int32_t>(shape_rank) : allocation_rank;
}

snippets::op::Buffer::Buffer(const ov::Shape& shape)
: Op(), m_type(Type::NewMemory), m_shape(shape), m_offset(0) {
snippets::op::Buffer::Buffer(const ov::Shape& shape, size_t id)
: Op(), m_type(Type::NewMemory), m_shape(shape), m_offset(0), m_id(id) {
constructor_validate_and_infer_types();
}

snippets::op::Buffer::Buffer(const ov::Output<ov::Node>& arg, const ov::Shape& shape)
: Op({arg}), m_type(Type::IntermediateMemory), m_shape(shape), m_offset(0) {
snippets::op::Buffer::Buffer(const ov::Output<ov::Node>& arg, const ov::Shape& shape, size_t id)
: Op({arg}), m_type(Type::IntermediateMemory), m_shape(shape), m_offset(0), m_id(id) {
constructor_validate_and_infer_types();
}

snippets::op::Buffer::Buffer(const ov::Output<ov::Node>& arg, int32_t allocation_rank)
: Op({arg}), m_type(Type::IntermediateMemory), m_offset(0) {
snippets::op::Buffer::Buffer(const ov::Output<ov::Node>& arg, int32_t allocation_rank, size_t id)
: Op({arg}), m_type(Type::IntermediateMemory), m_offset(0), m_id(id) {
const auto pshape = arg.get_partial_shape();
OPENVINO_ASSERT(pshape.is_static(), "Buffer supports only static input shape");
const auto shape = pshape.get_shape();
Expand All @@ -41,6 +41,7 @@ bool snippets::op::Buffer::visit_attributes(AttributeVisitor& visitor) {
INTERNAL_OP_SCOPE(Buffer_visit_attributes);
visitor.on_attribute("allocation_shape", m_shape);
visitor.on_attribute("offset", m_offset);
visitor.on_attribute("id", m_id);
return true;
}

Expand Down Expand Up @@ -68,9 +69,9 @@ std::shared_ptr<Node> snippets::op::Buffer::clone_with_new_inputs(const OutputVe
check_new_args_count(this, new_args);
std::shared_ptr<op::Buffer> new_buffer = nullptr;
if (m_type == Type::NewMemory) {
new_buffer = std::make_shared<Buffer>(m_shape);
new_buffer = std::make_shared<Buffer>(m_shape, m_id);
} else if (m_type == Type::IntermediateMemory) {
new_buffer = std::make_shared<Buffer>(new_args.at(0), m_shape);
new_buffer = std::make_shared<Buffer>(new_args.at(0), m_shape, m_id);
} else {
throw ov::Exception("Buffer supports only the following types: NewMemory and IntermediateMemory");
}
Expand Down
Loading

0 comments on commit be72f40

Please sign in to comment.