Skip to content

Commit

Permalink
Updated Buffer
Browse files Browse the repository at this point in the history
  • Loading branch information
a-sidorova committed Jan 12, 2023
1 parent d9503fd commit 42e7c79
Show file tree
Hide file tree
Showing 17 changed files with 187 additions and 78 deletions.
53 changes: 44 additions & 9 deletions src/common/snippets/include/snippets/op/buffer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ namespace op {
/**
* @interface Buffer
* @brief The operation is for intermediate data storage
* TODO
* - m_allocation_rank - rank of shape for memory allocation: shape[shape_rank - normalize(m_allocation_rank) : shape_rank].
* It's needed to allocate needed memory size that depends on Tile rank, for example.
* Default value is -1 (full shape)
Expand All @@ -29,24 +30,58 @@ class Buffer : public ngraph::op::Op {
public:
OPENVINO_OP("Buffer", "SnippetsOpset");

Buffer(const Output<Node>& x, const int32_t allocation_rank = -1);
Buffer(const ov::Shape shape, const ov::element::Type element_type, int32_t allocation_rank = -1);
size_t get_byte_size() const;
virtual ov::PartialShape get_allocation_shape() const = 0;

protected:
Buffer() = default;
};

int32_t get_allocation_rank() const { return m_allocation_rank; }
void set_allocation_rank(int32_t rank) { m_allocation_rank = rank; }
/**
* @interface AllocationBuffer
* @brief The operation is for allocation new empty memory
* TODO
* @ingroup snippets
*/
class AllocationBuffer : public Buffer {
public:
OPENVINO_OP("AllocationBuffer", "SnippetsOpset", Buffer);

size_t get_byte_size() const;
AllocationBuffer() = default;
AllocationBuffer(const ov::Output<ov::Node>& shape, const ov::element::Type element_type);

ov::PartialShape get_allocation_shape() const override;

bool visit_attributes(AttributeVisitor& visitor) override;
std::shared_ptr<Node> clone_with_new_inputs(const OutputVector& new_args) const override;
void validate_and_infer_types() override;

private:
int32_t m_allocation_rank = -1;
ov::Shape m_static_shape;
protected:
ov::element::Type m_element_type;
bool m_is_single = false;
};

/**
* @interface IntermediateBuffer
* @brief The operation is for intermediate data storage
* TODO
* @ingroup snippets
*/
class IntermediateBuffer : public Buffer {
public:
OPENVINO_OP("IntermediateBuffer", "SnippetsOpset", Buffer);

IntermediateBuffer() = default;
IntermediateBuffer(const ov::Output<ov::Node>& x);
IntermediateBuffer(const ov::Output<ov::Node>& x, const ov::Output<ov::Node>& shape);

ov::PartialShape get_allocation_shape() const override;

bool visit_attributes(AttributeVisitor& visitor) override { return true; }
std::shared_ptr<Node> clone_with_new_inputs(const OutputVector& new_args) const override;
void validate_and_infer_types() override;

static std::shared_ptr<ov::Node> create_shape_constant(const ov::PartialShape& shape, size_t allocation_rank);
static std::shared_ptr<ov::Node> create_shape_constant(const ov::PartialShape& shape);
};

} // namespace op
Expand Down
4 changes: 4 additions & 0 deletions src/common/snippets/include/snippets/utils.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,10 @@ void set_output_layout(const ov::Output<Node>& port, const std::vector<size_t>&
inline ov::Dimension get_inner_dim(const ov::PartialShape &shape) { return *(shape.rbegin()); }
inline ov::Dimension get_outer_dim(const ov::PartialShape &shape) { return *(shape.rbegin() + 1); }

inline auto normalize_rank(int32_t allocation_rank, const size_t shape_rank) -> int32_t {
return allocation_rank < 0 ? allocation_rank + shape_rank + 1 : allocation_rank;
}

template <typename T, typename P>
constexpr bool one_of(T val, P item) { return val == item; }

Expand Down
121 changes: 81 additions & 40 deletions src/common/snippets/src/op/buffer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@

#include "snippets/op/buffer.hpp"
#include "snippets/snippets_isa.hpp"
#include "snippets/utils.hpp"

#include <ngraph/runtime/host_tensor.hpp>

using namespace std;
using namespace ngraph;
Expand All @@ -16,62 +16,103 @@ auto normalize_rank(int32_t allocation_rank, const size_t shape_rank) -> int32_t
return allocation_rank < 0 ? allocation_rank + static_cast<int32_t>(shape_rank) : allocation_rank;
}

snippets::op::Buffer::Buffer(const Output<Node>& x, const int32_t allocation_rank)
: Op({x}), m_allocation_rank(allocation_rank), m_is_single(false) {
constructor_validate_and_infer_types();
size_t ngraph::snippets::op::Buffer::get_byte_size() const {
const auto pshape = get_allocation_shape();
// TODO: Add support of dynamism
NGRAPH_CHECK(pshape.is_static(), "Buffer should have static shapes for memory allocation");
const auto shape = pshape.get_shape();
return ngraph::shape_size(shape) * get_element_type().size();
}

snippets::op::Buffer::Buffer(const ov::Shape shape, const ov::element::Type element_type, const int32_t allocation_rank)
: Op(), m_static_shape(shape), m_element_type(element_type), m_allocation_rank(allocation_rank), m_is_single(true) {
snippets::op::AllocationBuffer::AllocationBuffer(const Output<Node>& shape, const ov::element::Type element_type)
: Buffer(), m_element_type(element_type) {
set_arguments({shape});
constructor_validate_and_infer_types();
}

bool snippets::op::Buffer::visit_attributes(AttributeVisitor& visitor) {
INTERNAL_OP_SCOPE(Buffer_visit_attributes);
visitor.on_attribute("allocation_rank", m_allocation_rank);
if (m_is_single) {
visitor.on_attribute("shape", m_static_shape);
visitor.on_attribute("element_type", m_element_type);
}
bool snippets::op::AllocationBuffer::visit_attributes(AttributeVisitor& visitor) {
INTERNAL_OP_SCOPE(AllocationBuffer_visit_attributes);
visitor.on_attribute("element_type", m_element_type);
return true;
}

std::shared_ptr<Node> snippets::op::Buffer::clone_with_new_inputs(const OutputVector& new_args) const {
INTERNAL_OP_SCOPE(Buffer_clone_with_new_inputs);
std::shared_ptr<Node> snippets::op::AllocationBuffer::clone_with_new_inputs(const OutputVector& new_args) const {
INTERNAL_OP_SCOPE(AllocationBuffer_clone_with_new_inputs);
check_new_args_count(this, new_args);
if (m_is_single) {
return std::make_shared<Buffer>(m_static_shape, m_element_type, m_allocation_rank);
return std::make_shared<AllocationBuffer>(new_args.at(0), m_element_type);
}

void snippets::op::AllocationBuffer::validate_and_infer_types() {
INTERNAL_OP_SCOPE(AllocationBuffer_validate_and_infer_types);
set_output_type(0, m_element_type, get_allocation_shape());
}

ov::PartialShape ngraph::snippets::op::AllocationBuffer::get_allocation_shape() const {
ov::PartialShape shape = ov::PartialShape::dynamic();
const auto shape_constant = ov::as_type_ptr<ngraph::op::v0::Constant>(get_input_node_shared_ptr(0));
if (shape_constant) {
NGRAPH_CHECK(shape_constant->get_element_type() == ov::element::i32,
"The AllocationBuffer expects Constant with shape of I32 element type");
const auto dims = shape_constant->cast_vector<int32_t>();
NGRAPH_CHECK(!dims.empty(), "The AllocationBuffer got invalid shape Constant");
shape = ov::PartialShape(ov::Shape(std::vector<size_t>(dims.begin(), dims.end())));
}
return shape;
}

snippets::op::IntermediateBuffer::IntermediateBuffer(const ov::Output<ov::Node>& x) : Buffer() {
set_arguments({x});
constructor_validate_and_infer_types();
}

return std::make_shared<Buffer>(new_args.at(0), m_allocation_rank);
snippets::op::IntermediateBuffer::IntermediateBuffer(const ov::Output<ov::Node>& x, const ov::Output<ov::Node>& shape) : Buffer() {
set_arguments({x, shape});
constructor_validate_and_infer_types();
}

void snippets::op::Buffer::validate_and_infer_types() {
INTERNAL_OP_SCOPE(Buffer_validate_and_infer_types);
ov::PartialShape output_shape;
ov::element::Type output_type;
if (m_is_single) {
output_shape = m_static_shape;
output_type = m_element_type;
} else {
output_shape = get_input_partial_shape(0);
output_type = get_input_element_type(0);
std::shared_ptr<Node> snippets::op::IntermediateBuffer::clone_with_new_inputs(const OutputVector& new_args) const {
INTERNAL_OP_SCOPE(IntermediateBuffer_clone_with_new_inputs);
check_new_args_count(this, new_args);
if (new_args.size() == 2) {
return std::make_shared<IntermediateBuffer>(new_args.at(0), new_args.at(1));
} else if (new_args.size() == 1) {
return std::make_shared<IntermediateBuffer>(new_args.at(0));
}

const auto shape_rank = output_shape.rank();
if (shape_rank.is_static()) {
const auto normalized_rank = normalize_rank(m_allocation_rank, shape_rank.get_length());
NGRAPH_CHECK(normalized_rank >= 0 && normalized_rank <= shape_rank.get_length(),
"Buffer has incorrect allocation rank: " + std::to_string(m_allocation_rank));
throw ngraph_error("The IntermediateBuffer op got invalid input count");
}

void snippets::op::IntermediateBuffer::validate_and_infer_types() {
INTERNAL_OP_SCOPE(IntermediateBuffer_validate_and_infer_types);
set_output_type(0, get_input_element_type(0), get_input_partial_shape(0));
}

ov::PartialShape ngraph::snippets::op::IntermediateBuffer::get_allocation_shape() const {
if (get_input_size() == 1) {
return get_input_partial_shape(0);
}

set_output_type(0, output_type, output_shape);
const auto shape_constant = ov::as_type_ptr<ngraph::op::v0::Constant>(get_input_node_shared_ptr(1));
if (shape_constant) {
NGRAPH_CHECK(shape_constant->get_element_type() == ov::element::i32,
"The AllocationBuffer expects Constant with shape of I32 element type");
const auto dims = shape_constant->cast_vector<int32_t>();
NGRAPH_CHECK(!dims.empty(), "The AllocationBuffer got invalid shape Constant");
return ov::PartialShape(ov::Shape(std::vector<size_t>(dims.begin(), dims.end())));
}
return ov::PartialShape::dynamic();
}

size_t ngraph::snippets::op::Buffer::get_byte_size() const {
const auto pshape = get_output_partial_shape(0);
NGRAPH_CHECK(pshape.is_static(), "Buffer should have static shapes for memory allocation");
const auto shape = pshape.get_shape();
const auto normalized_rank = normalize_rank(m_allocation_rank, shape.size());
return ngraph::shape_size(shape.rbegin(), shape.rbegin() + normalized_rank + 1) * get_element_type().size();
std::shared_ptr<ov::Node> ngraph::snippets::op::IntermediateBuffer::create_shape_constant(const ov::PartialShape& shape, size_t allocation_rank) {
if (shape.rank().is_dynamic())
return nullptr;
const auto normalize_rank = utils::normalize_rank(allocation_rank, shape.size());
const auto offset = shape.size() - normalize_rank;
return create_shape_constant(ov::PartialShape(std::vector<ov::Dimension>{shape.begin() + offset, shape.end()}));
}

std::shared_ptr<ov::Node> ngraph::snippets::op::IntermediateBuffer::create_shape_constant(const ov::PartialShape& shape) {
if (shape.is_dynamic())
return nullptr;
return std::make_shared<ov::op::v0::Constant>(ov::element::i32, ov::Shape{shape.size()}, shape.get_shape());
}
7 changes: 7 additions & 0 deletions src/common/snippets/src/op/subgraph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -498,6 +498,13 @@ void snippets::op::Subgraph::initialize_buffer_scratchpad_size() {
for (const auto& op : ops) {
if (const auto buffer = ov::as_type_ptr<ngraph::snippets::op::Buffer>(op)) {
const auto buffer_size = buffer->get_byte_size();
if (ov::is_type<op::IntermediateBuffer>(op)) {
if (op->get_input_size() == 2) {
op->set_arguments({op->get_input_source_output(0)});
}
} else if (ov::is_type<op::AllocationBuffer>(op)) {
op->set_arguments(ov::OutputVector{});
}
// We need to allocate memory for first buffer at least
if (m_buffer_scratchpad == 0) {
m_buffer_scratchpad += buffer_size;
Expand Down
8 changes: 5 additions & 3 deletions src/common/snippets/src/pass/assign_registers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,10 +38,12 @@ bool ngraph::snippets::pass::AssignRegisters::run_on_model(const std::shared_ptr
// here we use the fact that Result input & output tensors are identical by construction
manually_assigned_gprs[op->output(0).get_tensor_ptr()] =
static_cast<Reg>(f->get_result_index(result) + num_parameters);
} else if (const auto& buffer = ov::as_type_ptr<op::Buffer>(op)) {
} else if (ov::is_type<op::Buffer>(op)) {
// All buffers have one common data pointer
manually_assigned_gprs[op->input(0).get_tensor_ptr()] =
static_cast<Reg>(num_results + num_parameters);
if (ov::is_type<op::IntermediateBuffer>(op)) {
manually_assigned_gprs[op->input(0).get_tensor_ptr()] =
static_cast<Reg>(num_results + num_parameters);
}
manually_assigned_gprs[op->output(0).get_tensor_ptr()] =
static_cast<Reg>(num_results + num_parameters);
} else if (ov::is_type<op::HorizonMax>(op) || ov::is_type<op::HorizonSum>(op)) {
Expand Down
8 changes: 6 additions & 2 deletions src/common/snippets/src/pass/insert_buffer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,9 @@ ngraph::snippets::pass::InsertBuffer::InsertBuffer(const int32_t allocation_rank
if (!ov::is_type<ngraph::snippets::op::Buffer>(input_node) &&
!ov::is_type<ngraph::op::v0::Parameter>(input_node) &&
!ov::is_type<ngraph::op::v0::Constant>(input_node)) {
const auto buffer = std::make_shared<ngraph::snippets::op::Buffer>(input_node, allocation_rank);
const auto constant_shape = op::IntermediateBuffer::create_shape_constant(input.get_partial_shape(), allocation_rank);
const auto buffer = constant_shape ? std::make_shared<op::IntermediateBuffer>(input_node, constant_shape) :
std::make_shared<op::IntermediateBuffer>(input_node);
root->set_argument(input.get_index(), buffer);
rewritten |= true;
}
Expand Down Expand Up @@ -67,7 +69,9 @@ ngraph::snippets::pass::InsertBuffer::InsertBuffer(const int32_t allocation_rank
}
}

const auto buffer = std::make_shared<ngraph::snippets::op::Buffer>(output, allocation_rank);
const auto constant_shape = op::IntermediateBuffer::create_shape_constant(output.get_partial_shape(), allocation_rank);
const auto buffer = constant_shape ? std::make_shared<op::IntermediateBuffer>(output, constant_shape) :
std::make_shared<op::IntermediateBuffer>(output);
for (const auto& consumer : output.get_target_inputs()) {
const auto output_node = consumer.get_node()->shared_from_this();
if (output_node != buffer &&
Expand Down
7 changes: 6 additions & 1 deletion src/common/snippets/src/pass/insert_loops.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,10 @@ void insert_loops_explicitly(const ov::NodeVector& ops, const size_t vector_size
// on LoopBegin to guarantee that the constants are executed inside the Loop.
for (const auto& n : body) {
if (auto c = std::dynamic_pointer_cast<ov::op::v0::Constant>(n)) {
c->add_control_dependency(inner_loop_begin);
// Except Constant Shape for Buffers
if (!ov::is_type<op::Buffer>(n->get_output_target_inputs(0).begin()->get_node())) {
c->add_control_dependency(inner_loop_begin);
}
}
}

Expand All @@ -155,6 +158,8 @@ void insert_loops_explicitly(const ov::NodeVector& ops, const size_t vector_size
ov::is_type<ov::op::v0::Result>(op) ||
ov::is_type<op::Buffer>(op))
return true;
if (ov::is_type<ov::op::v0::Constant>(op) && ov::is_type<op::Buffer>(op->get_output_target_inputs(0).begin()->get_node()))
return true;
auto& rt = op->get_rt_info();
auto outside_rt = rt.find("outside_loop");
bool is_outside = false;
Expand Down
11 changes: 5 additions & 6 deletions src/common/snippets/src/pass/loop_fusion.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ auto can_be_merged(const std::shared_ptr<ngraph::snippets::op::LoopEnd>& loop_en

auto get_buffer_and_loop_end(const std::shared_ptr<ngraph::snippets::op::LoopBegin>& loop_begin_down,
std::shared_ptr<ngraph::snippets::op::LoopEnd>& loop_end_up,
std::shared_ptr<ngraph::snippets::op::Buffer>& buffer) -> bool {
std::shared_ptr<ngraph::snippets::op::IntermediateBuffer>& buffer) -> bool {
size_t fusion_input_num = 0;
for (const auto& parent : loop_begin_down->input_values()) {
const auto parent_shared = parent.get_node_shared_ptr();
Expand All @@ -69,10 +69,9 @@ auto get_buffer_and_loop_end(const std::shared_ptr<ngraph::snippets::op::LoopBeg
continue;

loop_end_up = ngraph::as_type_ptr<ngraph::snippets::op::LoopEnd>(parent_shared);
buffer = ov::as_type_ptr<ngraph::snippets::op::Buffer>(parent_shared);
buffer = ov::as_type_ptr<ngraph::snippets::op::IntermediateBuffer>(parent_shared);
if (buffer) {
if (buffer->output(0).get_target_inputs().size() == 0 ||
buffer->get_input_size() != 1 ||
buffer->get_input_source_output(0).get_target_inputs().size() != 1)
return false;

Expand All @@ -86,7 +85,7 @@ auto get_buffer_and_loop_end(const std::shared_ptr<ngraph::snippets::op::LoopBeg
}

auto collect_loop_inputs(const std::shared_ptr<ngraph::snippets::op::LoopBegin>& loop_begin,
const std::shared_ptr<ngraph::snippets::op::Buffer>& buffer,
const std::shared_ptr<ngraph::snippets::op::IntermediateBuffer>& buffer,
std::vector<Edge>& new_loop_inputs,
std::vector<int64_t>& new_ptr_increments,
std::vector<int64_t>& new_finalization_offsets) -> void {
Expand All @@ -109,7 +108,7 @@ auto collect_loop_inputs(const std::shared_ptr<ngraph::snippets::op::LoopBegin>&
}

auto collect_loop_outputs(const std::shared_ptr<ngraph::snippets::op::LoopEnd>& loop_end,
const std::shared_ptr<ngraph::snippets::op::Buffer>& buffer,
const std::shared_ptr<ngraph::snippets::op::IntermediateBuffer>& buffer,
std::vector<Edge>& new_loop_outputs,
std::vector<int64_t>& new_ptr_increments,
std::vector<int64_t>& new_finalization_offsets,
Expand Down Expand Up @@ -162,7 +161,7 @@ bool ngraph::snippets::pass::LoopFusion::Merge(const std::shared_ptr<op::LoopBeg
}

std::shared_ptr<ngraph::snippets::op::LoopEnd> loop_end_up = nullptr;
std::shared_ptr<ngraph::snippets::op::Buffer> buffer = nullptr;
std::shared_ptr<ngraph::snippets::op::IntermediateBuffer> buffer = nullptr;
// Initialize the corresponding upper LoopEnd and Buffer
if (!get_buffer_and_loop_end(loop_begin_down, loop_end_up, buffer)) {
return false;
Expand Down
Loading

0 comments on commit 42e7c79

Please sign in to comment.