Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Snippets] Added Softmax support #57

Merged
merged 12 commits into from
Dec 21, 2022
10 changes: 10 additions & 0 deletions src/common/snippets/include/snippets/generator.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -120,10 +120,20 @@ class Generator {
public:
// True if the lowered Emitters need to be accessed during runtime. Normally they're destroyed after code emission.
bool m_save_lowered_code = false;
// True if we can optimize tails for single evaluation during code generation
// More details with optimization examples you can see in generate() method
// For example, tails with Buffer ops doesn't support single evaluation optimizations
// because of that we should always reset memory pointer using finalization offsets
// after data storing to Buffer
bool m_optimize_single_evaluation = true;
// True if we should check runtime info for nodes to call specific needed transformations
bool m_need_fill_tail_register = false;
};
/**
* @brief virtual method any specific implementation should implement
* @param m model in canonical for for table-based code generation
* @param config config with transformation and optimization parameters
* @param compile_params parameters for generated code
* @return pointer to generated code
*/
code generate(std::shared_ptr<ov::Model>& m, const GeneratorConfig& config, const void* compile_params = nullptr);
Expand Down
16 changes: 15 additions & 1 deletion src/common/snippets/include/snippets/op/brgemm.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,27 @@ namespace op {
class Brgemm : public ngraph::op::v0::MatMul {
public:
OPENVINO_OP("Brgemm", "SnippetsOpset", ngraph::op::v0::MatMul);
Brgemm(const Output<Node>& A, const Output<Node>& B);
Brgemm(const Output<Node>& A, const Output<Node>& B, const size_t offset_a = 0lu, const size_t offset_b = 0lu, const size_t offset_c = 0lu);
Brgemm() = default;

bool visit_attributes(AttributeVisitor& visitor) override;
void validate_and_infer_types() override;
std::shared_ptr<Node> clone_with_new_inputs(const OutputVector& new_args) const override;

bool has_evaluate() const override { return false; }

size_t get_offset_a() const { return m_offset_a; }
size_t get_offset_b() const { return m_offset_b; }
size_t get_offset_c() const { return m_offset_c; }

void set_offset_a(const size_t offset) { m_offset_a = offset; }
void set_offset_b(const size_t offset) { m_offset_b = offset; }
void set_offset_c(const size_t offset) { m_offset_c = offset; }

private:
size_t m_offset_a = 0lu; // offset for first input
size_t m_offset_b = 0lu; // offset for second input
size_t m_offset_c = 0lu; // offset for output
};

} // namespace op
Expand Down
10 changes: 7 additions & 3 deletions src/common/snippets/include/snippets/op/broadcastload.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,14 +21,18 @@ class BroadcastLoad : public BroadcastMove {
public:
OPENVINO_OP("BroadcastLoad", "SnippetsOpset", ngraph::snippets::op::BroadcastMove);

BroadcastLoad(const Output<Node>& x, ov::PartialShape output_shape);
BroadcastLoad(const Output<Node>& x, ov::PartialShape output_shape, size_t offset = 0lu);
BroadcastLoad() = default;

bool visit_attributes(AttributeVisitor& visitor) override;
size_t get_offset() const { return m_offset; }
void set_offset(const size_t offset) { m_offset = offset; }

bool visit_attributes(AttributeVisitor& visitor) override;
std::shared_ptr<Node> clone_with_new_inputs(const OutputVector& new_args) const override;

void validate_and_infer_types() override;

private:
size_t m_offset = 0lu;
};

} // namespace op
Expand Down
56 changes: 56 additions & 0 deletions src/common/snippets/include/snippets/op/buffer.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
// Copyright (C) 2018-2022 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#pragma once

#include <ngraph/op/op.hpp>

namespace ngraph {
namespace snippets {
namespace op {

/**
* @interface Buffer
* @brief The operation is for intermediate data storage
* - m_offset - offset from common Buffer allocated memory.
* Default value is 0.
* - m_allocation_rank - rank of shape for memory allocation: shape[shape_rank - m_allocation_rank : shape_rank].
* It's needed to allocate needed memory size that depends on Tile rank, for example.
* Default value is -1 (full shape)
* Notes:
* - All buffers in a graph have the same memory pointer. So if we have a few buffers,
* each buffer should have its own offset for common memory
* - Buffer should be a single consumer for operation output port
* @ingroup snippets
*/
class Buffer : public ngraph::op::Op {
public:
OPENVINO_OP("Buffer", "SnippetsOpset");
BWDCMP_RTTI_DECLARATION;

Buffer(const Output<Node>& x, const int32_t allocation_rank = -1);
IvanNovoselov marked this conversation as resolved.
Show resolved Hide resolved
Buffer() = default;

size_t get_offset() const { return m_offset; }
// NOTE: If a buffer offset is changed, we should update it
// in the corresponding memory access operations as well (Load, Store, MatMul)
void set_offset(const size_t offset);

int32_t get_allocation_rank() const { return m_allocation_rank; }
void set_allocation_rank(int32_t rank) { m_allocation_rank = rank; }

size_t get_byte_size() const;

bool visit_attributes(AttributeVisitor& visitor) override;
std::shared_ptr<Node> clone_with_new_inputs(const OutputVector& new_args) const override;
void validate_and_infer_types() override;

private:
size_t m_offset = 0lu;
IvanNovoselov marked this conversation as resolved.
Show resolved Hide resolved
int32_t m_allocation_rank = -1;
IvanNovoselov marked this conversation as resolved.
Show resolved Hide resolved
};

} // namespace op
} // namespace snippets
} // namespace ngraph
47 changes: 47 additions & 0 deletions src/common/snippets/include/snippets/op/fill.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
// Copyright (C) 2018-2022 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#pragma once

#include <ngraph/op/op.hpp>

namespace ngraph {
namespace snippets {
namespace op {

/**
* @interface Fill
* @brief Generated in Tail Loop vector representation in code generation step for cases when we should
* refill registers by special values.
* For example, for cases with ReduceMax or ReduceSum in Softmax
* Where:
* - offset - starting element index there filling is performed while begging of input data is untouched
IvanNovoselov marked this conversation as resolved.
Show resolved Hide resolved
* - fill_value - hexadecimal filling value
dmitry-gorokhov marked this conversation as resolved.
Show resolved Hide resolved
* @ingroup snippets
*/
class Fill : public ngraph::op::Op {
public:
OPENVINO_OP("Fill", "SnippetsOpset");

Fill(const Output<Node>& x, const size_t offset, const uint32_t fill_value = 0x0);
IvanNovoselov marked this conversation as resolved.
Show resolved Hide resolved
Fill() = default;

size_t get_offset() const { return m_offset; }
uint32_t get_fill_value() const { return m_fill_value; }

void set_offset(const size_t offset) { m_offset = offset; }
void set_fill_value(const uint32_t fill_value) { m_fill_value = fill_value; }

bool visit_attributes(AttributeVisitor& visitor) override;
std::shared_ptr<Node> clone_with_new_inputs(const OutputVector& new_args) const override;
void validate_and_infer_types() override;

protected:
size_t m_offset = 0lu;
uint32_t m_fill_value = 0x0;
};

} // namespace op
} // namespace snippets
} // namespace ngraph
32 changes: 32 additions & 0 deletions src/common/snippets/include/snippets/op/horizon_max.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
// Copyright (C) 2018-2022 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#pragma once

#include "ngraph/op/op.hpp"

namespace ngraph {
namespace snippets {
namespace op {

/**
* @interface HorizonMax
* @brief The operation calculates a horizon maximum of a vector register
* @ingroup snippets
*/
class HorizonMax : public ngraph::op::Op {
public:
OPENVINO_OP("HorizonMax", "SnippetsOpset");

HorizonMax(const Output<Node>& x);
HorizonMax() = default;

bool visit_attributes(AttributeVisitor& visitor) override { return true;}
std::shared_ptr<Node> clone_with_new_inputs(const OutputVector& new_args) const override;
void validate_and_infer_types() override;
};

} // namespace op
} // namespace snippets
} // namespace ngraph
32 changes: 32 additions & 0 deletions src/common/snippets/include/snippets/op/horizon_sum.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
// Copyright (C) 2018-2022 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#pragma once

#include "ngraph/op/op.hpp"

namespace ngraph {
namespace snippets {
namespace op {

/**
* @interface HorizonSum
* @brief The operation calculates a horizon sum of a vector register
* @ingroup snippets
*/
class HorizonSum : public ngraph::op::Op {
public:
OPENVINO_OP("HorizonSum", "SnippetsOpset");

HorizonSum(const Output<Node>& x);
HorizonSum() = default;

bool visit_attributes(AttributeVisitor& visitor) override { return true;}
std::shared_ptr<Node> clone_with_new_inputs(const OutputVector& new_args) const override;
void validate_and_infer_types() override;
};

} // namespace op
} // namespace snippets
} // namespace ngraph
10 changes: 5 additions & 5 deletions src/common/snippets/include/snippets/op/load.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,16 +13,16 @@ namespace op {

/**
* @interface Load
* @brief Generated by Canonicalization step where explicit instructions should be emitted for data loading
* where number of elements to load is determined by "count"
* Default value is "1" - to load one element
* @brief Generated during Lowering stage (convert_to_snippets_dialect) where explicit instructions should be emitted for data loading
* where number of elements to load is determined by "count" (Default value is "1" - to load one element)
* and memory offset for loading is determined by "offset" (Default value is "0" - to load starting from the first element)
* @ingroup snippets
*/
class Load : public MemoryAccess {
public:
OPENVINO_OP("Load", "SnippetsOpset");

Load(const Output<Node>& x, const size_t count = 1lu);
Load(const Output<Node>& x, const size_t count = 1lu, const size_t offset = 0lu);
Load() = default;

std::shared_ptr<Node> clone_with_new_inputs(const OutputVector& new_args) const override;
Expand All @@ -38,7 +38,7 @@ class Load : public MemoryAccess {
class LoadReshape : public Load {
public:
OPENVINO_OP("LoadReshape", "SnippetsOpset");
LoadReshape(const Output<Node>& x, size_t count = 1lu, std::vector<size_t> order = {});
LoadReshape(const Output<Node>& x, size_t count = 1lu, const size_t offset = 0lu, std::vector<size_t> order = {});
bool visit_attributes(AttributeVisitor& visitor) override;
std::shared_ptr<Node> clone_with_new_inputs(const OutputVector& new_args) const override;
void validate_and_infer_types() override;
Expand Down
7 changes: 5 additions & 2 deletions src/common/snippets/include/snippets/op/memory_access.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,14 +24,17 @@ class MemoryAccess : public ngraph::op::Op {
OPENVINO_OP("MemoryAccess", "SnippetsOpset");

size_t get_count() const;
void set_count(size_t count);
size_t get_offset() const;
void set_count(const size_t count);
void set_offset(const size_t offset);
bool visit_attributes(AttributeVisitor& visitor) override;
void validate_and_infer_types() override;

protected:
explicit MemoryAccess(const Output<Node>& x, size_t count = 1lu);
explicit MemoryAccess(const Output<Node>& x, size_t count = 1lu, size_t offset = 0lu);
MemoryAccess() = default;
size_t m_count = 0lu;
size_t m_offset = 0lu;
};

} // namespace op
Expand Down
8 changes: 4 additions & 4 deletions src/common/snippets/include/snippets/op/store.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,16 +13,16 @@ namespace op {

/**
* @interface Store
* @brief Generated by Canonicalization step where explicit instructions should be emitted for data storing
* where number of elements to store is determined by "count"
* Default value is "1" - to store one element
* @brief Generated during Lowering stage (convert_to_snippets_dialect) where explicit instructions should be emitted for data storing
* where number of elements to store is determined by "count" (Default value is "1" - to store one element)
* and memory offset for storing is determined by "offset" (Default value is "0" - to store starting at start memory ptr)
* @ingroup snippets
*/
class Store : public MemoryAccess {
public:
OPENVINO_OP("Store", "SnippetsOpset");

Store(const Output<Node>& x, const size_t count = 1lu);
Store(const Output<Node>& x, const size_t count = 1lu, const size_t offset = 0lu);
Store() = default;

std::shared_ptr<Node> clone_with_new_inputs(const OutputVector& new_args) const override;
Expand Down
41 changes: 19 additions & 22 deletions src/common/snippets/include/snippets/op/subgraph.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -89,21 +89,13 @@ class Subgraph : public ngraph::op::Op {
return m_generator;
}

size_t get_non_scalar_constants_count() const {
return m_non_scalar_constants_count;
}

bool is_quantized() const {
return config.m_is_quantized;
}

bool has_type_relaxed_ops() const {
return config.m_has_type_relaxed_ops;
}

bool has_domain_sensitive_ops() const {
return config.m_has_domain_sensitive_ops;
}
// Return common memory size for all buffers in body. Should be called only after tileRank setting
size_t get_buffer_scratchpad_size() const;
size_t get_virtual_port_count() const { return m_virtual_port_count; }
bool is_buffer_needed() const { return m_buffer_needed; }
bool is_quantized() const { return config.m_is_quantized; }
bool has_type_relaxed_ops() const { return config.m_has_type_relaxed_ops; }
bool has_domain_sensitive_ops() const { return config.m_has_domain_sensitive_ops; }

snippets::Schedule generate(const BlockedShapeVector& output_shapes, const BlockedShapeVector& input_shapes, ngraph::pass::Manager& opt,
const void* compile_params = nullptr);
Expand All @@ -117,8 +109,9 @@ class Subgraph : public ngraph::op::Op {
// plugin sets generator for a snippet to some specific generator.
// it's going to be replaced with Jitters table later
void set_generator(std::shared_ptr<ngraph::snippets::Generator> generator);
void set_non_scalar_constants_count(const size_t count);
void set_tile_rank(size_t newRank) {tileRank = newRank;}
void set_virtual_port_count(const size_t count);
void set_buffer_needed(const bool need);

void print() const;
void print_statistics(bool verbose);
Expand All @@ -133,11 +126,14 @@ class Subgraph : public ngraph::op::Op {
void align_element_types(const BlockedShapeVector& outputShapes, const BlockedShapeVector& inputShapes);
void convert_to_snippet_dialect();
void init_config();
// Count of potentional non-scalar Consants that will be created after some tranformations
// At the moment it's relevant only for FakeQuantize decomposition
// NOTE: To avoid overheads in each calcution of this count (for example, in validate_and_type_infer()),
// Count of Subgraph virtual ports:
// - Potential non-scalar Constants that will be created after some transformations (At the moment it's relevant only for FakeQuantize decomposition)
// Need Buffer op or not
// - Buffers. All Buffers are considered as one common additional virtual port. So we cannot summarize them as potential non-scalar Constants
// NOTE: To avoid overheads in each calculation of this count (for example, in validate_and_type_infer()),
// we should MANUALLY calculate it where it needed.
size_t m_non_scalar_constants_count = 0;
size_t m_virtual_port_count = 0;
bool m_buffer_needed = false;
Shape exec_domain = {};
std::shared_ptr<ov::Model> m_body = nullptr;
std::shared_ptr<ngraph::snippets::Generator> m_generator = nullptr;
Expand All @@ -162,11 +158,12 @@ class Subgraph : public ngraph::op::Op {
// True if Subgraph contains TypeRelaxed nodes -> for several streams in tp mode we should copy body using mutexes
// because TypeRelaxed::copy_with_new_inputs() isn't save-thread method
bool m_has_type_relaxed_ops = false;
// True if we should check runtime info for nodes to call specific needed transformations
bool m_need_fill_tail_register = false;
// True if body has operations that don't support plugin-side domain optimizations
// (e.g. Transpose, Softmax, MatMul in general doesn't support dimensions collapsing)
bool m_has_domain_sensitive_ops = false;
// True if we should go through whole body to check for where loops should be explicitly inserted.
// Otherwise, we insert Loops on Parameters and Results - for example, it's optimized out for subgraph with only Eltwise ops
bool m_explicit_loop_insertion = false;
} config;
};

Expand Down
Loading