Skip to content

Commit

Permalink
[Snippets] Added Softmax support (#57)
Browse files Browse the repository at this point in the history
  • Loading branch information
a-sidorova authored Dec 21, 2022
1 parent 6cb1bae commit f86fd91
Show file tree
Hide file tree
Showing 73 changed files with 3,176 additions and 284 deletions.
10 changes: 10 additions & 0 deletions src/common/snippets/include/snippets/generator.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -120,10 +120,20 @@ class Generator {
public:
// True if the lowered Emitters need to be accessed during runtime. Normally they're destroyed after code emission.
bool m_save_lowered_code = false;
// True if we can optimize tails for single evaluation during code generation
// More details with optimization examples you can see in generate() method
// For example, tails with Buffer ops doesn't support single evaluation optimizations
// because of that we should always reset memory pointer using finalization offsets
// after data storing to Buffer
bool m_optimize_single_evaluation = true;
// True if we should check runtime info for nodes to call specific needed transformations
bool m_need_fill_tail_register = false;
};
/**
* @brief virtual method any specific implementation should implement
* @param m model in canonical for for table-based code generation
* @param config config with transformation and optimization parameters
* @param compile_params parameters for generated code
* @return pointer to generated code
*/
code generate(std::shared_ptr<ov::Model>& m, const GeneratorConfig& config, const void* compile_params = nullptr);
Expand Down
16 changes: 15 additions & 1 deletion src/common/snippets/include/snippets/op/brgemm.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,27 @@ namespace op {
class Brgemm : public ngraph::op::v0::MatMul {
public:
OPENVINO_OP("Brgemm", "SnippetsOpset", ngraph::op::v0::MatMul);
Brgemm(const Output<Node>& A, const Output<Node>& B);
Brgemm(const Output<Node>& A, const Output<Node>& B, const size_t offset_a = 0lu, const size_t offset_b = 0lu, const size_t offset_c = 0lu);
Brgemm() = default;

bool visit_attributes(AttributeVisitor& visitor) override;
void validate_and_infer_types() override;
std::shared_ptr<Node> clone_with_new_inputs(const OutputVector& new_args) const override;

bool has_evaluate() const override { return false; }

size_t get_offset_a() const { return m_offset_a; }
size_t get_offset_b() const { return m_offset_b; }
size_t get_offset_c() const { return m_offset_c; }

void set_offset_a(const size_t offset) { m_offset_a = offset; }
void set_offset_b(const size_t offset) { m_offset_b = offset; }
void set_offset_c(const size_t offset) { m_offset_c = offset; }

private:
size_t m_offset_a = 0lu; // offset for first input
size_t m_offset_b = 0lu; // offset for second input
size_t m_offset_c = 0lu; // offset for output
};

} // namespace op
Expand Down
10 changes: 7 additions & 3 deletions src/common/snippets/include/snippets/op/broadcastload.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,14 +21,18 @@ class BroadcastLoad : public BroadcastMove {
public:
OPENVINO_OP("BroadcastLoad", "SnippetsOpset", ngraph::snippets::op::BroadcastMove);

BroadcastLoad(const Output<Node>& x, ov::PartialShape output_shape);
BroadcastLoad(const Output<Node>& x, ov::PartialShape output_shape, size_t offset = 0lu);
BroadcastLoad() = default;

bool visit_attributes(AttributeVisitor& visitor) override;
size_t get_offset() const { return m_offset; }
void set_offset(const size_t offset) { m_offset = offset; }

bool visit_attributes(AttributeVisitor& visitor) override;
std::shared_ptr<Node> clone_with_new_inputs(const OutputVector& new_args) const override;

void validate_and_infer_types() override;

private:
size_t m_offset = 0lu;
};

} // namespace op
Expand Down
48 changes: 48 additions & 0 deletions src/common/snippets/include/snippets/op/buffer.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
// Copyright (C) 2018-2022 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#pragma once

#include <ngraph/op/op.hpp>

namespace ngraph {
namespace snippets {
namespace op {

/**
* @interface Buffer
* @brief The operation is for intermediate data storage
* - m_allocation_rank - rank of shape for memory allocation: shape[shape_rank - normalize(m_allocation_rank) : shape_rank].
* It's needed to allocate needed memory size that depends on Tile rank, for example.
* Default value is -1 (full shape)
* Notes:
* - All buffers in a graph have the same memory pointer. So if we have a few buffers,
* each the corresponding MemoryAccess op for Buffer should have offset for common memory pointer of this Buffer
* - Buffer should be a single consumer for operation output port
* @ingroup snippets
*/
class Buffer : public ngraph::op::Op {
public:
OPENVINO_OP("Buffer", "SnippetsOpset");
BWDCMP_RTTI_DECLARATION;

Buffer(const Output<Node>& x, const int32_t allocation_rank = -1);
Buffer() = default;

int32_t get_allocation_rank() const { return m_allocation_rank; }
void set_allocation_rank(int32_t rank) { m_allocation_rank = rank; }

size_t get_byte_size() const;

bool visit_attributes(AttributeVisitor& visitor) override;
std::shared_ptr<Node> clone_with_new_inputs(const OutputVector& new_args) const override;
void validate_and_infer_types() override;

private:
int32_t m_allocation_rank = -1;
};

} // namespace op
} // namespace snippets
} // namespace ngraph
47 changes: 47 additions & 0 deletions src/common/snippets/include/snippets/op/fill.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
// Copyright (C) 2018-2022 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#pragma once

#include <ngraph/op/op.hpp>

namespace ngraph {
namespace snippets {
namespace op {

/**
* @interface Fill
* @brief Generated in Tail Loop vector representation in code generation step for cases when we should
* refill registers by special values.
* For example, for cases with ReduceMax or ReduceSum in Softmax
* Where:
* - offset - starting element index where filling is performed while beginning of input data is untouched
* - fill_value - hexadecimal filling value
* @ingroup snippets
*/
class Fill : public ngraph::op::Op {
public:
OPENVINO_OP("Fill", "SnippetsOpset");

Fill(const Output<Node>& x, const size_t offset, const uint32_t fill_value = 0x0);
Fill() = default;

size_t get_offset() const { return m_offset; }
uint32_t get_fill_value() const { return m_fill_value; }

void set_offset(const size_t offset) { m_offset = offset; }
void set_fill_value(const uint32_t fill_value) { m_fill_value = fill_value; }

bool visit_attributes(AttributeVisitor& visitor) override;
std::shared_ptr<Node> clone_with_new_inputs(const OutputVector& new_args) const override;
void validate_and_infer_types() override;

protected:
size_t m_offset = 0lu;
uint32_t m_fill_value = 0x0;
};

} // namespace op
} // namespace snippets
} // namespace ngraph
32 changes: 32 additions & 0 deletions src/common/snippets/include/snippets/op/horizon_max.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
// Copyright (C) 2018-2022 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#pragma once

#include "ngraph/op/op.hpp"

namespace ngraph {
namespace snippets {
namespace op {

/**
* @interface HorizonMax
* @brief The operation calculates a horizon maximum of a vector register
* @ingroup snippets
*/
class HorizonMax : public ngraph::op::Op {
public:
OPENVINO_OP("HorizonMax", "SnippetsOpset");

HorizonMax(const Output<Node>& x);
HorizonMax() = default;

bool visit_attributes(AttributeVisitor& visitor) override { return true;}
std::shared_ptr<Node> clone_with_new_inputs(const OutputVector& new_args) const override;
void validate_and_infer_types() override;
};

} // namespace op
} // namespace snippets
} // namespace ngraph
32 changes: 32 additions & 0 deletions src/common/snippets/include/snippets/op/horizon_sum.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
// Copyright (C) 2018-2022 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#pragma once

#include "ngraph/op/op.hpp"

namespace ngraph {
namespace snippets {
namespace op {

/**
* @interface HorizonSum
* @brief The operation calculates a horizon sum of a vector register
* @ingroup snippets
*/
class HorizonSum : public ngraph::op::Op {
public:
OPENVINO_OP("HorizonSum", "SnippetsOpset");

HorizonSum(const Output<Node>& x);
HorizonSum() = default;

bool visit_attributes(AttributeVisitor& visitor) override { return true;}
std::shared_ptr<Node> clone_with_new_inputs(const OutputVector& new_args) const override;
void validate_and_infer_types() override;
};

} // namespace op
} // namespace snippets
} // namespace ngraph
10 changes: 5 additions & 5 deletions src/common/snippets/include/snippets/op/load.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,16 +13,16 @@ namespace op {

/**
* @interface Load
* @brief Generated by Canonicalization step where explicit instructions should be emitted for data loading
* where number of elements to load is determined by "count"
* Default value is "1" - to load one element
* @brief Generated during Lowering stage (convert_to_snippets_dialect) where explicit instructions should be emitted for data loading
* where number of elements to load is determined by "count" (Default value is "1" - to load one element)
* and memory offset for loading is determined by "offset" (Default value is "0" - to load starting from the first element)
* @ingroup snippets
*/
class Load : public MemoryAccess {
public:
OPENVINO_OP("Load", "SnippetsOpset");

Load(const Output<Node>& x, const size_t count = 1lu);
Load(const Output<Node>& x, const size_t count = 1lu, const size_t offset = 0lu);
Load() = default;

std::shared_ptr<Node> clone_with_new_inputs(const OutputVector& new_args) const override;
Expand All @@ -38,7 +38,7 @@ class Load : public MemoryAccess {
class LoadReshape : public Load {
public:
OPENVINO_OP("LoadReshape", "SnippetsOpset");
LoadReshape(const Output<Node>& x, size_t count = 1lu, std::vector<size_t> order = {});
LoadReshape(const Output<Node>& x, size_t count = 1lu, const size_t offset = 0lu, std::vector<size_t> order = {});
bool visit_attributes(AttributeVisitor& visitor) override;
std::shared_ptr<Node> clone_with_new_inputs(const OutputVector& new_args) const override;
void validate_and_infer_types() override;
Expand Down
7 changes: 5 additions & 2 deletions src/common/snippets/include/snippets/op/memory_access.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,14 +24,17 @@ class MemoryAccess : public ngraph::op::Op {
OPENVINO_OP("MemoryAccess", "SnippetsOpset");

size_t get_count() const;
void set_count(size_t count);
size_t get_offset() const;
void set_count(const size_t count);
void set_offset(const size_t offset);
bool visit_attributes(AttributeVisitor& visitor) override;
void validate_and_infer_types() override;

protected:
explicit MemoryAccess(const Output<Node>& x, size_t count = 1lu);
explicit MemoryAccess(const Output<Node>& x, size_t count = 1lu, size_t offset = 0lu);
MemoryAccess() = default;
size_t m_count = 0lu;
size_t m_offset = 0lu;
};

} // namespace op
Expand Down
8 changes: 4 additions & 4 deletions src/common/snippets/include/snippets/op/store.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,16 +13,16 @@ namespace op {

/**
* @interface Store
* @brief Generated by Canonicalization step where explicit instructions should be emitted for data storing
* where number of elements to store is determined by "count"
* Default value is "1" - to store one element
* @brief Generated during Lowering stage (convert_to_snippets_dialect) where explicit instructions should be emitted for data storing
* where number of elements to store is determined by "count" (Default value is "1" - to store one element)
* and memory offset for storing is determined by "offset" (Default value is "0" - to store starting at start memory ptr)
* @ingroup snippets
*/
class Store : public MemoryAccess {
public:
OPENVINO_OP("Store", "SnippetsOpset");

Store(const Output<Node>& x, const size_t count = 1lu);
Store(const Output<Node>& x, const size_t count = 1lu, const size_t offset = 0lu);
Store() = default;

std::shared_ptr<Node> clone_with_new_inputs(const OutputVector& new_args) const override;
Expand Down
41 changes: 19 additions & 22 deletions src/common/snippets/include/snippets/op/subgraph.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -89,21 +89,13 @@ class Subgraph : public ngraph::op::Op {
return m_generator;
}

size_t get_non_scalar_constants_count() const {
return m_non_scalar_constants_count;
}

bool is_quantized() const {
return config.m_is_quantized;
}

bool has_type_relaxed_ops() const {
return config.m_has_type_relaxed_ops;
}

bool has_domain_sensitive_ops() const {
return config.m_has_domain_sensitive_ops;
}
// Return common memory size for all buffers in body. Should be called only after tileRank setting
size_t get_buffer_scratchpad_size() const;
size_t get_virtual_port_count() const { return m_virtual_port_count; }
bool is_buffer_needed() const { return m_buffer_needed; }
bool is_quantized() const { return config.m_is_quantized; }
bool has_type_relaxed_ops() const { return config.m_has_type_relaxed_ops; }
bool has_domain_sensitive_ops() const { return config.m_has_domain_sensitive_ops; }

snippets::Schedule generate(const BlockedShapeVector& output_shapes, const BlockedShapeVector& input_shapes, ngraph::pass::Manager& opt,
const void* compile_params = nullptr);
Expand All @@ -117,8 +109,9 @@ class Subgraph : public ngraph::op::Op {
// plugin sets generator for a snippet to some specific generator.
// it's going to be replaced with Jitters table later
void set_generator(std::shared_ptr<ngraph::snippets::Generator> generator);
void set_non_scalar_constants_count(const size_t count);
void set_tile_rank(size_t newRank) {tileRank = newRank;}
void set_virtual_port_count(const size_t count);
void set_buffer_needed(const bool need);

void print() const;
void print_statistics(bool verbose);
Expand All @@ -133,11 +126,14 @@ class Subgraph : public ngraph::op::Op {
void align_element_types(const BlockedShapeVector& outputShapes, const BlockedShapeVector& inputShapes);
void convert_to_snippet_dialect();
void init_config();
// Count of potentional non-scalar Consants that will be created after some tranformations
// At the moment it's relevant only for FakeQuantize decomposition
// NOTE: To avoid overheads in each calcution of this count (for example, in validate_and_type_infer()),
// Count of Subgraph virtual ports:
// - Potential non-scalar Constants that will be created after some transformations (At the moment it's relevant only for FakeQuantize decomposition)
// Need Buffer op or not
// - Buffers. All Buffers are considered as one common additional virtual port. So we cannot summarize them as potential non-scalar Constants
// NOTE: To avoid overheads in each calculation of this count (for example, in validate_and_type_infer()),
// we should MANUALLY calculate it where it needed.
size_t m_non_scalar_constants_count = 0;
size_t m_virtual_port_count = 0;
bool m_buffer_needed = false;
Shape exec_domain = {};
std::shared_ptr<ov::Model> m_body = nullptr;
std::shared_ptr<ngraph::snippets::Generator> m_generator = nullptr;
Expand All @@ -162,11 +158,12 @@ class Subgraph : public ngraph::op::Op {
// True if Subgraph contains TypeRelaxed nodes -> for several streams in tp mode we should copy body using mutexes
// because TypeRelaxed::copy_with_new_inputs() isn't save-thread method
bool m_has_type_relaxed_ops = false;
// True if we should check runtime info for nodes to call specific needed transformations
bool m_need_fill_tail_register = false;
// True if body has operations that don't support plugin-side domain optimizations
// (e.g. Transpose, Softmax, MatMul in general doesn't support dimensions collapsing)
bool m_has_domain_sensitive_ops = false;
// True if we should go through whole body to check for where loops should be explicitly inserted.
// Otherwise, we insert Loops on Parameters and Results - for example, it's optimized out for subgraph with only Eltwise ops
bool m_explicit_loop_insertion = false;
} config;
};

Expand Down
Loading

0 comments on commit f86fd91

Please sign in to comment.