Skip to content

Commit

Permalink
[Snippets][CPU] Added FP32 MHA tokenization support (#14327)
Browse files Browse the repository at this point in the history
  • Loading branch information
a-sidorova authored Jan 18, 2023
1 parent 6ec71c3 commit 6525dd4
Show file tree
Hide file tree
Showing 176 changed files with 9,989 additions and 1,628 deletions.
5 changes: 3 additions & 2 deletions src/common/snippets/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,10 @@ ie_faster_build(${TARGET_NAME}
)

target_link_libraries(${TARGET_NAME} PUBLIC openvino::runtime
PRIVATE ngraph_reference openvino::runtime::dev)
PRIVATE ngraph_reference ov_shape_inference openvino::runtime::dev)

target_include_directories(${TARGET_NAME} PUBLIC $<BUILD_INTERFACE:${PUBLIC_HEADERS_DIR}>)
target_include_directories(${TARGET_NAME} PUBLIC $<BUILD_INTERFACE:${PUBLIC_HEADERS_DIR}>
PRIVATE $<BUILD_INTERFACE:${SHAPE_INFER_INCLUDE_DIR}>)

add_cpplint_target(${TARGET_NAME}_cpplint FOR_TARGETS ${TARGET_NAME})

Expand Down
30 changes: 26 additions & 4 deletions src/common/snippets/include/snippets/generator.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -84,15 +84,15 @@ class Schedule {
* @param f can this kernel be linearided to 1D range
* @param p pointer to generated code
*/
Schedule(const Shape& ws, bool f, code p) : work_size(ws), is_flat(f), ptr(p) {}
Schedule(const ov::PartialShape& ws, bool f, code p) : work_size(ws), is_flat(f), ptr(p) {}
/**
* @brief Returns callable instanse of code pointer
*/
template<typename K> K get_callable() const {
return reinterpret_cast<K>(const_cast<unsigned char*>(ptr));
}

Shape work_size {};
ov::PartialShape work_size {};
bool is_flat {false};
code ptr {nullptr};
};
Expand All @@ -112,21 +112,43 @@ class Generator {
* @brief Default destructor
*/
virtual ~Generator() = default;
/**
* @interface GeneratorConfig
* @brief Allows to tweak the lowering process.
*/
class GeneratorConfig {
public:
// True if the lowered Emitters need to be accessed during runtime. Normally they're destroyed after code emission.
bool m_save_lowered_code = false;
// True if we can optimize tails for single evaluation during code generation
// More details with optimization examples you can see in generate() method
// For example, tails with Buffer ops doesn't support single evaluation optimizations
// because of that we should always reset memory pointer using finalization offsets
// after data storing to Buffer
bool m_optimize_single_evaluation = true;
// True if we should check runtime info for nodes to call specific needed transformations
bool m_need_fill_tail_register = false;
};
/**
* @brief virtual method any specific implementation should implement
* @param m model in canonical for for table-based code generation
* @param config config with transformation and optimization parameters
* @param compile_params parameters for generated code
* @return pointer to generated code
*/
code generate(std::shared_ptr<ov::Model>& m, const void* compile_params = nullptr) const;
code generate(std::shared_ptr<ov::Model>& m, const GeneratorConfig& config, const void* compile_params = nullptr);

/**
* @brief gets target machine
* @return pointer to constant target machine
*/
std::shared_ptr<const TargetMachine> get_target_machine() const { return target; }
std::shared_ptr<const TargetMachine> get_target_machine() const;

protected:
std::shared_ptr<TargetMachine> target;
// todo: we need to save lowered code to access compiled brgemm kernels on execution time (normally lowered is destructed by then).
// This is temporary solution, remove this when kernel caching is implemented. Don't forget to make generate const method.
std::vector<AllocatedEmitter> lowered_saved;
};

} // namespace snippets
Expand Down
47 changes: 47 additions & 0 deletions src/common/snippets/include/snippets/op/brgemm.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
// Copyright (C) 2018-2022 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#pragma once

#include "ngraph/op/op.hpp"
#include "ngraph/op/matmul.hpp"

namespace ngraph {
namespace snippets {
namespace op {

/**
* @interface Brgemm
* @brief Brgemm is a batch-reduced matrix multiplication with the support of arbitrary strides between matrices rows
* @ingroup snippets
*/
class Brgemm : public ngraph::op::v0::MatMul {
public:
OPENVINO_OP("Brgemm", "SnippetsOpset", ngraph::op::v0::MatMul);
Brgemm(const Output<Node>& A, const Output<Node>& B, const size_t offset_a = 0lu, const size_t offset_b = 0lu, const size_t offset_c = 0lu);
Brgemm() = default;

bool visit_attributes(AttributeVisitor& visitor) override;
void validate_and_infer_types() override;
std::shared_ptr<Node> clone_with_new_inputs(const OutputVector& new_args) const override;

bool has_evaluate() const override { return false; }

size_t get_offset_a() const { return m_offset_a; }
size_t get_offset_b() const { return m_offset_b; }
size_t get_offset_c() const { return m_offset_c; }

void set_offset_a(const size_t offset) { m_offset_a = offset; }
void set_offset_b(const size_t offset) { m_offset_b = offset; }
void set_offset_c(const size_t offset) { m_offset_c = offset; }

private:
size_t m_offset_a = 0lu; // offset for first input
size_t m_offset_b = 0lu; // offset for second input
size_t m_offset_c = 0lu; // offset for output
};

} // namespace op
} // namespace snippets
} // namespace ngraph
10 changes: 8 additions & 2 deletions src/common/snippets/include/snippets/op/broadcastload.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,18 @@ class BroadcastLoad : public BroadcastMove {
public:
OPENVINO_OP("BroadcastLoad", "SnippetsOpset", ngraph::snippets::op::BroadcastMove);

BroadcastLoad(const Output<Node>& x, Shape output_shape);
BroadcastLoad(const Output<Node>& x, ov::PartialShape output_shape, size_t offset = 0lu);
BroadcastLoad() = default;

std::shared_ptr<Node> clone_with_new_inputs(const OutputVector& new_args) const override;
size_t get_offset() const { return m_offset; }
void set_offset(const size_t offset) { m_offset = offset; }

bool visit_attributes(AttributeVisitor& visitor) override;
std::shared_ptr<Node> clone_with_new_inputs(const OutputVector& new_args) const override;
void validate_and_infer_types() override;

private:
size_t m_offset = 0lu;
};

} // namespace op
Expand Down
7 changes: 2 additions & 5 deletions src/common/snippets/include/snippets/op/broadcastmove.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ class BroadcastMove : public ngraph::op::Op {
public:
OPENVINO_OP("BroadcastMove", "SnippetsOpset");

BroadcastMove(const Output<Node>& x, Shape output_shape);
BroadcastMove(const Output<Node>& x, ov::PartialShape output_shape);
BroadcastMove() = default;

bool visit_attributes(AttributeVisitor& visitor) override;
Expand All @@ -28,12 +28,9 @@ class BroadcastMove : public ngraph::op::Op {

void validate_and_infer_types() override;

OPENVINO_SUPPRESS_DEPRECATED_START
bool evaluate(const HostTensorVector& output_values, const HostTensorVector& input_values) const override;
OPENVINO_SUPPRESS_DEPRECATED_END

protected:
Shape output_shape;
ov::PartialShape output_shape;
};

} // namespace op
Expand Down
47 changes: 47 additions & 0 deletions src/common/snippets/include/snippets/op/buffer.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
// Copyright (C) 2018-2022 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#pragma once

#include <ngraph/op/op.hpp>

namespace ngraph {
namespace snippets {
namespace op {

/**
* @interface Buffer
* @brief The operation is for intermediate data storage
* - m_allocation_rank - rank of shape for memory allocation: shape[shape_rank - normalize(m_allocation_rank) : shape_rank].
* It's needed to allocate needed memory size that depends on Tile rank, for example.
* Default value is -1 (full shape)
* Notes:
* - All buffers in a graph have the same memory pointer. So if we have a few buffers,
* each the corresponding MemoryAccess op for Buffer should have offset for common memory pointer of this Buffer
* - Buffer should be a single consumer for operation output port
* @ingroup snippets
*/
class Buffer : public ngraph::op::Op {
public:
OPENVINO_OP("Buffer", "SnippetsOpset");

Buffer(const Output<Node>& x, const int32_t allocation_rank = -1);
Buffer() = default;

int32_t get_allocation_rank() const { return m_allocation_rank; }
void set_allocation_rank(int32_t rank) { m_allocation_rank = rank; }

size_t get_byte_size() const;

bool visit_attributes(AttributeVisitor& visitor) override;
std::shared_ptr<Node> clone_with_new_inputs(const OutputVector& new_args) const override;
void validate_and_infer_types() override;

private:
int32_t m_allocation_rank = -1;
};

} // namespace op
} // namespace snippets
} // namespace ngraph
47 changes: 47 additions & 0 deletions src/common/snippets/include/snippets/op/fill.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
// Copyright (C) 2018-2022 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#pragma once

#include <ngraph/op/op.hpp>

namespace ngraph {
namespace snippets {
namespace op {

/**
* @interface Fill
* @brief Generated in Tail Loop vector representation in code generation step for cases when we should
* refill registers by special values.
* For example, for cases with ReduceMax or ReduceSum in Softmax
* Where:
* - offset - starting element index where filling is performed while beginning of input data is untouched
* - fill_value - hexadecimal filling value
* @ingroup snippets
*/
class Fill : public ngraph::op::Op {
public:
OPENVINO_OP("Fill", "SnippetsOpset");

Fill(const Output<Node>& x, const size_t offset, const uint32_t fill_value = 0x0);
Fill() = default;

size_t get_offset() const { return m_offset; }
uint32_t get_fill_value() const { return m_fill_value; }

void set_offset(const size_t offset) { m_offset = offset; }
void set_fill_value(const uint32_t fill_value) { m_fill_value = fill_value; }

bool visit_attributes(AttributeVisitor& visitor) override;
std::shared_ptr<Node> clone_with_new_inputs(const OutputVector& new_args) const override;
void validate_and_infer_types() override;

protected:
size_t m_offset = 0lu;
uint32_t m_fill_value = 0x0;
};

} // namespace op
} // namespace snippets
} // namespace ngraph
32 changes: 32 additions & 0 deletions src/common/snippets/include/snippets/op/horizon_max.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
// Copyright (C) 2018-2022 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#pragma once

#include "ngraph/op/op.hpp"

namespace ngraph {
namespace snippets {
namespace op {

/**
* @interface HorizonMax
* @brief The operation calculates a horizon maximum of a vector register
* @ingroup snippets
*/
class HorizonMax : public ngraph::op::Op {
public:
OPENVINO_OP("HorizonMax", "SnippetsOpset");

HorizonMax(const Output<Node>& x);
HorizonMax() = default;

bool visit_attributes(AttributeVisitor& visitor) override { return true;}
std::shared_ptr<Node> clone_with_new_inputs(const OutputVector& new_args) const override;
void validate_and_infer_types() override;
};

} // namespace op
} // namespace snippets
} // namespace ngraph
32 changes: 32 additions & 0 deletions src/common/snippets/include/snippets/op/horizon_sum.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
// Copyright (C) 2018-2022 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#pragma once

#include "ngraph/op/op.hpp"

namespace ngraph {
namespace snippets {
namespace op {

/**
* @interface HorizonSum
* @brief The operation calculates a horizon sum of a vector register
* @ingroup snippets
*/
class HorizonSum : public ngraph::op::Op {
public:
OPENVINO_OP("HorizonSum", "SnippetsOpset");

HorizonSum(const Output<Node>& x);
HorizonSum() = default;

bool visit_attributes(AttributeVisitor& visitor) override { return true;}
std::shared_ptr<Node> clone_with_new_inputs(const OutputVector& new_args) const override;
void validate_and_infer_types() override;
};

} // namespace op
} // namespace snippets
} // namespace ngraph
7 changes: 4 additions & 3 deletions src/common/snippets/include/snippets/op/kernel.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,14 @@ class Kernel : public ngraph::op::Op {
public:
OPENVINO_OP("Kernel", "SnippetsOpset");

Kernel(const std::vector<std::pair<std::shared_ptr<ngraph::snippets::Emitter>, ngraph::snippets::RegInfo>>& region);
Kernel(std::vector<AllocatedEmitter> region, std::shared_ptr<const ov::Model> m);
Kernel() = default;

std::vector<std::pair<std::shared_ptr<ngraph::snippets::Emitter>, ngraph::snippets::RegInfo>> region;
std::vector<AllocatedEmitter> region;
const std::shared_ptr<const ov::Model> model;

std::shared_ptr<Node> clone_with_new_inputs(const OutputVector& inputs) const override {
return std::make_shared<Kernel>(region);
return std::make_shared<Kernel>(region, model);
}
const void *compile_params = nullptr;
};
Expand Down
Loading

0 comments on commit 6525dd4

Please sign in to comment.