-
Notifications
You must be signed in to change notification settings - Fork 2.4k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[Snippets][CPU] Added FP32 MHA tokenization support (#14327)
- Loading branch information
1 parent
6ec71c3
commit 6525dd4
Showing
176 changed files
with
9,989 additions
and
1,628 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
// Copyright (C) 2018-2022 Intel Corporation | ||
// SPDX-License-Identifier: Apache-2.0 | ||
// | ||
|
||
#pragma once | ||
|
||
#include "ngraph/op/op.hpp" | ||
#include "ngraph/op/matmul.hpp" | ||
|
||
namespace ngraph { | ||
namespace snippets { | ||
namespace op { | ||
|
||
/** | ||
* @interface Brgemm | ||
* @brief Brgemm is a batch-reduced matrix multiplication with the support of arbitrary strides between matrices rows | ||
* @ingroup snippets | ||
*/ | ||
class Brgemm : public ngraph::op::v0::MatMul { | ||
public: | ||
OPENVINO_OP("Brgemm", "SnippetsOpset", ngraph::op::v0::MatMul); | ||
Brgemm(const Output<Node>& A, const Output<Node>& B, const size_t offset_a = 0lu, const size_t offset_b = 0lu, const size_t offset_c = 0lu); | ||
Brgemm() = default; | ||
|
||
bool visit_attributes(AttributeVisitor& visitor) override; | ||
void validate_and_infer_types() override; | ||
std::shared_ptr<Node> clone_with_new_inputs(const OutputVector& new_args) const override; | ||
|
||
bool has_evaluate() const override { return false; } | ||
|
||
size_t get_offset_a() const { return m_offset_a; } | ||
size_t get_offset_b() const { return m_offset_b; } | ||
size_t get_offset_c() const { return m_offset_c; } | ||
|
||
void set_offset_a(const size_t offset) { m_offset_a = offset; } | ||
void set_offset_b(const size_t offset) { m_offset_b = offset; } | ||
void set_offset_c(const size_t offset) { m_offset_c = offset; } | ||
|
||
private: | ||
size_t m_offset_a = 0lu; // offset for first input | ||
size_t m_offset_b = 0lu; // offset for second input | ||
size_t m_offset_c = 0lu; // offset for output | ||
}; | ||
|
||
} // namespace op | ||
} // namespace snippets | ||
} // namespace ngraph |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
// Copyright (C) 2018-2022 Intel Corporation | ||
// SPDX-License-Identifier: Apache-2.0 | ||
// | ||
|
||
#pragma once | ||
|
||
#include <ngraph/op/op.hpp> | ||
|
||
namespace ngraph { | ||
namespace snippets { | ||
namespace op { | ||
|
||
/** | ||
* @interface Buffer | ||
* @brief The operation is for intermediate data storage | ||
* - m_allocation_rank - rank of shape for memory allocation: shape[shape_rank - normalize(m_allocation_rank) : shape_rank]. | ||
* It's needed to allocate needed memory size that depends on Tile rank, for example. | ||
* Default value is -1 (full shape) | ||
* Notes: | ||
* - All buffers in a graph have the same memory pointer. So if we have a few buffers, | ||
* each the corresponding MemoryAccess op for Buffer should have offset for common memory pointer of this Buffer | ||
* - Buffer should be a single consumer for operation output port | ||
* @ingroup snippets | ||
*/ | ||
class Buffer : public ngraph::op::Op { | ||
public: | ||
OPENVINO_OP("Buffer", "SnippetsOpset"); | ||
|
||
Buffer(const Output<Node>& x, const int32_t allocation_rank = -1); | ||
Buffer() = default; | ||
|
||
int32_t get_allocation_rank() const { return m_allocation_rank; } | ||
void set_allocation_rank(int32_t rank) { m_allocation_rank = rank; } | ||
|
||
size_t get_byte_size() const; | ||
|
||
bool visit_attributes(AttributeVisitor& visitor) override; | ||
std::shared_ptr<Node> clone_with_new_inputs(const OutputVector& new_args) const override; | ||
void validate_and_infer_types() override; | ||
|
||
private: | ||
int32_t m_allocation_rank = -1; | ||
}; | ||
|
||
} // namespace op | ||
} // namespace snippets | ||
} // namespace ngraph |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
// Copyright (C) 2018-2022 Intel Corporation | ||
// SPDX-License-Identifier: Apache-2.0 | ||
// | ||
|
||
#pragma once | ||
|
||
#include <ngraph/op/op.hpp> | ||
|
||
namespace ngraph { | ||
namespace snippets { | ||
namespace op { | ||
|
||
/** | ||
* @interface Fill | ||
* @brief Generated in Tail Loop vector representation in code generation step for cases when we should | ||
* refill registers by special values. | ||
* For example, for cases with ReduceMax or ReduceSum in Softmax | ||
* Where: | ||
* - offset - starting element index where filling is performed while beginning of input data is untouched | ||
* - fill_value - hexadecimal filling value | ||
* @ingroup snippets | ||
*/ | ||
class Fill : public ngraph::op::Op { | ||
public: | ||
OPENVINO_OP("Fill", "SnippetsOpset"); | ||
|
||
Fill(const Output<Node>& x, const size_t offset, const uint32_t fill_value = 0x0); | ||
Fill() = default; | ||
|
||
size_t get_offset() const { return m_offset; } | ||
uint32_t get_fill_value() const { return m_fill_value; } | ||
|
||
void set_offset(const size_t offset) { m_offset = offset; } | ||
void set_fill_value(const uint32_t fill_value) { m_fill_value = fill_value; } | ||
|
||
bool visit_attributes(AttributeVisitor& visitor) override; | ||
std::shared_ptr<Node> clone_with_new_inputs(const OutputVector& new_args) const override; | ||
void validate_and_infer_types() override; | ||
|
||
protected: | ||
size_t m_offset = 0lu; | ||
uint32_t m_fill_value = 0x0; | ||
}; | ||
|
||
} // namespace op | ||
} // namespace snippets | ||
} // namespace ngraph |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
// Copyright (C) 2018-2022 Intel Corporation | ||
// SPDX-License-Identifier: Apache-2.0 | ||
// | ||
|
||
#pragma once | ||
|
||
#include "ngraph/op/op.hpp" | ||
|
||
namespace ngraph { | ||
namespace snippets { | ||
namespace op { | ||
|
||
/** | ||
* @interface HorizonMax | ||
* @brief The operation calculates a horizon maximum of a vector register | ||
* @ingroup snippets | ||
*/ | ||
class HorizonMax : public ngraph::op::Op { | ||
public: | ||
OPENVINO_OP("HorizonMax", "SnippetsOpset"); | ||
|
||
HorizonMax(const Output<Node>& x); | ||
HorizonMax() = default; | ||
|
||
bool visit_attributes(AttributeVisitor& visitor) override { return true;} | ||
std::shared_ptr<Node> clone_with_new_inputs(const OutputVector& new_args) const override; | ||
void validate_and_infer_types() override; | ||
}; | ||
|
||
} // namespace op | ||
} // namespace snippets | ||
} // namespace ngraph |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
// Copyright (C) 2018-2022 Intel Corporation | ||
// SPDX-License-Identifier: Apache-2.0 | ||
// | ||
|
||
#pragma once | ||
|
||
#include "ngraph/op/op.hpp" | ||
|
||
namespace ngraph { | ||
namespace snippets { | ||
namespace op { | ||
|
||
/** | ||
* @interface HorizonSum | ||
* @brief The operation calculates a horizon sum of a vector register | ||
* @ingroup snippets | ||
*/ | ||
class HorizonSum : public ngraph::op::Op { | ||
public: | ||
OPENVINO_OP("HorizonSum", "SnippetsOpset"); | ||
|
||
HorizonSum(const Output<Node>& x); | ||
HorizonSum() = default; | ||
|
||
bool visit_attributes(AttributeVisitor& visitor) override { return true;} | ||
std::shared_ptr<Node> clone_with_new_inputs(const OutputVector& new_args) const override; | ||
void validate_and_infer_types() override; | ||
}; | ||
|
||
} // namespace op | ||
} // namespace snippets | ||
} // namespace ngraph |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.