-
Notifications
You must be signed in to change notification settings - Fork 2.3k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[Snippets] Added support for Reshape around Softmax applied comment part Added config parameter to disable MHA ops tokenization
- Loading branch information
1 parent
d00ba5b
commit 0aaea67
Showing
63 changed files
with
2,901 additions
and
254 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
// Copyright (C) 2018-2022 Intel Corporation | ||
// SPDX-License-Identifier: Apache-2.0 | ||
// | ||
|
||
#pragma once | ||
|
||
namespace ngraph { | ||
namespace snippets { | ||
|
||
|
||
/** | ||
* @interface SubgraphConfig | ||
* @brief Config to know which transformations should be called. | ||
* It helps to avoid overheads of extra transformation calls | ||
* @ingroup snippets | ||
*/ | ||
|
||
struct SubgraphConfig { | ||
// True if Subgraph contains FakeQuantize -> FQ decomposition should be called | ||
bool m_is_quantized = false; | ||
// True if we should align element types indise body | ||
bool m_is_needed_to_align_precision = false; | ||
// True if Subgraph contains TypeRelaxed nodes -> for several streams in tp mode we should copy body using mutexes | ||
// because TypeRelaxed::copy_with_new_inputs() isn't save-thread method | ||
bool m_has_type_relaxed_ops = false; | ||
// True if we should check runtime info for nodes to call specific needed transformations | ||
bool m_need_fill_tail_register = false; | ||
// True if we should go through whole body to check for where loops should be explicitly inserted. | ||
// Otherwise, we insert Loops on Parameters and Results - for example, it's optimized out for subgraph with only Eltwise ops | ||
bool m_explicit_loop_insertion = false; | ||
// True if body has operations that don't support plugin-side domain optimizations | ||
// (e.g. Transpose, Softmax, MatMul in general doesn't support dimensions collapsing) | ||
bool m_has_domain_sensitive_ops = false; | ||
// True if one evaluation optimizations are enabled | ||
bool m_one_evaluation_optimizations = true; | ||
}; | ||
|
||
} // namespace snippets | ||
} // namespace ngraph |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
// Copyright (C) 2018-2022 Intel Corporation | ||
// SPDX-License-Identifier: Apache-2.0 | ||
// | ||
|
||
#pragma once | ||
|
||
#include <ngraph/op/op.hpp> | ||
|
||
namespace ngraph { | ||
namespace snippets { | ||
namespace op { | ||
|
||
/** | ||
* @interface Buffer | ||
* @brief The operation is for intermediate data storage | ||
* Notes: | ||
* - All buffers in a graph have the same memory pointer. So if we have a few buffers, | ||
* each buffer should have its own offset for common memory | ||
* - If Buffer is an input for operation output, this Buffer should be a single consumer for this port | ||
* @ingroup snippets | ||
*/ | ||
class Buffer : public ngraph::op::Op { | ||
public: | ||
OPENVINO_OP("Buffer", "SnippetsOpset"); | ||
BWDCMP_RTTI_DECLARATION; | ||
|
||
Buffer(const Output<Node>& x); | ||
Buffer() = default; | ||
|
||
size_t get_offset() const { return m_offset; } | ||
void set_offset(const size_t offset); | ||
|
||
bool visit_attributes(AttributeVisitor& visitor) override; | ||
std::shared_ptr<Node> clone_with_new_inputs(const OutputVector& new_args) const override; | ||
void validate_and_infer_types() override; | ||
|
||
private: | ||
size_t m_offset = 0lu; | ||
}; | ||
|
||
} // namespace op | ||
} // namespace snippets | ||
} // namespace ngraph |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
// Copyright (C) 2018-2022 Intel Corporation | ||
// SPDX-License-Identifier: Apache-2.0 | ||
// | ||
|
||
#pragma once | ||
|
||
#include <ngraph/op/op.hpp> | ||
|
||
namespace ngraph { | ||
namespace snippets { | ||
namespace op { | ||
|
||
/** | ||
* @interface Fill | ||
* @brief Generated in Tail Loop vector representation in code generation step for cases when we should | ||
* refill regsiters by special numbers. | ||
* For example, for cases with ReduceMax or ReduceSum in Softmax | ||
* Where: | ||
* - offset - is value shift for filling | ||
* - fill_value - hexadecimal filling value | ||
* @ingroup snippets | ||
*/ | ||
class Fill : public ngraph::op::Op { | ||
public: | ||
OPENVINO_OP("Fill", "SnippetsOpset"); | ||
|
||
Fill(const Output<Node>& x, const size_t offset, const uint32_t fill_value = 0x0); | ||
Fill() = default; | ||
|
||
size_t get_offset() const { return m_offset; } | ||
uint32_t get_fill_value() const { return m_fill_value; } | ||
|
||
void set_offset(const size_t offset) { m_offset = offset; } | ||
void set_fill_value(const uint32_t fill_value) { m_fill_value = fill_value; } | ||
|
||
bool visit_attributes(AttributeVisitor& visitor) override; | ||
std::shared_ptr<Node> clone_with_new_inputs(const OutputVector& new_args) const override; | ||
void validate_and_infer_types() override; | ||
|
||
protected: | ||
size_t m_offset = 0lu; | ||
uint32_t m_fill_value = 0x0; | ||
}; | ||
|
||
} // namespace op | ||
} // namespace snippets | ||
} // namespace ngraph |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
// Copyright (C) 2018-2022 Intel Corporation | ||
// SPDX-License-Identifier: Apache-2.0 | ||
// | ||
|
||
#pragma once | ||
|
||
#include "ngraph/op/op.hpp" | ||
|
||
namespace ngraph { | ||
namespace snippets { | ||
namespace op { | ||
|
||
/** | ||
* @interface HorizonMax | ||
* @brief The operation calculates a horizon maximum of a vector register | ||
* @ingroup snippets | ||
*/ | ||
class HorizonMax : public ngraph::op::Op { | ||
public: | ||
OPENVINO_OP("HorizonMax", "SnippetsOpset"); | ||
|
||
HorizonMax(const Output<Node>& x); | ||
HorizonMax() = default; | ||
|
||
bool visit_attributes(AttributeVisitor& visitor) override { return true;} | ||
std::shared_ptr<Node> clone_with_new_inputs(const OutputVector& new_args) const override; | ||
void validate_and_infer_types() override; | ||
}; | ||
|
||
} // namespace op | ||
} // namespace snippets | ||
} // namespace ngraph |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
// Copyright (C) 2018-2022 Intel Corporation | ||
// SPDX-License-Identifier: Apache-2.0 | ||
// | ||
|
||
#pragma once | ||
|
||
#include "ngraph/op/op.hpp" | ||
|
||
namespace ngraph { | ||
namespace snippets { | ||
namespace op { | ||
|
||
/** | ||
* @interface HorizonSum | ||
* @brief The operation calculates a horizon sum of a vector register | ||
* @ingroup snippets | ||
*/ | ||
class HorizonSum : public ngraph::op::Op { | ||
public: | ||
OPENVINO_OP("HorizonSum", "SnippetsOpset"); | ||
|
||
HorizonSum(const Output<Node>& x); | ||
HorizonSum() = default; | ||
|
||
bool visit_attributes(AttributeVisitor& visitor) override { return true;} | ||
std::shared_ptr<Node> clone_with_new_inputs(const OutputVector& new_args) const override; | ||
void validate_and_infer_types() override; | ||
}; | ||
|
||
} // namespace op | ||
} // namespace snippets | ||
} // namespace ngraph |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.