Skip to content

Commit

Permalink
Merge branch 'master' into river/capi_add_status_for_wait_for
Browse files Browse the repository at this point in the history
  • Loading branch information
riverlijunjie authored Oct 27, 2023
2 parents 8ea781d + fd88a6b commit 98fb56d
Show file tree
Hide file tree
Showing 81 changed files with 1,979 additions and 832 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -565,9 +565,7 @@ def expect_fail(test_case_path, xfail): # type: (str) -> None
),
(
xfail_issue_99955,
"OnnxBackendNodeModelTest.test_group_normalization_epsilon_cpu",
"OnnxBackendNodeModelTest.test_group_normalization_epsilon_expanded_cpu",
"OnnxBackendNodeModelTest.test_group_normalization_example_cpu",
"OnnxBackendNodeModelTest.test_group_normalization_example_expanded_cpu",
),
(
Expand Down
10 changes: 10 additions & 0 deletions src/common/snippets/include/snippets/lowered/port_descriptor.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,16 @@ class PortDescriptor {
VectorDims m_subtensor_shape{};
/// \brief The corresponding abstract/physical register
size_t m_reg = 0;

/// Notes:
/// - `m_tensor_shape` is dense shape which is controlled by expression outputs.
/// It means that the result of data writing of expression outputs should be read using this shape by the next expression inputs.
/// - `m_layout` is the order of data reading or writing by MemoryAccess ops. Note that only MemoryAccess ops may have `m_layout`.
/// For other expressions this order parameter is simply ignored for now.
/// if it's input port of MemoryAccess expression:
/// - `m_layout` shows how the data should be read (by which strides) using m_tensor_shape.
/// If it's output port of MemoryAccess expression:
/// - `m_layout` shows how the data should be written (by which strides) to get m_tensor_shape.
};

class PortDescriptorUtils {
Expand Down
2 changes: 1 addition & 1 deletion src/common/snippets/include/snippets/op/subgraph.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
#include <openvino/op/util/sub_graph_base.hpp>
#include "openvino/op/op.hpp"
#include "openvino/core/rt_info.hpp"
#include "snippets/pass_manager.hpp"
#include "snippets/pass/manager.hpp"
#include "snippets/shape_inference/shape_inference.hpp"
#include "snippets/lowered/pass/pass.hpp"

Expand Down
20 changes: 6 additions & 14 deletions src/common/snippets/include/snippets/pass/common_optimizations.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,30 +5,22 @@
#pragma once

#include "openvino/pass/graph_rewrite.hpp"
#include "snippets/op/subgraph.hpp"
#include "snippets/pass/tokenization.hpp"

namespace ov {
namespace snippets {
namespace pass {

class CommonOptimizations : public ov::pass::MatcherPass {
class SubgraphPass;
class SubgraphManager;
friend class ExtractConstants;
friend class ExtractUnsupportedTransposes;
friend class SplitDimensionM;

public:
OPENVINO_RTTI("CommonOptimizations", "0");
CommonOptimizations(const SnippetsTokenization::Config& config = {});

// Returns True if parallelism work amount can be increased using SplitDimensionM optimization
static bool CanOptimizeParallelWA(const std::shared_ptr<const ov::Node>& node, size_t concurrency);

private:
// Move up Constants which aren't scalars from body to Subgraph and replace them with Parameters inside body
void ExtractConstants(const std::shared_ptr<op::Subgraph>& subgraph);
// Move up unsupported Transposes on Parameter outputs from body
void ExtractUnsupportedTransposes(const std::shared_ptr<op::Subgraph>& subgraph);
// Insert Reshape nodes after and before Parameters and Results in Subgraphs with MatMul inside
// to split dimension M for MatMuls to increase work amount for parallelism
// Note: works only with 3D MHA patterns
void SplitDimensionM(const std::shared_ptr<op::Subgraph>& subgraph, size_t concurrency);
};

} // namespace pass
Expand Down
29 changes: 29 additions & 0 deletions src/common/snippets/include/snippets/pass/extract_constants.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
// Copyright (C) 2018-2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#pragma once

#include "subgraph_pass.hpp"

namespace ov {
namespace snippets {
namespace pass {

/**
* @interface ExtractConstants
* @brief Moves up Constants which aren't scalars outside of the Subgraph's body and replaces them with Parameters inside body
* @ingroup snippets
*/
class ExtractConstants: public CommonOptimizations::SubgraphPass {
public:
OPENVINO_RTTI("ExtractConstants", "0");
ExtractConstants() = default;

bool run_on_subgraph(const std::shared_ptr<op::Subgraph>& subgraph) override;
};


} // namespace pass
} // namespace snippets
} // namespace ov
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
// Copyright (C) 2018-2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#pragma once

#include "subgraph_pass.hpp"

namespace ov {
namespace snippets {
namespace pass {

/**
* @interface ExtractUnsupportedTransposes
* @brief Moves up unsupported Transposes on Parameter outputs from body
* @ingroup snippets
*/
class ExtractUnsupportedTransposes: public CommonOptimizations::SubgraphPass {
public:
OPENVINO_RTTI("ExtractUnsupportedTransposes", "0");
ExtractUnsupportedTransposes() = default;

bool run_on_subgraph(const std::shared_ptr<op::Subgraph>& subgraph) override;
};


} // namespace pass
} // namespace snippets
} // namespace ov
Original file line number Diff line number Diff line change
Expand Up @@ -18,18 +18,17 @@ namespace pass {
/**
* @interface FuseTransposeBrgemm
* @brief Fuses Transpose with Brgemm node, fusing on both Brgemm inputs and output is supported. Applicable to
* Transposes that don't change the position of the last dimension (since Brgemm supports strided rows i/o),
* but only 0213 Transpose is currently supported.
* Transposes that don't change the position of the last dimension (since Brgemm supports strided rows i/o).
* Supported any Transpose order where last index is equal to [rank - 1] - it means that last dimension isn't moved.
* @ingroup snippets
*/
class FuseTransposeBrgemm: public ov::pass::MatcherPass {
public:
OPENVINO_RTTI("FuseTransposeBrgemm", "0");
FuseTransposeBrgemm();
static const std::set<std::vector<int>> supported_cases;

private:
static bool is_supported_transpose(const Output<Node>& transpose_port);
static bool is_supported_transpose(const Output<Node>& transpose_out);
static bool is_supported_transpose_order(const std::vector<int32_t>& order);
};

} // namespace pass
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,18 @@
//

#pragma once

#include "openvino/pass/manager.hpp"
#include "openvino/pass/pass.hpp"
#include "openvino/pass/validate.hpp"

#include <typeinfo>


namespace ov {
namespace snippets {
namespace pass {

/**
* @brief Manager is like ov::pass::Manager, but allows to insert new passes at arbitrary places in the pipeline
* @ingroup snippets
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,9 @@ class TokenizeMHASnippets: public ov::pass::MatcherPass {
public:
OPENVINO_RTTI("TokenizeMHASnippets", "0");
TokenizeMHASnippets(const SnippetsTokenization::Config& config = {});

static std::vector<int32_t> get_fusion_transpose_order(size_t rank);
static std::vector<int32_t> get_decomposed_transpose_order(size_t rank);
static bool is_matmul0_supported(const std::shared_ptr<ov::opset1::MatMul>& matmul);
};

Expand Down
44 changes: 44 additions & 0 deletions src/common/snippets/include/snippets/pass/split_dimension_m.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
// Copyright (C) 2018-2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#pragma once

#include "subgraph_pass.hpp"

namespace ov {
namespace snippets {
namespace pass {

/**
* @interface SplitDimensionM
* @brief Inserts Reshape nodes before inputs and after outputs of Subgraphs with MatMul inside
* to split dimension M for MatMuls. It allows to increase work amount for parallelism
* @ingroup snippets
*/
class SplitDimensionM: public CommonOptimizations::SubgraphPass {
public:
OPENVINO_RTTI("SplitDimensionM", "0");
SplitDimensionM(size_t concurrency) : m_concurrency(concurrency) {}

bool run_on_subgraph(const std::shared_ptr<op::Subgraph>& subgraph) override;

// Return True if the MatMul node is supported by this optimization
static bool is_supported_matmul(const std::shared_ptr<const ov::Node>& node);
// Returns True if parallelism work amount (concurrency) can be increased by this optimization
static bool can_be_optimized(const std::shared_ptr<const ov::Node>& node, size_t concurrency);

private:
static std::shared_ptr<ov::op::v0::MatMul> get_matmul(const std::shared_ptr<op::Subgraph>& subgraph);
static std::pair<size_t, size_t> get_splited_dimensions(size_t batch_dim, size_t m_dim, size_t optimal_parallelism_work_amount);
static bool split(const ov::Shape& shape, size_t optimal_parallelism_work_amount, size_t& batch_m_dim, size_t& new_m_dim);

void reshape_subgraph(const std::shared_ptr<op::Subgraph>& subgraph, const ov::Shape& shape, size_t batch_m_dim, size_t new_m_dim);

size_t m_concurrency;
};


} // namespace pass
} // namespace snippets
} // namespace ov
49 changes: 49 additions & 0 deletions src/common/snippets/include/snippets/pass/subgraph_manager.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
// Copyright (C) 2018-2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#pragma once

#include <memory>
#include <typeinfo>
#include <vector>

#include "snippets/pass/common_optimizations.hpp"

#include "snippets/pass/subgraph_pass.hpp"
#include "snippets/op/subgraph.hpp"

namespace ov {
namespace snippets {
namespace pass {
/**
* @brief Manager class allows to manage transformation passes (SubgraphPasses) on Subgraph ops.
* See SubgraphPasses description for more details.
* It's light version of ov::Manager implementation the purpose of which is to change only Subgraph as separate node in model.
* @ingroup snippets
*/
class CommonOptimizations::SubgraphManager {
public:
SubgraphManager() = default;

/// @brief Register given transformation class type to execution list
/// @return shared_ptr to the transformation instance
template <typename T, class... Args>
std::shared_ptr<T> register_pass(Args&&... args) {
static_assert(std::is_base_of<SubgraphPass, T>::value, "pass not derived from SubgraphPass base");
auto pass = std::make_shared<T>(std::forward<Args>(args)...);
m_pass_list.push_back(std::static_pointer_cast<SubgraphPass>(pass));
return pass;
}

/// @brief Runs registered transformations on a given model
/// @param subgraph Input model
/// @return Returns true if the model was changed by transformations, false otherwise.
bool run_passes(std::shared_ptr<ov::snippets::op::Subgraph> subgraph);

protected:
std::vector<std::shared_ptr<SubgraphPass>> m_pass_list;
};
} // namespace pass
} // namespace snippets
} // namespace ov
45 changes: 45 additions & 0 deletions src/common/snippets/include/snippets/pass/subgraph_pass.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
// Copyright (C) 2018-2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#pragma once

#include <typeinfo>

#include "snippets/pass/common_optimizations.hpp"


namespace ov {
namespace snippets {
namespace pass {

/**
* @brief Base class for Subgraph passes.
* The pass runs on `Subgraph` op that allows users to transform
* `Subgraph` as node and `body` of this `Subgraph` as model at the same time.
* These passes may change `Subgraph` as node, its `body` and other ops around `Subgraph` in model.
* To avoid unsafe changes of other ops in model, SubgraphPass is not derived from ov::Pass to avoid
* registration to ov::Model
* @ingroup snippets
*/
class CommonOptimizations::SubgraphPass {
public:
SubgraphPass() = default;
virtual ~SubgraphPass() = default;

virtual bool run_on_subgraph(const std::shared_ptr<op::Subgraph>& subgraph) = 0;

void set_name(const std::string& name) { m_name = name; }
std::string get_name() const { return m_name; }

using type_info_t = DiscreteTypeInfo;
virtual const type_info_t& get_type_info() const = 0;

private:
std::string m_name;
};


} // namespace pass
} // namespace snippets
} // namespace ov
13 changes: 11 additions & 2 deletions src/common/snippets/include/snippets/pass/tokenization.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,11 @@ class EnumerateNodes : public ov::pass::ModelPass {
* 2. MHA tokenization
* 3. Common tokenization
* 4. Some common transformations for Subgraphs. For example, FakeQuantize decomposition
* Naming policy:
* - During tokenization new Subgraph op takes the name of the last tokenized op.
* It's needed to save output names of model in cases when tokenized op was before model Result.
* - If some transformation (for example, SplitDimensionM) insert new op after Subgraph,
* the op should be called as this Subgraph to save output name. The Subgraph name is updated using suffix "_original".
* @ingroup snippets
*/
class SnippetsTokenization : public ov::pass::ModelPass {
Expand All @@ -61,9 +66,9 @@ class SnippetsTokenization : public ov::pass::ModelPass {
* @ingroup snippets
*/
struct Config {
Config(size_t concurrency = 1, bool split_m_dimension = true, bool enable_transpose_on_output = true)
Config(size_t concurrency = 1, bool split_m_dimension = true, bool enable_transpose_on_output = true, std::set<size_t> mha_transpose_ranks = {3, 4})
: concurrency(concurrency), split_m_dimension(split_m_dimension),
mha_token_enable_transpose_on_output(enable_transpose_on_output) {}
mha_token_enable_transpose_on_output(enable_transpose_on_output), mha_supported_transpose_ranks(std::move(mha_transpose_ranks)) {}

size_t concurrency = 1;
// True if "SplitDimensionM" optimization is enabled. Otherwise, it's disabled.
Expand All @@ -72,6 +77,10 @@ class SnippetsTokenization : public ov::pass::ModelPass {
// Otherwise, it may be fused into Subgraph if possible
// TODO [111813]: Remove please when the ticket 111813 is implemented
bool mha_token_enable_transpose_on_output = true;
// Set of supported Transpose shape ranks for tokenization in MHATokenization pass.
// Note that in general Snippets support Transpose of any ranks.
// But at the moment Transpose is used only in MHA pattern where 3D and 4D tensors are supported.
std::set<size_t> mha_supported_transpose_ranks = { 3, 4 };
};

OPENVINO_RTTI("SnippetsTokenization", "0");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,9 @@ class TransposeDecomposition: public ov::pass::MatcherPass {
public:
OPENVINO_RTTI("TransposeDecomposition", "0");
TransposeDecomposition();
static const std::set<std::vector<int>> supported_cases;

static bool is_supported_transpose(const Output<Node>& transpose_out);
static bool is_supported_transpose_order(const std::vector<int32_t>& order);
};

} // namespace pass
Expand Down
Loading

0 comments on commit 98fb56d

Please sign in to comment.