Skip to content

Commit

Permalink
[Snippets] Explicit Tiles review comments No. 1
Browse files Browse the repository at this point in the history
  • Loading branch information
IvanNovoselov committed Nov 23, 2022
1 parent 71c3678 commit 5f8a55e
Show file tree
Hide file tree
Showing 12 changed files with 46 additions and 49 deletions.
18 changes: 12 additions & 6 deletions src/common/snippets/include/snippets/op/loop_helpers.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,9 @@ namespace ngraph {
namespace snippets {
namespace op {

/* ==== LoopBegin === */
std::shared_ptr<LoopBegin> insertLoopBeginAfterOutputs(const OutputVector& originalOutputs);

std::shared_ptr<LoopEnd> insertLoopEndBeforeInputs(const std::vector<Input<Node>>& originalInputs,
const std::shared_ptr<LoopBegin>& tileBegin,
size_t dimension, size_t work_amount, size_t increment,
std::vector<bool> apply_increment = {},
std::vector<int64_t> finalization_offsets = {});
template<typename T>
std::shared_ptr<LoopBegin> insertLoopBegin(const T& afterTheseNodes) {
static_assert(std::is_same<T, ParameterVector>() || std::is_same<T, NodeVector>(),
Expand All @@ -36,8 +32,16 @@ std::shared_ptr<LoopBegin> insertLoopBegin(const T& afterTheseNodes) {

template<>
inline std::shared_ptr<LoopBegin> insertLoopBegin(const OutputVector& afterTheseNodes) {
return insertLoopBeginAfterOutputs(afterTheseNodes);
return insertLoopBeginAfterOutputs(afterTheseNodes);
}
/* ============== */

/* ==== LoopEnd === */
std::shared_ptr<LoopEnd> insertLoopEndBeforeInputs(const std::vector<Input<Node>>& originalInputs,
const std::shared_ptr<LoopBegin>& tileBegin,
size_t dimension, size_t work_amount, size_t increment,
std::vector<bool> apply_increment = {},
std::vector<int64_t> finalization_offsets = {});

template<typename T, typename ...Args>
std::shared_ptr<LoopEnd> insertLoopEnd(const T& beforeTheseNodes, Args ...args) {
Expand All @@ -51,10 +55,12 @@ std::shared_ptr<LoopEnd> insertLoopEnd(const T& beforeTheseNodes, Args ...args)
}
return insertLoopEndBeforeInputs(originalInputs, args...);
}

template<typename ...Args>
std::shared_ptr<LoopEnd> insertLoopEnd(const std::vector<Input<Node>>& beforeTheseNodes, Args ...args) {
return insertLoopEndBeforeInputs(beforeTheseNodes, args...);
}
/* ============== */

} // namespace op
} // namespace snippets
Expand Down
20 changes: 10 additions & 10 deletions src/common/snippets/src/pass/insert_loops.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@ bool ngraph::snippets::pass::InsertLoops::run_on_model(const std::shared_ptr<ov:
const auto inner_dim = master_shape.size() - 1;
// Note: outer_dim could overflow if master_shape.size() < 2
const auto outer_dim = master_shape.size() - 2;
const auto inner_WA = master_shape[inner_dim].get_length();
const auto outer_WA = master_shape.size() >= 2 ? master_shape[outer_dim].get_length() : 1;
const auto inner_work_amount = master_shape[inner_dim].get_length();
const auto outer_work_amount = master_shape.size() >= 2 ? master_shape[outer_dim].get_length() : 1;

ParameterVector commonParams = model->get_parameters();
// Note that topological sort parses node arguments in reversed order, but results are added - in direct order
Expand All @@ -35,7 +35,7 @@ bool ngraph::snippets::pass::InsertLoops::run_on_model(const std::shared_ptr<ov:
std::transform(commonResults.begin(), commonResults.end(), std::back_inserter(ioShapes),
[](const std::shared_ptr<Node>& n) { return n->get_input_partial_shape(0); });

if (inner_WA > 0) {
if (inner_work_amount > 0) {
std::vector<bool> apply_increments;
apply_increments.reserve(ioShapes.size());
// Inner Loop applies increments if a dimension is not broadcasted
Expand All @@ -44,18 +44,18 @@ bool ngraph::snippets::pass::InsertLoops::run_on_model(const std::shared_ptr<ov:
return ps[inner_dim] != 1 && master_shape[inner_dim] != 1;
});
std::vector<int64_t> inner_finalization_offsets(ioShapes.size(), 0);
if (outer_WA > 1) {
if (outer_work_amount > 1) {
// We need to step back if an outer dim is broadcasted, while the corresponding lower one is not
std::transform(ioShapes.begin(), ioShapes.end(), inner_finalization_offsets.begin(),
[=](const PartialShape& ps) {
return ps[outer_dim] == 1 && ps[inner_dim] != 1 ? -inner_WA : 0;
return ps[outer_dim] == 1 && ps[inner_dim] != 1 ? -inner_work_amount : 0;
});
}
const auto& inner_loop_begin = op::insertLoopBegin(commonParams);
const auto& inner_loop_end = insertLoopEnd(commonResults, inner_loop_begin, inner_dim, inner_WA, vector_size, apply_increments,
inner_finalization_offsets);
const auto& inner_loop_end = insertLoopEnd(commonResults, inner_loop_begin, inner_dim, inner_work_amount,
vector_size, apply_increments, inner_finalization_offsets);
// set internal flag to enable scalar vs vector loop optimizations
inner_loop_end->has_outer_loop = outer_WA > 1;
inner_loop_end->has_outer_loop = outer_work_amount > 1;
// Due to features of topological sort, some Constants (Scalars) may appear right after Parameters in
// sorted ops (so it's between Parameters and LoopBegin). Consequently, ScalarEmitters would be called
// outside the Loop, and only the first Loop iteration would yield correct data (assuming the vector reg
Expand All @@ -69,7 +69,7 @@ bool ngraph::snippets::pass::InsertLoops::run_on_model(const std::shared_ptr<ov:
}
}

if (outer_WA > 1) {
if (outer_work_amount > 1) {
std::vector<bool> apply_increments;
apply_increments.reserve(ioShapes.size());
// Outer Loop applies increments only if a corresponding lower dim was broadcasted (or all lower dims == 1)
Expand All @@ -78,7 +78,7 @@ bool ngraph::snippets::pass::InsertLoops::run_on_model(const std::shared_ptr<ov:
return ps[outer_dim] != 1 && ps[inner_dim] == 1;
});
const auto& outer_loop_begin = op::insertLoopBegin(commonParams);
insertLoopEnd(commonResults, outer_loop_begin, outer_dim, outer_WA, 1, apply_increments);
insertLoopEnd(commonResults, outer_loop_begin, outer_dim, outer_work_amount, 1, apply_increments);
}

return true;
Expand Down
2 changes: 1 addition & 1 deletion src/common/snippets/tests/src/pass/canonicalization.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ TEST_P(CanonicalizationTests, Add) {
auto subgraph = getTokenizedSubgraph(function);
subgraph->set_generator(std::make_shared<DummyGenerator>());
auto canonical_output_shape = subgraph->canonicalize(output_blocked_shapes, input_blocked_shapes);
ASSERT_TRUE(!canonical_output_shape.is_dynamic());
ASSERT_TRUE(canonical_output_shape.is_static());
ASSERT_DIMS_EQ(canonical_output_shape.get_shape(), expected_output_shape);
}

Expand Down
6 changes: 3 additions & 3 deletions src/plugins/intel_cpu/src/emitters/jit_snippets_emitters.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -246,7 +246,7 @@ LoopBeginEmitter::LoopBeginEmitter(dnnl::impl::cpu::x64::jit_generator* h, dnnl:
IE_THROW() << "LoopBeginEmitter invoked with invalid configuration: the last output must be LoopEnd";
work_amount = loop_begin->get_work_amount();
evaluate_once = loop_begin->get_evaluate_once();
num_inputs = loop_begin->get_output_size() - 1;
num_inputs = loop_begin->get_input_size();
in_out_type_ = emitter_in_out_map::gpr_to_gpr;
}

Expand Down Expand Up @@ -297,8 +297,8 @@ LoopEndEmitter::LoopEndEmitter(dnnl::impl::cpu::x64::jit_generator* h, dnnl::imp
if (!loop_begin)
IE_THROW() << "LoopEndEmitter invoked with invalid configuration: the last arg must be LoopBegin";
// Note that 1 edge connects LoopBegin and LoopEnd
num_inputs = loop_begin->get_output_size() - 1;
num_outputs = loop_end->get_input_size() - 1;
num_inputs = loop_begin->get_input_size();
num_outputs = loop_end->get_output_size();
increment = loop_end->get_increment();
work_amount = loop_end->get_work_amount();
apply_increments = loop_end->get_apply_increment();
Expand Down
17 changes: 4 additions & 13 deletions src/plugins/intel_cpu/src/nodes/subgraph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@

#include <snippets/op/subgraph.hpp>
#include "emitters/cpu_generator.hpp"
#include "utils/cpu_utils.hpp"
#include "snippets_transformations/fuse_load_store_and_convert.hpp"
#include "ngraph_transformations/convert_to_swish_cpu.hpp"

Expand Down Expand Up @@ -70,14 +71,6 @@ void Snippet::copy_snippet() {
isa_num_lanes = snippet->get_generator()->get_target_machine()->get_lanes();
}

VectorDims Snippet::prependWithOnes(const VectorDims& dims, size_t rank) {
if (rank <= dims.size())
return dims;
VectorDims result(rank, 1);
std::copy(dims.begin(), dims.end(), &result[rank - dims.size()]);
return result;
}

void Snippet::initSupportedPrimitiveDescriptors() {
copy_snippet();
if (!supportedPrimitiveDescriptors.empty())
Expand Down Expand Up @@ -439,11 +432,11 @@ void Snippet::prepareParams() {
// here must be all the stuff that could only be done for static shapes, e.g. offset calculation
// Here it must be all the stuff that could be done once for both static and dynamic shapes

masterShape = prependWithOnes(masterShape, tensorRank);
masterShape = getNormalizedDimsBySize(masterShape, tensorRank);
for (auto& pshape : normInputShapes)
pshape = prependWithOnes(pshape, tensorRank);
pshape = getNormalizedDimsBySize(pshape, tensorRank);
for (auto& pshape : normOutputShapes)
pshape = prependWithOnes(pshape, tensorRank);
pshape = getNormalizedDimsBySize(pshape, tensorRank);

tileRank = 1;
fullWorkAmount = std::accumulate(masterShape.begin(), masterShape.end(), 1, std::multiplies<size_t>());
Expand Down Expand Up @@ -486,8 +479,6 @@ void Snippet::prepareParams() {
dim = 1;
}

// ov::pass::Serialize("tile_initial.xml", "tile_initial.bin").run_on_model(snippet->get_body());
//
std::vector<ov::Shape> new_shapes;
for (const auto& s : normInputShapes) {
ov::Shape ns(tileRank, 0);
Expand Down
1 change: 0 additions & 1 deletion src/plugins/intel_cpu/src/nodes/subgraph.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,6 @@ class Snippet : public Node {
// NOTE: Before call mutex should be initialized
void copy_snippet();

static VectorDims prependWithOnes(const VectorDims& dims, size_t rank);
ov::PartialShape canonicalizeBody();
void optimizeExecDomain(std::vector<VectorDims>&, std::vector<VectorDims>&, VectorDims&, size_t&) const;
void calcJITParams(std::vector<int64_t>& offsets) const;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,10 +43,13 @@ std::vector<std::vector<ov::Shape>> inShapesStatic{
{{1, 128, 1, 17}, {1, 128, 1, 17}},
{{1, 128, 1, 29}, {1, 128, 1, 29}},
{{1, 128, 1, 33}, {1, 128, 1, 33}},
{{1, 128, 9, 30}, {1, 128, 1, 30}},
{{1, 128, 9, 1}, {1, 128, 1, 30}},
};
INSTANTIATE_TEST_SUITE_P(smoke_Snippets_Eltwise, AddSinhPair,
::testing::Combine(
::testing::ValuesIn(inShapesStatic),
::testing::Values(ov::element::f32),
::testing::Values(3), // Add + 2 converts after inputs
::testing::Values(1), // Subgraph is created, since the inputs are followed by converts
::testing::Values(CommonTestUtils::DEVICE_CPU)),
Expand Down
9 changes: 1 addition & 8 deletions src/tests/functional/plugin/shared/include/snippets/add.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ typedef std::tuple<

typedef std::tuple<
std::vector<ov::Shape>, // Input 0, Input 1 Shape
ov::element::Type, // Element type
size_t, // Expected num nodes
size_t, // Expected num subgraphs
std::string // Target Device
Expand All @@ -34,14 +35,6 @@ typedef std::tuple<
std::string // Target Device
> AddConstParams;

typedef std::tuple<
InputShape, // Input 0 Shape
InputShape, // Input 1 Shape
size_t, // Expected num nodes
size_t, // Expected num subgraphs
std::string // Target Device
> AddDynamicParams;

class Add : public testing::WithParamInterface<ov::test::snippets::AddParams>,
virtual public ov::test::SnippetsTestsCommon {
public:
Expand Down
8 changes: 6 additions & 2 deletions src/tests/functional/plugin/shared/src/snippets/add.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -91,14 +91,16 @@ void AddRollConst::SetUp() {

std::string AddSinhPair::getTestCaseName(testing::TestParamInfo<ov::test::snippets::AddParamsPair> obj) {
std::vector<ov::Shape> input_shapes;
ov::element::Type type;
std::string targetDevice;
size_t num_nodes, num_subgraphs;
std::tie(input_shapes, num_nodes, num_subgraphs, targetDevice) = obj.param;
std::tie(input_shapes, type, num_nodes, num_subgraphs, targetDevice) = obj.param;
if (input_shapes.size() != 2)
IE_THROW() << "Invalid input shapes vector size";
std::ostringstream result;
result << "IS[0]=" << CommonTestUtils::vec2str(input_shapes[0]) << "_";
result << "IS[1]=" << CommonTestUtils::vec2str(input_shapes[1]) << "_";
result << "T=" << type << "_";
result << "#N=" << num_nodes << "_";
result << "#S=" << num_subgraphs << "_";
result << "targetDevice=" << targetDevice;
Expand All @@ -107,14 +109,16 @@ std::string AddSinhPair::getTestCaseName(testing::TestParamInfo<ov::test::snippe

void AddSinhPair::SetUp() {
std::vector<ov::Shape> input_shapes;
std::tie(input_shapes, ref_num_nodes, ref_num_subgraphs, targetDevice) = this->GetParam();
ov::element::Type type;
std::tie(input_shapes, type, ref_num_nodes, ref_num_subgraphs, targetDevice) = this->GetParam();
std::vector<InputShape> is;
for (const auto& s : input_shapes) {
is.emplace_back(InputShape {{}, {s, }});
}
init_input_shapes(is);
auto f = ov::test::snippets::AddSinhFunction({input_shapes[0], input_shapes[1]});
function = f.getOriginal();
setInferenceType(type);
}

TEST_P(Add, CompareWithRefImpl) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
#include "shared_test_classes/base/snippets_test_utils.hpp"
#include "functional_test_utils/skip_tests_config.hpp"
#include "exec_graph_info.hpp"
#include "cpp_interfaces/interface/ie_internal_plugin_config.hpp"

namespace ov {
namespace test {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -72,8 +72,9 @@ class AddSinhConstFunction : public SnippetsFunctionBase {
// The function is needed to check different input element types (model precision change)
class AddRollConstFunction : public SnippetsFunctionBase {
public:
explicit AddRollConstFunction(const std::vector<Shape>& inputShapes) : SnippetsFunctionBase(inputShapes) {
explicit AddRollConstFunction(const std::vector<PartialShape>& inputShapes) : SnippetsFunctionBase(inputShapes) {
NGRAPH_CHECK(input_shapes.size() == 1, "Got invalid number of input shapes");
NGRAPH_CHECK(input_shapes[0].is_static(), "Only static shapes are supported");
}
protected:
std::shared_ptr<ov::Model> initOriginal() const override;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,9 +56,10 @@ std::shared_ptr<ov::Model> AddSinhConstFunction::initOriginal() const {
return std::make_shared<ov::Model>(NodeVector{add}, ParameterVector{data0});
}
std::shared_ptr<ov::Model> AddRollConstFunction::initOriginal() const {
auto data0 = std::make_shared<op::v0::Parameter>(precision, input_shapes[0]);
const std::vector<float> const_values = CommonTestUtils::generate_float_numbers(shape_size(input_shapes[0]), -10., 10.);
auto const_data1 = std::make_shared<op::v0::Constant>(precision, input_shapes[0], const_values);
const auto input_shape = input_shapes[0].get_shape();
auto data0 = std::make_shared<op::v0::Parameter>(precision, input_shape);
const std::vector<float> const_values = CommonTestUtils::generate_float_numbers(shape_size(input_shape), -10., 10.);
auto const_data1 = std::make_shared<op::v0::Constant>(precision, input_shape, const_values);
auto shift = std::make_shared<op::v0::Constant>(ov::element::i32, ov::Shape{1}, std::vector<float>{1});
auto axes = std::make_shared<op::v0::Constant>(ov::element::i32, ov::Shape{1}, std::vector<float>{0});
auto roll0 = std::make_shared<ov::op::v7::Roll>(data0, shift, axes);
Expand Down

0 comments on commit 5f8a55e

Please sign in to comment.