Skip to content

Commit

Permalink
Alexandra's comments applied: 1st part
Browse files Browse the repository at this point in the history
  • Loading branch information
v-Golubev committed Jan 26, 2024
1 parent a23670a commit 5961ea2
Show file tree
Hide file tree
Showing 7 changed files with 57 additions and 31 deletions.
14 changes: 13 additions & 1 deletion src/common/snippets/include/snippets/op/reduce.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ namespace op {
/**
* @interface ReduceBase
* @brief Base class for reduce operations.
* @arg m_axis reduce axis.
* @param m_axis reduce axis.
* @ingroup snippets
*/
class ReduceBase : public ov::op::Op {
Expand All @@ -41,6 +41,12 @@ class ReduceSum : public ReduceBase {
static std::set<ov::element::TypeVector> get_supported_precisions(const std::shared_ptr<ov::Node>& node) {
return {{ov::element::f32}};
}
/**
* @brief Creates ReduceSum operation, computes and sets input/output subtensors
* @param x Reduce input
* @param axis Reduce axis
*/
static std::shared_ptr<ReduceSum> make_reduce_sum(const Output<Node>& x, size_t axis);
};

class ReduceMax : public ReduceBase {
Expand All @@ -52,6 +58,12 @@ class ReduceMax : public ReduceBase {
static std::set<ov::element::TypeVector> get_supported_precisions(const std::shared_ptr<ov::Node>& node) {
return {{ov::element::f32}};
}
/**
* @brief Creates ReduceSum operation, computes and sets input/output subtensors
* @param x Reduce input
* @param axis Reduce axis
*/
static std::shared_ptr<ReduceMax> make_reduce_max(const Output<Node>& x, size_t axis);
};

} // namespace op
Expand Down
30 changes: 28 additions & 2 deletions src/common/snippets/src/op/reduce.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,28 @@
// SPDX-License-Identifier: Apache-2.0
//

#include "snippets/itt.hpp"

#include "snippets/op/reduce.hpp"

#include "snippets/itt.hpp"
#include "snippets/lowered/port_descriptor.hpp"

namespace ov {
namespace snippets {
namespace op {
namespace {
void compute_and_set_reduce_subtensors(const std::shared_ptr<ReduceBase>& reduce) {
OPENVINO_ASSERT(reduce->get_input_partial_shape(0).rank().is_static(),
"Subtensors can be automatically calculated only for reduce with static rank.");
const auto reduce_rank = reduce->get_input_partial_shape(0).size();
const auto axis = reduce->get_axis();

std::vector<size_t> subtensor(reduce_rank, 1);
for (size_t i = axis; i < reduce_rank; ++i)
subtensor[i] = lowered::PortDescriptor::ServiceDimensions::FULL_DIM;
lowered::PortDescriptorUtils::set_port_descriptor_ptr(reduce->input(0), std::make_shared<lowered::PortDescriptor>(reduce->input(0), subtensor));
lowered::PortDescriptorUtils::set_port_descriptor_ptr(reduce->output(0), std::make_shared<lowered::PortDescriptor>(reduce->output(0), subtensor));
}
} // namespace

ReduceBase::ReduceBase(const Output<Node>& x, size_t axis) : Op({x}), m_axis(axis) {
constructor_validate_and_infer_types();
Expand All @@ -32,12 +46,24 @@ std::shared_ptr<Node> ReduceSum::clone_with_new_inputs(const OutputVector& new_a
return std::make_shared<ReduceSum>(new_args.at(0), m_axis);
}

std::shared_ptr<ReduceSum> ReduceSum::make_reduce_sum(const Output<Node>& x, size_t axis) {
const auto reduce = std::make_shared<ReduceSum>(x, axis);
compute_and_set_reduce_subtensors(reduce);
return reduce;
}

std::shared_ptr<Node> ReduceMax::clone_with_new_inputs(const OutputVector& new_args) const {
INTERNAL_OP_SCOPE(ReduceMax);
check_new_args_count(this, new_args);
return std::make_shared<ReduceMax>(new_args.at(0), m_axis);
}

std::shared_ptr<ReduceMax> ReduceMax::make_reduce_max(const Output<Node>& x, size_t axis) {
const auto reduce = std::make_shared<ReduceMax>(x, axis);
compute_and_set_reduce_subtensors(reduce);
return reduce;
}

} // namespace op
} // namespace snippets
} // namespace ov
4 changes: 2 additions & 2 deletions src/common/snippets/src/pass/collapse_subgraph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -169,10 +169,10 @@ auto is_supported_op(const std::shared_ptr<const Node> &n) -> bool {
if (ov::is_type<const ov::op::v1::ReduceMax>(n) || ov::is_type<const ov::op::v1::ReduceSum>(n)) {
const auto& reduce_base = ov::as_type_ptr<const ov::op::util::ArithmeticReductionKeepDims>(n);
const auto& axis_constant = ov::as_type_ptr<const ov::op::v0::Constant>(n->get_input_node_shared_ptr(1));
if (!reduce_base->get_keep_dims() || !axis_constant || shape_size(axis_constant->get_shape()) != 1)
const auto rank = n->get_input_partial_shape(0).rank();
if (rank.is_dynamic() || !reduce_base->get_keep_dims() || !axis_constant || shape_size(axis_constant->get_shape()) != 1)
return false;

const auto rank = n->get_input_partial_shape(0).rank();
const auto axis_value = axis_constant->cast_vector<int32_t>(1)[0];
const auto normalized_axis = ov::util::normalize_axis(n->get_friendly_name(), axis_value, rank);
// Note: Reduction only over the last dimension is currently supported
Expand Down
12 changes: 4 additions & 8 deletions src/common/snippets/src/pass/reduce_to_snippets_reduce.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,24 +29,20 @@ snippets::pass::ReduceToSnippetsReduce::ReduceToSnippetsReduce() {
const auto& axis_constant = ov::as_type_ptr<ov::op::v0::Constant>(reduce->get_input_node_shared_ptr(1));
// Note: we do not check the Constant value here. If the Reduce was tokenized, then we assume that it is supported
OPENVINO_ASSERT(reduce_base->get_keep_dims() && axis_constant, "Unspported Reduce was tokenized by Snippets");

const auto& data_input = reduce->get_input_source_output(0);
const auto reduce_rank = reduce->get_input_partial_shape(0).rank();
OPENVINO_ASSERT(reduce_rank.is_static(), "ReduceToSnippetsReduce doesn't support dynamic ranks.");
const auto axis = ov::util::normalize_axis(reduce->get_friendly_name(), axis_constant->cast_vector<int32_t>(1)[0], reduce_rank);

std::shared_ptr<snippets::op::ReduceBase> snippets_reduce = nullptr;
if (ov::is_type<ov::op::v1::ReduceSum>(reduce))
snippets_reduce = std::make_shared<snippets::op::ReduceSum>(data_input, axis);
snippets_reduce = ov::snippets::op::ReduceSum::make_reduce_sum(data_input, axis);
else if (ov::is_type<ov::op::v1::ReduceMax>(reduce))
snippets_reduce = std::make_shared<snippets::op::ReduceMax>(data_input, axis);
snippets_reduce = ov::snippets::op::ReduceMax::make_reduce_max(data_input, axis);
else
OPENVINO_THROW("Reduce ", reduce, " can't be converted to snippets opset.");

std::vector<size_t> subtensor(reduce_rank.get_length(), 1);
for (auto i = axis; i < reduce_rank.get_length(); ++i)
subtensor[i] = PortDescriptor::ServiceDimensions::FULL_DIM;
PortDescriptorUtils::set_port_descriptor_ptr(snippets_reduce->input(0), std::make_shared<PortDescriptor>(snippets_reduce->input(0), subtensor));
PortDescriptorUtils::set_port_descriptor_ptr(snippets_reduce->output(0), std::make_shared<PortDescriptor>(snippets_reduce->output(0), subtensor));

ov::replace_node(reduce, snippets_reduce);
snippets_reduce->set_friendly_name(reduce->get_friendly_name());
ov::copy_runtime_info(reduce, snippets_reduce);
Expand Down
8 changes: 2 additions & 6 deletions src/common/snippets/src/pass/softmax_decomposition.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,11 +42,11 @@ SoftmaxDecomposition::SoftmaxDecomposition() {
}

const auto& softmax_input = softmax->input_value(0);
const auto reduce_max = std::make_shared<ov::snippets::op::ReduceMax>(softmax_input, axis);
const auto reduce_max = ov::snippets::op::ReduceMax::make_reduce_max(softmax_input, axis);
const auto subtract = std::make_shared<ov::op::v1::Subtract>(softmax_input, reduce_max);
const auto exp = std::make_shared<ov::op::v0::Exp>(subtract);

const auto reduce_sum = std::make_shared<ov::snippets::op::ReduceSum>(exp, axis);
const auto reduce_sum = ov::snippets::op::ReduceSum::make_reduce_sum(exp, axis);
const auto power = std::make_shared<ov::snippets::op::PowerStatic>(reduce_sum, -1.f);
const auto multiply = std::make_shared<ov::op::v1::Multiply>(exp, power);

Expand All @@ -55,10 +55,6 @@ SoftmaxDecomposition::SoftmaxDecomposition() {
for (size_t i = axis; i < rank; ++i)
subtensor[i] = PortDescriptor::ServiceDimensions::FULL_DIM;

PortDescriptorUtils::set_port_descriptor_ptr(reduce_max->input(0), std::make_shared<PortDescriptor>(reduce_max->input(0), subtensor));
PortDescriptorUtils::set_port_descriptor_ptr(reduce_max->output(0), std::make_shared<PortDescriptor>(reduce_max->output(0), subtensor));
PortDescriptorUtils::set_port_descriptor_ptr(reduce_sum->input(0), std::make_shared<PortDescriptor>(reduce_sum->input(0), subtensor));
PortDescriptorUtils::set_port_descriptor_ptr(reduce_sum->output(0), std::make_shared<PortDescriptor>(reduce_sum->output(0), subtensor));
PortDescriptorUtils::set_port_descriptor_ptr(power->input(0), std::make_shared<PortDescriptor>(power->input(0), subtensor));
PortDescriptorUtils::set_port_descriptor_ptr(power->output(0), std::make_shared<PortDescriptor>(power->output(0), subtensor));

Expand Down
10 changes: 4 additions & 6 deletions src/common/snippets/tests/src/lowered/pass/buffer_allocation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ std::shared_ptr<ov::Model> MHABufferAllocationTest::GetModel() const {
const auto subtensor_scalar = std::vector<size_t>{1};
const auto subtensor_eltwise = std::vector<size_t>{1, m_vector_size};
const auto subtensor_brgemm = std::vector<size_t>{32, ov::snippets::lowered::PortDescriptor::ServiceDimensions::FULL_DIM};
const auto subtensor_softmax = std::vector<size_t>{1, ov::snippets::lowered::PortDescriptor::ServiceDimensions::FULL_DIM};
const auto subtensor_power = std::vector<size_t>{1, ov::snippets::lowered::PortDescriptor::ServiceDimensions::FULL_DIM};

const auto parameter0 = std::make_shared<ov::op::v0::Parameter>(ov::element::f32, ov::PartialShape({1, 12, 128, 64}));
const auto parameter1 = std::make_shared<ov::op::v0::Parameter>(ov::element::f32, ov::PartialShape({1, 128, 12, 64}));
Expand All @@ -142,11 +142,11 @@ std::shared_ptr<ov::Model> MHABufferAllocationTest::GetModel() const {
const auto relu1 = std::make_shared<ov::op::v0::Relu>(matmul0);

// Decomposed Softmax
const auto reduce_max = std::make_shared<ov::snippets::op::ReduceMax>(relu1, 3);
const auto reduce_max = ov::snippets::op::ReduceMax::make_reduce_max(relu1, 3);
const auto subtract = std::make_shared<ov::op::v1::Subtract>(relu1, reduce_max);
const auto exp = std::make_shared<ov::op::v0::Exp>(subtract);

const auto reduce_sum = std::make_shared<ov::snippets::op::ReduceSum>(exp, 3);
const auto reduce_sum = ov::snippets::op::ReduceSum::make_reduce_sum(exp, 3);
const auto power = std::make_shared<ov::snippets::op::PowerStatic>(reduce_sum, -1.f);
const auto multiply = std::make_shared<ov::op::v1::Multiply>(exp, power);

Expand All @@ -157,9 +157,7 @@ std::shared_ptr<ov::Model> MHABufferAllocationTest::GetModel() const {

MarkOp(load_reshape, subtensor_scalar);
MarkOp(store, subtensor_scalar);
MarkOp(reduce_max, subtensor_softmax);
MarkOp(reduce_sum, subtensor_softmax);
MarkOp(power, subtensor_softmax);
MarkOp(power, subtensor_power);

MarkBrgemm(matmul0, subtensor_brgemm);
MarkBrgemm(matmul1, subtensor_brgemm);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ class MHABF16AMXBufferAllocationTest : public BufferAllocationCPUTest {
protected:
std::shared_ptr<ov::Model> GetModel() const override {
const auto subtensor_scalar = std::vector<size_t>{1};
const auto subtensor_softmax = std::vector<size_t>{1, ov::snippets::lowered::PortDescriptor::ServiceDimensions::FULL_DIM};
const auto subtensor_power = std::vector<size_t>{1, ov::snippets::lowered::PortDescriptor::ServiceDimensions::FULL_DIM};
const auto subtensor_full = std::vector<size_t>(2, ov::snippets::lowered::PortDescriptor::ServiceDimensions::FULL_DIM);

const auto parameter0 = std::make_shared<ov::op::v0::Parameter>(ov::element::bf16, ov::PartialShape({1, 12, 128, 64}));
Expand All @@ -156,11 +156,11 @@ class MHABF16AMXBufferAllocationTest : public BufferAllocationCPUTest {
const auto relu1 = std::make_shared<ov::op::v0::Relu>(brgemm_cpu0);

// Decomposed Softmax
const auto reduce_max = std::make_shared<ov::snippets::op::ReduceMax>(relu1, 3);
const auto reduce_max = ov::snippets::op::ReduceMax::make_reduce_max(relu1, 3);
const auto subtract = std::make_shared<ov::op::v1::Subtract>(relu1, reduce_max);
const auto exp = std::make_shared<ov::op::v0::Exp>(subtract);

const auto reduce_sum = std::make_shared<ov::snippets::op::ReduceSum>(exp, 3);
const auto reduce_sum = ov::snippets::op::ReduceSum::make_reduce_sum(exp, 3);
const auto power = std::make_shared<ov::snippets::op::PowerStatic>(reduce_sum, -1.f);
const auto multiply = std::make_shared<ov::op::v1::Multiply>(exp, power);

Expand All @@ -181,9 +181,7 @@ class MHABF16AMXBufferAllocationTest : public BufferAllocationCPUTest {

MarkOp(load_reshape, subtensor_scalar);
MarkOp(store, subtensor_scalar);
MarkOp(reduce_max, subtensor_softmax);
MarkOp(reduce_sum, subtensor_softmax);
MarkOp(power, subtensor_softmax);
MarkOp(power, subtensor_power);

MarkOp(brgemm_cpu0, subtensor_full);
MarkOp(brgemm_cpu1, subtensor_full);
Expand Down

0 comments on commit 5961ea2

Please sign in to comment.