Skip to content

Commit

Permalink
Alexandra's comments applied: 2nd part
Browse files Browse the repository at this point in the history
  • Loading branch information
v-Golubev committed Jan 29, 2024
1 parent ccba54e commit d469e55
Show file tree
Hide file tree
Showing 10 changed files with 40 additions and 25 deletions.
14 changes: 4 additions & 10 deletions src/common/snippets/include/snippets/op/reduce.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,15 +38,12 @@ class ReduceSum : public ReduceBase {
ReduceSum(const Output<Node>& x, size_t axis) : ReduceBase(x, axis) {}
ReduceSum() = default;
std::shared_ptr<Node> clone_with_new_inputs(const OutputVector& new_args) const override;
static std::set<ov::element::TypeVector> get_supported_precisions(const std::shared_ptr<ov::Node>& node) {
return {{ov::element::f32}};
}
/**
* @brief Creates ReduceSum operation, computes and sets input/output subtensors
* @brief Creates ReduceSum operation, computes and sets subtensors to input/output PortDescriptors
* @param x Reduce input
* @param axis Reduce axis
*/
static std::shared_ptr<ReduceSum> make_reduce_sum(const Output<Node>& x, size_t axis);
static std::shared_ptr<ReduceSum> make(const Output<Node>& x, size_t axis);
};

class ReduceMax : public ReduceBase {
Expand All @@ -55,15 +52,12 @@ class ReduceMax : public ReduceBase {
ReduceMax(const Output<Node>& x, size_t axis) : ReduceBase(x, axis) {}
ReduceMax() = default;
std::shared_ptr<Node> clone_with_new_inputs(const OutputVector& new_args) const override;
static std::set<ov::element::TypeVector> get_supported_precisions(const std::shared_ptr<ov::Node>& node) {
return {{ov::element::f32}};
}
/**
* @brief Creates ReduceMax operation, computes and sets input/output subtensors
* @brief Creates ReduceMax operation, computes and sets subtensors to input/output PortDescriptors
* @param x Reduce input
* @param axis Reduce axis
*/
static std::shared_ptr<ReduceMax> make_reduce_max(const Output<Node>& x, size_t axis);
static std::shared_ptr<ReduceMax> make(const Output<Node>& x, size_t axis);
};

} // namespace op
Expand Down
6 changes: 6 additions & 0 deletions src/common/snippets/include/snippets/target_machine.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,12 @@ class TargetMachine {
*/
virtual size_t get_lanes() const = 0;

/**
* @brief reports supported precisions set for nodes which don't have emitters
* @param type node type for which the supported precisions set is requested
* @return set of supported precisions for the provided node type
*/
virtual std::set<ov::element::TypeVector> supported_precisions_for_emitterless_node(const ov::DiscreteTypeInfo& type) const;

/**
* @brief called by generator to all the emitter for a target machine
Expand Down
4 changes: 4 additions & 0 deletions src/common/snippets/src/lowered/target_machine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,10 @@
#include "snippets/target_machine.hpp"

using namespace ov::snippets;
std::set<ov::element::TypeVector> TargetMachine::supported_precisions_for_emitterless_node(const ov::DiscreteTypeInfo& type) const {
OPENVINO_THROW("supported_precisions_for_emitterless_node for this class is not implemented");
}

std::function<std::shared_ptr<Emitter>(const lowered::ExpressionPtr&)> TargetMachine::get(const ov::DiscreteTypeInfo& type) const {
auto jitter = jitters.find(type);
if (jitter == jitters.end()) {
Expand Down
4 changes: 2 additions & 2 deletions src/common/snippets/src/op/reduce.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ std::shared_ptr<Node> ReduceSum::clone_with_new_inputs(const OutputVector& new_a
return std::make_shared<ReduceSum>(new_args.at(0), m_axis);
}

std::shared_ptr<ReduceSum> ReduceSum::make_reduce_sum(const Output<Node>& x, size_t axis) {
std::shared_ptr<ReduceSum> ReduceSum::make(const Output<Node>& x, size_t axis) {
const auto reduce = std::make_shared<ReduceSum>(x, axis);
compute_and_set_reduce_subtensors(reduce);
return reduce;
Expand All @@ -58,7 +58,7 @@ std::shared_ptr<Node> ReduceMax::clone_with_new_inputs(const OutputVector& new_a
return std::make_shared<ReduceMax>(new_args.at(0), m_axis);
}

std::shared_ptr<ReduceMax> ReduceMax::make_reduce_max(const Output<Node>& x, size_t axis) {
std::shared_ptr<ReduceMax> ReduceMax::make(const Output<Node>& x, size_t axis) {
const auto reduce = std::make_shared<ReduceMax>(x, axis);
compute_and_set_reduce_subtensors(reduce);
return reduce;
Expand Down
4 changes: 2 additions & 2 deletions src/common/snippets/src/pass/reduce_to_snippets_reduce.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,9 @@ snippets::pass::ReduceToSnippetsReduce::ReduceToSnippetsReduce() {

std::shared_ptr<snippets::op::ReduceBase> snippets_reduce = nullptr;
if (ov::is_type<ov::op::v1::ReduceSum>(reduce))
snippets_reduce = ov::snippets::op::ReduceSum::make_reduce_sum(data_input, axis);
snippets_reduce = ov::snippets::op::ReduceSum::make(data_input, axis);
else if (ov::is_type<ov::op::v1::ReduceMax>(reduce))
snippets_reduce = ov::snippets::op::ReduceMax::make_reduce_max(data_input, axis);
snippets_reduce = ov::snippets::op::ReduceMax::make(data_input, axis);
else
OPENVINO_THROW("Reduce ", reduce, " can't be converted to snippets opset.");

Expand Down
4 changes: 2 additions & 2 deletions src/common/snippets/src/pass/softmax_decomposition.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,11 +42,11 @@ SoftmaxDecomposition::SoftmaxDecomposition() {
}

const auto& softmax_input = softmax->input_value(0);
const auto reduce_max = ov::snippets::op::ReduceMax::make_reduce_max(softmax_input, axis);
const auto reduce_max = ov::snippets::op::ReduceMax::make(softmax_input, axis);
const auto subtract = std::make_shared<ov::op::v1::Subtract>(softmax_input, reduce_max);
const auto exp = std::make_shared<ov::op::v0::Exp>(subtract);

const auto reduce_sum = ov::snippets::op::ReduceSum::make_reduce_sum(exp, axis);
const auto reduce_sum = ov::snippets::op::ReduceSum::make(exp, axis);
const auto power = std::make_shared<ov::snippets::op::PowerStatic>(reduce_sum, -1.f);
const auto multiply = std::make_shared<ov::op::v1::Multiply>(exp, power);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -142,11 +142,11 @@ std::shared_ptr<ov::Model> MHABufferAllocationTest::GetModel() const {
const auto relu1 = std::make_shared<ov::op::v0::Relu>(matmul0);

// Decomposed Softmax
const auto reduce_max = ov::snippets::op::ReduceMax::make_reduce_max(relu1, 3);
const auto reduce_max = ov::snippets::op::ReduceMax::make(relu1, 3);
const auto subtract = std::make_shared<ov::op::v1::Subtract>(relu1, reduce_max);
const auto exp = std::make_shared<ov::op::v0::Exp>(subtract);

const auto reduce_sum = ov::snippets::op::ReduceSum::make_reduce_sum(exp, 3);
const auto reduce_sum = ov::snippets::op::ReduceSum::make(exp, 3);
const auto power = std::make_shared<ov::snippets::op::PowerStatic>(reduce_sum, -1.f);
const auto multiply = std::make_shared<ov::op::v1::Multiply>(exp, power);

Expand Down
20 changes: 15 additions & 5 deletions src/plugins/intel_cpu/src/emitters/snippets/x64/cpu_generator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -109,12 +109,12 @@ static bool is_segfault_detector_emitter(const intel_cpu::jit_emitter *emitter)
} \
}

#define CREATE_UNDEFINED_EMITTER(node_type) { \
#define CREATE_UNDEFINED_EMITTER() { \
[](const snippets::lowered::ExpressionPtr& expr) -> std::shared_ptr<snippets::Emitter> { \
return nullptr; \
}, \
[](const std::shared_ptr<ov::Node>& n) -> std::set<std::vector<element::Type>> { \
return node_type::get_supported_precisions(n); \
[this](const std::shared_ptr<ov::Node>& n) -> std::set<std::vector<element::Type>> { \
return supported_precisions_for_emitterless_node(n->get_type_info()); \
} \
}

Expand Down Expand Up @@ -213,8 +213,8 @@ intel_cpu::CPUTargetMachine::CPUTargetMachine(dnnl::impl::cpu::x64::cpu_isa_t ho
jitters[snippets::op::LoopEnd::get_type_info_static()] = CREATE_SNIPPETS_EMITTER(intel_cpu::jit_loop_end_emitter);
jitters[intel_cpu::BrgemmCPU::get_type_info_static()] = CREATE_SNIPPETS_EMITTER(intel_cpu::jit_brgemm_emitter);
jitters[intel_cpu::BrgemmCopyB::get_type_info_static()] = CREATE_SNIPPETS_EMITTER(intel_cpu::jit_brgemm_copy_b_emitter);
jitters[snippets::op::ReduceMax::get_type_info_static()] = CREATE_UNDEFINED_EMITTER(snippets::op::ReduceMax);
jitters[snippets::op::ReduceSum::get_type_info_static()] = CREATE_UNDEFINED_EMITTER(snippets::op::ReduceSum);
jitters[snippets::op::ReduceMax::get_type_info_static()] = CREATE_UNDEFINED_EMITTER();
jitters[snippets::op::ReduceSum::get_type_info_static()] = CREATE_UNDEFINED_EMITTER();

#ifdef SNIPPETS_DEBUG_CAPS
jitters[snippets::op::PerfCountBegin::get_type_info_static()] = CREATE_CPU_EMITTER(ov::intel_cpu::jit_perf_count_chrono_start_emitter);
Expand All @@ -241,6 +241,16 @@ bool intel_cpu::CPUTargetMachine::is_supported() const {
return dnnl::impl::cpu::x64::mayiuse(isa);
}

std::set<ov::element::TypeVector> intel_cpu::CPUTargetMachine::supported_precisions_for_emitterless_node(const ov::DiscreteTypeInfo& type) const {
static const std::map<ov::DiscreteTypeInfo, std::set<ov::element::TypeVector>> supported_precisions_map{
{snippets::op::ReduceMax::get_type_info_static(), {{ov::element::f32}}},
{snippets::op::ReduceSum::get_type_info_static(), {{ov::element::f32}}},
};
auto it = supported_precisions_map.find(type);
OPENVINO_ASSERT(it != supported_precisions_map.end(), "supported precisions set for node without emitter is not set. Type info: ", type);
return it->second;
}

snippets::CompiledSnippetPtr intel_cpu::CPUTargetMachine::get_snippet() {
if (h->create_kernel() != dnnl::impl::status::success) {
OPENVINO_THROW("Failed to create jit_kernel in get_snippet()");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ class CPUTargetMachine : public snippets::TargetMachine {
snippets::CompiledSnippetPtr get_snippet() override;
size_t get_lanes() const override;
dnnl::impl::cpu::x64::cpu_isa_t get_isa() const;
std::set<ov::element::TypeVector> supported_precisions_for_emitterless_node(const ov::DiscreteTypeInfo& type) const override;
#ifdef SNIPPETS_DEBUG_CAPS
SnippetsDebugCapsConfig debug_config;
#endif
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -156,11 +156,11 @@ class MHABF16AMXBufferAllocationTest : public BufferAllocationCPUTest {
const auto relu1 = std::make_shared<ov::op::v0::Relu>(brgemm_cpu0);

// Decomposed Softmax
const auto reduce_max = ov::snippets::op::ReduceMax::make_reduce_max(relu1, 3);
const auto reduce_max = ov::snippets::op::ReduceMax::make(relu1, 3);
const auto subtract = std::make_shared<ov::op::v1::Subtract>(relu1, reduce_max);
const auto exp = std::make_shared<ov::op::v0::Exp>(subtract);

const auto reduce_sum = ov::snippets::op::ReduceSum::make_reduce_sum(exp, 3);
const auto reduce_sum = ov::snippets::op::ReduceSum::make(exp, 3);
const auto power = std::make_shared<ov::snippets::op::PowerStatic>(reduce_sum, -1.f);
const auto multiply = std::make_shared<ov::op::v1::Multiply>(exp, power);

Expand Down

0 comments on commit d469e55

Please sign in to comment.