Skip to content

Commit

Permalink
[Snippets][CPU] Supported Brgemm subtensor update in runtime
Browse files Browse the repository at this point in the history
  • Loading branch information
a-sidorova committed Jul 4, 2024
1 parent 5cb8116 commit 0453a1c
Show file tree
Hide file tree
Showing 7 changed files with 139 additions and 19 deletions.
16 changes: 10 additions & 6 deletions src/common/snippets/include/snippets/kernel_executor_table.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -75,17 +75,21 @@ class KernelExecutor : public snippets::KernelExecutorBase {
void update_by_expression(const ov::snippets::lowered::ExpressionPtr& expr) override final { // NOLINT
m_config = std::static_pointer_cast<Conf>(m_config->clone());
update_config(expr, m_config);
OPENVINO_ASSERT(m_config && m_config->is_completed(), "Failed to update kernel config in update_by_expression");
update_kernel(m_config, m_kernel);
OPENVINO_ASSERT(m_kernel, "Failed to compile kernel executor");
OPENVINO_ASSERT(m_config, "Failed to update kernel config in update_by_expression");
if (m_config->is_completed()) {
update_kernel(m_config, m_kernel);
OPENVINO_ASSERT(m_kernel, "Failed to compile kernel executor");
}
}
void update_by_config(const std::shared_ptr<const GenericConfig>& new_config) override final { // NOLINT
if (*m_config == *new_config)
return;
m_config = std::static_pointer_cast<Conf>(std::const_pointer_cast<GenericConfig>(new_config));
OPENVINO_ASSERT(m_config && m_config->is_completed(), "Failed to update kernel config in get_config");
update_kernel(m_config, m_kernel);
OPENVINO_ASSERT(m_kernel, "Failed to compile kernel executor");
OPENVINO_ASSERT(m_config, "Failed to update kernel config in get_config");
if (m_config->is_completed()) {
update_kernel(m_config, m_kernel);
OPENVINO_ASSERT(m_kernel, "Failed to compile kernel executor");
}
}
std::shared_ptr<const GenericConfig> get_config() const override { return m_config; }
std::shared_ptr<const KernelType> get_kernel() const { return m_kernel; }
Expand Down
37 changes: 32 additions & 5 deletions src/common/snippets/src/lowered/pass/propagate_subtensors.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,17 +16,42 @@ namespace lowered {
namespace pass {
namespace {

// SIZE_MAX - dynamic value
constexpr size_t DEFAULT_VALUE = SIZE_MAX - 1;
// The algorithm uses the following special values in subtensors/shapes:
// 1. Dynamic value in subtensor/shape : SIZE_MAX
// 2. Full fimension in subtensor : SIZE_MAX - 1
// 3. Default value of `new_dim_value` : SIZE_MAX - 2
// 4. `Forced` special dynamic value : SIZE_MAX - 3
//
// We have to introduce `SPECIAL_DYNAMIC_VALUE` to distinguish `new_dim_value = DYNAMIC`
// from the real dynamic values in subtensors and shapes and force this value in subtensors.
// For example, there is Brgemm with the following info in the tail Loop:
// Input 0: shape [?, ?], existing subtensor [32, FULL_DIM]
// Input 1: shape [?, ?], existing subtensor [FULL_DIM, FULL_DIM]
// Output : shape [?, ?], existing subtensor [32, FULL_DIM]
// If the user wants to force `?` in the place of `32` in subtensors, the steps will be:
// 1. Set `?` to subtensor and shape of Input 0 :
// shape [?, ?] (shape has not been changed!), new subtensor [?, FULL_DIM]
// 2. Make shape inference of Brgemm and get Output:
// shape [?, ?] (shape has not been changed!), existing subtensor [FULL_DIM, FULL_DIM]
// 3. Update subtensor on output using shape:
// new_subtensor[i] = std::min(planar_shape[i], subtensor[i]); // i = 0: std::min(SIZE_MAX(?), 32)
// new subtensor [32, FULL_DIM] - has not been changed! But should be [?, FULL_DIM]
// Conculsion: we have to distinguish forced dynamic value with existing dynamic values in shape and subtensor

constexpr size_t NEW_DEFAULT_VALUE = SIZE_MAX - 2;
constexpr size_t FORCED_DYNAMIC_VALUE = SIZE_MAX - 3;

void propagate_updated_subtensor_through_loop(const LinearIR& linear_ir,
const LoopInfoPtr& loop_info,
LinearIR::container::const_iterator begin,
LinearIR::container::const_iterator end,
bool most_outer_loop,
const size_t new_dim_value = DEFAULT_VALUE) {
OPENVINO_ASSERT(snippets::utils::implication(most_outer_loop, new_dim_value != DEFAULT_VALUE),
size_t new_dim_value = NEW_DEFAULT_VALUE) {
// Marks the forced dynamic value
new_dim_value = utils::is_dynamic_value(new_dim_value) ? FORCED_DYNAMIC_VALUE : new_dim_value;
OPENVINO_ASSERT(snippets::utils::implication(most_outer_loop, new_dim_value != NEW_DEFAULT_VALUE),
"if the updated subtensor propagation was called for the outer loop, new_dim_value must not be equal to default value");

std::map<lowered::PortDescriptorPtr, snippets::VectorDims> original_shapes;
// First step: set new dim value to the corresponding input_ports' dimensions
if (most_outer_loop) {
Expand Down Expand Up @@ -82,7 +107,9 @@ void propagate_updated_subtensor_through_loop(const LinearIR& linear_ir,
const size_t subtensor_start = planar_dims.size() - subtensor.size();
VectorDims new_subtensor(planar_dims.begin() + subtensor_start, planar_dims.end());
for (size_t i = 0; i < new_subtensor.size(); ++i) {
new_subtensor[i] = std::min(new_subtensor[i], subtensor[i]);
// If user forces dynamic value to set in subtensor, set real dynamic dimension using `get_dynamic_value<size_t>()`
new_subtensor[i] = new_subtensor[i] == FORCED_DYNAMIC_VALUE ? utils::get_dynamic_value<size_t>()
: std::min(new_subtensor[i], subtensor[i]);
}
desc->set_subtensor(new_subtensor);
}
Expand Down
3 changes: 2 additions & 1 deletion src/common/snippets/src/op/serialization_node.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,8 @@ bool SerializationNode::visit_attributes(AttributeVisitor &visitor) {
std::stringstream ss;
for (size_t i = 0; i < subtensor.size(); ++i) {
const auto& v = subtensor[i];
const auto v_str = (v == lowered::PortDescriptor::ServiceDimensions::FULL_DIM) ? "FULL_DIM" : std::to_string(v);
const auto v_str = v == lowered::PortDescriptor::ServiceDimensions::FULL_DIM ? "FULL_DIM" :
(utils::is_dynamic_value(v) ? "?" : std::to_string(v));
const auto del = i < subtensor.size() - 1 ? ", " : "";
ss << v_str << del;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

#include "emitters/snippets/cpu_runtime_configurator.hpp"

#include "transformations/snippets/x64/op/brgemm_cpu.hpp"
#include "snippets/utils.hpp"
#include "snippets/lowered/loop_manager.hpp"

Expand All @@ -18,20 +19,50 @@ void CPURuntimeConfigurator::update(const std::shared_ptr<ov::snippets::lowered:
RuntimeConfigurator::update(linear_ir);

if (linear_ir->is_dynamic()) {
const auto& loop_manager = linear_ir->get_loop_manager();
update_loop_args(loop_manager);
update_brgemms(loop_manager);
get_kernel_executor_table()->update_state();
update_loop_args(linear_ir);
}
}

void CPURuntimeConfigurator::initialization(const std::shared_ptr<ov::snippets::lowered::LinearIR>& linear_ir) {
RuntimeConfigurator::initialization(linear_ir);

for (const auto& expr : *linear_ir) {
// At the moment only blocking by dynamic M is supported
if (ov::is_type<ov::intel_cpu::BrgemmCPU>(expr->get_node())) {
const auto& in0_desc = expr->get_input_port_descriptor(0);
const auto& in1_desc = expr->get_input_port_descriptor(1);
const auto& out_desc = expr->get_output_port_descriptor(0);

const auto& in0_subtensor = in0_desc->get_subtensor();
const auto& in1_subtensor = in1_desc->get_subtensor();
const auto& out_subtensor = out_desc->get_subtensor();

OPENVINO_ASSERT(!snippets::utils::is_dynamic_value(*in0_subtensor.crbegin()) &&
!snippets::utils::is_dynamic_value(*in1_subtensor.crbegin()) &&
!snippets::utils::is_dynamic_value(*(++in1_subtensor.crbegin())) &&
!snippets::utils::is_dynamic_value(*out_subtensor.crbegin()),
"CPURuntimeConfigurator supports only dynamic M in Brgemm subtensors");
OPENVINO_ASSERT(*(++in0_subtensor.crbegin()) == *(++out_subtensor.crbegin()),
"Incorrect values in subtensors of BrgemmCPU");

if (snippets::utils::is_dynamic_value(*(++in0_subtensor.crbegin())))
m_dynamic_brgemms.push_back(expr);
}
}
}

void CPURuntimeConfigurator::init_tensor_rank(const std::shared_ptr<ov::snippets::lowered::LinearIR>& linear_ir) const {
m_config->tensor_rank = std::max(linear_ir->get_master_shape().size(), rank6D);
}

void CPURuntimeConfigurator::update_loop_args(const std::shared_ptr<ov::snippets::lowered::LinearIR>& linear_ir) const {
void CPURuntimeConfigurator::update_loop_args(const ov::snippets::lowered::LoopManagerPtr& loop_manager) const {
const auto& cpu_config = ov::as_type_ptr<CPURuntimeConfig>(m_config);
OPENVINO_ASSERT(cpu_config, "CPURuntimeConfigurator expects CPURuntimeConfig");

const auto& loop_map = linear_ir->get_loop_manager()->get_map();
const auto& loop_map = loop_manager->get_map();
cpu_config->loop_args.resize(loop_map.size());
for (const auto& loop : loop_map) {
const auto& idx = loop.first;
Expand All @@ -50,5 +81,24 @@ void CPURuntimeConfigurator::update_loop_args(const std::shared_ptr<ov::snippets
}
}

void CPURuntimeConfigurator::update_brgemms(const ov::snippets::lowered::LoopManagerPtr& loop_manager) const {
for (const auto& brgemm_expr : m_dynamic_brgemms) {
const auto& loop_ids = brgemm_expr->get_loop_ids();
OPENVINO_ASSERT(!loop_ids.empty(), "Dynamic Brgemm must be in loops");
const auto& expanded_loop_info = loop_manager->get_loop_info<snippets::lowered::ExpandedLoopInfo>(loop_ids.front());
const auto& block_size_m = expanded_loop_info->get_work_amount();

const auto& in_desc = brgemm_expr->get_input_port_descriptor(0);
const auto& out_desc = brgemm_expr->get_output_port_descriptor(0);

auto in_subtensor = in_desc->get_subtensor();
auto out_subtensor = out_desc->get_subtensor();
*++in_subtensor.rbegin() = block_size_m;
*++out_subtensor.rbegin() = block_size_m;
in_desc->set_subtensor(in_subtensor);
out_desc->set_subtensor(out_subtensor);
}
}

} // namespace intel_cpu
} // namespace ov
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,11 @@ class CPURuntimeConfigurator : public ov::snippets::RuntimeConfigurator {
* @param linear_ir LinearIR
*/
void update(const std::shared_ptr<ov::snippets::lowered::LinearIR>& linear_ir) override;
/**
* @brief Allocate and intialize fields in RuntimeConfig and RuntimeConfigurator
* @param linear_ir LinearIR
*/
void initialization(const std::shared_ptr<ov::snippets::lowered::LinearIR>& linear_ir) override;
/**
* @brief Initializes tensor rank of config
* @param linear_ir LinearIR
Expand All @@ -39,9 +44,14 @@ class CPURuntimeConfigurator : public ov::snippets::RuntimeConfigurator {
* @brief Calculate Loop parameters of Loop emitters and update these values in CPURuntimeConfig
* @param linear_ir LinearIR
*/
void update_loop_args(const std::shared_ptr<ov::snippets::lowered::LinearIR>& linear_ir) const;
void update_loop_args(const ov::snippets::lowered::LoopManagerPtr& loop_manager) const;
/**
* @brief Update latest input shapes
*/
void update_brgemms(const ov::snippets::lowered::LoopManagerPtr& loop_manager) const;

const size_t rank6D = 6;
std::vector<ov::snippets::lowered::ExpressionPtr> m_dynamic_brgemms = {};
};

} // namespace intel_cpu
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ void jit_loop_begin_emitter::emit_code(const std::vector<size_t> &in, const std:

void jit_loop_begin_emitter::emit_impl(const std::vector<size_t>& in, const std::vector<size_t>& out) const {
// If the loop evaulate once, we can skip loop begin code emission
if (evaluate_once)
if (evaluate_once && !is_work_amount_dynamic)
return;

Reg64 reg_work_amount = Reg64(static_cast<int>(out.back()));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -67,11 +67,39 @@ INSTANTIATE_TEST_SUITE_P(smoke_Snippets_MatMult, MatMul,


std::vector<std::vector<ov::test::InputShape>> input_shapes_dynamic{
// All dimensions are dynamic
{
{PartialShape{-1, -1, -1, -1}, {{2, 1, 32, 64}, {2, 2, 10, 20}, {2, 2, 100, 80},
{2, 2, 10, 20}, {2, 1, 32, 64}}},
{2, 2, 10, 20}, {2, 1, 32, 64}, {2, 3, 64, 55}}},
{PartialShape{-1, -1, -1, -1}, {{1, 3, 64, 128}, {2, 2, 20, 30}, {2, 2, 80, 120},
{2, 2, 20, 30}, {1, 3, 64, 128}}}
{2, 2, 20, 30}, {1, 3, 64, 128}, {2, 3, 55, 128}}}
},
// Only M dimension is dynamic + one one loop by M
{
{PartialShape{-1, 2, -1, 64}, {{2, 2, 64, 64}, {2, 2, 64, 64}, {2, 2, 35, 64},
{2, 2, 120, 64}, {2, 2, 15, 64}, {2, 2, 35, 64}}},
{PartialShape{-1, 2, 64, 32}, {{2, 2, 64, 32}, {2, 2, 64, 32}, {1, 2, 64, 32},
{1, 2, 64, 32}, {2, 2, 64, 32}, {1, 2, 64, 32}}}
},
// Only M dimension is dynamic + all Loops (by M, N, K)
{
{PartialShape{2, 2, -1, 550}, {{2, 2, 64, 550}, {2, 2, 16, 550}, {2, 2, 35, 550},
{2, 2, 16, 550}, {2, 2, 50, 550}, {2, 2, 64, 550}}},
{PartialShape{2, 1, 550, 70}, {{2, 1, 550, 70}, {2, 1, 550, 70}, {2, 1, 550, 70},
{2, 1, 550, 70}, {2, 1, 550, 70}, {2, 1, 550, 70}}}
},
// Only K dimension is dynamic
{
{PartialShape{2, 2, 35, -1}, {{2, 2, 35, 128}, {2, 2, 35, 10}, {2, 2, 35, 33},
{2, 2, 35, 35}, {2, 2, 35, 100},}},
{PartialShape{2, 2, -1, 70}, {{2, 2, 128, 70}, {2, 2, 10, 70}, {2, 2, 33, 70},
{2, 2, 35, 70}, {2, 2, 100, 70},}}
},
// Only N dimension is dynamic
{
STATIC_SHAPE(2, 2, 35, 550),
{PartialShape{2, 2, 550, -1}, {{2, 2, 550, 70}, {2, 2, 550, 12}, {2, 2, 550, 70},
{2, 2, 550, 12}, {2, 2, 550, 10},}}
},
};

Expand Down

0 comments on commit 0453a1c

Please sign in to comment.