Skip to content

Commit

Permalink
Integrate recompilation infrastructure into RuntimeConfigurator (open…
Browse files Browse the repository at this point in the history
…vinotoolkit#24955)

### Details:
- *Integrate dynamic executors recompilation infrastructure into
RuntimeConfigurator*
- *Allow RuntimeConfigurator to recompile dynamic kernel executors in
runtime*
 - *Employ this approach to enable dynamic MatMul tests (fp32)*

### Tickets:
 - *143257*
  • Loading branch information
IvanNovoselov authored Jun 21, 2024
1 parent 080f22e commit b660da8
Show file tree
Hide file tree
Showing 31 changed files with 517 additions and 320 deletions.
114 changes: 102 additions & 12 deletions src/common/snippets/include/snippets/kernel_executor_table.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,10 @@

#pragma once

#include "snippets/lowered/expression.hpp"

#include "snippets/lowered/linear_ir.hpp"
#if defined(SNIPPETS_DEBUG_CAPS) && !defined(_WIN32)
#include <cxxabi.h>
#endif
namespace ov {
namespace snippets {

Expand All @@ -23,8 +25,38 @@ class KernelExecutorBase {
* while dynamic kernels will be completed only in runtime, when all the shapes are known.
*/
virtual bool is_completed() const = 0;

/*** Return deep copy of the config */
virtual std::shared_ptr<GenericConfig> clone() const = 0;

/*** Compute hash for fast comparison operations or caching support */
virtual size_t hash() const = 0;

bool operator==(const GenericConfig& rhs) const { return hash() == rhs.hash(); }
bool operator!=(const GenericConfig& rhs) const { return hash() != rhs.hash(); }

virtual ~GenericConfig() = default;
/** serialize config for debug purposes */
#ifdef SNIPPETS_DEBUG_CAPS
virtual std::string to_string() const = 0;
#endif
};
/**
* @brief Update current kernel config in accordance with the passed expression. Corresponding kernel is recompiled if necessary.
* This method should be called to update KernelExecutor based on runtime info (e.g. shapes) available through expression ptr
*/
virtual void update_by_expression(const ov::snippets::lowered::ExpressionPtr& expr) = 0;
/**
* @brief Replace current kernel config with the provided value. Corresponding kernel is recompiled if necessary.
* This method should be called to restore a saved state of the executor, that was configured using update_by_expression().
*/
virtual void update_by_config(const std::shared_ptr<const GenericConfig>& new_config) = 0;

virtual std::shared_ptr<const GenericConfig> get_config() const = 0;
/** serialize for debug purposes */
#ifdef SNIPPETS_DEBUG_CAPS
virtual std::string to_string() const = 0;
#endif
virtual ~KernelExecutorBase() = default;

private:
Expand All @@ -38,17 +70,47 @@ template<typename Conf, typename KernelType,
class KernelExecutor : public snippets::KernelExecutorBase {
public:
explicit KernelExecutor(std::shared_ptr<Conf> c) : KernelExecutorBase(), m_config{std::move(c)} {}
/**
* @brief check current config and recompile kernel if necessary. Use kernel caching to avoid redundant recompilations.
* This method must be called only for complete configs. It's the user responsibility to check is_completed() before calling.
*/
virtual void update_kernel() = 0;

// Note: override when final is redundant, but needed to avoid warnings on some compilers
void update_by_expression(const ov::snippets::lowered::ExpressionPtr& expr) override final { // NOLINT
m_config = std::static_pointer_cast<Conf>(m_config->clone());
update_config(expr, m_config);
OPENVINO_ASSERT(m_config && m_config->is_completed(), "Failed to update kernel config in update_by_expression");
update_kernel(m_config, m_kernel);
OPENVINO_ASSERT(m_kernel, "Failed to compile kernel executor");
}
void update_by_config(const std::shared_ptr<const GenericConfig>& new_config) override final { // NOLINT
if (*m_config == *new_config)
return;
m_config = std::static_pointer_cast<Conf>(std::const_pointer_cast<GenericConfig>(new_config));
OPENVINO_ASSERT(m_config && m_config->is_completed(), "Failed to update kernel config in get_config");
update_kernel(m_config, m_kernel);
OPENVINO_ASSERT(m_kernel, "Failed to compile kernel executor");
}
std::shared_ptr<const GenericConfig> get_config() const override { return m_config; }
std::shared_ptr<const KernelType> get_kernel() const { return m_kernel; }
#ifdef SNIPPETS_DEBUG_CAPS
std::string to_string() const override {
std::string type_name = typeid(KernelType).name();
#ifndef _WIN32
int status;
std::unique_ptr<char, void (*)(void*)> demangled_name(
abi::__cxa_demangle(type_name.c_str(), nullptr, nullptr, &status),
std::free);
type_name = demangled_name.get();
#endif
return "KernelExecutorType: " + std::string(type_name) + " KernelConfig: " + m_config->to_string();
}
#endif

protected:
/**
* @brief Takes shared_ptr to compilation config, returns shared_ptr to compiled kernel.
* Should be called only if actual compilation is required. Kernel caching must be implemented in update_kernel().
*/
virtual std::shared_ptr<KernelType> compile_kernel(const std::shared_ptr<Conf>& c) const = 0;
/*** Updates stored kernel config based on runtime info from expression (e.g. new input shapes). */
virtual void update_config(const ov::snippets::lowered::ExpressionPtr& expr, std::shared_ptr<Conf>& config) const = 0;
/*** Updates stored kernel in accordance with the passed config. Recompilation of the kernel is
* performed only if necessary, otherwise an appropriate kernel is retrieved from cache. */
virtual void update_kernel(const std::shared_ptr<const Conf>& c, std::shared_ptr<KernelType>& kernel) const = 0;

private:
/** Contains all the necessary information to compile a desired kernel*/
std::shared_ptr<Conf> m_config = nullptr;
/** Stores pointer to compiled kernel since the last update_kernel() call */
Expand All @@ -57,6 +119,7 @@ class KernelExecutor : public snippets::KernelExecutorBase {

class KernelExecutorTable {
public:
/*** Register KernelExecutor in the KernelExecutorTable so it can be later updated in runtime. */
template<typename T, class ...C,
typename std::enable_if<std::is_base_of<KernelExecutorBase, T>::value, bool>::type = true>
std::shared_ptr<T> register_kernel(const snippets::lowered::ExpressionPtr& expr, C... args) {
Expand All @@ -69,10 +132,37 @@ class KernelExecutorTable {
OPENVINO_ASSERT(m_table.count(expr), "This expression doesn't have a registered kernel executor");
return m_table.at(expr);
}
/*** Updates every registered KernelExecutor in accordance with the corresponding expression */
void update_state() const {
for (const auto& record : m_table)
record.second->update_by_expression(record.first);
}

/*** Returns lambda function that contains current state of the table, and restores this state when called */
std::function<void()> get_state_reset() {
auto current_state = get_state();
return [=]() { reset_state(current_state); };
}

/**
* @brief Replace originally registered ExpressionPtr with a new value.
* Note that code emission is performed on a copy of LIR, so all expression pointers visible from emitters won't
* be accessible from RuntimeConfigurator. In order to replace these cloned ExpressionPtrs with the original ones,
* we need to call this method.
*/
void replace_key_expression(const snippets::lowered::ExpressionPtr& from, const snippets::lowered::ExpressionPtr& to);

virtual ~KernelExecutorTable() = default;

protected:
std::unordered_map<snippets::lowered::ExpressionPtr, std::shared_ptr<KernelExecutorBase>> m_table{};
typedef std::vector<std::pair<snippets::lowered::ExpressionPtr, std::shared_ptr<const KernelExecutorBase::GenericConfig>>> ExecTableState;

/*** Restore the table state previously obtained by get_state() */
void reset_state(const ExecTableState& state);

/*** Return cumulative state of all the executors in the table. The returned ExecTableState object can be passed to reset_state */
ExecTableState get_state() const;
};

using KernelExecutorTablePtr = std::shared_ptr<KernelExecutorTable>;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,14 @@ class LinearIRBuilder {
/**
* @brief Make a full copy of LinearIR by rules described in `m_config`
* @param linear_ir Linear IR
* @param expression_map expression map
* @return clone of `linear_ir`
*/
std::shared_ptr<LinearIR> clone(const std::shared_ptr<LinearIR>& linear_ir) const;
std::shared_ptr<LinearIR> clone(const std::shared_ptr<LinearIR>& linear_ir, ExpressionMap& expression_map) const;
inline std::shared_ptr<LinearIR> clone(const std::shared_ptr<LinearIR>& linear_ir) const {
ExpressionMap expression_map;
return clone(linear_ir, expression_map);
}
/**
* @brief Make a copy of LinearIR range by rules described in `m_config`
* @param begin begin iterator of the target range of LinearIR
Expand Down
5 changes: 2 additions & 3 deletions src/common/snippets/include/snippets/op/brgemm.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,17 +55,16 @@ class Brgemm : virtual public modifier::MemoryAccess, public ov::op::Op {
protected:
ov::element::Type get_output_type() const;
std::vector<ov::PartialShape> get_planar_input_shapes(const std::vector<ov::Input<ov::Node>>& inputs) const;
ov::PartialShape get_output_partial_shape(const std::vector<ov::PartialShape>& input_shapes) const;
ov::PartialShape infer_output_partial_shape(const std::vector<ov::PartialShape>& input_shapes) const;
ov::PartialShape get_planar_output_shape(const ov::PartialShape& output_shape) const;
void compute_block_size_values(size_t blk_size_m, size_t blk_size_k, size_t blk_size_n);
void set_block_size_values(size_t blk_size_m, size_t blk_size_k, size_t blk_size_n);
size_t m_M_blk = 0;
size_t m_K_blk = 0;
size_t m_N_blk = 0;
float m_beta = 0.f;

private:
void custom_constructor_validate_and_infer_types(std::vector<size_t> layout_a, std::vector<size_t> layout_b, std::vector<size_t> layout_c);
void validate_inputs() const;
};

} // namespace op
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
#pragma once

#include "snippets/lowered/linear_ir.hpp"
#include "snippets/kernel_executor_table.hpp"
#include "snippets/lowered/pass/pass.hpp"

namespace ov {
Expand Down Expand Up @@ -42,7 +43,8 @@ class RuntimeConfig {
ov::snippets::VectorDims master_shape = {};

size_t buffer_scratchpad_size = 0;
std::vector<size_t> buffer_cluster_offsets;
std::vector<size_t> buffer_cluster_offsets {};
KernelExecutorTablePtr kernel_executor_table = std::make_shared<ov::snippets::KernelExecutorTable>();
};

/**
Expand All @@ -60,6 +62,8 @@ class RuntimeConfigurator {
* @return updated config
*/
const std::shared_ptr<RuntimeConfig>& get_updated_config(const std::shared_ptr<lowered::LinearIR>& linear_ir);
/*** Returns pointer to KernelExecutorTable owned by the config */
const std::shared_ptr<KernelExecutorTable>& get_kernel_executor_table() const { return m_config->kernel_executor_table; }

protected:
/**
Expand Down
2 changes: 0 additions & 2 deletions src/common/snippets/include/snippets/target_machine.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@

#include "emitter.hpp"
#include "snippets/lowered/expression.hpp"
#include "kernel_executor_table.hpp"

namespace ov {
namespace snippets {
Expand Down Expand Up @@ -94,7 +93,6 @@ class TargetMachine {

protected:
std::map<const ov::DiscreteTypeInfo, jitters_value> jitters;
std::shared_ptr<KernelExecutorTable> kernel_executor_table;
std::shared_ptr<RuntimeConfigurator> configurator;
};

Expand Down
38 changes: 38 additions & 0 deletions src/common/snippets/include/snippets/utils.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -243,6 +243,44 @@ std::shared_ptr<ov::Node> get_leaf_node_of_first_child_shape_infer_seq(const std
*/
std::shared_ptr<ov::Node> get_leaf_node_of_first_parent_shape_infer_seq(const std::shared_ptr<ov::Node>& start_node);

/**
* @brief Calculate leading dimension of the shape that should be read according to the layout
* @param shape original (not reordered) input shape
* @param layout specifies the order in what dimensions of in the input shape should be read
* @return stride of the dimension idx = layout[layout.size() - 2] in the original shape
Example:
Original shape (shape) = [1, 49, 2, 23]
Layout (transpose order) = [2, 0, 1, 3]
dim_idx = layout.size() - 2 = 2
// Since layout specifies the order of dimensions in which the shape should be read
dim = layout[dim_idx] = 1
stride(shape[1]) = shape[2] * shape[3] = 2 * 23
*/
size_t get_in_leading_dim(const VectorDims& shape, const std::vector<size_t>& layout);
inline size_t get_in_leading_dim(const lowered::PortDescriptorPtr& pd) {
return get_in_leading_dim(pd->get_shape(), pd->get_layout());
}
/**
*
* @param shape reordered input shape that is stored according to the layout
* @param layout specifies the order in what the dimensions of the input shape are stored
* @return
Output shape is already transposed, we need to correctly write the data with original shape by the order
Example:
Original transposed shape (shape) = [49, 2, 7, 39]
Layout (transpose order) = [2, 0, 1, 3]
dim_idx = layout.size() - 2 = 2
// Since the shape dimensions are already reordered according to the layout
dim = /find dim_idx index in layout/ = 0
stride(shape[0]) = shape[1] * shape[2] * shape[3] = 2 * 7 * 39
*/
size_t get_out_leading_dim(const VectorDims& shape, const std::vector<size_t>& layout);
inline size_t get_out_leading_dim(const lowered::PortDescriptorPtr& pd) {
return get_out_leading_dim(pd->get_shape(), pd->get_layout());
}

} // namespace utils
} // namespace snippets
} // namespace ov
39 changes: 39 additions & 0 deletions src/common/snippets/src/kernel_executor_table.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
// Copyright (C) 2018-2024 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#include "snippets/kernel_executor_table.hpp"

namespace ov {
namespace snippets {

void KernelExecutorTable::replace_key_expression(const snippets::lowered::ExpressionPtr& from, const snippets::lowered::ExpressionPtr& to) {
const auto& found = m_table.find(from);
if (found != m_table.end()) {
OPENVINO_ASSERT(m_table.count(to) == 0, "Attempt to replace a value that is already in the KernelExecutorTable");
m_table.insert({to, found->second});
m_table.erase(found);
}
}

void KernelExecutorTable::reset_state(const ExecTableState& state) {
OPENVINO_ASSERT(state.size() == m_table.size(), "Invalid state in restore_state: size mismatch");
auto state_it = state.begin();
for (const auto& table_record : m_table) {
const auto& state_record = *state_it++;
OPENVINO_ASSERT(table_record.first == state_record.first, "Invalid state in restore_state: expressions mismatch");
table_record.second->update_by_config(state_record.second);
}
}

KernelExecutorTable::ExecTableState KernelExecutorTable::get_state() const {
ExecTableState result;
// Note: we need to clone configs when saving the state, since the configs still stored in the table can
// be modified e.g. by calling update_by_expression();
for (const auto& record : m_table)
result.emplace_back(std::make_pair(record.first, record.second->get_config()->clone()));
return result;
}

}// namespace snippets
}// namespace ov
3 changes: 1 addition & 2 deletions src/common/snippets/src/lowered/linear_ir_builder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -65,11 +65,10 @@ std::vector<std::shared_ptr<ov::Node>> clone_nodes(const std::vector<std::shared
}
} // namespace

std::shared_ptr<LinearIR> LinearIRBuilder::clone(const std::shared_ptr<LinearIR>& linear_ir) const {
std::shared_ptr<LinearIR> LinearIRBuilder::clone(const std::shared_ptr<LinearIR>& linear_ir, ExpressionMap& expression_map) const {
auto cloned = std::make_shared<LinearIR>();
cloned->m_config = linear_ir->m_config;

ExpressionMap expression_map;
cloned->m_expressions = clone_range(linear_ir->m_expressions.cbegin(), linear_ir->m_expressions.cend(), expression_map);
for (const auto& expr : cloned->m_expressions) {
cloned->register_expression(expr, true);
Expand Down
Loading

0 comments on commit b660da8

Please sign in to comment.