Skip to content

Commit

Permalink
[Snippets][CPU] Added Kernel Executor table caching with binary code (#…
Browse files Browse the repository at this point in the history
…25638)

### Details:
- *The Kernel Executor table maps on Expression execution numbers
instead of Expressions to avoid dependency between `LinearIR` and binary
code*
- *Added Kernel Executor table to `SubgraphCodeGenerator` to be cached
with binary code*
 - *Added Subgraph caching test*

### Tickets:
 - *N/A*
 
 ### Prerequisites:
- [x] #25623
- [x] #25378
  • Loading branch information
a-sidorova authored Aug 7, 2024
1 parent 5058a88 commit 2080aad
Show file tree
Hide file tree
Showing 15 changed files with 227 additions and 71 deletions.
4 changes: 3 additions & 1 deletion src/common/snippets/include/snippets/generator.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
#include "snippets_isa.hpp"

#include "snippets/lowered/linear_ir.hpp"
#include "snippets/kernel_executor_table.hpp"
#include "snippets/shape_types.hpp"
#include "target_machine.hpp"

Expand All @@ -32,7 +33,8 @@ class LoweringResult {
std::vector<std::shared_ptr<Emitter>> m_saved_emitters{};

public:
std::shared_ptr<CompiledSnippet> compiled_snippet = nullptr;
CompiledSnippetPtr compiled_snippet = nullptr;
KernelExecutorTablePtr kernel_executor_table = nullptr;
};

/**
Expand Down
42 changes: 22 additions & 20 deletions src/common/snippets/include/snippets/kernel_executor_table.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ class KernelExecutorBase {
* @brief Update current kernel config in accordance with the passed expression. Corresponding kernel is recompiled if necessary.
* This method should be called to update KernelExecutor based on runtime info (e.g. shapes) available through expression ptr
*/
virtual void update_by_expression(const lowered::ExpressionPtr& expr, const lowered::LinearIRPtr& linear_ir) = 0;
virtual void update_by_expression(const lowered::ExpressionPtr& expr, const lowered::LinearIRCPtr& linear_ir) = 0;
/**
* @brief Replace current kernel config with the provided value. Corresponding kernel is recompiled if necessary.
* This method should be called to restore a saved state of the executor, that was configured using update_by_expression().
Expand All @@ -70,7 +70,7 @@ class KernelExecutor : public KernelExecutorBase {
explicit KernelExecutor(Conf c) : KernelExecutorBase(), m_config{std::move(c)} {}

// Note: override when final is redundant, but needed to avoid warnings on some compilers
void update_by_expression(const lowered::ExpressionPtr& expr, const lowered::LinearIRPtr& linear_ir) override final { // NOLINT
void update_by_expression(const lowered::ExpressionPtr& expr, const lowered::LinearIRCPtr& linear_ir) override final { // NOLINT
update_config(expr, linear_ir, m_config);
OPENVINO_ASSERT(m_config.is_completed(), "Failed to update kernel config in update_by_expression");
update_kernel(m_config, m_kernel);
Expand Down Expand Up @@ -103,7 +103,7 @@ class KernelExecutor : public KernelExecutorBase {

protected:
/*** Updates stored kernel config based on runtime info from expression (e.g. new input shapes). */
virtual void update_config(const lowered::ExpressionPtr& expr, const lowered::LinearIRPtr& linear_ir, Conf& config) const = 0;
virtual void update_config(const lowered::ExpressionPtr& expr, const lowered::LinearIRCPtr& linear_ir, Conf& config) const = 0;
/*** Updates stored kernel in accordance with the passed config. Recompilation of the kernel is
* performed if necessary. */
virtual void update_kernel(const Conf& c, std::shared_ptr<KernelType>& kernel) const = 0;
Expand All @@ -122,17 +122,26 @@ class KernelExecutorTable {
typename std::enable_if<std::is_base_of<KernelExecutorBase, T>::value, bool>::type = true>
std::shared_ptr<T> register_kernel(const lowered::ExpressionPtr& expr, C... args) {
const auto& instance = std::make_shared<T>(args...);
OPENVINO_ASSERT(m_table.insert({expr, instance}).second, "This expression already has an alterable kernel");
OPENVINO_ASSERT(m_table.insert({expr->get_exec_num(), instance}).second, "This expression execution number already has an alterable kernel");
return instance;
}
const std::shared_ptr<KernelExecutorBase>& get_kernel_executor(const lowered::ExpressionPtr& expr) const {
OPENVINO_ASSERT(m_table.count(expr), "This expression doesn't have a registered kernel executor");
return m_table.at(expr);

const std::shared_ptr<KernelExecutorBase>& get_kernel_executor(const lowered::ExpressionPtr& expr) const {
return get_kernel_executor(expr->get_exec_num());
}
const std::shared_ptr<KernelExecutorBase>& get_kernel_executor(double expr_exec_num) const {
OPENVINO_ASSERT(m_table.count(expr_exec_num), "This expression execution number doesn't have a registered kernel executor");
return m_table.at(expr_exec_num);
}

/*** Updates every registered KernelExecutor in accordance with the corresponding expression */
void update_state(const lowered::LinearIRPtr& linear_ir) const {
for (const auto& record : m_table)
record.second->update_by_expression(record.first, linear_ir);
void update_state(const lowered::LinearIRCPtr& linear_ir) const {
for (const auto& expr : *linear_ir) {
const auto& found = m_table.find(expr->get_exec_num());
if (found != m_table.end()) {
found->second->update_by_expression(expr, linear_ir);
}
}
}

/*** Returns lambda function that contains current state of the table, and restores this state when called */
Expand All @@ -141,19 +150,12 @@ class KernelExecutorTable {
return [=]() { reset_state(current_state); };
}

/**
* @brief Replace originally registered ExpressionPtr with a new value.
* Note that code emission is performed on a copy of LIR, so all expression pointers visible from emitters won't
* be accessible from RuntimeConfigurator. In order to replace these cloned ExpressionPtrs with the original ones,
* we need to call this method.
*/
void replace_key_expression(const lowered::ExpressionPtr& from, const lowered::ExpressionPtr& to);

virtual ~KernelExecutorTable() = default;

protected:
std::unordered_map<lowered::ExpressionPtr, std::shared_ptr<KernelExecutorBase>> m_table{};
typedef std::vector<std::pair<lowered::ExpressionPtr, std::shared_ptr<const KernelExecutorBase::GenericConfig>>> ExecTableState;
std::unordered_map<double, std::shared_ptr<KernelExecutorBase>> m_table {};

typedef std::vector<std::pair<double, std::shared_ptr<const KernelExecutorBase::GenericConfig>>> ExecTableState;

/*** Restore the table state previously obtained by get_state() */
void reset_state(const ExecTableState& state);
Expand Down
1 change: 1 addition & 0 deletions src/common/snippets/include/snippets/lowered/linear_ir.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -284,6 +284,7 @@ class LinearIR {
size_t m_static_buffer_scratchpad_size = 0;
};
using LinearIRPtr = std::shared_ptr<LinearIR>;
using LinearIRCPtr = std::shared_ptr<const LinearIR>;

template<typename iterator>
iterator LinearIR::find(iterator begin, iterator end, const ExpressionPtr& target) const {
Expand Down
1 change: 1 addition & 0 deletions src/common/snippets/include/snippets/op/subgraph.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,7 @@ class Subgraph : public ov::op::util::SubGraphOp {

std::shared_ptr<Subgraph> clone() const;

const std::shared_ptr<RuntimeConfigurator>& get_runtime_configurator() const;
const std::shared_ptr<RuntimeConfig>& update_runtime_config() const;

static auto wrap_node_as_subgraph(const std::shared_ptr<ov::Node>& node) -> std::shared_ptr<Subgraph>;
Expand Down
26 changes: 17 additions & 9 deletions src/common/snippets/include/snippets/runtime_configurator.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -61,52 +61,60 @@ class RuntimeConfigurator {
* @param linear_ir LinearIR
* @return updated config
*/
const std::shared_ptr<RuntimeConfig>& get_updated_config(const lowered::LinearIRPtr& linear_ir);
/*** Returns pointer to KernelExecutorTable owned by the config */
const std::shared_ptr<RuntimeConfig>& get_updated_config(const lowered::LinearIRCPtr& linear_ir);
/**
* @brief Returns pointer to KernelExecutorTable owned by the config
* @return updated KernelExecutorTable
*/
const std::shared_ptr<KernelExecutorTable>& get_kernel_executor_table() const { return m_config->kernel_executor_table; }
/**
* @brief Set new KernelExecutorTable to the config
* @param table new KernelExecutorTable
*/
void set_kernel_executor_table(std::shared_ptr<KernelExecutorTable> table) const;

protected:
/**
* @brief Update RuntimeConfig based on LinearIR
* @param linear_ir LinearIR
*/
virtual void update(const lowered::LinearIRPtr& linear_ir);
virtual void update(const lowered::LinearIRCPtr& linear_ir);
/**
* @brief Allocate and intialize fields in RuntimeConfig and RuntimeConfigurator
* @param linear_ir LinearIR
*/
virtual void initialization(const lowered::LinearIRPtr& linear_ir);
virtual void initialization(const lowered::LinearIRCPtr& linear_ir);

/**
* @brief Initializes input and data information of LinearIR:
* descriptors (that contains shapes and layouts) and data_sizes
* @param linear_ir LinearIR
*/
void init_data_info(const lowered::LinearIRPtr& linear_ir);
void init_data_info(const lowered::LinearIRCPtr& linear_ir);
/**
* @brief Initializes information of buffers:
* - static buffer_scratchpad_size
* - offsets of static clusters (with static buffers)
* - clusters with dynamic buffers (`m_dynamic_buffer_clusters`) for the quick access in `update()`
* @param linear_ir LinearIR
*/
void init_buffer_info(const lowered::LinearIRPtr& linear_ir);
void init_buffer_info(const lowered::LinearIRCPtr& linear_ir);
/**
* @brief Initializes tensor rank of config
* @param linear_ir LinearIR
*/
virtual void init_tensor_rank(const lowered::LinearIRPtr& linear_ir) const;
virtual void init_tensor_rank(const lowered::LinearIRCPtr& linear_ir) const;
/**
* @brief Update Loop informations in LinearIR: Unified and ExpandedLoopInfo
* @param linear_ir LinearIR
*/
void update_loop_info(const lowered::LinearIRPtr& linear_ir) const;
void update_loop_info(const lowered::LinearIRCPtr& linear_ir) const;
/**
* @brief Update Buffer scratchpad size and offsets if needed
* Note: `update_loop_info` must be called before
* @param linear_ir LinearIR
*/
void update_buffer_scratchpad_size(const lowered::LinearIRPtr& linear_ir) const;
void update_buffer_scratchpad_size(const lowered::LinearIRCPtr& linear_ir) const;
/**
* @brief Calculate data offsets of LinearIR and update these values in RuntimeConfig
*/
Expand Down
2 changes: 2 additions & 0 deletions src/common/snippets/src/generator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
#include "snippets/generator.hpp"

#include "snippets/itt.hpp"
#include "snippets/runtime_configurator.hpp"
#include "snippets/lowered/linear_ir.hpp"
#include "snippets/lowered/expression.hpp"
#include "snippets/op/kernel.hpp"
Expand Down Expand Up @@ -46,6 +47,7 @@ LoweringResult Generator::generate(lowered::LinearIR& linear_ir, const void* com
result.m_saved_emitters.emplace_back(emitter);
}
result.compiled_snippet = target->get_snippet();
result.kernel_executor_table = target->get_runtime_configurator()->get_kernel_executor_table();

return result;
}
Expand Down
12 changes: 2 additions & 10 deletions src/common/snippets/src/kernel_executor_table.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,21 +7,13 @@
namespace ov {
namespace snippets {

void KernelExecutorTable::replace_key_expression(const snippets::lowered::ExpressionPtr& from, const snippets::lowered::ExpressionPtr& to) {
const auto& found = m_table.find(from);
if (found != m_table.end()) {
OPENVINO_ASSERT(m_table.count(to) == 0, "Attempt to replace a value that is already in the KernelExecutorTable");
m_table.insert({to, found->second});
m_table.erase(found);
}
}

void KernelExecutorTable::reset_state(const ExecTableState& state) {
OPENVINO_ASSERT(state.size() == m_table.size(), "Invalid state in restore_state: size mismatch");
auto state_it = state.begin();
for (const auto& table_record : m_table) {
const auto& state_record = *state_it++;
OPENVINO_ASSERT(table_record.first == state_record.first, "Invalid state in restore_state: expressions mismatch");
OPENVINO_ASSERT(table_record.first == state_record.first,
"Invalid state in restore_state: expression execution numbers mismatched");
table_record.second->update_by_config(*state_record.second);
}
}
Expand Down
15 changes: 7 additions & 8 deletions src/common/snippets/src/op/subgraph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -544,22 +544,21 @@ snippets::Schedule Subgraph::generate(const void* compile_params) const {
}

auto lowering_result = m_generator->generate(linear_ir, compile_params);

// Note: Since the code emission is performed on a copy of LIR, but RuntimeConfigurator works with the initial instance,
// we need to replace cloned expression pointers to original ones in the KernelExecutorTable. Ticket: 129772
const auto& exec_table = m_generator->get_target_machine()->get_runtime_configurator()->get_kernel_executor_table();
for (const auto& expr : *m_linear_ir)
exec_table->replace_key_expression(expression_map.at(expr.get()), expr);
// Some kernel executors might've been registered during code emission.
// We need to update them, so appropriate kernels will be compiled.
const auto& exec_table = get_runtime_configurator()->get_kernel_executor_table();
exec_table->update_state(m_linear_ir);
return {std::move(lowering_result)};
}

const std::shared_ptr<RuntimeConfig>& Subgraph::update_runtime_config() const {
const std::shared_ptr<RuntimeConfigurator>& Subgraph::get_runtime_configurator() const {
OPENVINO_ASSERT(m_generator, "Generator has not been inited!");
return m_generator->get_target_machine()->get_runtime_configurator();
}

const std::shared_ptr<RuntimeConfig>& Subgraph::update_runtime_config() const {
OPENVINO_ASSERT(m_linear_ir, "LoweredLinearIR has not been inited!");
return m_generator->get_target_machine()->get_runtime_configurator()->get_updated_config(m_linear_ir);
return get_runtime_configurator()->get_updated_config(m_linear_ir);
}

void Subgraph::print() const {
Expand Down
21 changes: 13 additions & 8 deletions src/common/snippets/src/runtime_configurator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ RuntimeConfigurator::RuntimeConfigurator(std::shared_ptr<RuntimeConfig> c) :
OPENVINO_ASSERT(m_config, "Runtime config is nullptr!");
}

const std::shared_ptr<RuntimeConfig>& RuntimeConfigurator::get_updated_config(const lowered::LinearIRPtr& linear_ir) {
const std::shared_ptr<RuntimeConfig>& RuntimeConfigurator::get_updated_config(const lowered::LinearIRCPtr& linear_ir) {
// First initialization
if (m_io_num == 0)
initialization(linear_ir);
Expand All @@ -44,7 +44,7 @@ const std::shared_ptr<RuntimeConfig>& RuntimeConfigurator::get_updated_config(co
return m_config;
}

void RuntimeConfigurator::initialization(const lowered::LinearIRPtr& linear_ir) {
void RuntimeConfigurator::initialization(const lowered::LinearIRCPtr& linear_ir) {
init_data_info(linear_ir);
init_tensor_rank(linear_ir);
init_buffer_info(linear_ir);
Expand All @@ -55,7 +55,7 @@ void RuntimeConfigurator::initialization(const lowered::LinearIRPtr& linear_ir)
m_config->tile_rank = linear_ir->get_config().m_loop_depth;
}

void RuntimeConfigurator::update(const lowered::LinearIRPtr& linear_ir) {
void RuntimeConfigurator::update(const lowered::LinearIRCPtr& linear_ir) {
if (linear_ir->is_dynamic()) {
update_loop_info(linear_ir);
update_buffer_scratchpad_size(linear_ir);
Expand All @@ -67,11 +67,11 @@ void RuntimeConfigurator::update(const lowered::LinearIRPtr& linear_ir) {
update_latest_shapes();
}

void RuntimeConfigurator::init_tensor_rank(const lowered::LinearIRPtr& linear_ir) const {
void RuntimeConfigurator::init_tensor_rank(const lowered::LinearIRCPtr& linear_ir) const {
m_config->tensor_rank = linear_ir->get_master_shape().size();
}

void RuntimeConfigurator::init_data_info(const lowered::LinearIRPtr& linear_ir) {
void RuntimeConfigurator::init_data_info(const lowered::LinearIRCPtr& linear_ir) {
const auto& parameters = linear_ir->get_parameters();
const auto& results = linear_ir->get_results();
m_in_num = parameters.size();
Expand Down Expand Up @@ -113,7 +113,7 @@ void RuntimeConfigurator::init_data_info(const lowered::LinearIRPtr& linear_ir)
}
}

void RuntimeConfigurator::init_buffer_info(const lowered::LinearIRPtr& linear_ir) {
void RuntimeConfigurator::init_buffer_info(const lowered::LinearIRCPtr& linear_ir) {
std::map<size_t, std::set<lowered::ExpressionPtr>> dynamic_buffer_clusters, static_buffer_clusters;

// All needed checks are in Validate pass
Expand Down Expand Up @@ -143,7 +143,7 @@ void RuntimeConfigurator::init_buffer_info(const lowered::LinearIRPtr& linear_ir
m_dynamic_buffer_clusters = std::move(dynamic_buffer_clusters);
}

void RuntimeConfigurator::update_loop_info(const lowered::LinearIRPtr& linear_ir) const {
void RuntimeConfigurator::update_loop_info(const lowered::LinearIRCPtr& linear_ir) const {
// Initialized UnifiedLoopInfo
struct CurrentUnifiedLoopInfo {
size_t current_work_amount = 0;
Expand Down Expand Up @@ -202,7 +202,7 @@ void RuntimeConfigurator::update_loop_info(const lowered::LinearIRPtr& linear_ir
}
}

void RuntimeConfigurator::update_buffer_scratchpad_size(const lowered::LinearIRPtr& linear_ir) const {
void RuntimeConfigurator::update_buffer_scratchpad_size(const lowered::LinearIRCPtr& linear_ir) const {
const auto& loop_manager = linear_ir->get_loop_manager();
m_config->buffer_scratchpad_size = linear_ir->get_static_buffer_scratchpad_size();

Expand Down Expand Up @@ -278,5 +278,10 @@ void RuntimeConfigurator::update_latest_shapes() {
}
}

void RuntimeConfigurator::set_kernel_executor_table(std::shared_ptr<KernelExecutorTable> table) const {
OPENVINO_ASSERT(table, "Failed to update Kernel Executo Table: passed table is missed");
m_config->kernel_executor_table = std::move(table);
}

} // namespace snippets
} // namespace ov
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ namespace intel_cpu {
CPURuntimeConfigurator::CPURuntimeConfigurator() : ov::snippets::RuntimeConfigurator(std::make_shared<CPURuntimeConfig>()) {
}

void CPURuntimeConfigurator::update(const ov::snippets::lowered::LinearIRPtr& linear_ir) {
void CPURuntimeConfigurator::update(const ov::snippets::lowered::LinearIRCPtr& linear_ir) {
if (linear_ir->is_dynamic()) {
update_loop_info(linear_ir);
update_loop_args(linear_ir);
Expand All @@ -30,11 +30,11 @@ void CPURuntimeConfigurator::update(const ov::snippets::lowered::LinearIRPtr& li
update_latest_shapes();
}

void CPURuntimeConfigurator::init_tensor_rank(const ov::snippets::lowered::LinearIRPtr& linear_ir) const {
void CPURuntimeConfigurator::init_tensor_rank(const ov::snippets::lowered::LinearIRCPtr& linear_ir) const {
m_config->tensor_rank = std::max(linear_ir->get_master_shape().size(), rank6D);
}

void CPURuntimeConfigurator::update_loop_args(const ov::snippets::lowered::LinearIRPtr& linear_ir) const {
void CPURuntimeConfigurator::update_loop_args(const ov::snippets::lowered::LinearIRCPtr& linear_ir) const {
const auto& cpu_config = ov::as_type_ptr<CPURuntimeConfig>(m_config);
OPENVINO_ASSERT(cpu_config, "CPURuntimeConfigurator expects CPURuntimeConfig");

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,17 +29,17 @@ class CPURuntimeConfigurator : public ov::snippets::RuntimeConfigurator {
* @brief Update RuntimeConfig based on LinearIR
* @param linear_ir LinearIR
*/
void update(const ov::snippets::lowered::LinearIRPtr& linear_ir) override;
void update(const ov::snippets::lowered::LinearIRCPtr& linear_ir) override;
/**
* @brief Initializes tensor rank of config
* @param linear_ir LinearIR
*/
void init_tensor_rank(const ov::snippets::lowered::LinearIRPtr& linear_ir) const override;
void init_tensor_rank(const ov::snippets::lowered::LinearIRCPtr& linear_ir) const override;
/**
* @brief Calculate Loop parameters of Loop emitters and update these values in CPURuntimeConfig
* @param linear_ir LinearIR
*/
void update_loop_args(const ov::snippets::lowered::LinearIRPtr& linear_ir) const;
void update_loop_args(const ov::snippets::lowered::LinearIRCPtr& linear_ir) const;

const size_t rank6D = 6;
};
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -184,7 +184,7 @@ float BrgemmKernelExecutor::get_beta(const ov::snippets::lowered::LoopManagerPtr
return 0;
}
void BrgemmKernelExecutor::update_config(const ov::snippets::lowered::ExpressionPtr& expr,
const ov::snippets::lowered::LinearIRPtr& linear_ir,
const ov::snippets::lowered::LinearIRCPtr& linear_ir,
BrgemmKernelConfig& config) const {
const auto& input_pds = expr->get_input_port_descriptors();
const auto& output_pds = expr->get_output_port_descriptors();
Expand Down
Loading

0 comments on commit 2080aad

Please sign in to comment.