Skip to content

Commit

Permalink
Handle pointer shifts for repacked inputs
Browse files Browse the repository at this point in the history
  • Loading branch information
v-Golubev committed Oct 11, 2024
1 parent 5dbab73 commit 26faff5
Show file tree
Hide file tree
Showing 10 changed files with 68 additions and 41 deletions.
6 changes: 6 additions & 0 deletions src/common/snippets/include/snippets/runtime_configurator.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,12 @@ class RuntimeConfigurator {
*/
std::vector<std::vector<size_t>> extract_layouts() const;

static void compute_offsets(const ov::snippets::VectorDims& shape,
ov::snippets::VectorDims& offsets,
size_t offsets_size,
size_t dim_step,
size_t idx_stride);

class MHAParallelWAOptimizer {
public:
MHAParallelWAOptimizer() = default;
Expand Down
10 changes: 10 additions & 0 deletions src/common/snippets/include/snippets/utils/utils.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -305,6 +305,16 @@ void visit_path(const lowered::ExpressionPtr& expr,
std::function<void(lowered::ExpressionPtr)> func,
bool visit_parent_path);

/**
* @brief Checks if layout is planar
*/
inline bool is_planar_layout(const std::vector<size_t>& layout) {
for (size_t i = 0; i < layout.size(); ++i)
if (layout[i] != i)
return false;
return true;
}

} // namespace utils
} // namespace snippets
} // namespace ov
9 changes: 2 additions & 7 deletions src/common/snippets/src/lowered/expression.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -170,11 +170,6 @@ ExpressionPtr Expression::clone() const {
}

bool Expression::visit_attributes(AttributeVisitor &visitor) {
auto is_planar_layout = [](const std::vector<size_t>& layout) {
for (size_t i = 0; i < layout.size(); ++i)
if (layout[i] != i) return false;
return true;
};
auto subtensor2str = [](const VectorDims& subtensor) {
std::stringstream ss;
for (size_t i = 0; i < subtensor.size(); ++i) {
Expand Down Expand Up @@ -203,7 +198,7 @@ bool Expression::visit_attributes(AttributeVisitor &visitor) {
subtensors.emplace_back("in_subtensor_" + std::to_string(i), subtensor2str(subtensor));

const auto& layout = desc->get_layout();
if (!layout.empty() && !is_planar_layout(layout))
if (!layout.empty() && !utils::is_planar_layout(layout))
layouts.emplace_back("in_layout_" + std::to_string(i), layout);

in_reg_types.emplace_back(regTypeToStr(desc->get_reg().type));
Expand All @@ -220,7 +215,7 @@ bool Expression::visit_attributes(AttributeVisitor &visitor) {
subtensors.emplace_back("out_subtensor_" + std::to_string(i), subtensor2str(subtensor));

const auto& layout = desc->get_layout();
if (!layout.empty() && !is_planar_layout(layout))
if (!layout.empty() && !utils::is_planar_layout(layout))
layouts.emplace_back("out_layout_" + std::to_string(i), layout);

out_reg_types.emplace_back(regTypeToStr(desc->get_reg().type));
Expand Down
33 changes: 19 additions & 14 deletions src/common/snippets/src/runtime_configurator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -271,27 +271,18 @@ void RuntimeConfigurator::update_data_offsets(const std::vector<VectorDims>& sha
// shape: s0, s1, s2 == 1, s3
// offsets: s1*s3, s3, 0, 1
const auto& shape = shapes[i];
OPENVINO_ASSERT(m_config->tensor_rank >= shape.size(), "Incorrect tensor rank!");
if (shape == m_latest_shapes[i])
continue;

const auto& layout = layouts[i];
auto& offsets = m_config->io_data_offsets[i];

offsets.resize(m_config->tensor_rank);
std::fill(offsets.begin(), offsets.end(), 0);
if (utils::is_dynamic_vdims(shape))
return;

size_t dim_step = m_io_data_sizes[i];
offsets[offsets.size() - 1] = dim_step;

OPENVINO_ASSERT(m_config->tensor_rank >= shape.size(), "Incorrect tensor rank!");
auto& offsets = m_config->io_data_offsets[i];
const auto idx_stride = m_config->tensor_rank - shape.size();
for (int i = static_cast<int>(shape.size()) - 2; i >= 0; i--) {
dim_step *= shape[i + 1];
offsets[i + idx_stride] = shape[i] != 1 ? dim_step : 0;
}
compute_offsets(shape, offsets, m_config->tensor_rank, m_io_data_sizes[i], idx_stride);

std::cout << "offsets[" << i << "] = " << ov::PartialShape(offsets) << std::endl;
const auto& layout = layouts[i];
if (!layout.empty()) {
std::vector<size_t> reordered_offsets(offsets.size());
const auto is_input = i < m_in_num;
Expand Down Expand Up @@ -319,6 +310,20 @@ std::vector<std::vector<size_t>> RuntimeConfigurator::extract_layouts() const {
return layouts;
}

void RuntimeConfigurator::compute_offsets(const ov::snippets::VectorDims& shape,
ov::snippets::VectorDims& offsets,
size_t offsets_size,
size_t dim_step,
size_t idx_stride) {
offsets.resize(offsets_size);
std::fill(offsets.begin(), offsets.end(), 0);
offsets[offsets.size() - 1] = dim_step;
for (int i = static_cast<int>(shape.size()) - 2; i >= 0; i--) {
dim_step *= shape[i + 1];
offsets[i + idx_stride] = shape[i] != 1 ? dim_step : 0;
}
}

void RuntimeConfigurator::set_kernel_executor_table(std::shared_ptr<KernelExecutorTable> table) const {
OPENVINO_ASSERT(table, "Failed to update Kernel Executo Table: passed table is missed");
m_config->kernel_executor_table = std::move(table);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ void CPURuntimeConfigurator::update(const ov::snippets::lowered::LinearIRCPtr& l
RuntimeConfigurator::update(linear_ir);
if (linear_ir->is_dynamic())
update_loop_args(linear_ir);
adjust_offsets_from_descs();
adjust_offsets_from_descs(linear_ir);
}

void CPURuntimeConfigurator::update_tensor_rank(const ov::snippets::VectorDims& master_shape) {
Expand Down Expand Up @@ -105,15 +105,28 @@ void CPURuntimeConfigurator::update_requested_descs(const ov::snippets::lowered:
}
}
}
void CPURuntimeConfigurator::adjust_offsets_from_descs() const {
void CPURuntimeConfigurator::adjust_offsets_from_descs(const ov::snippets::lowered::LinearIRCPtr& linear_ir) const {
const auto& cpu_config = ov::as_type_ptr<CPURuntimeConfig>(m_config);
auto& optimal_descs = cpu_config->m_in_requested_descs;
for (size_t i = 0; i < m_in_num; ++i) {
if (optimal_descs[i]) {
const auto& optimal_desc = optimal_descs[i];
if (optimal_desc) {
// It is assumed that shape is planar
const auto& parameter = linear_ir->get_parameters()[i];
const auto& original_shape = parameter->get_output_port_descriptor(0)->get_shape();
const auto& blocked_shape = optimal_desc->as<DnnlBlockedMemoryDesc>()->getBlockDims();

ov::snippets::VectorDims shape_for_offset(m_config->tensor_rank - original_shape.size(), 1);
// Parallel work amount is copied from original shape
shape_for_offset.insert(shape_for_offset.end(), original_shape.begin(), original_shape.end() - m_config->tile_rank);
// Only first dim is batch, the rest are repacked KN
shape_for_offset.insert(shape_for_offset.end(), blocked_shape.begin() + 1, blocked_shape.end());
std::cout << "shape_for_offset = " << ov::PartialShape(shape_for_offset) << std::endl;

auto& offsets = m_config->io_data_offsets[i];
// TODO: how exactly should offsets be corrected using info from blocking descriptor?
if (i == 1)
offsets[3] = 2048 * 2;
compute_offsets(shape_for_offset, offsets, shape_for_offset.size(), m_io_data_sizes[i], 0);
std::cout << "offsets[*] = " << ov::PartialShape(offsets) << std::endl;
OPENVINO_ASSERT(ov::snippets::utils::is_planar_layout(parameter->get_output_port_descriptor(0)->get_layout()));
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ class CPURuntimeConfigurator : public ov::snippets::RuntimeConfigurator {
void update_loop_args(const ov::snippets::lowered::LinearIRCPtr& linear_ir) const;

void update_requested_descs(const ov::snippets::lowered::LinearIRCPtr& linear_ir) const;
void adjust_offsets_from_descs() const;
void adjust_offsets_from_descs(const ov::snippets::lowered::LinearIRCPtr& linear_ir) const;

static const size_t rank6D;
};
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@

#include "snippets/lowered/loop_manager.hpp"
#include "emitters/plugin/x64/utils.hpp"
#include "nodes/common/cpu_memcpy.h"
#include "transformations/snippets/x64/op/brgemm_utils.hpp"

#define DTYPE_CAST(X) static_cast<dnnl_data_type_t>(DnnlExtensionUtils::ElementTypeToDataType(X))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,14 @@ pass::MoveBrgemmRepackingOut::MoveBrgemmRepackingOut() {
const auto& copy_b_in = pattern_map.at(m_param);
const auto& copy_b_out = pattern_map.at(m_copy_b);
const auto copy_b_node = copy_b_out.get_node_shared_ptr();
// TODO: how to handle copyB with compensations?
if (copy_b_node->get_output_size() != 1 || transformation_callback(copy_b_node))

const auto& in_desc = PortDescriptorUtils::get_port_descriptor_ptr(copy_b_node->input(0));
const auto& layout = in_desc->get_layout();
// TODO:
// 1. handle copyB with compensations
// 2. handle non-planar layout
if (!ov::snippets::utils::is_planar_layout(layout) || copy_b_node->get_output_size() != 1 ||
transformation_callback(copy_b_node))
return false;
return ov::replace_output_update_name(copy_b_out, copy_b_in);
};
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,6 @@ namespace tpp {
namespace pass {
namespace {
using ExpressionPort = snippets::lowered::ExpressionPort;
bool is_planar_layout(const std::vector<size_t>& layout) {
for (size_t i = 0; i < layout.size(); i++) {
if (layout[i] != i)
return false;
}
return true;
}
// Note: Buffer is directly connected to the port if it remains in the same loops with the port's expression
// Directly connected Buffers store data densely, so strides are defined by subternsor dims
// Indirectly connected Buffers (with loops between the expr and Buffer) store data according
Expand Down Expand Up @@ -81,12 +74,12 @@ size_t get_leading_dim(ExpressionPort port, const snippets::lowered::LoopManager
subtensor[idx] = shape[shape.size() - i];
}
}
OPENVINO_ASSERT(!full_dim_substituted || is_planar_layout(layout),
OPENVINO_ASSERT(!full_dim_substituted || ov::snippets::utils::is_planar_layout(layout),
"Only planar layouts are supported for FULL_DIM substitution");

if (has_directly_connected_buffer(port, loop_mngr)) {
shape = port_desc->get_subtensor();
OPENVINO_ASSERT(is_planar_layout(layout), "Only planar layouts are supported for Buffers");
OPENVINO_ASSERT(ov::snippets::utils::is_planar_layout(layout), "Only planar layouts are supported for Buffers");
const auto rank_diff = static_cast<int64_t>(layout.size()) - static_cast<int64_t>(shape.size());
if (rank_diff > 0)
layout.erase(layout.end() - rank_diff, layout.end());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ size_t B1 = std::getenv("B1") ? std::atoi(std::getenv("B1")) : 1;
size_t B2 = std::getenv("B2") ? std::atoi(std::getenv("B2")) : 1;

std::vector<std::vector<ov::test::InputShape>> input_shapes{
{ {{}, {{B1, 1, 1, K}}}, {{}, {{1, B2, K, N}}} },
{ {{}, {{B1, 1, 1, K}}}, {{}, {{B2, 5, K, N}}} },
/*
{ {{}, {{2, 1, 3, 5}}}, {{}, {{1, 3, 5, 3}}} },
{ {{}, {{3, 1, 32, 14}}}, {{}, {{1, 3, 14, 37}}} },
Expand Down

0 comments on commit 26faff5

Please sign in to comment.