From 7a51f3c97d3fc26361970be2af7b51598b4d106b Mon Sep 17 00:00:00 2001 From: Vladislav Golubev Date: Wed, 25 Sep 2024 12:24:22 +0200 Subject: [PATCH] [TMP] MatMul experiments via benchmark_app --- samples/cpp/benchmark_app/main.cpp | 31 ++++++++++++++++++- .../x64/pass/lowered/brgemm_cpu_blocking.cpp | 5 ++- 2 files changed, 34 insertions(+), 2 deletions(-) diff --git a/samples/cpp/benchmark_app/main.cpp b/samples/cpp/benchmark_app/main.cpp index 2cfd15b77afb6e..c6a5dbd85266e4 100644 --- a/samples/cpp/benchmark_app/main.cpp +++ b/samples/cpp/benchmark_app/main.cpp @@ -12,6 +12,8 @@ // clang-format off #include "openvino/openvino.hpp" +#include "openvino/op/constant.hpp" +#include "openvino/op/matmul.hpp" #include "openvino/pass/serialize.hpp" #ifndef IN_OV_COMPONENT @@ -406,6 +408,7 @@ int main(int argc, char* argv[]) { // set to default value device_config[ov::enable_profiling.name()] = FLAGS_pc; } + device_config[ov::enable_profiling.name()] = true; perf_counts = (device_config.at(ov::enable_profiling.name()).as()) ? true : perf_counts; auto supported_properties = core.get_property(device, ov::supported_properties); @@ -596,7 +599,21 @@ int main(int argc, char* argv[]) { slog::info << "Loading model files" << slog::endl; auto startTime = Time::now(); - auto model = core.read_model(FLAGS_m); + auto model = []() { + const size_t batch = std::atoi(std::getenv("B")); + const size_t M = std::atoi(std::getenv("M")); + const size_t K = std::atoi(std::getenv("K")); + const size_t N = std::atoi(std::getenv("N")); + + ov::Shape data_shape{1, batch, M, K}; + auto data = std::make_shared(ov::element::f32, data_shape); + std::vector weights_vals(K * N); + std::iota(weights_vals.begin(), weights_vals.end(), -1000); + auto weights = ov::op::v0::Constant::create(ov::element::f32, {K, N}, weights_vals); + auto matmul = std::make_shared(data, weights, false, false); + matmul->set_friendly_name("MatMul"); + return std::make_shared(ov::NodeVector{matmul}, ov::ParameterVector{data}); + }(); auto duration_ms = get_duration_ms_till_now(startTime); slog::info << "Read model took " << double_to_string(duration_ms) << " ms" << slog::endl; slog::info << "Original model I/O parameters:" << slog::endl; @@ -1218,6 +1235,18 @@ int main(int argc, char* argv[]) { slog::info << "OpenVINO Runtime configuration settings were dumped to " << FLAGS_dump_config << slog::endl; } + const auto runtime_model = compiledModel.get_runtime_model(); + for (const auto& op : runtime_model->get_ordered_ops()) { + if (op->get_friendly_name() == "MatMul") { + const auto& rt_info = op->get_rt_info(); + auto exec_time = rt_info.at("execTimeMcs"); + auto layer_type = rt_info.at("layerType"); + auto primitive_type = rt_info.at("primitiveType"); + std::cout << "execTimeMcs: " << exec_time.as() << std::endl; + std::cout << "layerType: " << layer_type.as() << std::endl; + std::cout << "primitiveType: " << primitive_type.as() << std::endl; + } + } if (!FLAGS_exec_graph_path.empty()) { try { ov::serialize(compiledModel.get_runtime_model(), FLAGS_exec_graph_path); diff --git a/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/lowered/brgemm_cpu_blocking.cpp b/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/lowered/brgemm_cpu_blocking.cpp index 51565537c43568..f10078507cbe63 100644 --- a/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/lowered/brgemm_cpu_blocking.cpp +++ b/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/lowered/brgemm_cpu_blocking.cpp @@ -70,7 +70,10 @@ std::tuple BrgemmCPUBlocking::get_blocking_params(const n_blk = get_full_dim_value(); k_blk = get_full_dim_value(); } - return std::make_tuple(m_blk, n_blk, k_blk); + const size_t M = std::atoi(std::getenv("M_b")); + const size_t K = std::atoi(std::getenv("K_b")); + const size_t N = std::atoi(std::getenv("N_b")); + return std::make_tuple(M, N, K); } SpecificIterationHandlers BrgemmCPUBlocking::get_k_loop_handlers(size_t work_amount, size_t block_size) const {