From 17a7c77ece4e948a13df0d56b2c92aa471db4739 Mon Sep 17 00:00:00 2001 From: Sergey Ivanov Date: Tue, 23 Jul 2024 14:02:29 +0000 Subject: [PATCH 01/54] [NPU] Add possibility to set up different layout per input/output (#25518) ### Details: - Adding per-input/output layout configuration through command line arguments ### Tickets: - E-130481 --------- Co-authored-by: Maksim Doronin --- .../tools/common/include/tensor_utils.hpp | 10 + .../tools/common/src/tensor_utils.cpp | 24 ++ .../tools/single-image-test/main.cpp | 206 ++++++++++++++---- 3 files changed, 200 insertions(+), 40 deletions(-) diff --git a/src/plugins/intel_npu/tools/common/include/tensor_utils.hpp b/src/plugins/intel_npu/tools/common/include/tensor_utils.hpp index c6ca8f50fd3f94..87b2301a7ae4fb 100644 --- a/src/plugins/intel_npu/tools/common/include/tensor_utils.hpp +++ b/src/plugins/intel_npu/tools/common/include/tensor_utils.hpp @@ -5,8 +5,11 @@ #pragma once +#include #include +#include + namespace npu { namespace utils { @@ -58,5 +61,12 @@ inline ov::Tensor toFP32(const ov::Tensor& in, void* ptr = nullptr) { */ std::vector> parseTensorsAsFP32(const std::map& tensors); +/** + * @brief Join several non-batched tensors having the same shapes and precisions into a batched one. + * + * @param tensors The source non-batched tensors + * @return The merged batched tensor + */ +ov::Tensor joinTensors(const std::list& tensors, const ov::Layout& layout); } // namespace utils } // namespace npu diff --git a/src/plugins/intel_npu/tools/common/src/tensor_utils.cpp b/src/plugins/intel_npu/tools/common/src/tensor_utils.cpp index 470d737a2b9d31..32616b86135243 100644 --- a/src/plugins/intel_npu/tools/common/src/tensor_utils.cpp +++ b/src/plugins/intel_npu/tools/common/src/tensor_utils.cpp @@ -468,5 +468,29 @@ std::vector> parseTensorsAsFP32(const std::map& tensors, const ov::Layout& layout) { + if (tensors.empty()) { + OPENVINO_THROW("Cannot join tensors: nothing to join"); + } + if (!ov::layout::has_batch(layout)) { + OPENVINO_THROW("Cannot join tensors: has no batch_idx in layout", layout.to_string()); + } + auto pivotShape = tensors.front().get_shape(); + auto pivotPrecision = tensors.front().get_element_type(); + if (!std::all_of(tensors.begin(), tensors.end(), [&pivotShape, &pivotPrecision](const auto& t) { + return t.get_shape() == pivotShape && t.get_element_type() == pivotPrecision; + })) { + OPENVINO_THROW("Cannot join tensors with different shapes, expected: ", pivotPrecision, ", ", pivotShape); + } + pivotShape[ov::layout::batch_idx(layout)] *= tensors.size(); + ov::Tensor out(pivotPrecision, pivotShape); + const auto outputBuffer = out.data(); + size_t bytesOffset = 0; + for (const auto& t : tensors) { + memcpy(reinterpret_cast(outputBuffer) + bytesOffset, t.data(), t.get_byte_size()); + bytesOffset += t.get_byte_size(); + } + return out; +} } // namespace utils } // namespace npu diff --git a/src/plugins/intel_npu/tools/single-image-test/main.cpp b/src/plugins/intel_npu/tools/single-image-test/main.cpp index 2d14dbd23e0d7e..44cbe246ae2ec3 100644 --- a/src/plugins/intel_npu/tools/single-image-test/main.cpp +++ b/src/plugins/intel_npu/tools/single-image-test/main.cpp @@ -20,6 +20,8 @@ #include #include #include +#include +#include #include #include #include @@ -66,10 +68,18 @@ DEFINE_string(device, "", "Device to use"); DEFINE_string(config, "", "Path to the configuration file (optional)"); DEFINE_string(ip, "", "Input precision (default: U8, available: FP32, FP16, I32, I64, U8)"); DEFINE_string(op, "", "Output precision (default: FP32, available: FP32, FP16, I32, I64, U8)"); -DEFINE_string(il, "", "Input layout"); -DEFINE_string(ol, "", "Output layout"); -DEFINE_string(iml, "", "Model input layout"); -DEFINE_string(oml, "", "Model output layout"); +DEFINE_string( + il, "", + "Input layout for all inputs, or ';' separated list of pairs :. Regex in is supported"); +DEFINE_string(ol, "", + "Output layout for all outputs, or ';' separated list of pairs :. Regex in is " + "supported"); +DEFINE_string(iml, "", + "Model input layout for all model inputs, or ';' separated list of pairs :. Regex in " + " is supported"); +DEFINE_string(oml, "", + "Model output layout for all outputs, or ';' separated list of pairs :. Regex in " + " is supported"); DEFINE_bool(img_as_bin, false, "Force binary input even if network expects an image"); DEFINE_bool(pc, false, "Report performance counters"); @@ -156,6 +166,25 @@ std::vector splitStringList(const std::string& str, char delim) { return out; } +std::map parseArgMap(std::string argMap) { + argMap.erase(std::remove_if(argMap.begin(), argMap.end(), ::isspace), argMap.end()); + + const auto pairs = splitStringList(argMap, ';'); + + std::map parsedMap; + for (auto&& pair : pairs) { + const auto lastDelimPos = pair.find_last_of(':'); + auto key = pair.substr(0, lastDelimPos); + std::string value; + if (lastDelimPos != std::string::npos) { + value = pair.substr(lastDelimPos + 1); + } + parsedMap[std::move(key)] = std::move(value); + } + + return parsedMap; +} + void parseCommandLine(int argc, char* argv[]) { std::ostringstream usage; usage << "Usage: " << argv[0] << "[]"; @@ -531,6 +560,38 @@ std::vector> parseMeanOrScale(const std::string& mean_scale, return result; } +using RegexPtr = std::unique_ptr; +std::map parseLayoutRegex(std::string layouts) { + std::map input_output_layouts = parseArgMap(std::move(layouts)); + + std::map out; + for (const auto& input_output_layout : input_output_layouts) { + auto [name, value] = input_output_layout; + if (value.empty()) { + if (name.empty()) { + throw std::runtime_error("Can't parse layouts string \"" + layouts + + "\" into valid \"input:layout;input:layout\" pairs"); + } + // there is no value only name, thus we consider input/output name as "any" and + // apply layout value as the parsed name + out.emplace(std::make_unique(".*"), name); + continue; + } + std::string valid_regex_str = name.empty() ? ".*" : "^" + name + "$"; + out.emplace(std::make_unique(std::move(valid_regex_str)), std::move(value)); + } + return out; +} + +template +std::optional getRegexSubstitutionIfExist(const std::string& haystack, const std::map& substitutions) { + for (const auto& s : substitutions) { + if (std::regex_search(haystack, *s.first)) { + return {s.second}; + } + } + return {}; +} // // File utils // @@ -569,27 +630,70 @@ ov::Tensor loadImage(const ov::element::Type& precision, const ov::Shape& shape, return tensor; } -ov::Tensor loadBinary(const ov::element::Type& modelPrecision, const ov::Shape& shape, const std::string& filePath, - const ov::element::Type& dataPrecision) { +ov::Tensor loadBinary(const ov::element::Type& modelPrecision, const ov::Shape& shape, const ov::Layout& layout, + const std::string& filePath, const ov::element::Type& dataPrecision) { std::ifstream binaryFile(filePath, std::ios_base::binary | std::ios_base::ate); OPENVINO_ASSERT(binaryFile, "Failed to open input binary file: ", filePath); - const auto fileBytes = binaryFile.tellg(); + const auto fileSize = binaryFile.tellg(); binaryFile.seekg(0, std::ios_base::beg); OPENVINO_ASSERT(binaryFile.good(), "While reading a file an error is encountered"); - - const ov::Tensor requestedTensor(modelPrecision, shape); - const int reqTensorBytes = static_cast(requestedTensor.get_byte_size()); + const size_t fileBytes = static_cast(fileSize); + ov::Tensor requestedTensor(modelPrecision, shape); + const size_t reqTensorBytes = static_cast(requestedTensor.get_byte_size()); if (dataPrecision != modelPrecision && dataPrecision != ov::element::Type_t::undefined) { std::cout << "Converting " << filePath << " input from " << dataPrecision << " to " << modelPrecision << std::endl; const ov::Tensor inputTensor(dataPrecision, shape); - binaryFile.read(reinterpret_cast(inputTensor.data()), static_cast(fileBytes)); - npu::utils::convertTensorPrecision(inputTensor, requestedTensor); + if (fileBytes == inputTensor.get_byte_size()) { + binaryFile.read(reinterpret_cast(inputTensor.data()), static_cast(fileBytes)); + npu::utils::convertTensorPrecision(inputTensor, requestedTensor); + } else { + std::cout << "File contains " << fileBytes + << " bytes, but it expected to be: " << inputTensor.get_byte_size() + << " while converting precision from " << dataPrecision << " to " << modelPrecision + << ". Check whether it is possible to batch loading " << std::endl; + OPENVINO_ASSERT(ov::layout::has_batch(layout), + "Input layout has no batch dimenstion: ", layout.to_string()); + size_t N = shape[ov::layout::batch_idx(layout)]; + OPENVINO_ASSERT(fileBytes * N == inputTensor.get_byte_size(), "File contains ", fileBytes, " bytes, but ", + inputTensor.get_byte_size() * N, " total in batch size ", N, + " expected while converting precision from ", dataPrecision, " to ", modelPrecision); + ov::Shape debatchedInputTensorShape(shape); + debatchedInputTensorShape[ov::layout::batch_idx(layout)] = 1; + const ov::Tensor inputDebatchedTensor(dataPrecision, debatchedInputTensorShape); + binaryFile.read(reinterpret_cast(inputDebatchedTensor.data()), + static_cast(fileBytes)); + const ov::Tensor convertedPrecisionTensor(modelPrecision, debatchedInputTensorShape); + npu::utils::convertTensorPrecision(inputDebatchedTensor, convertedPrecisionTensor); + std::list tensorsToJoin; + std::generate_n(std::back_inserter(tensorsToJoin), N, [&convertedPrecisionTensor]() { + return convertedPrecisionTensor; + }); + requestedTensor = npu::utils::joinTensors(tensorsToJoin, layout); + } + } else { - OPENVINO_ASSERT(fileBytes == reqTensorBytes, "File contains ", fileBytes, " bytes, but ", reqTensorBytes, - " expected"); - binaryFile.read(reinterpret_cast(requestedTensor.data()), static_cast(reqTensorBytes)); + if (fileBytes == reqTensorBytes) { + binaryFile.read(reinterpret_cast(requestedTensor.data()), + static_cast(reqTensorBytes)); + } else { + std::cout << "File contains " << fileBytes << " bytes, but it expected to be: " << reqTensorBytes + << " when datatypes match. " + << ". Check whether it is possible to batch loading " << std::endl; + OPENVINO_ASSERT(ov::layout::has_batch(layout), + "Input layout has no batch dimenstion: ", layout.to_string()); + size_t N = shape[ov::layout::batch_idx(layout)]; + OPENVINO_ASSERT(fileBytes * N == reqTensorBytes, "File contains ", fileBytes, " bytes, but ", + reqTensorBytes, " in batch size ", N, " expected"); + + // duplicate a binary into tensor memory if the tensor batched + for (size_t n = 0; n < N; ++n) { + binaryFile.seekg(0, std::ios_base::beg); + binaryFile.read(reinterpret_cast(requestedTensor.data()) + fileBytes * n, + static_cast(fileBytes)); + } + } } return requestedTensor; @@ -617,7 +721,7 @@ ov::Tensor loadInput(const ov::element::Type& modelPrecision, const ov::Shape& s if (isImage(shape, layout) && !FLAGS_img_as_bin) { return loadImage(modelPrecision, shape, layout, filePath, colorFormat); } else { - return loadBinary(modelPrecision, shape, filePath, dataPrecision); + return loadBinary(modelPrecision, shape, layout, filePath, dataPrecision); } } @@ -1620,10 +1724,10 @@ static int runSingleImageTest() { throw std::logic_error("Parameter -op " + FLAGS_op + " is not supported"); } - ov::Layout inUserLayout(FLAGS_il); - ov::Layout outUserLayout(FLAGS_ol); - ov::Layout inModelLayout(FLAGS_iml); - ov::Layout outModelLayout(FLAGS_oml); + std::map inUserLayouts = parseLayoutRegex(FLAGS_il); + std::map outUserLayouts = parseLayoutRegex(FLAGS_ol); + std::map inModelLayouts = parseLayoutRegex(FLAGS_iml); + std::map outModelLayouts = parseLayoutRegex(FLAGS_oml); std::vector inputFilesPerCase; std::vector> inputFilesForOneInfer; @@ -1712,10 +1816,16 @@ static int runSingleImageTest() { } // Input layout - if (!inUserLayout.empty()) { - for (size_t i = 0; i < inputInfo.size(); ++i) { + for (size_t i = 0; i < inputInfo.size(); ++i) { + if (std::optional inUserLayout = + getRegexSubstitutionIfExist(inputInfo[i].get_any_name(), inUserLayouts); + inUserLayout.has_value()) { ov::Layout inLayerModelLayout; - if (inModelLayout.empty()) { + if (std::optional inModelLayout = + getRegexSubstitutionIfExist(inputInfo[i].get_any_name(), inModelLayouts); + inModelLayout.has_value()) { + inLayerModelLayout = inModelLayout.value(); + } else { const auto shape = inputInfo[i].get_shape(); inLayerModelLayout = getLayoutByRank(shape.size()); std::cout << "WARNING: Configuring preprocessing. Since --iml option isn't set, input model " @@ -1723,11 +1833,12 @@ static int runSingleImageTest() { << inputInfo[i].get_any_name() << "\" is infered from shape: " << toString(shape) << " rank (" << shape.size() << ") as " << inLayerModelLayout.to_string() << std::endl; - } else { - inLayerModelLayout = inModelLayout; } + std::cout << "Set layouts for the input: \"" << inputInfo[i].get_any_name() << "\", model " + << inLayerModelLayout.to_string() << ", user " << inUserLayout.value().to_string() + << std::endl; ppp.input(i).model().set_layout(inLayerModelLayout); - ppp.input(i).tensor().set_layout(inUserLayout); + ppp.input(i).tensor().set_layout(inUserLayout.value()); } } @@ -1766,10 +1877,16 @@ static int runSingleImageTest() { } // Output layout - if (!outUserLayout.empty()) { - for (size_t i = 0; i < outputInfo.size(); ++i) { + for (size_t i = 0; i < outputInfo.size(); ++i) { + if (std::optional outUserLayout = + getRegexSubstitutionIfExist(outputInfo[i].get_any_name(), outUserLayouts); + outUserLayout.has_value()) { ov::Layout outLayerModelLayout; - if (outModelLayout.empty()) { + if (std::optional outModelLayout = + getRegexSubstitutionIfExist(outputInfo[i].get_any_name(), outModelLayouts); + outModelLayout.has_value()) { + outLayerModelLayout = outModelLayout.value(); + } else { const auto shape = outputInfo[i].get_shape(); outLayerModelLayout = getLayoutByRank(shape.size()); std::cout << "WARNING: Configuring preprocessing. Since --oml option isn't set, output model " @@ -1777,11 +1894,12 @@ static int runSingleImageTest() { << outputInfo[i].get_any_name() << "\" is infered from shape: " << toString(shape) << " rank (" << shape.size() << ") as " << outLayerModelLayout.to_string() << std::endl; - } else { - outLayerModelLayout = outModelLayout; } + std::cout << "Set layouts for the output: \"" << outputInfo[i].get_any_name() << "\", model " + << outLayerModelLayout.to_string() << ", user " << outUserLayout.value().to_string() + << std::endl; ppp.output(i).model().set_layout(outLayerModelLayout); - ppp.output(i).tensor().set_layout(outUserLayout); + ppp.output(i).tensor().set_layout(outUserLayout.value()); } } @@ -1852,10 +1970,14 @@ static int runSingleImageTest() { // Determine the input layout ov::Layout inputLayout; - if (!inUserLayout.empty()) { - inputLayout = inUserLayout; - } else if (!inModelLayout.empty()) { - inputLayout = inModelLayout; + if (std::optional inUserLayout = + getRegexSubstitutionIfExist(inputInfo.get_any_name(), inUserLayouts); + inUserLayout.has_value()) { + inputLayout = inUserLayout.value(); + } else if (std::optional inModelLayout = + getRegexSubstitutionIfExist(inputInfo.get_any_name(), inModelLayouts); + inModelLayout.has_value()) { + inputLayout = inModelLayout.value(); } else { inputLayout = getLayoutByRank(shape.size()); std::cout << "WARNING: Loading input data. Since --iml option isn't set, input model layout for " @@ -1922,10 +2044,14 @@ static int runSingleImageTest() { // Determine the output layout ov::Layout outputLayout; - if (!outUserLayout.empty()) { - outputLayout = outUserLayout; - } else if (!outModelLayout.empty()) { - outputLayout = outModelLayout; + if (std::optional outUserLayout = + getRegexSubstitutionIfExist(tensorName, outUserLayouts); + outUserLayout.has_value()) { + outputLayout = outUserLayout.value(); + } else if (std::optional outModelLayout = + getRegexSubstitutionIfExist(tensorName, outModelLayouts); + outModelLayout.has_value()) { + outputLayout = outModelLayout.value(); } else { outputLayout = getLayoutByRank(shape.size()); std::cout << "WARNING: Since --oml option isn't set, output model layout for layer \"" From d0c07cd1ed9631baff95100f3aa0076f520af882 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 23 Jul 2024 16:52:12 +0200 Subject: [PATCH 02/54] Update pytest requirement from <8.3,>=5.0 to >=5.0,<8.4 in /src/bindings/python (#25660) Updates the requirements on [pytest](https://github.com/pytest-dev/pytest) to permit the latest version.
Release notes

Sourced from pytest's releases.

8.3.1

pytest 8.3.1 (2024-07-20)

The 8.3.0 release failed to include the change notes and docs for the release. This patch release remedies this. There are no other changes.

Commits
  • de98446 Prepare release version 8.3.1
  • bd0a042 Merge pull request #12636 from pytest-dev/update-release-notes
  • 664325b doc/changelog: update 8.3.0 notes
  • 19d225d Merge pull request #12635 from pytest-dev/release-8.3.0
  • bc33028 Prepare release version 8.3.0
  • a7d5a8e Merge pull request #12557 from x612skm/maintainence/11771-pypy-3.9-bump
  • ced7072 Add a change note for PR #11771
  • d42b76d Adjust test_errors_in_xfail_skip_expressions for PyPy
  • 9eee45a Bump PyPy runtime to v3.9 @ GHA
  • d489247 Fix caching of parameterized fixtures (#12600)
  • Additional commits viewable in compare view

Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@dependabot rebase` will rebase this PR - `@dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@dependabot merge` will merge this PR after your CI passes on it - `@dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@dependabot cancel merge` will cancel a previously requested merge and block automerging - `@dependabot reopen` will reopen this PR if it is closed - `@dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Alina Kladieva --- src/bindings/python/constraints.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/bindings/python/constraints.txt b/src/bindings/python/constraints.txt index 6127d46c62a103..9c3eef6d75a286 100644 --- a/src/bindings/python/constraints.txt +++ b/src/bindings/python/constraints.txt @@ -2,7 +2,7 @@ numpy>=1.16.6,<2.1.0 # Python bindings, frontends # pytest -pytest>=5.0,<8.3 +pytest>=5.0,<8.4 pytest-dependency==0.6.0 pytest-html==4.1.1 pytest-timeout==2.2.0 From bb5a9d48059abb5223e785b04b0d0042ec80a3dc Mon Sep 17 00:00:00 2001 From: Andrew Kwangwoong Park Date: Wed, 24 Jul 2024 03:09:52 +0900 Subject: [PATCH 03/54] [GPU] Fix issue to calculate present layout's padding for KVCache (#25682) ### Details: - Fix issue to calculate present layout's padding for KVCache ### Tickets: - 146876 --- src/plugins/intel_gpu/src/graph/primitive_inst.cpp | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp index 522fb03f15c5bd..9fb822955c41a4 100644 --- a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp +++ b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp @@ -1189,6 +1189,7 @@ void primitive_inst::do_runtime_in_place_kv_cache() { } const auto& desc = _node->as().get_primitive(); auto& past_layout = _impl_params->input_layouts[0]; + auto& new_layout = _impl_params->input_layouts[1]; auto& present_layout = _impl_params->output_layouts[0]; const auto& sequence_axis = desc->concat_axis; const auto& gather_axis = desc->gather_axis; @@ -1209,8 +1210,12 @@ void primitive_inst::do_runtime_in_place_kv_cache() { auto max_pad = kv_cache_inst::get_max_pad(past_layout, _deps[0].first->_max_output_layout_count[0], sequence_axis_legacy, "past_layout"); if (max_pad > 0) { - kv_cache_inst::update_pad(present_layout, max_pad - 1, sequence_axis_legacy); - GPU_DEBUG_TRACE_DETAIL << "[do runtime_in_place_kv_cache] " << id() << " Updated present_layout's pad : " << present_layout.to_string() << std::endl; + const auto new_seq_len = static_cast(new_layout.get_shape()[sequence_axis]); + if (max_pad - new_seq_len >= 0) { + kv_cache_inst::update_pad(present_layout, max_pad - new_seq_len, sequence_axis_legacy); + GPU_DEBUG_TRACE_DETAIL << "[do runtime_in_place_kv_cache] " << id() << " Updated present_layout's pad : " + << present_layout.to_string() << std::endl; + } auto& variable = get_network().get_variable(desc->variable_info.variable_id); variable.set_layout(present_layout); GPU_DEBUG_TRACE_DETAIL << "[do_runtime_in_place_kv_cache] " << id() << "Updated variable with present_layout" From 8f795130652a7546fe01b052d34a376f52806fa2 Mon Sep 17 00:00:00 2001 From: Haiqi Pan Date: Wed, 24 Jul 2024 11:43:40 +0800 Subject: [PATCH 04/54] [API][BATCH] add PERF_COUNT to support properties (#25271) ### Details: - *[API][BATCH] add PERF_COUNT to support properties* ### Tickets: - *CVS-130236* --------- Co-authored-by: Wang, Yang Co-authored-by: Chen Peter --- src/plugins/auto_batch/src/plugin.cpp | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/src/plugins/auto_batch/src/plugin.cpp b/src/plugins/auto_batch/src/plugin.cpp index de125cb12551af..d97987bea6f39a 100644 --- a/src/plugins/auto_batch/src/plugin.cpp +++ b/src/plugins/auto_batch/src/plugin.cpp @@ -19,9 +19,10 @@ namespace ov { namespace autobatch_plugin { -std::vector supported_configKeys = {ov::device::priorities.name(), - ov::auto_batch_timeout.name(), - ov::enable_profiling.name()}; +std::vector supported_configKeys = { + ov::PropertyName{ov::device::priorities.name(), ov::PropertyMutability::RW}, + ov::PropertyName{ov::auto_batch_timeout.name(), ov::PropertyMutability::RW}, + ov::PropertyName{ov::enable_profiling.name(), ov::PropertyMutability::RW}}; inline ov::AnyMap merge_properties(ov::AnyMap config, const ov::AnyMap& user_config) { for (auto&& kvp : user_config) { @@ -82,9 +83,13 @@ ov::Any Plugin::get_property(const std::string& name, const ov::AnyMap& argument return {it->second}; } } else if (name == ov::supported_properties.name()) { - return std::vector{ - ov::PropertyName{ov::supported_properties.name(), ov::PropertyMutability::RO}, - ov::PropertyName{ov::device::full_name.name(), ov::PropertyMutability::RO}}; + std::vector property_name; + property_name.push_back(ov::PropertyName{ov::supported_properties.name(), ov::PropertyMutability::RO}); + property_name.push_back(ov::PropertyName{ov::device::full_name.name(), ov::PropertyMutability::RO}); + for (auto& it : supported_configKeys) { + property_name.push_back(it); + } + return decltype(ov::supported_properties)::value_type(std::move(property_name)); } else if (name == ov::internal::supported_properties.name()) { return decltype(ov::internal::supported_properties)::value_type{}; } else if (name == ov::device::full_name.name()) { @@ -113,6 +118,7 @@ OV_DEFINE_PLUGIN_CREATE_FUNCTION(Plugin, version) Plugin::Plugin() { set_device_name("BATCH"); m_plugin_config.insert(ov::auto_batch_timeout(1000)); // default value (ms) + m_plugin_config.insert(ov::enable_profiling(false)); } std::shared_ptr Plugin::compile_model(const std::shared_ptr& model, @@ -132,7 +138,7 @@ std::shared_ptr Plugin::compile_model(const std::shared_ptr< auto full_properties = merge_properties(m_plugin_config, properties); auto device_batch = full_properties.find(ov::device::priorities.name()); if (device_batch == full_properties.end()) { - OPENVINO_THROW("ov::device::priorities key for AUTO NATCH is not set for BATCH device"); + OPENVINO_THROW("ov::device::priorities key for AUTO BATCH is not set for BATCH device"); } auto meta_device = parse_meta_device(device_batch->second.as(), properties); From ba57be36eaa74814fbfc1962042dd9036e2a7266 Mon Sep 17 00:00:00 2001 From: Jade Cho Date: Wed, 24 Jul 2024 17:08:54 +0900 Subject: [PATCH 05/54] [GPU] Disable onednn globally when model has convolutions with weights zero points (#25669) ### Tickets: - *142818* --- .../src/graph/graph_optimizer/reorder_inputs.cpp | 15 ++++++++++++++- .../graph_optimizer/select_preferred_formats.cpp | 9 +++++++++ 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/reorder_inputs.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/reorder_inputs.cpp index c6de09403c1cef..88dcb8865d937a 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/reorder_inputs.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/reorder_inputs.cpp @@ -59,7 +59,20 @@ std::map get_preferred_formats(program& p, layout_o onednn_impls_counter++; } - if (onednn_impls_counter < 1 && lo.get_optimization_attributes().use_onednn_impls) { + // Fallback to ocl when asymmetric weights convolution is existed. + size_t total_convs = 0; + size_t num_asym_wei_convs = 0; + for (auto n : p.get_processing_order()) { + if (n->is_type()) { + total_convs++; + if (n->as().weights_zero_points_term()) + num_asym_wei_convs++; + } + } + + GPU_DEBUG_LOG << "Number of convolutions with weights zero points: " << num_asym_wei_convs << "/" << total_convs << std::endl; + + if (lo.get_optimization_attributes().use_onednn_impls && (onednn_impls_counter < 1 || num_asym_wei_convs > 0)) { should_update_fmt_map = true; lo.set_optimization_attribute(layout_optimizer::optimization_attributes_type::use_onednn_impls, 0); GPU_DEBUG_LOG << "Disable oneDNN implementations globally" << std::endl; diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/select_preferred_formats.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/select_preferred_formats.cpp index e44ee477c0812f..70d0b70c7fa9fa 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/select_preferred_formats.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/select_preferred_formats.cpp @@ -31,6 +31,15 @@ void select_preferred_formats::run(program& p) { return; #ifdef ENABLE_ONEDNN_FOR_GPU + + // Fallback to ocl when asymmetric weights convolution is existed. + if (_lo.get_optimization_attributes().use_onednn_impls) { + for (auto n : p.get_processing_order()) { + if (n->is_type() && n->as().weights_zero_points_term()) + return; + } + } + auto forcing_map = _lo.get_implementation_forcing(); engine.create_onednn_engine(p.get_config()); From e723e050761800c98ae174c04765d4fd2d777a90 Mon Sep 17 00:00:00 2001 From: Alina Kladieva Date: Wed, 24 Jul 2024 10:20:16 +0200 Subject: [PATCH 06/54] [GHA] Artifacts to share (#25330) ### Tickets: - 146008 --- .github/actions/create_manifest/action.yml | 44 +++ .../create_manifest/create_manifest.py | 128 +++++++ .../create_manifest/manifest_manager.py | 336 ++++++++++++++++++ .../actions/create_manifest/requirements.txt | 2 + .github/actions/store_artifacts/action.yml | 39 ++ .../actions/store_artifacts/requirements.txt | 2 + .../store_artifacts/store_artifacts.py | 134 +++++++ .github/workflows/linux.yml | 37 ++ .github/workflows/linux_arm64.yml | 28 ++ .github/workflows/windows.yml | 45 ++- 10 files changed, 786 insertions(+), 9 deletions(-) create mode 100644 .github/actions/create_manifest/action.yml create mode 100644 .github/actions/create_manifest/create_manifest.py create mode 100644 .github/actions/create_manifest/manifest_manager.py create mode 100644 .github/actions/create_manifest/requirements.txt create mode 100644 .github/actions/store_artifacts/action.yml create mode 100644 .github/actions/store_artifacts/requirements.txt create mode 100644 .github/actions/store_artifacts/store_artifacts.py diff --git a/.github/actions/create_manifest/action.yml b/.github/actions/create_manifest/action.yml new file mode 100644 index 00000000000000..66d59930e93712 --- /dev/null +++ b/.github/actions/create_manifest/action.yml @@ -0,0 +1,44 @@ +name: 'Create manifest' +description: 'Creates manifest containing versions of the product and the corresponding repositories' +inputs: + repos: + description: "Multi-line list of repositories to include to manifest" + required: true + product_type: + description: "Unique string to reflect product configuration" + required: true + save_to: + description: "Path to save manifest to" + required: true + action_path: + description: "Action path, if not set - taken from github context" + required: false + target_arch: + description: "Target architecture" + required: true + build_type: + description: "Build type: release | debug | release_with_debug" + required: true + + +runs: + using: "composite" + steps: + - name: Install Python dependencies + shell: ${{ runner.os == 'Windows' && 'pwsh' || 'bash' }} + run: >- + pip install -r ${{ env.ACTION_PATH }}/requirements.txt + env: + ACTION_PATH: ${{ runner.os == 'Windows' && '$env:GITHUB_ACTION_PATH' || '$GITHUB_ACTION_PATH' }} + + - name: 'Create manifest' + id: create_manifest + shell: ${{ runner.os == 'Windows' && 'pwsh' || 'bash' }} + run: >- + python ${{ env.ACTION_PATH }}/create_manifest.py + --target_arch "${{ inputs.target_arch }}" --build_type "${{ inputs.build_type }}" + --save_to "${{ inputs.save_to }}" --product_type "${{ inputs.product_type }}" -r "${{ inputs.repos }}" + env: + BASE_SHA: ${{ github.event.pull_request.base.sha }} + PR_HEAD_SHA: ${{ github.event.pull_request.head.sha }} + ACTION_PATH: ${{ runner.os == 'Windows' && '$env:GITHUB_ACTION_PATH' || '$GITHUB_ACTION_PATH' }} diff --git a/.github/actions/create_manifest/create_manifest.py b/.github/actions/create_manifest/create_manifest.py new file mode 100644 index 00000000000000..1fb3a4712807e6 --- /dev/null +++ b/.github/actions/create_manifest/create_manifest.py @@ -0,0 +1,128 @@ +from __future__ import annotations + +import argparse +import logging +import os +from datetime import timezone +from pathlib import Path +import re +import git + +from manifest_manager import Manifest, Repository, Component + + +def parse_args(): + parser = argparse.ArgumentParser(description='Creates manifest with product and repositories version') + parser.add_argument('-e', '--event_name', help='Name of GitHub event', required=False) + parser.add_argument('-r', '--repos', type=str, help='Paths to repositories to lon in manifest', + required=True) + parser.add_argument('--product_type', help='Unique string to reflect product configuration', required=True) + parser.add_argument('--target_arch', help='Target architecture', required=True) + parser.add_argument('--build_type', help='Build type: release | debug | release_with_debug', required=True) + parser.add_argument('--save_to', help='Path to save manifest to', required=True) + args = parser.parse_args() + return args + + +def init_logger(): + logging.basicConfig(level=logging.DEBUG, + format='%(asctime)s %(name)-15s %(levelname)-8s %(message)s', + datefmt='%m-%d-%Y %H:%M:%S') + + +def set_github_output(name: str, value: str, github_output_var_name: str = 'GITHUB_OUTPUT'): + """Sets output variable for a GitHub Action""" + logger = logging.getLogger(__name__) + # In an environment variable "GITHUB_OUTPUT" GHA stores path to a file to write outputs to + with open(os.environ.get(github_output_var_name), 'a+') as file: + logger.info(f"Add {name}={value} to {github_output_var_name}") + print(f'{name}={value}', file=file) + + +def get_repo_data(repo_dir: str | Path) -> dict: + repo = git.Repo(str(repo_dir)) + repo_url = next(repo.remote().urls) + repo_name_match = re.search(r'github\.com/[^/]+/([^/]+)', repo_url) + repo_name = repo_name_match.group(1) if repo_name_match else None + + trigger_repo_url = f"{os.getenv('GITHUB_SERVER_URL')}/{os.getenv('GITHUB_REPOSITORY')}" + is_trigger_repo = repo_url == trigger_repo_url + + branch = os.getenv('GITHUB_REF') if is_trigger_repo else repo.references[0].name + target_branch = os.getenv('GITHUB_BASE_REF') if is_trigger_repo else None + revision = os.getenv('PR_HEAD_SHA') or os.getenv('GITHUB_SHA') if is_trigger_repo else repo.head.commit.hexsha + target_revision = os.getenv('BASE_SHA') if is_trigger_repo else None + # Commit time of a merge commit (in case of PR merged to target) + # TODO: Save commit time of a head commit in PR as well? + commit_time = repo.head.commit.committed_datetime.astimezone(timezone.utc) + merge_target = branch.endswith('/merge') + return { + 'name': repo_name, + 'url': repo_url, + 'branch': branch.replace('refs/heads/', ''), # To align with internal manifest + 'target_branch': target_branch, + 'revision': revision, + 'target_revision': target_revision, + 'commit_time': commit_time, + 'merge_target': merge_target, + 'trigger': is_trigger_repo, + } + + +def parse_ov_version(header_file: str | Path) -> str: + header_code = Path(header_file).read_text() + major, minor, patch = (re.search(rf"#define OPENVINO_VERSION_{name} (\d+)", header_code).group(1) + for name in ["MAJOR", "MINOR", "PATCH"]) + return f"{major}.{minor}.{patch}" + + +def generate_manifest(repos: list, product_type: str, event_type: str, build_type: str, target_arch: str) -> Manifest: + manifest = Manifest() + component_name = 'dldt' # historical, keep for internal compatibility + repositories = [] + ov_version = None + trigger_repo = None + + for repo_dir in repos: + repo = Repository(**get_repo_data(repo_dir)) + repositories.append(repo) + if repo.name == 'openvino': + version_file = Path(repo_dir) / 'src' / 'core' / 'include' / 'openvino' / 'core' / 'version.hpp' + ov_version = parse_ov_version(version_file) + if repo.trigger: + trigger_repo = repo + + custom_branch_name = f'-{trigger_repo.branch}' if trigger_repo.branch != 'master' else '' + run_number_postfix = f'-{os.environ.get("GITHUB_RUN_NUMBER")}' if os.environ.get("GITHUB_RUN_NUMBER") else '' + product_version = f"{ov_version}{run_number_postfix}-{trigger_repo.revision[:11]}{custom_branch_name}" + ci_build_dev_tag = f'dev{trigger_repo.commit_time.strftime("%Y%m%d")}' + wheel_product_version = f'{ov_version}.{ci_build_dev_tag}' + + set_github_output('CI_BUILD_NUMBER', product_version, 'GITHUB_ENV') + set_github_output('CI_BUILD_DEV_TAG', ci_build_dev_tag, 'GITHUB_ENV') + + component = Component(name=component_name, version=product_version, product_type=product_type, + target_arch=target_arch, build_type=build_type, build_event=event_type, + repositories=repositories, custom_params={'wheel_product_version': wheel_product_version}) + + manifest.add_component(component) + return manifest + + +def main(): + init_logger() + logger = logging.getLogger(__name__) + args = parse_args() + + event_name = args.event_name or os.getenv('GITHUB_EVENT_NAME') + event_type = 'pre_commit' if event_name == 'pull_request' else 'commit' + + repos = args.repos.split() + manifest = generate_manifest(repos, args.product_type, event_type, args.build_type, args.target_arch) + + logger.info(f"Saving manifest to {args.save_to}") + manifest.save_manifest(args.save_to) + + +if __name__ == '__main__': + main() diff --git a/.github/actions/create_manifest/manifest_manager.py b/.github/actions/create_manifest/manifest_manager.py new file mode 100644 index 00000000000000..9978f65c0d560e --- /dev/null +++ b/.github/actions/create_manifest/manifest_manager.py @@ -0,0 +1,336 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +from __future__ import annotations + +import yaml +from pathlib import Path +from copy import deepcopy +from typing import Optional, Dict, List, Union, Iterator, Any + + +class ManifestException(Exception): + """Base Manifest file manager exception""" + + +class ManifestDoesNotExist(ManifestException): + """ManifestDoesNotExist Manifest file manager exception""" + + +class ManifestSavingError(ManifestException): + """ManifestSavingError Manifest file manager exception""" + + +class WrongComponentFormatError(ManifestException): + """WrongComponentFormatError Manifest file manager exception""" + + +class WrongRepositoryFormatError(ManifestException): + """WrongRepositoryFormatError Manifest file manager exception""" + + +class Manifest: + """Manifest wrapper""" + + default_manifest_name = "manifest.yml" + + def __init__(self, manifest_path: Optional[str] = None): + """ + :param manifest_path: Path to a manifest file + """ + self._manifest_file = Path(manifest_path or self.default_manifest_name) + if self._manifest_file.is_dir(): + self._manifest_file = self._manifest_file / self.default_manifest_name + + self._manifest_version = "1.0" + self._components: Dict[str, Component] = {} + + if manifest_path is not None: + self._prepare_manifest() + + def __repr__(self) -> str: + return str(self._manifest_file) + + def _prepare_manifest(self) -> None: + """Read manifest file and convert its data to objects""" + if not self._manifest_file.is_file(): + raise ManifestDoesNotExist(f'Cannot find manifest "{self._manifest_file}"') + + with self._manifest_file.open("r") as manifest: + manifest_info = yaml.safe_load(manifest) + + if not isinstance(manifest_info, dict): + raise ManifestDoesNotExist(f'Incorrect manifest "{self._manifest_file}"') + + self._manifest_version = manifest_info.get("manifest_version", self._manifest_version) + + for name, info in manifest_info["components"].items(): + self._components[name] = Component.from_dict({ + "name": name, + "version": info["version"], + "repository": info["repository"], + "product_type": info["product_type"], + "target_arch": info["target_arch"], + "build_type": info["build_type"], + "build_event": info["build_event"], + "custom_params": info.get("custom_params") + }) + + @property + def version(self) -> str: + return self._manifest_version + + @property + def components(self) -> List[Component]: + return list(self._components.values()) + + def get_component(self, component_name: str) -> Optional[Component]: + return self._components.get(component_name) + + def add_component(self, component: Component, replace: bool = False) -> bool: + if not replace and component.name in self._components: + return False + self._components[component.name] = component + return True + + def delete_component(self, component_name: str) -> bool: + return self._components.pop(component_name, None) is not None + + def save_manifest(self, save_to: Union[str, Path]) -> None: + class YamlDumper(yaml.SafeDumper): + """Formatting PyYAML dump() output""" + + def write_line_break(self, data=None): + super().write_line_break(data) + if len(self.indents) in {1, 2, 4}: + super().write_line_break() + + path_to_save = Path(save_to) + if path_to_save.is_dir(): + path_to_save = path_to_save / self.default_manifest_name + else: + path_to_save.parent.mkdir(parents=True, exist_ok=True) + + manifest_data = {"components": {}, "manifest_version": self._manifest_version} + for comp_name, comp_data in self._components.items(): + comp = dict(comp_data) + manifest_data["components"][comp_name] = { + "version": comp["version"], + "product_type": comp["product_type"], + "target_arch": comp["target_arch"], + "build_type": comp["build_type"], + "build_event": comp["build_event"], + "trigger_repo_name": comp["trigger_repo_name"], + "custom_params": comp["custom_params"], + "repository": comp["repositories"], + } + + try: + with path_to_save.open("w") as manifest: + yaml.dump(manifest_data, stream=manifest, Dumper=YamlDumper, default_flow_style=False, sort_keys=False) + except Exception as ex: + raise ManifestSavingError(ex) from ex + + def as_dict(self) -> Dict[str, Union[str, Dict]]: + """Return manifest as dictionary""" + if not self._manifest_file.is_file(): + raise ManifestDoesNotExist(f'Cannot find manifest "{self._manifest_file}"') + + with self._manifest_file.open("r") as manifest: + manifest_dict = yaml.safe_load(manifest) + + if not isinstance(manifest_dict, dict): + raise ManifestDoesNotExist(f'Incorrect manifest "{self._manifest_file}"') + + return manifest_dict + + +class Repository: + def __init__(self, **kwargs) -> None: + self._state: dict = { + "name": None, + "url": None, + "branch": None, + "revision": None, + "commit_id": None, + "commit_time": None, + "target_branch": None, + "target_revision": None, + "target_commit_id": None, + "merge_target": False, + "revert_time": None, + "trigger": False, + "default_branch": None, + "type": "git", + } + for arg_name, arg_value in kwargs.items(): + if arg_name in self._state: + self._state[arg_name] = arg_value + + def __getattr__(self, attr_name: str) -> Any: + if attr_name in self._state: + return self._state.get(attr_name) + raise AttributeError(f"'{self.__class__.__name__}' object has no attribute '{attr_name}'") + + def __iter__(self) -> Iterator: + for name in self._state: + yield name, self._state.get(name) + + def get_git_repo_state(self) -> dict: + state = deepcopy(self._state) + state.pop("revision") + state.pop("target_revision") + state.pop("commit_time") + state.pop("type") + state["commit_id"] = self._state["revision"] + state["target_commit_id"] = self._state["target_revision"] + return state + + +class Component: + def __init__( + self, + name: str, + version: str, + repositories: list, + product_type: str, + target_arch: str, + build_type: str, + build_event: str, + custom_params: Optional[dict] = None + ): + """ + Initialize the product component. + + :param name: Name of component + :param version: Version of component + :param repositories: List of repositories + :param product_type: Unique key to describe a product type (can include OS, arch, build variant, etc) + :param target_arch: Target architecture + :param build_type: Type of build (release, debug) + :param build_event: Build event (pre_commit, commit) + :param custom_params: Custom parameters (optional) + """ + self._name = name + self._version = version + self._repositories = {} + self._product_type = product_type + self._target_arch = target_arch + self._build_type = build_type + self._build_event = build_event + self._custom_params = custom_params if custom_params is not None else {} + self._trigger_repo_name = None + + self._prepare_repositories(repositories) + + def __iter__(self) -> Iterator: + yield "name", self._name + yield "version", self._version + yield "product_type", self._product_type + yield "target_arch", self._target_arch + yield "build_type", self._build_type + yield "build_event", self._build_event + yield "trigger_repo_name", self._trigger_repo_name + yield "custom_params", self._custom_params + yield "repositories", [dict(repo) for repo in self._repositories.values()] + + def _prepare_repositories(self, repositories: list) -> None: + for repo in repositories: + repo_name, repo_obj = self._parse_repository(repo) + self._repositories[repo_name] = repo_obj + + if repo_obj.trigger: + if self._trigger_repo_name: + raise WrongRepositoryFormatError( + f"Found trigger repo duplicates: {self._trigger_repo_name}, {repo_name}" + ) + self._trigger_repo_name = repo_name + + @staticmethod + def _parse_repository(repo: Union[dict, Repository]) -> tuple[str, Repository]: + if isinstance(repo, dict): + repo_name = repo["name"] + repo_obj = Repository(**repo) + elif isinstance(repo, Repository): + repo_name = repo.name + repo_obj = repo + return repo_name, repo_obj + + @staticmethod + def from_dict(comp_data: dict) -> Component: + """ + Convert a dictionary to a Component object. + + :param comp_data: Component data dictionary + :return: Component object + """ + try: + return Component( + comp_data["name"], + comp_data["version"], + comp_data["repository"], + comp_data["product_type"], + comp_data["target_arch"], + comp_data["build_type"], + comp_data["build_event"], + comp_data.get("custom_params"), + ) + except Exception as ex: + raise WrongComponentFormatError(ex) from ex + + @property + def name(self) -> str: + return self._name + + @property + def version(self) -> str: + return self._version + + @property + def product_type(self) -> str: + return self._product_type + + @property + def target_arch(self) -> str: + return self._target_arch + + @property + def build_type(self) -> str: + return self._build_type + + @property + def build_event(self) -> str: + return self._build_event + + @property + def repositories(self) -> List[Repository]: + return list(self._repositories.values()) + + @property + def trigger_repo_name(self) -> Optional[str]: + return self._trigger_repo_name + + @property + def trigger_repository(self) -> Optional[Repository]: + return next((repo for repo in self._repositories.values() if repo.trigger), None) + + def get_repository(self, repository_name: str) -> Optional[Repository]: + return self._repositories.get(repository_name) + + def add_repository(self, repository: Repository, replace: bool = False) -> bool: + if not replace and repository.name in self._repositories: + return False + self._repositories[repository.name] = repository + return True + + def delete_repository(self, repository_name: str) -> bool: + return self._repositories.pop(repository_name, None) is not None + + def get_custom_param(self, name: str) -> Optional[Any]: + return self._custom_params.get(name) + + def add_custom_param(self, name: str, value: Any) -> None: + self._custom_params[name] = value + + def delete_custom_param(self, name: str) -> bool: + return self._custom_params.pop(name, None) is not None diff --git a/.github/actions/create_manifest/requirements.txt b/.github/actions/create_manifest/requirements.txt new file mode 100644 index 00000000000000..eb0abf060b23b0 --- /dev/null +++ b/.github/actions/create_manifest/requirements.txt @@ -0,0 +1,2 @@ +GitPython~=3.1.43 +pyyaml~=6.0.1 diff --git a/.github/actions/store_artifacts/action.yml b/.github/actions/store_artifacts/action.yml new file mode 100644 index 00000000000000..d9c4184a622cce --- /dev/null +++ b/.github/actions/store_artifacts/action.yml @@ -0,0 +1,39 @@ +name: 'Store artifacts' +description: 'Store given artifacts in a proper place on a shared drive' +inputs: + artifacts: + description: "Multi-line list of artifacts to store" + required: true + storage_dir: + description: "Directory name to store artifacts in" + required: true + storage_root: + description: "Root path of the storage to place artifacts to" + required: true + + +outputs: + artifacts_storage_path: + description: "Path where the artifacts are stored" + value: ${{ steps.copy_artifacts.outputs.artifacts_storage_path }} + +runs: + using: "composite" + steps: + - name: Install Python dependencies + shell: ${{ runner.os == 'Windows' && 'pwsh' || 'bash' }} + run: >- + pip install -r ${{ env.ACTION_PATH }}/requirements.txt + env: + ACTION_PATH: ${{ runner.os == 'Windows' && '$env:GITHUB_ACTION_PATH' || '$GITHUB_ACTION_PATH' }} + + - name: 'Copy artifacts' + id: copy_artifacts + shell: ${{ runner.os == 'Windows' && 'pwsh' || 'bash' }} + run: >- + python ${{ env.ACTION_PATH }}/store_artifacts.py + --storage_dir "${{ inputs.storage_dir }}" --storage_root "${{ inputs.storage_root }}" + -a "${{ inputs.artifacts }}" + env: + PR_HEAD_SHA: ${{ github.event.pull_request.head.sha }} + ACTION_PATH: ${{ runner.os == 'Windows' && '$env:GITHUB_ACTION_PATH' || '$GITHUB_ACTION_PATH' }} diff --git a/.github/actions/store_artifacts/requirements.txt b/.github/actions/store_artifacts/requirements.txt new file mode 100644 index 00000000000000..eb0abf060b23b0 --- /dev/null +++ b/.github/actions/store_artifacts/requirements.txt @@ -0,0 +1,2 @@ +GitPython~=3.1.43 +pyyaml~=6.0.1 diff --git a/.github/actions/store_artifacts/store_artifacts.py b/.github/actions/store_artifacts/store_artifacts.py new file mode 100644 index 00000000000000..7dde088dc91593 --- /dev/null +++ b/.github/actions/store_artifacts/store_artifacts.py @@ -0,0 +1,134 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +from __future__ import annotations + +import argparse +import logging +import os +import re +import sys +import git +import shutil +from contextlib import contextmanager +from pathlib import Path + + +def parse_args(): + parser = argparse.ArgumentParser(description='Returns product components changed in a given PR or commit') + parser.add_argument('-e', '--event_name', help='Name of GitHub event', required=False) + parser.add_argument('-b', '--branch_name', help='Name of GitHub branch', required=False) + parser.add_argument('-s', '--commit_sha', help='Commit hash for which artifacts were generated', required=False) + parser.add_argument('-a', '--artifacts', type=str, help='Paths to artifacts to store (files/dirs)', required=True) + parser.add_argument('--storage_dir', help='Directory name to store artifacts in', required=True) + parser.add_argument('--storage_root', help='Root path of the storage to place artifacts to', required=True) + args = parser.parse_args() + return args + + +def init_logger(): + logging.basicConfig(level=logging.DEBUG, + format='%(asctime)s %(name)-15s %(levelname)-8s %(message)s', + datefmt='%m-%d-%Y %H:%M:%S') + + +def set_github_output(name: str, value: str, github_output_var_name: str = 'GITHUB_OUTPUT'): + """Sets output variable for a GitHub Action""" + logger = logging.getLogger(__name__) + # In an environment variable "GITHUB_OUTPUT" GHA stores path to a file to write outputs to + with open(os.environ.get(github_output_var_name), 'a+') as file: + logger.info(f"Add {name}={value} to {github_output_var_name}") + print(f'{name}={value}', file=file) + + +@contextmanager +def preserve_stats_context(): + """ + Workaround for copying to samba share on Linux + to avoid issues while setting Linux permissions. + """ + _orig_copystat = shutil.copystat + shutil.copystat = lambda x, y, follow_symlinks=True: x + try: + yield + finally: + shutil.copystat = _orig_copystat + + +def rotate_dir(directory: Path) -> bool: + """ + Renames directory if exists: + dir -> dir_1 + """ + log = logging.getLogger('rotate_dir') + + if not directory.exists(): + return False + + dir_parent = directory.parent + dir_name = directory.name + max_dir_num = 0 + for redir in dir_parent.iterdir(): + dir_num = redir.name.split('_')[-1] + if redir.name.startswith(dir_name) and dir_num.isdigit() and int(dir_num) > max_dir_num: + max_dir_num = int(dir_num) + + duplicate = dir_parent / f'{dir_name}_{max_dir_num + 1}' + log.info(f"Move previous directory to {duplicate}") + directory.rename(duplicate) + return True + + +def main(): + init_logger() + logger = logging.getLogger(__name__) + args = parse_args() + + event_name = args.event_name or os.getenv('GITHUB_EVENT_NAME') + branch_name = args.branch_name or os.getenv('GITHUB_BASE_REF') or os.getenv('GITHUB_REF_NAME') + + # TODO: return, once we decide to get rid of post-commit and choose artifacts generated for a merged PR in queue? + # merge_queue_matcher = re.search(r'gh-readonly-queue/(.*?)/pr-', branch_name) + # if merge_queue_matcher: + # branch_name = merge_queue_matcher.group(1) + + commit_hash = args.commit_sha or os.getenv('PR_HEAD_SHA') or os.getenv('GITHUB_SHA') + event_type = 'pre_commit' if event_name == 'pull_request' else 'commit' + storage_root = args.storage_root or os.getenv('ARTIFACTS_SHARE') + + storage = Path(storage_root) / 'dldt' / branch_name / event_type / commit_hash / args.storage_dir + set_github_output("artifacts_storage_path", str(storage)) + + logger.info(f"Storing artifacts to {storage}") + rotate_dir(storage) # TODO: use more stable approach to handle storing artifacts from re-runs + + error_found = False + for artifact in args.artifacts.split(): + artifact_path = Path(artifact) + logger.debug(f"Copying {artifact_path} to {storage / artifact_path.name}") + try: + with preserve_stats_context(): + if artifact_path.is_dir(): + shutil.copytree(artifact_path, storage / artifact_path.name) + else: + storage.mkdir(parents=True, exist_ok=True) + shutil.copy2(artifact_path, storage / artifact_path.name) + except Exception as e: + logger.error(f'Failed to copy {artifact}: {e}') + error_found = True + + github_server = os.getenv('GITHUB_SERVER_URL') + if github_server: # If running from GHA context + # TODO: write an exact job link, but it's not trivial to get + workflow_link = f"{github_server}/{os.getenv('GITHUB_REPOSITORY')}/actions/runs/{os.getenv('GITHUB_RUN_ID')}" + with open(storage / 'workflow_link.txt', 'w') as file: + file.write(workflow_link) + + logger.debug(f"Copying finished") + (storage / 'copying_finished').touch() + if error_found: + sys.exit(1) + + +if __name__ == '__main__': + main() diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml index da0309e20b37bd..eee6b4c73ec7f8 100644 --- a/.github/workflows/linux.yml +++ b/.github/workflows/linux.yml @@ -109,6 +109,9 @@ jobs: BUILD_DIR: /__w/openvino/openvino/openvino_build SCCACHE_AZURE_KEY_PREFIX: ubuntu20_x86_64_Release ONNX_RUNTIME_UTILS: /__w/openvino/openvino/openvino/src/frontends/onnx/tests/ci_utils/onnxruntime + ARTIFACTS_SHARE: "/mount/build-artifacts" + MANIFEST_PATH: '/__w/openvino/openvino/manifest.yml' + PRODUCT_TYPE: 'public_linux_ubuntu_20_04_release' if: "!needs.smart_ci.outputs.skip_workflow" steps: @@ -135,6 +138,18 @@ jobs: submodules: 'true' ref: 'master' + - name: Generate product manifest and set CI_BUILD_NUMBER & CI_BUILD_DEV_TAG + id: create_manifest + uses: ./openvino/.github/actions/create_manifest + with: + repos: | + ${{ env.OPENVINO_REPO }} + ${{ env.OPENVINO_CONTRIB_REPO }} + product_type: ${{ env.PRODUCT_TYPE }} + target_arch: 'intel64' + build_type: 'release' + save_to: ${{ env.MANIFEST_PATH }} + # # Print system info # @@ -301,6 +316,28 @@ jobs: path: ${{ env.BUILD_DIR }}/openvino_tests.tar.gz if-no-files-found: 'error' + - name: Prepare debian packages for storage on share + if: ${{ always() }} + continue-on-error: true + run: | + pushd ${{ env.BUILD_DIR }} + mkdir deb && mv *.deb deb/ + popd + + - name: Store artifacts to a shared drive + id: store_artifacts + if: ${{ always() }} + uses: ./openvino/.github/actions/store_artifacts + with: + artifacts: | + ${{ env.BUILD_DIR }}/openvino_package.tar.gz + ${{ env.BUILD_DIR }}/openvino_developer_package.tar.gz + ${{ env.BUILD_DIR }}/openvino_tests.tar.gz + ${{ env.BUILD_DIR }}/deb + ${{ env.MANIFEST_PATH }} + storage_dir: ${{ env.PRODUCT_TYPE }} + storage_root: ${{ env.ARTIFACTS_SHARE }} + Debian_Packages: name: Debian Packages needs: Build diff --git a/.github/workflows/linux_arm64.yml b/.github/workflows/linux_arm64.yml index 825a4b076d631d..feb0ffee81f8f0 100644 --- a/.github/workflows/linux_arm64.yml +++ b/.github/workflows/linux_arm64.yml @@ -104,6 +104,9 @@ jobs: BUILD_DIR: /__w/openvino/openvino/openvino_build SCCACHE_AZURE_KEY_PREFIX: 'ubuntu20_aarch64_Release' ONNX_RUNTIME_UTILS: /__w/openvino/openvino/openvino/src/frontends/onnx/tests/ci_utils/onnxruntime + ARTIFACTS_SHARE: "/mount/build-artifacts" + MANIFEST_PATH: '/__w/openvino/openvino/manifest.yml' + PRODUCT_TYPE: 'public_linux_ubuntu_20_04_arm64_release' if: "!needs.smart_ci.outputs.skip_workflow" steps: @@ -121,6 +124,18 @@ jobs: submodules: 'true' ref: 'master' + - name: Generate product manifest and set CI_BUILD_NUMBER & CI_BUILD_DEV_TAG + id: create_manifest + uses: ./openvino/.github/actions/create_manifest + with: + repos: | + ${{ env.OPENVINO_REPO }} + ${{ env.OPENVINO_CONTRIB_REPO }} + product_type: ${{ env.PRODUCT_TYPE }} + target_arch: 'aarch64' + build_type: 'release' + save_to: ${{ env.MANIFEST_PATH }} + # # Print system info # @@ -294,6 +309,19 @@ jobs: path: ${{ env.BUILD_DIR }}/openvino_tests.tar.gz if-no-files-found: 'error' + - name: Store artifacts to a shared drive + id: store_artifacts + if: ${{ always() }} + uses: ./openvino/.github/actions/store_artifacts + with: + artifacts: | + ${{ env.BUILD_DIR }}/openvino_package.tar.gz + ${{ env.BUILD_DIR }}/openvino_developer_package.tar.gz + ${{ env.BUILD_DIR }}/openvino_tests.tar.gz + ${{ env.MANIFEST_PATH }} + storage_dir: ${{ env.PRODUCT_TYPE }} + storage_root: ${{ env.ARTIFACTS_SHARE }} + Debian_Packages: name: Debian Packages needs: Build diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml index 0e49752fc92968..24fd5946cbcf2c 100644 --- a/.github/workflows/windows.yml +++ b/.github/workflows/windows.yml @@ -64,6 +64,9 @@ jobs: INSTALL_DIR_JS: "${{ github.workspace }}\\openvino_install\\js" INSTALL_TEST_DIR: "${{ github.workspace }}\\tests_install" BUILD_DIR: "${{ github.workspace }}\\openvino_build" + ARTIFACTS_SHARE: "C:\\mount\\build-artifacts" + MANIFEST_PATH: "${{ github.workspace }}\\manifest.yml" + PRODUCT_TYPE: 'public_windows_vs2019_release' # TODO: specify version of compiler here if: ${{ !needs.smart_ci.outputs.skip_workflow && github.event_name != 'merge_group' }} @@ -81,6 +84,27 @@ jobs: path: 'openvino_contrib' ref: 'master' + - name: Setup Python ${{ env.PYTHON_VERSION }} + uses: ./openvino/.github/actions/setup_python + with: + version: ${{ env.PYTHON_VERSION }} + pip-cache-path: ${{ env.PIP_CACHE_PATH }} + should-setup-pip-paths: 'true' + self-hosted-runner: 'true' + show-cache-info: 'true' + + - name: Generate product manifest and set CI_BUILD_NUMBER & CI_BUILD_DEV_TAG + id: create_manifest + uses: ./openvino/.github/actions/create_manifest + with: + repos: | + ${{ env.OPENVINO_REPO }} + ${{ env.OPENVINO_CONTRIB_REPO }} + product_type: ${{ env.PRODUCT_TYPE }} + target_arch: 'intel64' + build_type: 'release' + save_to: ${{ env.MANIFEST_PATH }} + # # Print system info # @@ -92,15 +116,6 @@ jobs: # Dependencies # - - name: Setup Python ${{ env.PYTHON_VERSION }} - uses: ./openvino/.github/actions/setup_python - with: - version: ${{ env.PYTHON_VERSION }} - pip-cache-path: ${{ env.PIP_CACHE_PATH }} - should-setup-pip-paths: 'true' - self-hosted-runner: 'true' - show-cache-info: 'true' - - name: Install python dependencies run: | # For Python API: build and wheel packaging @@ -240,6 +255,18 @@ jobs: path: ${{ env.INSTALL_DIR_JS }} if-no-files-found: 'error' + - name: Store artifacts to a shared drive + id: store_artifacts + if: ${{ always() }} + uses: ./openvino/.github/actions/store_artifacts + with: + artifacts: | + ${{ env.BUILD_DIR }}/openvino_package.zip + ${{ env.BUILD_DIR }}/openvino_tests.zip + ${{ env.MANIFEST_PATH }} + storage_dir: ${{ env.PRODUCT_TYPE }} + storage_root: ${{ env.ARTIFACTS_SHARE }} + Samples: needs: [ Build, Smart_CI ] if: fromJSON(needs.smart_ci.outputs.affected_components).samples From bf2aab667be9f033ce9da38a66667486c320c613 Mon Sep 17 00:00:00 2001 From: Maxim Vafin Date: Wed, 24 Jul 2024 11:09:09 +0200 Subject: [PATCH 07/54] Update torch version in tests (#25636) ### Details: - *item1* - *...* ### Tickets: - *ticket-id* --- .../openvino/frontend/pytorch/fx_decoder.py | 36 ++++++++++++------- .../pytorch/src/translate_session.cpp | 5 +-- tests/constraints.txt | 2 +- .../pytorch_tests/test_batch_norm.py | 2 ++ tests/layer_tests/pytorch_tests/test_full.py | 8 ++--- 5 files changed, 31 insertions(+), 22 deletions(-) diff --git a/src/bindings/python/src/openvino/frontend/pytorch/fx_decoder.py b/src/bindings/python/src/openvino/frontend/pytorch/fx_decoder.py index a1293f89a1ffc5..182dbd5c766101 100644 --- a/src/bindings/python/src/openvino/frontend/pytorch/fx_decoder.py +++ b/src/bindings/python/src/openvino/frontend/pytorch/fx_decoder.py @@ -16,6 +16,11 @@ logger.setLevel(logging.WARNING) +class InlinedInput: + def __init__(self, data) -> None: + self.data = data + + class TorchFXPythonDecoder (Decoder): def __init__(self, pt_module, fx_gm=None, nodes=None, mark_node_callback=None, input_shapes=[], input_types=[]): @@ -59,7 +64,7 @@ def __init__(self, pt_module, fx_gm=None, nodes=None, mark_node_callback=None, i for arg in uargs if arg[1] is not None] for idx, shape in enumerate(found_shapes): if shape is not None: - new_shape=[] + new_shape = [] for dim in range(0, len(shape)): if (type(shape[dim]).__name__ == "SymInt"): new_shape.append(-1) @@ -81,7 +86,7 @@ def __init__(self, pt_module, fx_gm=None, nodes=None, mark_node_callback=None, i # None in inputs mean the input is inlined or None (also considered inlined) self._inputs = [self._nodes.index( - arg) if arg in self._nodes else (arg,) for arg in pt_module.args] + arg) if arg in self._nodes else InlinedInput(arg) for arg in pt_module.args] # FIXME: Find a better way to pass nested tuples to OV frontend. This is a temporary solution to flatten arguments. new_inputs = [] @@ -92,22 +97,22 @@ def __init__(self, pt_module, fx_gm=None, nodes=None, mark_node_callback=None, i if arg in self._nodes: new_inputs.append(self._nodes.index(arg)) else: - new_inputs.append((arg,)) + new_inputs.append(InlinedInput(arg)) self.input_types.append(OVAny(DecoderType.List( TorchFXPythonDecoder.get_type_for_value(arg)))) else: v = self._inputs[i] new_inputs.append(v) self.input_types.append( - TorchFXPythonDecoder.get_type_for_value(v[0] if isinstance(v, tuple) else self._nodes[v])) + TorchFXPythonDecoder.get_type_for_value(v.data if isinstance(v, InlinedInput) else self._nodes[v])) self._inputs = new_inputs def inputs(self): # Consider 0 a special case which may mean the input is inlined, but not guaranteed - return [x if not isinstance(x, tuple) else 0 for x in self._inputs] + return [x if not isinstance(x, InlinedInput) else 0 for x in self._inputs] def is_input_inlined(self, index): - return isinstance(self._inputs[index], tuple) + return isinstance(self._inputs[index], InlinedInput) @staticmethod def unpack_containers(arg): @@ -142,19 +147,24 @@ def arg_to_constant(arg): return make_constant(OVType.i64, Shape([]), [arg]) elif isinstance(arg, float): return make_constant(OVType.f32, Shape([]), [arg]) + elif isinstance(arg, str): + u8_tensor = torch.frombuffer(str.encode(arg), dtype=torch.uint8) + return torch_tensor_to_ov_const(u8_tensor, shared_memory=True) return None def inlined_input(self, index): assert index < len(self._inputs), "Requested input doesn't exist" assert isinstance( - self._inputs[index], tuple), "Requested input which is not inlined" - assert self._inputs[index][0] is not None, "Requested None inlined input" + self._inputs[index], InlinedInput), "Requested input which is not inlined" + arg = self._inputs[index].data + assert arg is not None, f"Requested None inlined input for op {self.get_op_type()}" constant = None - arg = self._inputs[index][0] constant = self.arg_to_constant(arg) - assert constant is not None, f"Constant wasn't created for inlined input {index}" - return constant.outputs() + if constant is not None: + return constant.outputs() + else: + return [] def input(self, index): # TODO: remove return self.inputs()[index] # TODO: find specialized method @@ -309,7 +319,7 @@ def _raw_output(self, index): return self._raw_outputs()[index] def _raw_inputs(self): - return [self._nodes[x] if not isinstance(x, tuple) and x < len(self._nodes) else x[0] for x in self._inputs] + return [self._nodes[x] if not isinstance(x, InlinedInput) and x < len(self._nodes) else x.data for x in self._inputs] def _raw_input(self, index): return self._raw_inputs()[index] @@ -347,7 +357,7 @@ def as_string(self): return None def input_is_none(self, index): - if index >= len(self._inputs) or (isinstance(self._inputs[index], tuple) and self._inputs[index][0] is None): + if index >= len(self._inputs) or (isinstance(self._inputs[index], InlinedInput) and self._inputs[index].data is None): return True else: r_input = self._raw_input(index) diff --git a/src/frontends/pytorch/src/translate_session.cpp b/src/frontends/pytorch/src/translate_session.cpp index 9295b388048baa..a5c84c319087ba 100644 --- a/src/frontends/pytorch/src/translate_session.cpp +++ b/src/frontends/pytorch/src/translate_session.cpp @@ -368,10 +368,7 @@ void TranslateSession::encode_tensor_name(Output output, namespace { bool is_number(const std::string& s) { - std::string::const_iterator it = s.begin(); - while (it != s.end() && std::isdigit(*it)) - ++it; - return !s.empty() && it == s.end(); + return !s.empty() && std::all_of(s.begin(), s.end(), ::isdigit); } } // namespace diff --git a/tests/constraints.txt b/tests/constraints.txt index c0ab1a660164f4..6fb62db1d86d6e 100644 --- a/tests/constraints.txt +++ b/tests/constraints.txt @@ -29,4 +29,4 @@ networkx<=3.3 keras>=2.0.0,<3.0.0 --extra-index-url https://download.pytorch.org/whl/cpu -torch>=1.13,<2.3 \ No newline at end of file +torch>=1.13,<2.4 \ No newline at end of file diff --git a/tests/layer_tests/pytorch_tests/test_batch_norm.py b/tests/layer_tests/pytorch_tests/test_batch_norm.py index 577a036af70240..8e72ae33eaa15e 100644 --- a/tests/layer_tests/pytorch_tests/test_batch_norm.py +++ b/tests/layer_tests/pytorch_tests/test_batch_norm.py @@ -60,5 +60,7 @@ def forward(self, x): @pytest.mark.precommit_fx_backend @pytest.mark.precommit_torch_export def test_batch_norm(self, weights, bias, eps, train, running_stats, ie_device, precision, ir_version, kwargs_to_prepare_input): + if running_stats and self.use_torch_export(): + pytest.skip("running_mean not supported by torch.export") self._test(*self.create_model(weights, bias, eps, train, running_stats), ie_device, precision, ir_version, kwargs_to_prepare_input=kwargs_to_prepare_input, dynamic_shapes=False, use_mo_convert=False) diff --git a/tests/layer_tests/pytorch_tests/test_full.py b/tests/layer_tests/pytorch_tests/test_full.py index 20a70367e047f5..6ef8ca25a692a0 100644 --- a/tests/layer_tests/pytorch_tests/test_full.py +++ b/tests/layer_tests/pytorch_tests/test_full.py @@ -93,7 +93,7 @@ def test_full(self, shape, value, ie_device, precision, ir_version): @pytest.mark.parametrize("shape", [[1], [1, 2], [1, 2, 3], [1, 2, 3, 4], [2, 3, 4, 5, 6]]) @pytest.mark.parametrize("value", [0, 1, -1, 0.5]) @pytest.mark.parametrize("dtype", ["int8", "int32", "int64", "float32", "float64"]) - @pytest.mark.parametrize("with_names", [True, False]) + @pytest.mark.parametrize("with_names", [skip_if_export(True), False]) @pytest.mark.nightly @pytest.mark.precommit_fx_backend @pytest.mark.precommit_torch_export @@ -104,7 +104,7 @@ def test_full_dtype(self, shape, value, dtype, with_names, ie_device, precision, @pytest.mark.parametrize("shape", [[1], [1, 2], [1, 2, 3], [1, 2, 3, 4], [2, 3, 4, 5, 6]]) @pytest.mark.parametrize("value", [0, 1, -1, 0.5]) @pytest.mark.parametrize("dtype", ["int8", "int32", "int64", "float32", "float64"]) - @pytest.mark.parametrize("with_names", [True, False]) + @pytest.mark.parametrize("with_names", [skip_if_export(True), False]) @pytest.mark.nightly def test_full_out(self, shape, value, dtype, with_names, ie_device, precision, ir_version): self._test(*self.create_model(shape, dtype=dtype, use_out=True, with_names=with_names), ie_device, precision, @@ -496,7 +496,7 @@ def test_zeros_ones(self, op_type, shape, ie_device, precision, ir_version): @pytest.mark.parametrize("shape", [(1, 1), (1, 2), (1, 2, 3), (1, 2, 3, 4), (2, 3, 4, 5, 6)]) @pytest.mark.parametrize("op_type", ["aten::zeros", "aten::ones"]) @pytest.mark.parametrize("dtype", ["int8", "int32", "int64", "float32", "float64"]) - @pytest.mark.parametrize("with_names", [True, False]) + @pytest.mark.parametrize("with_names", [skip_if_export(True), False]) @pytest.mark.nightly @pytest.mark.precommit_fx_backend @pytest.mark.precommit_torch_export @@ -508,7 +508,7 @@ def test_zeros_ones_with_dtype(self, op_type, shape, dtype, with_names, ie_devic @pytest.mark.parametrize("shape", [(1, 1), (1, 2), (1, 2, 3), (1, 2, 3, 4), (2, 3, 4, 5, 6)]) @pytest.mark.parametrize("op_type", ["aten::zeros", "aten::ones"]) @pytest.mark.parametrize("dtype", ["int8", "int32", "int64", "float32", "float64"]) - @pytest.mark.parametrize("with_names", [True, False]) + @pytest.mark.parametrize("with_names", [skip_if_export(True), False]) @pytest.mark.nightly def test_zeros_ones_with_out(self, op_type, shape, dtype, with_names, ie_device, precision, ir_version): self._test(*self.create_model(op_type, dtype=dtype, with_out=True, with_names=with_names), ie_device, precision, From 83bb6a04a10e85ac48a45eca136e067e11c8140f Mon Sep 17 00:00:00 2001 From: Sebastian Golebiewski Date: Wed, 24 Jul 2024 13:18:27 +0200 Subject: [PATCH 08/54] [DOCS] Update model caching for GPU (#25670) Adding details on model caching for GPU and other devices. This PR addresses JIRA ticket: 146449 --- docs/articles_en/assets/snippets/ov_caching.cpp | 4 ++-- docs/articles_en/assets/snippets/ov_caching.py | 1 + .../optimizing-latency/model-caching-overview.rst | 3 ++- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/docs/articles_en/assets/snippets/ov_caching.cpp b/docs/articles_en/assets/snippets/ov_caching.cpp index cefb3da55c7827..891d3e9368292d 100644 --- a/docs/articles_en/assets/snippets/ov_caching.cpp +++ b/docs/articles_en/assets/snippets/ov_caching.cpp @@ -1,10 +1,10 @@ #include +//! [ov:caching:part0] void part0() { std::string modelPath = "/tmp/myModel.xml"; - std::string device = "GPU"; + std::string device = "GPU"; // For example: "CPU", "GPU", "NPU". ov::AnyMap config; -//! [ov:caching:part0] ov::Core core; // Step 1: create ov::Core object core.set_property(ov::cache_dir("/path/to/cache/dir")); // Step 1b: Enable caching auto model = core.read_model(modelPath); // Step 2: Read Model diff --git a/docs/articles_en/assets/snippets/ov_caching.py b/docs/articles_en/assets/snippets/ov_caching.py index c03e8b34cfe9ce..4ce0b91ccd7506 100644 --- a/docs/articles_en/assets/snippets/ov_caching.py +++ b/docs/articles_en/assets/snippets/ov_caching.py @@ -8,6 +8,7 @@ import openvino.properties as props +# For example: "CPU", "GPU", "NPU". device_name = 'CPU' model_path = get_path_to_model() path_to_cache_dir = get_temp_dir() diff --git a/docs/articles_en/openvino-workflow/running-inference/optimize-inference/optimizing-latency/model-caching-overview.rst b/docs/articles_en/openvino-workflow/running-inference/optimize-inference/optimizing-latency/model-caching-overview.rst index 38af00d3796d5d..09701ab97d23fd 100644 --- a/docs/articles_en/openvino-workflow/running-inference/optimize-inference/optimizing-latency/model-caching-overview.rst +++ b/docs/articles_en/openvino-workflow/running-inference/optimize-inference/optimizing-latency/model-caching-overview.rst @@ -61,7 +61,8 @@ To enable model caching, the application must specify a folder to store the cach With this code, if the device specified by ``device_name`` supports import/export model capability, -a cached blob is automatically created inside the ``/path/to/cache/dir`` folder. +a cached blob (the ``.cl_cache`` and ``.blob`` file for GPU and CPU respectively) is automatically +created inside the ``/path/to/cache/dir`` folder. If the device does not support the import/export capability, cache is not created and no error is thrown. Note that the first ``compile_model`` operation takes slightly longer, as the cache needs to be created - From b17cf7e474a94e894537e6094a3c850f86355074 Mon Sep 17 00:00:00 2001 From: Maciej Smyk Date: Wed, 24 Jul 2024 13:56:26 +0200 Subject: [PATCH 09/54] [DOCS] Preprocessing Use Case update for master (#25634) * Removed info on the Model Optimizer from the article. * Moved the article to the new location in repo. --- .../openvino-workflow/model-preparation.rst | 1 + .../optimize-preprocessing.rst | 1 - .../preprocessing-api-details.rst | 5 +++ .../integrate-save-preprocessing-use-case.rst | 36 ++++++++----------- 4 files changed, 20 insertions(+), 23 deletions(-) rename docs/articles_en/openvino-workflow/running-inference/optimize-inference/optimize-preprocessing/{ => preprocessing-api-details}/integrate-save-preprocessing-use-case.rst (60%) diff --git a/docs/articles_en/openvino-workflow/model-preparation.rst b/docs/articles_en/openvino-workflow/model-preparation.rst index c6c7eaeb17fb31..bea0fcdba5311b 100644 --- a/docs/articles_en/openvino-workflow/model-preparation.rst +++ b/docs/articles_en/openvino-workflow/model-preparation.rst @@ -267,6 +267,7 @@ Before saving the model to OpenVINO IR, consider :doc:`Post-training Optimization ` to achieve more efficient inference and a smaller model. +.. _convert_model_cli_ovc: Convert a Model in CLI: ``ovc`` ############################### diff --git a/docs/articles_en/openvino-workflow/running-inference/optimize-inference/optimize-preprocessing.rst b/docs/articles_en/openvino-workflow/running-inference/optimize-inference/optimize-preprocessing.rst index 7d19e17a70f2c6..3fa01212b6d86b 100644 --- a/docs/articles_en/openvino-workflow/running-inference/optimize-inference/optimize-preprocessing.rst +++ b/docs/articles_en/openvino-workflow/running-inference/optimize-inference/optimize-preprocessing.rst @@ -10,7 +10,6 @@ Optimize Preprocessing optimize-preprocessing/preprocessing-api-details optimize-preprocessing/layout-api-overview - optimize-preprocessing/integrate-save-preprocessing-use-case Torchvision preprocessing converter .. meta:: diff --git a/docs/articles_en/openvino-workflow/running-inference/optimize-inference/optimize-preprocessing/preprocessing-api-details.rst b/docs/articles_en/openvino-workflow/running-inference/optimize-inference/optimize-preprocessing/preprocessing-api-details.rst index cb03e3b4e8129f..ef8613b84f0626 100644 --- a/docs/articles_en/openvino-workflow/running-inference/optimize-inference/optimize-preprocessing/preprocessing-api-details.rst +++ b/docs/articles_en/openvino-workflow/running-inference/optimize-inference/optimize-preprocessing/preprocessing-api-details.rst @@ -3,6 +3,11 @@ Preprocessing API - details =========================== +.. toctree:: + :maxdepth: 1 + :hidden: + + preprocessing-api-details/integrate-save-preprocessing-use-case .. meta:: :description: Learn the details on capabilities of pre-processing API and post-processing. diff --git a/docs/articles_en/openvino-workflow/running-inference/optimize-inference/optimize-preprocessing/integrate-save-preprocessing-use-case.rst b/docs/articles_en/openvino-workflow/running-inference/optimize-inference/optimize-preprocessing/preprocessing-api-details/integrate-save-preprocessing-use-case.rst similarity index 60% rename from docs/articles_en/openvino-workflow/running-inference/optimize-inference/optimize-preprocessing/integrate-save-preprocessing-use-case.rst rename to docs/articles_en/openvino-workflow/running-inference/optimize-inference/optimize-preprocessing/preprocessing-api-details/integrate-save-preprocessing-use-case.rst index aeb59c2e37a08e..2563b9270082b0 100644 --- a/docs/articles_en/openvino-workflow/running-inference/optimize-inference/optimize-preprocessing/integrate-save-preprocessing-use-case.rst +++ b/docs/articles_en/openvino-workflow/running-inference/optimize-inference/optimize-preprocessing/preprocessing-api-details/integrate-save-preprocessing-use-case.rst @@ -10,8 +10,8 @@ Use Case - Integrate and Save Preprocessing Steps Into IR OpenVINO Intermediate Representation. -Previous sections covered the topic of the :doc:`preprocessing steps ` -and the overview of :doc:`Layout ` API. +Previous sections covered the :doc:`preprocessing steps <../preprocessing-api-details>` +and the overview of :doc:`Layout API <../layout-api-overview>`. For many applications, it is also important to minimize read/load time of a model. Therefore, performing integration of preprocessing steps every time on application @@ -20,25 +20,18 @@ once pre and postprocessing steps have been added, it can be useful to store new model to OpenVINO Intermediate Representation (OpenVINO IR, `.xml` format). Most available preprocessing steps can also be performed via command-line options, -using Model Optimizer. For details on such command-line options, refer to the -:doc:`Optimizing Preprocessing Computation <../../../../documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-embedding-preprocessing-computation>`. +using ``ovc``. For details on such command-line options, refer to the +:ref:`Model Conversion `. Code example - Saving Model with Preprocessing to OpenVINO IR ############################################################# -When some preprocessing steps cannot be integrated into the execution graph using -Model Optimizer command-line options (for example, ``YUV``->``RGB`` color space conversion, -``Resize``, etc.), it is possible to write a simple code which: +In the following example: -* Reads the original model (OpenVINO IR, TensorFlow, TensorFlow Lite, ONNX, PaddlePaddle). -* Adds the preprocessing/postprocessing steps. -* Saves resulting model as IR (``.xml`` and ``.bin``). +* Original ONNX model takes one ``float32`` input with the ``{1, 3, 224, 224}`` shape, the ``RGB`` channel order, and mean/scale values applied. +* Application provides ``BGR`` image buffer with a non-fixed size and input images as batches of two. -Consider the example, where an original ONNX model takes one ``float32`` input with the -``{1, 3, 224, 224}`` shape, the ``RGB`` channel order, and mean/scale values applied. -In contrast, the application provides ``BGR`` image buffer with a non-fixed size and -input images as batches of two. Below is the model conversion code that can be applied -in the model preparation script for such a case. +Below is the model conversion code that can be applied in the model preparation script for this case: * Includes / Imports @@ -62,7 +55,6 @@ in the model preparation script for such a case. * Preprocessing & Saving to the OpenVINO IR code. - .. tab-set:: .. tab-item:: Python @@ -83,8 +75,8 @@ in the model preparation script for such a case. Application Code - Load Model to Target Device ############################################## -After this, the application code can load a saved file and stop preprocessing. In this case, enable -:doc:`model caching <../optimizing-latency/model-caching-overview>` to minimize load +Next, the application code can load a saved file and stop preprocessing. In this case, enable +:doc:`model caching <../../optimizing-latency/model-caching-overview>` to minimize load time when the cached model is available. @@ -108,10 +100,10 @@ time when the cached model is available. Additional Resources #################### -* :doc:`Preprocessing Details ` -* :doc:`Layout API overview ` -* :doc:`Model Optimizer - Optimize Preprocessing Computation <../../../../documentation/legacy-features/transition-legacy-conversion-api/legacy-conversion-api/[legacy]-embedding-preprocessing-computation>` -* :doc:`Model Caching Overview <../optimizing-latency/model-caching-overview>` +* :doc:`Preprocessing Details <../preprocessing-api-details>` +* :doc:`Layout API overview <../layout-api-overview>` +* :doc:`Model Caching Overview <../../optimizing-latency/model-caching-overview>` +* :doc:`Model Preparation <../../../../model-preparation>` * The `ov::preprocess::PrePostProcessor `__ C++ class documentation * The `ov::pass::Serialize `__ - pass to serialize model to XML/BIN * The ``ov::set_batch`` - update batch dimension for a given model From f387838a87ce59f10586cd9c10a1452e9bc1f7fe Mon Sep 17 00:00:00 2001 From: Edward Shogulin Date: Wed, 24 Jul 2024 13:01:11 +0100 Subject: [PATCH 10/54] [CPU] [ARM] JIT Tanh & Mod (GFI) fix (#25701) ### Details: - *[CPU] [ARM] JIT Tanh & Mod (GFI) fix* --- .../plugin/aarch64/jit_eltwise_emitters.cpp | 26 +++++++++---------- .../plugin/aarch64/jit_eltwise_emitters.hpp | 2 ++ 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/src/plugins/intel_cpu/src/emitters/plugin/aarch64/jit_eltwise_emitters.cpp b/src/plugins/intel_cpu/src/emitters/plugin/aarch64/jit_eltwise_emitters.cpp index 1dec30581dd71a..3d29173788d658 100644 --- a/src/plugins/intel_cpu/src/emitters/plugin/aarch64/jit_eltwise_emitters.cpp +++ b/src/plugins/intel_cpu/src/emitters/plugin/aarch64/jit_eltwise_emitters.cpp @@ -705,23 +705,18 @@ void jit_gelu_tanh_emitter::emit_isa(const std::vector &in_vec_idxs, con h->ld1r(vmm_aux1.s, table_val2("gelu_tanh_sqrt_two_over_pi")); h->fmul(vmm_aux0.s, vmm_aux1.s, vmm_aux2.s); - const bool store_src = vmm_src.getIdx() == vmm_dst.getIdx(); - if (store_src) { - h->mov(vmm_aux2.b16, vmm_src.b16); - } - tanh_emitter->emit_code( { vmm_aux0.getIdx() }, - { vmm_aux0.getIdx() }, + { vmm_aux2.getIdx() }, aux_vec_idxs, aux_gpr_idxs); // compute 0.5 * x * (1 + tanh(G(x))) h->ld1r(vmm_aux1.s, table_val2("one")); - h->fadd(vmm_aux0.s, vmm_aux1.s, vmm_aux0.s); + h->fadd(vmm_aux0.s, vmm_aux1.s, vmm_aux2.s); h->ld1r(vmm_aux1.s, table_val2("half")); h->fmul(vmm_aux0.s, vmm_aux1.s, vmm_aux0.s); - h->fmul(vmm_dst.s, store_src ? vmm_aux2.s : vmm_src.s, vmm_aux0.s); + h->fmul(vmm_dst.s, vmm_src.s, vmm_aux0.s); } void jit_gelu_tanh_emitter::register_table_entries() { @@ -1219,6 +1214,8 @@ jit_mod_emitter::jit_mod_emitter(dnnl::impl::cpu::aarch64::jit_generator *host, size_t jit_mod_emitter::get_inputs_count() const { return 2; } +size_t jit_mod_emitter::get_aux_vecs_count() const { return 1; } + void jit_mod_emitter::emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const { if (host_isa_ == dnnl::impl::cpu::aarch64::asimd) { emit_isa(in_vec_idxs, out_vec_idxs); @@ -1233,14 +1230,15 @@ void jit_mod_emitter::emit_isa(const std::vector &in_vec_idxs, const std using TReg = typename dnnl::impl::cpu::aarch64::cpu_isa_traits::TReg; - TReg divend = TReg(in_vec_idxs[0]); + TReg dividend = TReg(in_vec_idxs[0]); TReg divisor = TReg(in_vec_idxs[1]); TReg r = TReg(out_vec_idxs[0]); + TReg aux = TReg(aux_vec_idxs[0]); - h->uni_fdiv(r.s, divend.s, divisor.s); - h->frintz(r.s, r.s); - h->uni_fmul(r.s, r.s, divisor.s); - h->uni_fsub(r.s, divend.s, r.s); + h->fdiv(aux.s, dividend.s, divisor.s); + h->frintz(aux.s, aux.s); + h->fmul(aux.s, aux.s, divisor.s); + h->fsub(r.s, dividend.s, aux.s); } std::set> jit_mod_emitter::get_supported_precisions(const std::shared_ptr& node) { @@ -1874,7 +1872,7 @@ void jit_tanh_emitter::emit_isa(const std::vector &in_vec_idxs, const st TReg src = TReg(in_vec_idxs[0]); TReg dst = TReg(out_vec_idxs[0]); - TReg aux = TReg(aux_vec_idxs.back()); + TReg aux = TReg(aux_vec_idxs[0]); h->ld1r(aux.s, table_val2("two")); h->uni_fmul(aux.s, src.s, aux.s); diff --git a/src/plugins/intel_cpu/src/emitters/plugin/aarch64/jit_eltwise_emitters.hpp b/src/plugins/intel_cpu/src/emitters/plugin/aarch64/jit_eltwise_emitters.hpp index b71fb0a67f2a19..0152a5bd3d99e1 100644 --- a/src/plugins/intel_cpu/src/emitters/plugin/aarch64/jit_eltwise_emitters.hpp +++ b/src/plugins/intel_cpu/src/emitters/plugin/aarch64/jit_eltwise_emitters.hpp @@ -477,6 +477,8 @@ class jit_mod_emitter : public jit_emitter { size_t get_inputs_count() const override; + size_t get_aux_vecs_count() const override; + static std::set> get_supported_precisions(const std::shared_ptr& node = nullptr); private: From fa5877ea033792837fd17882769ca32d4704be18 Mon Sep 17 00:00:00 2001 From: Haiqi Pan Date: Wed, 24 Jul 2024 22:57:51 +0800 Subject: [PATCH 11/54] enable ov_core_compile_model_with_property (#25526) ### Details: - *enable ov_core_compile_model_with_property* ### Tickets: - *CVS-126283* --- src/bindings/c/tests/ov_core_test.cpp | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/src/bindings/c/tests/ov_core_test.cpp b/src/bindings/c/tests/ov_core_test.cpp index 3e8ceebcaa0e49..40aab57b2c7e6b 100644 --- a/src/bindings/c/tests/ov_core_test.cpp +++ b/src/bindings/c/tests/ov_core_test.cpp @@ -126,12 +126,7 @@ TEST_P(ov_core_test, ov_core_compile_model) { ov_core_free(core); } -#ifdef OPENVINO_ARCH_ARM64 -// Ticket: 126283 -TEST_P(ov_core_test, DISABLED_ov_core_compile_model_with_property) { -#else TEST_P(ov_core_test, ov_core_compile_model_with_property) { -#endif auto device_name = GetParam(); ov_core_t* core = nullptr; OV_EXPECT_OK(ov_core_create(&core)); @@ -149,12 +144,7 @@ TEST_P(ov_core_test, ov_core_compile_model_with_property) { char* property_value = nullptr; OV_EXPECT_OK(ov_compiled_model_get_property(compiled_model, key, &property_value)); -#if defined(OPENVINO_ARCH_ARM) || defined(OPENVINO_ARCH_ARM64) - // TODO: fix once ARM plugin supports multi-stream - EXPECT_STREQ(property_value, "1"); -#else EXPECT_STREQ(property_value, "2"); -#endif ov_free(property_value); ov_compiled_model_free(compiled_model); From e653ebc7c8c11508c7e5fd4f797174d21e4382bc Mon Sep 17 00:00:00 2001 From: Mikhail Ryzhov Date: Wed, 24 Jul 2024 18:45:54 +0200 Subject: [PATCH 12/54] [CONDA] Fix for conda build (#25695) ### Details: - Extended setup.py to set additional cmake agrs from environment - do not cache ENABLE_PYTHON option, because it should be set conditionally ### Tickets: - *ticket-id* --------- Co-authored-by: Zlobin Vladimir --- src/bindings/python/wheel/setup.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/bindings/python/wheel/setup.py b/src/bindings/python/wheel/setup.py index 610c4e744e32e3..25a51027a9082c 100644 --- a/src/bindings/python/wheel/setup.py +++ b/src/bindings/python/wheel/setup.py @@ -266,7 +266,7 @@ def finalize_options(self): self.jobs = multiprocessing.cpu_count() if self.jobs is None else int(self.jobs) if self.cmake_args is None: - self.cmake_args = "" + self.cmake_args = os.getenv("CMAKE_ARGS", "") def cmake_build_and_install(self, install_cfg): """Runs cmake (configure, build and install) if artfiacts are not already built / installed.""" @@ -297,6 +297,7 @@ def cmake_build_and_install(self, install_cfg): f"-DPython3_EXECUTABLE={sys.executable}", f"-DCMAKE_BUILD_TYPE={CONFIG}", f"-DCPACK_GENERATOR={CPACK_GENERATOR}", + "-DENABLE_PYTHON=ON", "-DENABLE_WHEEL=OFF", self.cmake_args, "-S", source_dir, From ff54835695c305ca765217f880bfc854c9202768 Mon Sep 17 00:00:00 2001 From: Maxim Vafin Date: Wed, 24 Jul 2024 18:56:58 +0200 Subject: [PATCH 13/54] [PT FE] Update tests for timm models (#25696) ### Details: - *Update timm model list according to version 1.0.7* - *Update for torch==2.3.1* - *Update torchvision models list for export scenario* ### Tickets: - *ticket-id* --- .../workflows/job_pytorch_models_tests.yml | 8 +- .github/workflows/linux.yml | 2 +- .../openvino/frontend/pytorch/fx_decoder.py | 8 +- .../pytorch/src/translate_session.cpp | 3 +- tests/constraints.txt | 1 + .../models_hub_common/utils.py | 1 - .../pytorch/hf_transformers_models | 6 +- tests/model_hub_tests/pytorch/test_timm.py | 16 ++-- .../pytorch/test_torchvision_models.py | 14 ++- tests/model_hub_tests/pytorch/timm_models | 85 +++++++++++++------ .../pytorch/torchvision_models | 16 ++-- 11 files changed, 101 insertions(+), 59 deletions(-) diff --git a/.github/workflows/job_pytorch_models_tests.yml b/.github/workflows/job_pytorch_models_tests.yml index c740cd89079ec2..f0a01847da0be3 100644 --- a/.github/workflows/job_pytorch_models_tests.yml +++ b/.github/workflows/job_pytorch_models_tests.yml @@ -134,7 +134,7 @@ jobs: if: ${{ inputs.model_scope == 'precommit' || inputs.model_scope == 'nightly_scope1' }} run: | export PYTHONPATH=${MODEL_HUB_TESTS_INSTALL_DIR}:$PYTHONPATH - python3 -m pytest ${MODEL_HUB_TESTS_INSTALL_DIR}/pytorch/ -m ${TYPE} --html=${INSTALL_TEST_DIR}/TEST-torch_model_timm_tv_tests.html --self-contained-html -v -n 4 -k "TestTimmConvertModel or TestTorchHubConvertModel" + python3 -m pytest ${MODEL_HUB_TESTS_INSTALL_DIR}/pytorch/ -m ${TYPE} --html=${INSTALL_TEST_DIR}/TEST-torch_model_timm_tv_${{ inputs.model_scope }}_tests.html --self-contained-html -v -n 2 -k "TestTimmConvertModel or TestTorchHubConvertModel" env: TYPE: ${{ inputs.model_scope == 'precommit' && 'precommit' || 'nightly' }} TEST_DEVICE: CPU @@ -144,7 +144,7 @@ jobs: if: ${{ inputs.model_scope == 'precommit' || inputs.model_scope == 'nightly_scope2' }} run: | export PYTHONPATH=${MODEL_HUB_TESTS_INSTALL_DIR}:$PYTHONPATH - python3 -m pytest ${MODEL_HUB_TESTS_INSTALL_DIR}/pytorch -m ${TYPE} --html=${INSTALL_TEST_DIR}/TEST-torch_model_tests.html --self-contained-html -v -k "not (TestTimmConvertModel or TestTorchHubConvertModel)" + python3 -m pytest ${MODEL_HUB_TESTS_INSTALL_DIR}/pytorch -m ${TYPE} --html=${INSTALL_TEST_DIR}/TEST-torch_model_${{ inputs.model_scope }}_tests.html --self-contained-html -v -k "not (TestTimmConvertModel or TestTorchHubConvertModel)" env: TYPE: ${{ inputs.model_scope == 'precommit' && 'precommit' || 'nightly' }} TEST_DEVICE: CPU @@ -155,7 +155,7 @@ jobs: if: ${{ inputs.model_scope == 'precommit' }} run: | export PYTHONPATH=${MODEL_HUB_TESTS_INSTALL_DIR}:$PYTHONPATH - python3 -m pytest ${MODEL_HUB_TESTS_INSTALL_DIR}/transformation_tests/test_pa_transformation.py -m precommit --html=${INSTALL_TEST_DIR}/TEST-torch_pagedattention_tests.html --self-contained-html -v --tb=short -n 4 + python3 -m pytest ${MODEL_HUB_TESTS_INSTALL_DIR}/transformation_tests/test_pa_transformation.py -m precommit --html=${INSTALL_TEST_DIR}/TEST-torch_pagedattention_tests.html --self-contained-html -v --tb=short -n 2 env: TEST_DEVICE: CPU USE_SYSTEM_CACHE: False @@ -183,7 +183,7 @@ jobs: uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3 if: ${{ !cancelled() }} with: - name: test-results-torch-models + name: test-results-torch-models-${{ inputs.model_scope == 'precommit' }} path: | ${{ env.INSTALL_TEST_DIR }}/TEST-torch* if-no-files-found: 'error' diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml index eee6b4c73ec7f8..dce369b6fe4dd9 100644 --- a/.github/workflows/linux.yml +++ b/.github/workflows/linux.yml @@ -590,7 +590,7 @@ jobs: # - /mount:/mount PyTorch_Models_Tests: name: PyTorch Models tests - if: fromJSON(needs.smart_ci.outputs.affected_components).PyTorch_FE.test + if: ${{ github.event_name != 'schedule' && fromJSON(needs.smart_ci.outputs.affected_components).PyTorch_FE.test }} needs: [ Build, Smart_CI, Openvino_tokenizers ] uses: ./.github/workflows/job_pytorch_models_tests.yml with: diff --git a/src/bindings/python/src/openvino/frontend/pytorch/fx_decoder.py b/src/bindings/python/src/openvino/frontend/pytorch/fx_decoder.py index 182dbd5c766101..d9dae251aa64e7 100644 --- a/src/bindings/python/src/openvino/frontend/pytorch/fx_decoder.py +++ b/src/bindings/python/src/openvino/frontend/pytorch/fx_decoder.py @@ -267,9 +267,7 @@ def get_named_input(self, name): raise RuntimeError("This input is not a Node") def get_subgraph_size(self): - if issubclass(type(self.pt_module), torch.fx.Node): - return 0 - return len(self.get_subgraphs()) if hasattr(self.pt_module, 'blocks') else 1 + return len(self.get_subgraphs()) def decoder_type_name(self) -> str: return "fx" @@ -287,9 +285,7 @@ def visit_subgraph(self, node_visitor): node_visitor(decoder) def get_subgraphs(self): - if issubclass(type(self.pt_module), torch.fx.Node): - return [] - return list(self.pt_module.blocks()) + return [] def get_subgraph_decoder(self, index): decoder = TorchFXPythonDecoder(self.get_subgraphs()[index], diff --git a/src/frontends/pytorch/src/translate_session.cpp b/src/frontends/pytorch/src/translate_session.cpp index a5c84c319087ba..a39c6b067528fe 100644 --- a/src/frontends/pytorch/src/translate_session.cpp +++ b/src/frontends/pytorch/src/translate_session.cpp @@ -225,7 +225,8 @@ std::shared_ptr TranslateSession::convert_pytorch_model( } }; - FRONT_END_GENERAL_CHECK(pytorch_model->get_subgraph_size() == 1, "Model should have exactly 1 subgraph."); + FRONT_END_GENERAL_CHECK(pytorch_model->decoder_type_name() != "ts" || pytorch_model->get_subgraph_size() == 1, + "Model should have exactly 1 subgraph for TorchScript."); pytorch_model->visit_subgraph(node_visitor); ResultVector results; diff --git a/tests/constraints.txt b/tests/constraints.txt index 6fb62db1d86d6e..0427c504395950 100644 --- a/tests/constraints.txt +++ b/tests/constraints.txt @@ -27,6 +27,7 @@ jaxlib<=0.4.14 kornia==0.7.0 networkx<=3.3 keras>=2.0.0,<3.0.0 +timm==1.0.7 --extra-index-url https://download.pytorch.org/whl/cpu torch>=1.13,<2.4 \ No newline at end of file diff --git a/tests/model_hub_tests/models_hub_common/utils.py b/tests/model_hub_tests/models_hub_common/utils.py index 6dac33640162de..068826669fab5b 100644 --- a/tests/model_hub_tests/models_hub_common/utils.py +++ b/tests/model_hub_tests/models_hub_common/utils.py @@ -27,7 +27,6 @@ def get_models_list(file_name: str): model_name, model_link = model_info.split(',') elif len(model_info.split(',')) == 4: model_name, model_link, mark, reason = model_info.split(',') - assert mark in ["skip", "xfail"], "Incorrect failure mark for model info {}".format(model_info) models.append((model_name, model_link, mark, reason)) return models diff --git a/tests/model_hub_tests/pytorch/hf_transformers_models b/tests/model_hub_tests/pytorch/hf_transformers_models index 5da9db39095810..f79f32b6d93ee8 100644 --- a/tests/model_hub_tests/pytorch/hf_transformers_models +++ b/tests/model_hub_tests/pytorch/hf_transformers_models @@ -4,7 +4,7 @@ abeja/gpt-neox-japanese-2.7b,gpt_neox_japanese acl-submission-anonym/EAM-spectral,examuse,skip,Load problem adalbertojunior/modular-test,modular,skip,Load problem adept/persimmon-8b-base,persimmon -aerner/lm-v2,open-llama,xfail,Example input problem +aerner/lm-v2,open-llama afonsosamarques/ardt-vanilla-combo_train_hopper_v2-2508_1336-33,decision_transformer,xfail,Tracing problem aihijo/gec-zh-gector-bert-large,gector,skip,Load problem albert-base-v2,albert @@ -170,6 +170,7 @@ huggingface/time-series-transformer-tourism-monthly,time_series_transformer,skip HuggingFaceM4/tiny-random-idefics,idefics,xfail,Unsupported op aten::any aten::einsum prim::TupleConstruct prim::TupleUnpack HuggingFaceM4/tiny-random-vllama-clip,vllama,skip,Load problem HuggingFaceM4/tiny-random-vopt-clip,vopt,skip,Load problem +HuggingFaceH4/zephyr-7b-beta,mistral HuiHuang/gpt3-damo-base-zh,gpt3,skip,Load problem hustvl/yolos-tiny,yolos iakarshu/tilt_base,tilt_base_configuration,skip,Load problem @@ -184,7 +185,7 @@ jaketae/fastspeech2-ljspeech,fastspeech2,skip,Load problem jambran/depression-classification,DepressionDetection,skip,Load problem Jellywibble/dalio-reward-charlie-v1,reward-model,skip,Load problem JonasGeiping/crammed-bert-legacy,crammedBERT,skip,Load problem -jonatasgrosman/wav2vec2-large-xlsr-53-english,wav2vec2,xfail,Unsupported op aten::index_put_ prim::TupleConstruct +jonatasgrosman/wav2vec2-large-xlsr-53-english,wav2vec2 Joqsan/test-my-fnet,my_fnet,skip,Load problem jozhang97/deta-swin-large,deta,skip,Load problem jploski/retnet-mini-shakespeare,retnet,skip,Load problem @@ -257,7 +258,6 @@ microsoft/xclip-base-patch32,xclip microsoft/xprophetnet-large-wiki100-cased,xlm-prophetnet miguelvictor/python-fromzero-lstmlm,lstmlm,skip,Load problem mingzi151/test-hf-wav2vec2bert,wav2vec2bert,skip,Load problem -mistralai/Mistral-7B-v0.1,mistral MIT/ast-finetuned-audioset-10-10-0.4593,audio-spectrogram-transformer Mizuiro-sakura/luke-japanese-large-sentiment-analysis-wrime,luke mlml-chip/thyme2_colon_e2e,cnlpt,skip,Load problem diff --git a/tests/model_hub_tests/pytorch/test_timm.py b/tests/model_hub_tests/pytorch/test_timm.py index 1e168de83a50d5..78bd632179be6f 100644 --- a/tests/model_hub_tests/pytorch/test_timm.py +++ b/tests/model_hub_tests/pytorch/test_timm.py @@ -8,7 +8,7 @@ import torch from models_hub_common.utils import get_models_list -from torch_utils import TestTorchConvertModel, process_pytest_marks +from torch_utils import TestTorchConvertModel def filter_timm(timm_list: list) -> list: @@ -42,10 +42,6 @@ def filter_timm(timm_list: list) -> list: return sorted([v[1] for v in unique_models.values()]) -def get_all_models() -> list: - return process_pytest_marks(os.path.join(os.path.dirname(__file__), "timm_models")) - - # To make tests reproducible we seed the random generator torch.manual_seed(0) @@ -82,10 +78,16 @@ def test_convert_model_precommit(self, name, ie_device): self.run(name, None, ie_device) @pytest.mark.nightly - @pytest.mark.parametrize("name", get_all_models()) + @pytest.mark.parametrize("name,link,mark,reason", get_models_list(os.path.join(os.path.dirname(__file__), "timm_models"))) @pytest.mark.parametrize("mode", ["trace", "export"]) - def test_convert_model_all_models(self, mode, name, ie_device): + def test_convert_model_all_models(self, mode, name, link, mark, reason, ie_device): self.mode = mode + assert mark is None or mark in [ + 'skip', 'xfail', 'xfail_trace', 'xfail_export'], f"Incorrect test case for {name}" + if mark == 'skip': + pytest.skip(reason) + elif mark in ['xfail', f'xfail_{mode}']: + pytest.xfail(reason) self.run(name, None, ie_device) @pytest.mark.nightly diff --git a/tests/model_hub_tests/pytorch/test_torchvision_models.py b/tests/model_hub_tests/pytorch/test_torchvision_models.py index 9aeabbbe09b032..31aeaedb2366d4 100644 --- a/tests/model_hub_tests/pytorch/test_torchvision_models.py +++ b/tests/model_hub_tests/pytorch/test_torchvision_models.py @@ -7,8 +7,9 @@ import pytest import torch import torchvision.transforms.functional as F +from models_hub_common.utils import get_models_list -from torch_utils import process_pytest_marks, TestTorchConvertModel +from torch_utils import TestTorchConvertModel def get_all_models() -> list: @@ -103,10 +104,15 @@ def test_convert_model_precommit_export(self, model_name, ie_device): self.mode = "export" self.run(model_name, None, ie_device) - @pytest.mark.parametrize("name", - process_pytest_marks(os.path.join(os.path.dirname(__file__), "torchvision_models"))) + @pytest.mark.parametrize("name,link,mark,reason", get_models_list(os.path.join(os.path.dirname(__file__), "torchvision_models"))) @pytest.mark.parametrize("mode", ["trace", "export"]) @pytest.mark.nightly - def test_convert_model_all_models(self, mode, name, ie_device): + def test_convert_model_all_models(self, mode, name, link, mark, reason, ie_device): self.mode = mode + assert mark is None or mark in [ + 'skip', 'xfail', 'xfail_trace', 'xfail_export'], f"Incorrect test case for {name}" + if mark == 'skip': + pytest.skip(reason) + elif mark in ['xfail', f'xfail_{mode}']: + pytest.xfail(reason) self.run(name, None, ie_device) diff --git a/tests/model_hub_tests/pytorch/timm_models b/tests/model_hub_tests/pytorch/timm_models index 9087edc24ffe2c..6aa64a90c19071 100644 --- a/tests/model_hub_tests/pytorch/timm_models +++ b/tests/model_hub_tests/pytorch/timm_models @@ -13,7 +13,7 @@ cait_s36_384.fb_dist_in1k,None cait_xs24_384.fb_dist_in1k,None cait_xxs24_224.fb_dist_in1k,None cait_xxs36_224.fb_dist_in1k,None -coat_tiny.in1k,None +coat_tiny.in1k,None,xfail_export,Requested None inlined input coatnet_bn_0_rw_224.sw_in1k,None coatnet_nano_rw_224.sw_in1k,None coatnet_rmlp_1_rw2_224.sw_in12k,None @@ -23,7 +23,7 @@ convformer_b36.sail_in1k,None convformer_m36.sail_in1k,None convformer_s18.sail_in1k,None convformer_s36.sail_in1k,None -convit_base.fb_in1k,None,xfail,Trace failed +convit_base.fb_in1k,None,xfail_trace,Trace failed convmixer_1024_20_ks9_p14.in1k,None convmixer_1536_20.in1k,None convnext_atto_ols.a2_in1k,None @@ -102,10 +102,10 @@ edgenext_xx_small.in1k,None efficientformer_l1.snap_dist_in1k,None efficientformer_l3.snap_dist_in1k,None efficientformer_l7.snap_dist_in1k,None -efficientformerv2_l.snap_dist_in1k,None -efficientformerv2_s0.snap_dist_in1k,None -efficientformerv2_s1.snap_dist_in1k,None -efficientformerv2_s2.snap_dist_in1k,None +efficientformerv2_l.snap_dist_in1k,None,xfail_export,Requested None inlined input +efficientformerv2_s0.snap_dist_in1k,None,xfail_export,Requested None inlined input +efficientformerv2_s1.snap_dist_in1k,None,xfail_export,Requested None inlined input +efficientformerv2_s2.snap_dist_in1k,None,xfail_export,Requested None inlined input efficientnet_b0.ra_in1k,None efficientnet_b1.ft_in1k,None efficientnet_b1_pruned.in1k,None @@ -144,6 +144,9 @@ eva02_base_patch14_224.mim_in22k,None eva02_base_patch16_clip_224.merged2b,None eva02_large_patch14_clip_224.merged2b,None fastvit_ma36.apple_dist_in1k,None +fastvit_mci0.apple_mclip,None +fastvit_mci1.apple_mclip,None +fastvit_mci2.apple_mclip,None,xfail_trace,Accuracy validation failed fastvit_s12.apple_dist_in1k,None fastvit_sa12.apple_dist_in1k,None fastvit_sa24.apple_dist_in1k,None @@ -167,7 +170,7 @@ gcresnext50ts.ch_in1k,None gcvit_base.in1k,None gernet_s.idstcv_in1k,None ghostnet_100.in1k,None -ghostnetv2_100.in1k,None +ghostnetv2_100.in1k,None,xfail_export,Requested None inlined input gmixer_24_224.ra3_in1k,None gmlp_s16_224.ra3_in1k,None halo2botnet50ts_256.a1h_in1k,None @@ -188,16 +191,18 @@ hgnetv2_b3.ssld_stage1_in22k_in1k,None hgnetv2_b4.ssld_stage1_in22k_in1k,None hgnetv2_b5.ssld_stage1_in22k_in1k,None hgnetv2_b6.ssld_stage1_in22k_in1k,None -hrnet_w18_small.gluon_in1k,None -hrnet_w18_small_v2.gluon_in1k,None -hrnet_w18_ssld.paddle_in1k,None -hrnet_w30.ms_in1k,None -hrnet_w32.ms_in1k,None -hrnet_w40.ms_in1k,None -hrnet_w44.ms_in1k,None -hrnet_w48.ms_in1k,None -hrnet_w48_ssld.paddle_in1k,None -hrnet_w64.ms_in1k,None +hiera_base_224.mae,None +hiera_base_plus_224.mae,None +hrnet_w18_small.gluon_in1k,None,xfail_export,Requested None inlined input +hrnet_w18_small_v2.gluon_in1k,None,xfail_export,Requested None inlined input +hrnet_w18_ssld.paddle_in1k,None,xfail_export,Requested None inlined input +hrnet_w30.ms_in1k,None,xfail_export,Requested None inlined input +hrnet_w32.ms_in1k,None,xfail_export,Requested None inlined input +hrnet_w40.ms_in1k,None,xfail_export,Requested None inlined input +hrnet_w44.ms_in1k,None,xfail_export,Requested None inlined input +hrnet_w48.ms_in1k,None,xfail_export,Requested None inlined input +hrnet_w48_ssld.paddle_in1k,None,xfail_export,Requested None inlined input +hrnet_w64.ms_in1k,None,xfail_export,Requested None inlined input inception_next_base.sail_in1k,None inception_resnet_v2.tf_ens_adv_in1k,None inception_v3.gluon_in1k,None @@ -236,6 +241,9 @@ mobilenetv2_110d.ra_in1k,None mobilenetv2_120d.ra_in1k,None mobilenetv3_rw.rmsp_in1k,None mobilenetv3_small_050.lamb_in1k,None +mobilenetv4_conv_blur_medium.e500_r224_in1k,None +mobilenetv4_conv_small.e1200_r224_in1k,None +mobilenetv4_hybrid_medium.e500_r224_in1k,None mobileone_s0.apple_in1k,None mobileone_s1.apple_in1k,None mobileone_s2.apple_in1k,None @@ -324,6 +332,8 @@ resnest50d.in1k,None resnest50d_1s4x24d.in1k,None resnest50d_4s2x40d.in1k,None resnet101.a1_in1k,None +resnet101_clip.openai,None +resnet101_clip_gap.openai,None resnet101c.gluon_in1k,None resnet101d.gluon_in1k,None resnet101s.gluon_in1k,None @@ -344,10 +354,18 @@ resnet33ts.ra2_in1k,None resnet34.a1_in1k,None resnet34d.ra2_in1k,None resnet50.a1_in1k,None +resnet50_clip.openai,None +resnet50_clip_gap.openai,None resnet50_gn.a1h_in1k,None resnet50c.gluon_in1k,None resnet50d.a1_in1k,None resnet50s.gluon_in1k,None +resnet50x16_clip.openai,None +resnet50x16_clip_gap.openai,None +resnet50x4_clip.openai,None +resnet50x4_clip_gap.openai,None +resnet50x64_clip.openai,None +resnet50x64_clip_gap.openai,None resnet51q.ra2_in1k,None resnet61q.ra2_in1k,None resnetaa101d.sw_in12k,None @@ -388,7 +406,7 @@ selecsls60.in1k,None selecsls60b.in1k,None semnasnet_075.rmsp_in1k,None senet154.gluon_in1k,None -sequencer2d_s.in1k,None +sequencer2d_s.in1k,None,xfail_export,No conversion rule found for operations aten.mkldnn_rnn_layer.default seresnet152d.ra2_in1k,None seresnet33ts.ra2_in1k,None seresnet50.a1_in1k,None @@ -453,7 +471,7 @@ tinynet_b.in1k,None tinynet_c.in1k,None tinynet_d.in1k,None tinynet_e.in1k,None -tnt_s_patch16_224,None +tnt_s_patch16_224,None,xfail_export,Requested None inlined input tresnet_m.miil_in1k,None tresnet_v2_l.miil_in21k,None twins_pcpvt_base.in1k,None @@ -467,25 +485,39 @@ vgg16_bn.tv_in1k,None vgg19.tv_in1k,None vgg19_bn.tv_in1k,None visformer_tiny.in1k,None +vit_base_mci_224.apple_mclip,None vit_base_patch14_dinov2.lvd142m,None vit_base_patch14_reg4_dinov2.lvd142m,None vit_base_patch16_224.augreg2_in21k_ft_in1k,None vit_base_patch16_224_miil.in21k,None vit_base_patch16_clip_224.datacompxl,None vit_base_patch16_clip_quickgelu_224.metaclip_2pt5b,None +vit_base_patch16_rope_reg1_gap_256.sbb_in1k,None,xfail,Argument shapes are inconsistent vit_base_patch16_rpn_224.sw_in1k,None vit_base_patch16_siglip_224.webli,None +vit_base_patch16_siglip_gap_224.webli,None vit_base_patch32_224.augreg_in1k,None vit_base_patch32_clip_224.datacompxl,None vit_base_patch32_clip_quickgelu_224.metaclip_2pt5b,None vit_base_patch8_224.augreg2_in21k_ft_in1k,None vit_base_r50_s16_224.orig_in21k,None +vit_betwixt_patch16_reg1_gap_256.sbb_in1k,None +vit_betwixt_patch16_reg4_gap_256.sbb_in12k,None +vit_betwixt_patch16_rope_reg4_gap_256.sbb_in1k,None,xfail,Argument shapes are inconsistent +vit_betwixt_patch32_clip_224.tinyclip_laion400m,None vit_huge_patch14_224.mae,None vit_huge_patch14_gap_224.in1k_ijepa,None vit_large_patch14_clip_224.datacompxl,None vit_large_patch14_clip_quickgelu_224.dfn2b,None vit_large_r50_s32_224.augreg_in21k,None +vit_little_patch16_reg1_gap_256.sbb_in12k,None +vit_little_patch16_reg4_gap_256.sbb_in1k,None vit_medium_patch16_gap_240.sw_in12k,None +vit_medium_patch16_reg1_gap_256.sbb_in1k,None +vit_medium_patch16_reg4_gap_256.sbb_in12k,None +vit_mediumd_patch16_reg4_gap_256.sbb_in12k,None +vit_mediumd_patch16_rope_reg1_gap_256.sbb_in1k,None,xfail,Argument shapes are inconsistent +vit_pwee_patch16_reg1_gap_256.sbb_in1k,None vit_relpos_base_patch16_224.sw_in1k,None vit_relpos_base_patch16_clsgap_224.sw_in1k,None vit_relpos_base_patch32_plus_rpn_256.sw_in1k,None @@ -493,13 +525,18 @@ vit_relpos_medium_patch16_cls_224.sw_in1k,None vit_relpos_medium_patch16_rpn_224.sw_in1k,None vit_small_r26_s32_224.augreg_in21k,None vit_so400m_patch14_siglip_224.webli,None +vit_so400m_patch14_siglip_gap_224.pali_mix,None,skip,Access to model google/paligemma-3b-mix-224-jax is restricted vit_srelpos_small_patch16_224.sw_in1k,None vit_tiny_r_s16_p8_224.augreg_in21k,None -volo_d1_224.sail_in1k,None -volo_d2_224.sail_in1k,None -volo_d3_224.sail_in1k,None -volo_d4_224.sail_in1k,None -volo_d5_224.sail_in1k,None +vit_wee_patch16_reg1_gap_256.sbb_in1k,None +vit_xsmall_patch16_clip_224.tinyclip_yfcc15m,None +vitamin_base_224.datacomp1b_clip,None,xfail,RuntimeError Error in loading state_dict for VisionTransformer +vitamin_large2_224.datacomp1b_clip,None +volo_d1_224.sail_in1k,None,xfail,Cannot get length of dynamic dimension +volo_d2_224.sail_in1k,None,xfail,Cannot get length of dynamic dimension +volo_d3_224.sail_in1k,None,xfail,Cannot get length of dynamic dimension +volo_d4_224.sail_in1k,None,xfail,Cannot get length of dynamic dimension +volo_d5_224.sail_in1k,None,xfail,Cannot get length of dynamic dimension wide_resnet101_2.tv2_in1k,None wide_resnet50_2.racm_in1k,None xception41.tf_in1k,None diff --git a/tests/model_hub_tests/pytorch/torchvision_models b/tests/model_hub_tests/pytorch/torchvision_models index 35e6805bd18152..a045925ed54f4a 100644 --- a/tests/model_hub_tests/pytorch/torchvision_models +++ b/tests/model_hub_tests/pytorch/torchvision_models @@ -3,9 +3,9 @@ convnext_base,none convnext_large,none convnext_small,none convnext_tiny,none -deeplabv3_mobilenet_v3_large,none -deeplabv3_resnet101,none -deeplabv3_resnet50,none +deeplabv3_mobilenet_v3_large,none,xfail_export,Requested None inlined input +deeplabv3_resnet101,none,xfail_export,Requested None inlined input +deeplabv3_resnet50,none,xfail_export,Requested None inlined input densenet121,none densenet161,none densenet169,none @@ -21,11 +21,11 @@ efficientnet_b7,none efficientnet_v2_l,none efficientnet_v2_m,none efficientnet_v2_s,none -fcn_resnet101,none -fcn_resnet50,none +fcn_resnet101,none,xfail_export,Requested None inlined input +fcn_resnet50,none,xfail_export,Requested None inlined input googlenet,none inception_v3,none -lraspp_mobilenet_v3_large,none +lraspp_mobilenet_v3_large,none,xfail_export,Requested None inlined input maxvit_t,none mc3_18,none mnasnet0_5,none @@ -39,8 +39,8 @@ mvit_v1_b,none mvit_v2_s,none r2plus1d_18,none r3d_18,none -raft_large,none -raft_small,none +raft_large,none,xfail_export,Mutating module attribute corr_pyramid during export +raft_small,none,xfail_export,Mutating module attribute corr_pyramid during export regnet_x_16gf,none regnet_x_1_6gf,none regnet_x_32gf,none From eeb8fe9a3fec53bbfeb1eb6870861a7393f79c1d Mon Sep 17 00:00:00 2001 From: Andrew Kwangwoong Park Date: Thu, 25 Jul 2024 02:06:23 +0900 Subject: [PATCH 14/54] [GPU] Fix remaininig issue to calculate present layout's padding for KVCache (#25706) ### Details: - Follow up remaining issue from https://github.com/openvinotoolkit/openvino/pull/25682 - Fix issue where kvcache was optimized out even if calculated present layout's padding was negative ### Tickets: - 146876 --- src/plugins/intel_gpu/src/graph/primitive_inst.cpp | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp index 9fb822955c41a4..e130040d372dfe 100644 --- a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp +++ b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp @@ -1208,14 +1208,11 @@ void primitive_inst::do_runtime_in_place_kv_cache() { GPU_DEBUG_TRACE_DETAIL << "[do runtime kv_cache opt] " << id() << " initial present_layout : " << present_layout.to_string() << std::endl; GPU_DEBUG_TRACE_DETAIL << "[do runtime kv_cache opt] " << id() << " initial past_layout : " << past_layout.to_string() << std::endl; auto max_pad = kv_cache_inst::get_max_pad(past_layout, _deps[0].first->_max_output_layout_count[0], sequence_axis_legacy, "past_layout"); - - if (max_pad > 0) { - const auto new_seq_len = static_cast(new_layout.get_shape()[sequence_axis]); - if (max_pad - new_seq_len >= 0) { - kv_cache_inst::update_pad(present_layout, max_pad - new_seq_len, sequence_axis_legacy); - GPU_DEBUG_TRACE_DETAIL << "[do runtime_in_place_kv_cache] " << id() << " Updated present_layout's pad : " - << present_layout.to_string() << std::endl; - } + const auto new_seq_len = static_cast(new_layout.get_shape()[sequence_axis]); + // In chatbot scenario, when chat history must be stored in kvcache, new_seq_len may not be 1 even if max_pad is greater than 0 + if (max_pad - new_seq_len >= 0) { + kv_cache_inst::update_pad(present_layout, max_pad - new_seq_len, sequence_axis_legacy); + GPU_DEBUG_TRACE_DETAIL << "[do runtime_in_place_kv_cache] " << id() << " Updated present_layout's pad : " << present_layout.to_string() << std::endl; auto& variable = get_network().get_variable(desc->variable_info.variable_id); variable.set_layout(present_layout); GPU_DEBUG_TRACE_DETAIL << "[do_runtime_in_place_kv_cache] " << id() << "Updated variable with present_layout" From bb7f8d3f2cf706d7d2eb93f4eaad4e7182a351c3 Mon Sep 17 00:00:00 2001 From: Taylor Yeonbok Lee Date: Wed, 24 Jul 2024 22:13:12 +0000 Subject: [PATCH 15/54] [GPU] Minor refactoring (#25629) ### Details: - Refactor according to the comments in PR25449 ### Tickets: - *ticket-id* --- .../intel_gpu/graph/kernel_impl_params.hpp | 9 +++++++ .../include/intel_gpu/runtime/layout.hpp | 11 +++++++- .../intel_gpu/src/graph/primitive_inst.cpp | 26 +++++++++---------- 3 files changed, 31 insertions(+), 15 deletions(-) diff --git a/src/plugins/intel_gpu/include/intel_gpu/graph/kernel_impl_params.hpp b/src/plugins/intel_gpu/include/intel_gpu/graph/kernel_impl_params.hpp index 0fc6cbdac13132..fa8a8807bbd92c 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/graph/kernel_impl_params.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/graph/kernel_impl_params.hpp @@ -114,6 +114,15 @@ struct kernel_impl_params final { return output_layouts[idx]; } + layout& get_output_layout(size_t idx = 0) { + OPENVINO_ASSERT(output_layouts.size() > idx, + "The size of output layouts must be greater than the requested index: ", + "Requested index is ", idx, ",", + "but the size of output layouts is ", output_layouts.size()); + return output_layouts[idx]; + } + + bool has_fused_primitives() const { return !fused_desc.empty(); } ov::element::Type_t get_output_element_type() const { diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/layout.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/layout.hpp index a454fc7afdee15..52e9f643c299d7 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/runtime/layout.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/layout.hpp @@ -288,6 +288,15 @@ struct layout { return *this; } + layout clone_with_other_shape(const ov::PartialShape& new_shape) { + return layout(new_shape, this->data_type, this->format, this->data_padding); + } + + layout clone_with_other_shape(const ov::Shape& new_shape) { + return clone_with_other_shape(ov::PartialShape(new_shape)); + } + + friend bool operator==(const layout& lhs, const layout& rhs) { return lhs.data_type == rhs.data_type && lhs.format == rhs.format && lhs.size == rhs.size && lhs.data_padding == rhs.data_padding; } @@ -306,7 +315,7 @@ struct layout { return (lhs.data_padding < rhs.data_padding); } - /// Number of elements to be stored in this memory layout + /// Number of elements to be stored in this layout size_t count() const; /// Layout size with padding included diff --git a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp index e130040d372dfe..f8267673722e64 100644 --- a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp +++ b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp @@ -465,7 +465,7 @@ void primitive_inst::update_shape() { auto desc = get_node().as().get_primitive(); auto var_mem_size = get_network().get_variable(desc->variable_info.variable_id).get_actual_mem_size(); // Need to trigger realloc_if_needed - if (var_mem_size < _impl_params->get_output_layout(0).get_buffer_size().count()) + if (var_mem_size < _impl_params->get_output_layout(0).get_linear_size()) set_shape_change(); } } @@ -684,13 +684,13 @@ event::ptr primitive_inst::realloc_if_needed() { prealloc_shape[seq_axis] += tmp_prealloc_count; required_buffer_size = std::accumulate(prealloc_shape.begin(), prealloc_shape.end(), size_t(1), std::multiplies()); } else { - required_buffer_size = (updated_layouts[i].get_buffer_size().count()); + required_buffer_size = (updated_layouts[i].get_linear_size()); } if (required_buffer_size * 10 < _max_output_layout_count[i]) { reclaim = true; } if (reclaim) { - GPU_DEBUG_TRACE_DETAIL << id() << ": Updated output[" << i << "] size " << updated_layouts[i].get_buffer_size().count() + GPU_DEBUG_TRACE_DETAIL << id() << ": Updated output[" << i << "] size " << updated_layouts[i].get_linear_size() << " is much smaller than current memory size! " << _max_output_layout_count[i] << "Reset memory of output " << i << std::endl; _max_output_layout_count[i] = 0; @@ -705,7 +705,7 @@ event::ptr primitive_inst::realloc_if_needed() { } for (size_t i = 0; i < actual_layouts.size(); ++i) { - bool can_reuse_buffer = (_outputs[i] && updated_layouts[i].get_buffer_size().count() <= _max_output_layout_count[i]); + bool can_reuse_buffer = (_outputs[i] && updated_layouts[i].get_linear_size() <= _max_output_layout_count[i]); std::pair prealloc_info; if (_node->is_type() && i == 0) { const auto& desc = _node->as().get_primitive(); @@ -717,17 +717,15 @@ event::ptr primitive_inst::realloc_if_needed() { prealloc_info = sp.predict_preallocation_shape(id(), updated_layouts[i], can_reuse_buffer, i, tmp_prealloc_count); } if (prealloc_info.first && sp.can_preallocate(ov::shape_size(prealloc_info.second) * (dt_sizes_in_B[i]))) { - auto new_layout = updated_layouts[i]; - new_layout.set_partial_shape(prealloc_info.second); - updated_params.output_layouts[i] = new_layout; + updated_params.output_layouts[i] = updated_layouts[i].clone_with_other_shape(prealloc_info.second); } - if (updated_params.output_layouts[i].get_buffer_size().count() < updated_layouts[i].get_buffer_size().count()) { + if (updated_params.output_layouts[i].get_linear_size() < updated_layouts[i].get_linear_size()) { updated_params.output_layouts[i] = updated_layouts[i]; } if (can_reuse_buffer) { GPU_DEBUG_TRACE_DETAIL << id() << ": reuse previously allocated output buffer[" << i << "] - " - << actual_layouts[i].get_buffer_size().count() << "/" << _max_output_layout_count[i] + << actual_layouts[i].get_linear_size() << "/" << _max_output_layout_count[i] << std::endl; if (_node->is_type() && (i == 0)) { // kv_cache has already assigned memory. @@ -759,7 +757,7 @@ event::ptr primitive_inst::realloc_if_needed() { GPU_DEBUG_TRACE_DETAIL << id() << ": realloc output memory. " << std::endl; GPU_DEBUG_TRACE_DETAIL << " outputs[" << i << "] " << " Current buffer_size=" << _max_output_layout_count[i] - << " Requested buffer_size=" << updated_layouts[i].get_buffer_size().count() + << " Requested buffer_size=" << updated_layouts[i].get_linear_size() << std::endl; _outputs[i] = allocate_output(_network.get_engine(), _network.get_memory_pool(), @@ -773,7 +771,7 @@ event::ptr primitive_inst::realloc_if_needed() { is_output_buffer(this, true), output_memory_ptr(i).get(), true); - _max_output_layout_count[i] = updated_params.output_layouts[i].get_buffer_size().count(); + _max_output_layout_count[i] = updated_params.output_layouts[i].get_linear_size(); GPU_DEBUG_CODE(std::string memalloc_info = ""); GPU_DEBUG_CODE(memalloc_info += (((_outputs.size() > 1) ? ("o" + to_string(i) + ":") : "") + (_outputs[i]->from_memory_pool ? "from_pool" : "new_alloc"));) @@ -1852,7 +1850,7 @@ primitive_inst::primitive_inst(network & network, program_node const& node, bool _impl_params->strm = _network.get_stream_ptr(); for (size_t i = 0; i < get_node().get_output_layouts().size(); ++i) { if (_outputs.size() > i) { - _max_output_layout_count.push_back(_outputs[i] ? _outputs[i]->get_layout().get_buffer_size().count() : 0); + _max_output_layout_count.push_back(_outputs[i] ? _outputs[i]->get_layout().get_linear_size() : 0); } else { _outputs.push_back(nullptr); _max_output_layout_count.push_back(0); @@ -1985,9 +1983,9 @@ event::ptr primitive_inst::update_weights() { GPU_DEBUG_TRACE_DETAIL << id() << ": add original weights memory " << original_layout.to_short_string() << " to weights cache; " << "cache_size=" << _reordered_weights_cache.size() << "/" << _reordered_weights_cache.capacity() << std::endl; } else { - auto expected_layout = reorder_kernel_params->get_output_layout(); // Set original partial shape, because it may be lost during kernel_selector::weights_tensor -> layout conversion - expected_layout.set_partial_shape(original_layout.get_partial_shape()); + auto expected_layout = + reorder_kernel_params->get_output_layout().clone_with_other_shape(original_layout.get_partial_shape()); _impl_params->weights_layout = optional_layout(expected_layout); if (_reordered_weights_cache.has(expected_layout)) { From b9d98cb32e0c3b1de46eecf2c3221858723a7eb8 Mon Sep 17 00:00:00 2001 From: Vladislav Golubev Date: Thu, 25 Jul 2024 01:37:14 +0200 Subject: [PATCH 16/54] [CPU] Weights caching: hash computation fix (#25625) ### Details: - *Modify hash computation logic: take into account not only dnnl desc format, but all the desc info. Previous logic were not fully correct since the hash could be equal for 2 descs with different `compute_compensations` flag -- this led to accuracy issues* - *Weights repacking hash computation logic is moved to one helper which is reused across the CPU plugin code* ### Tickets: - *CVS-139671* --- .../intel_cpu/src/dnnl_extension_utils.cpp | 16 ++- .../intel_cpu/src/dnnl_extension_utils.h | 8 ++ src/plugins/intel_cpu/src/node.cpp | 17 +-- .../src/nodes/executors/dnnl/dnnl_utils.cpp | 6 +- src/plugins/intel_cpu/src/weights_cache.cpp | 2 - src/plugins/intel_cpu/src/weights_cache.hpp | 28 ----- .../quantized_matmuls_with_shared_weights.cpp | 103 ++++++++++++++++++ 7 files changed, 130 insertions(+), 50 deletions(-) create mode 100644 src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/quantized_matmuls_with_shared_weights.cpp diff --git a/src/plugins/intel_cpu/src/dnnl_extension_utils.cpp b/src/plugins/intel_cpu/src/dnnl_extension_utils.cpp index 82e4d3fde3ac14..60351d14b3e89e 100644 --- a/src/plugins/intel_cpu/src/dnnl_extension_utils.cpp +++ b/src/plugins/intel_cpu/src/dnnl_extension_utils.cpp @@ -3,15 +3,17 @@ // #include "dnnl_extension_utils.h" -#include "memory_desc/dnnl_blocked_memory_desc.h" -#include "onednn/iml_type_mapper.h" -#include "utils/general_utils.h" + #include #include #include - #include +#include "cpu_memory.h" +#include "memory_desc/dnnl_blocked_memory_desc.h" +#include "onednn/iml_type_mapper.h" +#include "utils/general_utils.h" + using namespace dnnl; namespace ov { @@ -254,5 +256,11 @@ bool DnnlExtensionUtils::isUnarySupportedAsPostOp(Algorithm alg) { #endif } +std::string DnnlExtensionUtils::computeWeightsStringHash(const std::shared_ptr memory, + const std::shared_ptr dstDesc) { + const auto desc_hash = dnnl::impl::primitive_hashing::get_md_hash(*dstDesc->getDnnlDesc().get()); + return std::to_string(desc_hash) + "_" + std::to_string(reinterpret_cast(memory->getData())); +} + } // namespace intel_cpu } // namespace ov diff --git a/src/plugins/intel_cpu/src/dnnl_extension_utils.h b/src/plugins/intel_cpu/src/dnnl_extension_utils.h index 5def48284ab062..cdc6342e8963fd 100644 --- a/src/plugins/intel_cpu/src/dnnl_extension_utils.h +++ b/src/plugins/intel_cpu/src/dnnl_extension_utils.h @@ -22,6 +22,7 @@ namespace intel_cpu { class DnnlMemoryDesc; class DnnlBlockedMemoryDesc; class Shape; +class IMemory; class DnnlExtensionUtils { public: @@ -101,6 +102,13 @@ class DnnlExtensionUtils { static dnnl_memory_desc_t clone_desc(const_dnnl_memory_desc_t cdesc); static const char* query_pd_info(const_dnnl_primitive_desc_t pd); static bool isUnarySupportedAsPostOp(Algorithm alg); + /** + * @brief Computes weights string hash based on weights memory and requested descriptor + * @param memory Weights memory pointer + * @param dstDesc descriptor defining weights representation after repacking + * @return string hash + */ + static std::string computeWeightsStringHash(const std::shared_ptr memory, const std::shared_ptr dstDesc); }; } // namespace intel_cpu diff --git a/src/plugins/intel_cpu/src/node.cpp b/src/plugins/intel_cpu/src/node.cpp index dc24c611861a16..41c3011f8707ec 100644 --- a/src/plugins/intel_cpu/src/node.cpp +++ b/src/plugins/intel_cpu/src/node.cpp @@ -831,16 +831,8 @@ void Node::prepareMemory(const DnnlMemoryDescPtr& intDesc, size_t indx) { MemoryPtr ptr; auto weightCache = context->getWeightsCache(); if (weightCache != nullptr && memory::format_kind::blocked == intDesc->getDnnlDesc().get_format_kind()) { - const auto& format = intDesc->serializeFormat(); - const uint64_t data_hash = - weightCache->GetHashFunc().hash(static_cast(internalBlob->getData()), - internalBlob->getSize()); - - const std::string string_hash = name + "_" + std::to_string(indx) - + "_" + format - + "_" + std::to_string(internalBlob->getSize()) - + "_" + std::to_string(data_hash); - + const auto string_hash = + name + "_" + std::to_string(indx) + "_" + DnnlExtensionUtils::computeWeightsStringHash(internalBlob, intDesc); ptr = *weightCache->findOrCreate(string_hash, create); } else { ptr = create(); @@ -905,10 +897,7 @@ MemoryPtr Node::prepareWeightMemory(DnnlMemoryDescPtr dstWeightDesc, DnnlMemoryD auto weightCache = context->getWeightsCache(); if (weightCache != nullptr) { - const std::string string_hash = getName() + "_" + format - + "_" + std::to_string(edgeMem->getSize()) - + "_" + std::to_string(*edgeMem->getDataAs()); - + const auto string_hash = DnnlExtensionUtils::computeWeightsStringHash(edgeMem, dstWeightDesc); ptr = *weightCache->findOrCreate(string_hash, create); } else { ptr = create(); diff --git a/src/plugins/intel_cpu/src/nodes/executors/dnnl/dnnl_utils.cpp b/src/plugins/intel_cpu/src/nodes/executors/dnnl/dnnl_utils.cpp index d9cbd05a847231..c801eca5bbe13a 100644 --- a/src/plugins/intel_cpu/src/nodes/executors/dnnl/dnnl_utils.cpp +++ b/src/plugins/intel_cpu/src/nodes/executors/dnnl/dnnl_utils.cpp @@ -4,12 +4,15 @@ #include "nodes/executors/dnnl/dnnl_utils.hpp" +#include #include #include "cpu_memory.h" #include "memory_desc/dnnl_memory_desc.h" +#include "memory_desc/cpu_memory_desc_utils.h" #include "nodes/executors/executor.hpp" #include "nodes/reorder.h" +#include "utils/cpu_utils.hpp" namespace ov { namespace intel_cpu { @@ -86,8 +89,7 @@ MemoryPtr prepareWeightsMemory(const DnnlMemoryDescPtr srcWeightDesc, MemoryPtr ptr; if (globalWeightCache && dnnl::memory::format_kind::blocked == dstWeightDesc->getDnnlDesc().get_format_kind()) { - const std::string string_hash = format + "_" + std::to_string(weightsMem->getSize()) + "_" + - std::to_string(reinterpret_cast(weightsMem->getData())); + const auto string_hash = DnnlExtensionUtils::computeWeightsStringHash(weightsMem, dstWeightDesc); ptr = *globalWeightCache->findOrCreate(string_hash, create); } else { ptr = create(); diff --git a/src/plugins/intel_cpu/src/weights_cache.cpp b/src/plugins/intel_cpu/src/weights_cache.cpp index eed92f5977cffe..65fd3644ad4215 100644 --- a/src/plugins/intel_cpu/src/weights_cache.cpp +++ b/src/plugins/intel_cpu/src/weights_cache.cpp @@ -10,8 +10,6 @@ namespace ov { namespace intel_cpu { -const SimpleDataHash WeightsSharing::simpleCRC; - WeightsSharing::SharedMemory::SharedMemory( std::unique_lock && lock, const MemoryInfo::Ptr & memory, diff --git a/src/plugins/intel_cpu/src/weights_cache.hpp b/src/plugins/intel_cpu/src/weights_cache.hpp index 70c62569cdeb47..f0401700e49719 100644 --- a/src/plugins/intel_cpu/src/weights_cache.hpp +++ b/src/plugins/intel_cpu/src/weights_cache.hpp @@ -22,31 +22,6 @@ namespace ov { namespace intel_cpu { - -class SimpleDataHash { -public: - SimpleDataHash() { - for (int i = 0; i < kTableSize; i++) { - uint64_t c = i; - for (int j = 0; j < 8; j++) - c = ((c & 1) ? 0xc96c5795d7870f42 : 0) ^ (c >> 1); - table[i] = c; - } - } - // Computes 64-bit "cyclic redundancy check" sum, as specified in ECMA-182 - uint64_t hash(const unsigned char* data, size_t size) const { - uint64_t crc = 0; - for (size_t idx = 0; idx < size; idx++) - crc = table[(unsigned char)crc ^ data[idx]] ^ (crc >> 8); - - return ~crc; - } - -protected: - static constexpr int kTableSize = 256; - uint64_t table[kTableSize]; -}; - /** * Caching store of Memory objects * Will return a cached object or create new one @@ -94,12 +69,9 @@ class WeightsSharing { SharedMemory::Ptr get(const std::string& key) const; - static const SimpleDataHash& GetHashFunc () { return simpleCRC; } - protected: mutable std::mutex guard; std::unordered_map sharedWeights; - static const SimpleDataHash simpleCRC; }; /** diff --git a/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/quantized_matmuls_with_shared_weights.cpp b/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/quantized_matmuls_with_shared_weights.cpp new file mode 100644 index 00000000000000..107d669f442f80 --- /dev/null +++ b/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/quantized_matmuls_with_shared_weights.cpp @@ -0,0 +1,103 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include "common_test_utils/node_builders/constant.hpp" +#include "common_test_utils/node_builders/fake_quantize.hpp" +#include "common_test_utils/node_builders/reshape.hpp" +#include "openvino/openvino.hpp" +#include "shared_test_classes/base/ov_subgraph.hpp" + +namespace ov { +namespace test { + +enum class FQInterval { U8, I8 }; +inline std::ostream& operator<<(std::ostream& os, FQInterval interval) { + switch (interval) { + case FQInterval::U8: + os << "U8"; + break; + case FQInterval::I8: + os << "I8"; + break; + default: + OPENVINO_THROW("Unknown FQInterval"); + } + return os; +} + +typedef std::tuple QuantizedMatMulsWithSharedWeightsParans; + +/* This test verifies the correctness of the hash function computation for the shared weights. + Specifically, it checks that when one op requires compensations computation and second one does not, + the resulting hashes are not identical, and the weights are repacked for each op separately +*/ +class QuantizedMatMulsWithSharedWeightsTest + : public testing::WithParamInterface, + virtual public SubgraphBaseTest { +public: + static std::string getTestCaseName(const testing::TestParamInfo& obj) { + InputShape shape1; + InputShape shape2; + FQInterval interval1; + FQInterval interval2; + std::tie(shape1, shape2, interval1, interval2) = obj.param; + std::ostringstream result; + result << "IS1=" << shape1 << "IS2=" << shape2 << "FQInterval1=" << interval1 << "FQInterval2=" << interval2; + return result.str(); + } + + void SetUp() override { + targetDevice = ov::test::utils::DEVICE_CPU; + abs_threshold = 1e-4; + + InputShape shape1; + InputShape shape2; + FQInterval interval1; + FQInterval interval2; + std::tie(shape1, shape2, interval1, interval2) = this->GetParam(); + init_input_shapes({shape1, shape2}); + + const auto weights = ov::test::utils::make_constant(ov::element::i8, {16, 16}); + const auto convert = std::make_shared(weights, ov::element::f32); + const auto scale = ov::test::utils::make_constant(ov::element::f32, {16, 1}, ov::test::utils::InputGenerateData(0, 1, 5)); + const auto mul = std::make_shared(convert, scale); + + auto build_fq = [](const ov::Output& parent, FQInterval interval_type) { + const auto low = interval_type == FQInterval::I8 ? std::vector{-12.8f} : std::vector{0.f}; + const auto high = interval_type == FQInterval::I8 ? std::vector{12.7f} : std::vector{25.5f}; + return ov::test::utils::make_fake_quantize(parent, ov::element::f32, 256, {1, 1, 1, 1}, low, high, low, high); + }; + + const auto param1 = std::make_shared(ov::element::f32, inputDynamicShapes[0]); + const auto fq1 = build_fq(param1, interval1); + const auto mm1 = std::make_shared(fq1, mul, false, true); + + const auto param2 = std::make_shared(ov::element::f32, inputDynamicShapes[1]); + const auto fq2 = build_fq(param2, interval2); + const auto mm2 = std::make_shared(fq2, mul, false, true); + + function = std::make_shared(ov::OutputVector{mm1, mm2}, ov::ParameterVector{param1, param2}); + } +}; + +TEST_P(QuantizedMatMulsWithSharedWeightsTest, CompareWithRefs) { + run(); +} + +namespace { + +std::vector shapes1{{{-1, -1, -1, 16}, {{1, 1, 15, 16}, {1, 1, 12, 16}, {1, 1, 15, 16}}}}; +std::vector shapes2{{{-1, -1, -1, 16}, {{1, 1, 12, 16}, {1, 1, 15, 16}, {1, 1, 12, 16}}}}; +INSTANTIATE_TEST_SUITE_P(smoke_CustomTest, QuantizedMatMulsWithSharedWeightsTest, + ::testing::Combine( + ::testing::ValuesIn(shapes1), + ::testing::ValuesIn(shapes2), + ::testing::Values(FQInterval::U8, FQInterval::I8), + ::testing::Values(FQInterval::U8, FQInterval::I8)), + QuantizedMatMulsWithSharedWeightsTest::getTestCaseName); +} // namespace +} // namespace test +} // namespace ov From 0c598f4d91cf12915af25ca0fed7970595a095f9 Mon Sep 17 00:00:00 2001 From: Yuan Hu Date: Thu, 25 Jul 2024 12:02:18 +0800 Subject: [PATCH 17/54] [CPU] only allow per-oc or per-tensor FQ fusing into FC (#25530) ### Details: - Add a check to reject non-supported FakeQuantize from fusing into FC node, so they can run in standalone mode w/o causing exceptions when composing oneDNN postOps. - port from https://github.com/openvinotoolkit/openvino/pull/23009 - add test case ### Tickets: - *CVS-131890* --------- Signed-off-by: HU Yuan2 Co-authored-by: Li, Tingqian --- .../intel_cpu/src/nodes/fullyconnected.cpp | 21 ++++++++++ .../instances/x64/matmul.cpp | 39 +++++++++++++++++++ .../functional/utils/fusing_test_utils.cpp | 6 ++- .../functional/utils/fusing_test_utils.hpp | 21 ++++++++++ 4 files changed, 86 insertions(+), 1 deletion(-) diff --git a/src/plugins/intel_cpu/src/nodes/fullyconnected.cpp b/src/plugins/intel_cpu/src/nodes/fullyconnected.cpp index 76e41db1cd06c0..da3dcafa4750ef 100644 --- a/src/plugins/intel_cpu/src/nodes/fullyconnected.cpp +++ b/src/plugins/intel_cpu/src/nodes/fullyconnected.cpp @@ -25,6 +25,8 @@ #include "utils/debug_capabilities.h" #include "utils/general_utils.h" +#include "fake_quantize.h" + using namespace dnnl; using namespace ov::element; @@ -94,6 +96,25 @@ bool FullyConnected::canFuse(const NodePtr& node) const { #if defined(OV_CPU_WITH_SHL) return false; #endif + if (node->getType() == Type::FakeQuantize) { + auto* fq = dynamic_cast(node.get()); + if (fq->getBroadcastingPolicy() != FakeQuantize::BroadcastingPolicy::PerTensor) { + const auto& dstShape = getOutputShapeAtPort(0); + auto dataRanks = dstShape.getRank(); + // only per-OC or per-Tensor fakequantize can be postOps + if (fq->getAxis() != dataRanks - 1) { + DEBUG_LOG("reject FakeQuantize ", + fq->getName(), + "(axis=", + fq->getAxis(), + ") from fusing into ", + getName(), + " with dst shape ", + dstShape); + return false; + } + } + } return canFuseSimpleOperation(node); } diff --git a/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/instances/x64/matmul.cpp b/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/instances/x64/matmul.cpp index 3daa819cd4854d..83faa2c06ec6f6 100644 --- a/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/instances/x64/matmul.cpp +++ b/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/instances/x64/matmul.cpp @@ -1108,6 +1108,45 @@ INSTANTIATE_TEST_SUITE_P( testParamsDynamicFusingFullUndefShapes, MatMulLayerCPUTest::getTestCaseName); +class FCNotFuseFQCPUTest : public MatMulLayerCPUTest { + void SetUp() override { + MatMulLayerCPUTest::SetUp(); + expectPostOpsToBeFused = false; + } +}; + +TEST_P(FCNotFuseFQCPUTest, CompareWithRefs) { + run(); + CheckPluginRelatedResults(compiledModel, cpuNodeType); +} + +const std::vector& notFuseSmoke() { + static const std::vector params = { + {static_shapes_to_test_representation({{59, 1}, {1, 120}}), {false, true}}, + {static_shapes_to_test_representation({{59, 1}, {1, 120}}), {true, true}}, + + {static_shapes_to_test_representation({{59, 120}, {120, 1}}), {false, false}}, + {static_shapes_to_test_representation({{59, 120}, {120, 1}}), {true, true}}, + + {static_shapes_to_test_representation({{71, 128}, {128, 20}}), {true, false}}, + {static_shapes_to_test_representation({{71, 128}, {128, 20}}), {false, true}}, + }; + return params; +} + +const auto notFuseTestParamsSmoke = ::testing::Combine(::testing::Combine(::testing::ValuesIn(notFuseSmoke()), + ::testing::Values(ElementType::f32), + ::testing::Values(ElementType::undefined), + ::testing::Values(ElementType::undefined), + ::testing::Values(utils::InputLayerType::CONSTANT), + ::testing::Values(ov::test::utils::DEVICE_CPU), + ::testing::Values(emptyAdditionalConfig())), + ::testing::Values(MatMulNodeType::FullyConnected), + ::testing::ValuesIn({fusingFakeQuantizePerBatch, fusingFakeQuantizeFullTensor}), + ::testing::ValuesIn({CPUSpecificParams{{}, {}, {""}, "any_type"}})); + +INSTANTIATE_TEST_SUITE_P(smoke_FC, FCNotFuseFQCPUTest, notFuseTestParamsSmoke, FCNotFuseFQCPUTest::getTestCaseName); + } // namespace } // namespace MatMul } // namespace test diff --git a/src/plugins/intel_cpu/tests/functional/utils/fusing_test_utils.cpp b/src/plugins/intel_cpu/tests/functional/utils/fusing_test_utils.cpp index 39e60bdfe8a235..6f5e559201b30e 100644 --- a/src/plugins/intel_cpu/tests/functional/utils/fusing_test_utils.cpp +++ b/src/plugins/intel_cpu/tests/functional/utils/fusing_test_utils.cpp @@ -58,7 +58,11 @@ void CpuTestWithFusing::CheckFusingResults(const std::shared_ptr postOpMgrPtr; std::vector fusedOps; bool checkFusingPosition = true; + bool expectPostOpsToBeFused = true; }; static int getChannelAxis(const ov::AxisSet &axes, bool keep_dims) { @@ -304,6 +305,26 @@ const auto fusingFakeQuantizePerChannel = fusingSpecificParams{std::make_shared< return ov::test::utils::make_fake_quantize(cfg.input, localPrc, 256, newShape); }, "FakeQuantize(PerChannel)"}}), {"FakeQuantize"}}; +const auto fusingFakeQuantizePerBatch = fusingSpecificParams{std::make_shared(std::vector{ + {[](postNodeConfig& cfg){ + auto localPrc = cfg.input->get_element_type(); + const auto shape = cfg.input->get_output_partial_shape(0); + ov::Shape perBatchSize(shape.size(), 1); + perBatchSize[0] = shape[0].get_length(); + return ov::test::utils::make_fake_quantize(cfg.input, localPrc, 256, perBatchSize); + }, "FakeQuantize(PerBatch)"}}), {"FakeQuantize"}}; + +const auto fusingFakeQuantizeFullTensor = fusingSpecificParams{std::make_shared(std::vector{ + {[](postNodeConfig& cfg){ + auto localPrc = cfg.input->get_element_type(); + const auto shape = cfg.input->get_output_partial_shape(0); + ov::Shape fullTensorShape(shape.size(), 1); + for (size_t axis = 0; axis < shape.size(); axis++) { + fullTensorShape[axis] = shape[axis].get_length(); + } + return ov::test::utils::make_fake_quantize(cfg.input, localPrc, 256, fullTensorShape); + }, "FakeQuantize(FullTensor)"}}), {"FakeQuantize"}}; + const auto fusingFakeQuantizePerChannelRelu = fusingSpecificParams{std::make_shared(std::vector{ {[](postNodeConfig& cfg){ auto localPrc = cfg.input->get_element_type(); From 8b18e874819c6e54af1998842e83e225288550c2 Mon Sep 17 00:00:00 2001 From: Anastasia Kuporosova Date: Thu, 25 Jul 2024 06:52:50 +0200 Subject: [PATCH 18/54] Checkout OMZ (#25709) ### Details: - took changes from https://github.com/openvinotoolkit/openvino/pull/25661 ### Tickets: - *ticket-id* --- tests/constraints.txt | 2 +- tests/e2e_tests/requirements.txt | 2 +- thirdparty/open_model_zoo | 2 +- tools/constraints.txt | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/constraints.txt b/tests/constraints.txt index 0427c504395950..16bffdf16967db 100644 --- a/tests/constraints.txt +++ b/tests/constraints.txt @@ -18,7 +18,7 @@ opencv-python>=4.5 paddlepaddle==2.6.1 protobuf>=3.18.1,<4.0.0 py>=1.9.0 -pytest>=5.0,<7.5 +pytest>=5.0,<8.4 pytest-dependency==0.5.1 pytest-html==4.1.1 pytest-timeout==2.2.0 diff --git a/tests/e2e_tests/requirements.txt b/tests/e2e_tests/requirements.txt index 2c37134327f7cc..2d380c682819aa 100644 --- a/tests/e2e_tests/requirements.txt +++ b/tests/e2e_tests/requirements.txt @@ -20,7 +20,7 @@ scikit-image>=0.17.2 tabulate==0.9.0 pytest>=5.0,<=7.0.1; python_version < '3.10' -pytest==7.2.0; python_version >= '3.10' +pytest==8.3.1; python_version >= '3.10' pytest-cov==2.11.1 # pytest-html==1.19.0 pytest-html diff --git a/thirdparty/open_model_zoo b/thirdparty/open_model_zoo index 9c6d95a2a668d6..cec8d2be4baf81 160000 --- a/thirdparty/open_model_zoo +++ b/thirdparty/open_model_zoo @@ -1 +1 @@ -Subproject commit 9c6d95a2a668d6ae41aebda42b15608db7dd3fa0 +Subproject commit cec8d2be4baf81c191091abd83c59507fc12d2e8 diff --git a/tools/constraints.txt b/tools/constraints.txt index 21961ea88f9e14..258ed7a8b3208c 100644 --- a/tools/constraints.txt +++ b/tools/constraints.txt @@ -7,7 +7,7 @@ mxnet~=1.2.0; sys_platform == 'win32' mxnet>=1.7.0.post2,<=1.9.1; sys_platform != 'win32' onnx>=1.8.1,<=1.15.0 networkx<=3.1.0 -pytest>=5.0,<7.3 +pytest>=5.0,<8.4 protobuf>=3.18.1,<4.0.0 defusedxml>=0.7.1 requests>=2.25.1 From 8f07b923116fb4de9503b0dd725a017356b1f529 Mon Sep 17 00:00:00 2001 From: Roman Lyamin Date: Thu, 25 Jul 2024 10:04:25 +0400 Subject: [PATCH 19/54] [GPU] Fix incorrect selection of preferred formats for weights in case of OneDNN (#25697) ### Tickets: - *[146165](https://jira.devtools.intel.com/browse/CVS-146165)* --- .../intel_gpu/src/graph/layout_optimizer.cpp | 8 ++- src/plugins/intel_gpu/src/graph/reshape.cpp | 3 +- .../unit/test_cases/convolution_gpu_test.cpp | 54 +++++++++++++++++++ 3 files changed, 62 insertions(+), 3 deletions(-) diff --git a/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp b/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp index b2acc2abf1c173..bcada1fa769fea 100644 --- a/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp +++ b/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp @@ -1937,13 +1937,17 @@ void layout_optimizer::select_preferred_formats_for_onednn(program_node& node, d prim_input = node.get_dependency_index(node.as().input()); if (node.is_type()) prim_input = node.get_dependency_index(node.as().input()); + size_t prim_weights = node.get_primitive()->input_size(); // Note: did not handle attribute properly. especially for zero-point cldnn::format src_fmt = format::any; - if (idx == prim_input) + if (idx == prim_input) { src_fmt = onednn::find_data_format(prim_desc.src_desc()); - else // Dep for fused post ops + } else if (idx == prim_weights) { + src_fmt = format::custom; + } else { // Dep for fused post ops src_fmt = onednn::find_data_format(prim_desc.dst_desc()); + } // WA: shallow convolution needs to set input format by bfyx. // onednn recommended byxf for input format. It will insert reorder before shallow conv. diff --git a/src/plugins/intel_gpu/src/graph/reshape.cpp b/src/plugins/intel_gpu/src/graph/reshape.cpp index eed87ed759211d..5cbef11dd3b045 100644 --- a/src/plugins/intel_gpu/src/graph/reshape.cpp +++ b/src/plugins/intel_gpu/src/graph/reshape.cpp @@ -109,7 +109,8 @@ layout reshape_inst::calc_output_layout(reshape_node const& node, kernel_impl_pa auto desc = impl_param.typed_desc(); if (desc->output_shape.count() == 0) { if (desc->output_partial_shape.size() != 0) { - return layout{desc->output_partial_shape, input_layout.data_type, input_layout.format}; + format out_fmt = format::adjust_to_rank(input_layout.format, desc->output_partial_shape.rank().get_length()); + return layout{desc->output_partial_shape, input_layout.data_type, out_fmt}; } else { OPENVINO_ASSERT("[GPU] Output shape is not provided"); } diff --git a/src/plugins/intel_gpu/tests/unit/test_cases/convolution_gpu_test.cpp b/src/plugins/intel_gpu/tests/unit/test_cases/convolution_gpu_test.cpp index 0bf595e124db89..132b2378420a03 100644 --- a/src/plugins/intel_gpu/tests/unit/test_cases/convolution_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/test_cases/convolution_gpu_test.cpp @@ -11,6 +11,7 @@ #include #include #include +#include #include #include @@ -9933,6 +9934,59 @@ TEST(convolution_gpu_onednn, has_proper_synchronization) { } } +// A test that detects crashes in OneDNN convolution selection checks +TEST(convolution_gpu_onednn, grouped_runtime_weights) { + auto& engine = get_test_engine(); + + if (!engine.get_device_info().supports_immad) + return; + + tests::random_generator rg(GET_SUITE_NAME); + + int64_t input_b = 1, input_f = 256, input_y = 29, input_x = 29; + auto input_size = ov::PartialShape{ input_b, input_f, input_y, input_x }; + auto input_data = rg.generate_random_4d(input_b, input_f, input_y, input_x, -1, 1); + auto input_data_byxf = flatten_4d(format::byxf, input_data); + auto input_mem = engine.allocate_memory({ input_size, data_types::f16, format::byxf }); + set_values(input_mem, input_data_byxf); + + int64_t weights_b = 1, weights_f = 256, weights_y = 5, weights_x = 5; + auto weights_size = ov::PartialShape{ weights_b, weights_f, weights_y, weights_x }; + auto weights_data = rg.generate_random_4d(weights_b, weights_f, weights_y, weights_x, -1, 1); + auto weights_data_bfyx = flatten_4d(format::bfyx, weights_data); + auto weights_mem = engine.allocate_memory({ weights_size, data_types::f16, format::bfyx }); + set_values(weights_mem, weights_data_bfyx); + + auto input = input_layout("input", input_mem->get_layout()); + auto weights = input_layout("weights", weights_mem->get_layout()); + auto weights_reshape = reshape("reshaped_weights", input_info("weights"), true, { 256, 1, 1, 5, 5 }, { 256, 1, 1, 5, 5 }); + auto conv = convolution("conv", input_info("input"), "reshaped_weights", no_bias, 256, { 1, 1 }, { 1, 1 }, { 0, 0 }, { 0, 0 }, true); + auto output_reorder = reorder("reorder", input_info("conv"), { data_types::f32, format::bfyx, { 1, 256, 25, 25 } }); + + topology topology(input, weights, weights_reshape, conv, output_reorder); + + ExecutionConfig config = get_test_default_config(engine); + ov::intel_gpu::ImplementationDesc conv_impl = { format::byxf, "", impl_types::onednn }; + config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "conv", conv_impl }})); + config.set_property(ov::intel_gpu::optimize_data(true)); + + network network(engine, topology, config); + + network.set_input_data("input", input_mem); + network.set_input_data("weights", weights_mem); + + auto output = network.execute(); + + ASSERT_EQ(output.size(), size_t(1)); + ASSERT_EQ(output.begin()->first, "reorder"); + + auto output_memory = output.at("reorder").get_memory(); + auto output_layout = output_memory->get_layout(); + cldnn::mem_lock output_ptr(output_memory, get_test_stream()); + + ASSERT_EQ(output_layout.get_shape(), ov::Shape({1, 256, 25, 25})); +} + #endif // ENABLE_ONEDNN_FOR_GPU template From e27c6f614140b6a02c6a2fccc8077e3a91c3f483 Mon Sep 17 00:00:00 2001 From: Alexandru Enache Date: Thu, 25 Jul 2024 09:47:54 +0300 Subject: [PATCH 20/54] [intel-npu] Adding fix for max_tiles and stepping initializations (#25618) ### Details: - MAX_TILES and STEPPING should be initialized with the default values from the used backend if the user does not set them. - these two properties will be initialized by the driver if L0 backend is used. ### Tickets: - *E#130074* --- .../intel_npu/src/plugin/src/plugin.cpp | 33 ++++++++++--------- 1 file changed, 18 insertions(+), 15 deletions(-) diff --git a/src/plugins/intel_npu/src/plugin/src/plugin.cpp b/src/plugins/intel_npu/src/plugin/src/plugin.cpp index c503605eabc8e0..98f037f7a47271 100644 --- a/src/plugins/intel_npu/src/plugin/src/plugin.cpp +++ b/src/plugins/intel_npu/src/plugin/src/plugin.cpp @@ -221,15 +221,6 @@ Plugin::Plugin() // parse again env_variables after backend is initialized to get backend proprieties _globalConfig.parseEnvVars(); - // initialize properties which have device-tied default values in global config - // *only if there is a driver available - if (_metrics->GetAvailableDevicesNames().size() > 0) { - _globalConfig.update({{ov::intel_npu::stepping.name(), - std::to_string(_metrics->GetSteppingNumber(get_specified_device_name(_globalConfig)))}}); - _globalConfig.update({{ov::intel_npu::max_tiles.name(), - std::to_string(_metrics->GetMaxTiles(get_specified_device_name(_globalConfig)))}}); - } - // Map from name to function {Config -> ov::Any} // Note that some properties are RW before network is loaded, and become RO after network is loaded _properties = { @@ -472,14 +463,24 @@ Plugin::Plugin() {ov::intel_npu::stepping.name(), {false, ov::PropertyMutability::RW, - [](const Config& config) { - return config.get(); + [&](const Config& config) { + if (!config.has()) { + const auto specifiedDeviceName = get_specified_device_name(config); + return static_cast(_metrics->GetSteppingNumber(specifiedDeviceName)); + } else { + return config.get(); + } }}}, {ov::intel_npu::max_tiles.name(), {false, ov::PropertyMutability::RW, - [](const Config& config) { - return config.get(); + [&](const Config& config) { + if (!config.has()) { + const auto specifiedDeviceName = get_specified_device_name(config); + return static_cast(_metrics->GetMaxTiles(specifiedDeviceName)); + } else { + return config.get(); + } }}}, {ov::intel_npu::compilation_mode.name(), {false, @@ -632,7 +633,8 @@ std::shared_ptr Plugin::compile_model(const std::shared_ptr< // Update stepping w/ information from driver, unless provided by user or we are off-device // Ignore, if compilation was requested for platform, different from current if (!localConfig.has() && device != nullptr && - device->getName() == ov::intel_npu::Platform::standardize(platform)) { + device->getName() == ov::intel_npu::Platform::standardize(platform) && + _metrics->GetBackendName() == "level_zero") { try { localConfig.update({{ov::intel_npu::stepping.name(), std::to_string(device->getSubDevId())}}); } catch (...) { @@ -643,7 +645,8 @@ std::shared_ptr Plugin::compile_model(const std::shared_ptr< // Update max_tiles w/ information from driver, unless provided by user or we are off-device // Ignore, if compilation was requested for platform, different from current if (!localConfig.has() && device != nullptr && - device->getName() == ov::intel_npu::Platform::standardize(platform)) { + device->getName() == ov::intel_npu::Platform::standardize(platform) && + _metrics->GetBackendName() == "level_zero") { try { localConfig.update({{ov::intel_npu::max_tiles.name(), std::to_string(device->getMaxNumSlices())}}); } catch (...) { From b50cf21c4287fd2c86dac9d350882285ad310f7e Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 25 Jul 2024 11:18:37 +0200 Subject: [PATCH 21/54] Update tensorflow requirement from <2.17.0,>=1.15.5 to >=1.15.5,<2.18.0 in /src/bindings/python (#25536) Updates the requirements on [tensorflow](https://github.com/tensorflow/tensorflow) to permit the latest version.
Release notes

Sourced from tensorflow's releases.

TensorFlow 2.17.0

Release 2.17.0

TensorFlow

Breaking Changes

  • GPU
    • Support for NVIDIA GPUs with compute capability 5.x (Maxwell generation) has been removed from TF binary distributions (Python wheels).

Major Features and Improvements

  • Add is_cpu_target_available, which indicates whether or not TensorFlow was built with support for a given CPU target. This can be useful for skipping target-specific tests if a target is not supported.

  • tf.data

    • Support data.experimental.distribued_save. distribued_save uses tf.data service (https://www.tensorflow.org/api_docs/python/tf/data/experimental/service) to write distributed dataset snapshots. The call is non-blocking and returns without waiting for the snapshot to finish. Setting wait=True to tf.data.Dataset.load allows the snapshots to be read while they are being written.

Bug Fixes and Other Changes

  • GPU

    • Support for NVIDIA GPUs with compute capability 8.9 (e.g. L4 & L40) has been added to TF binary distributions (Python wheels).
  • Replace DebuggerOptions of TensorFlow Quantizer, and migrate to DebuggerConfig of StableHLO Quantizer.

  • Add TensorFlow to StableHLO converter to TensorFlow pip package.

  • TensorRT support: this is the last release supporting TensorRT. It will be removed in the next release.

  • NumPy 2.0 support: TensorFlow is going to support NumPy 2.0 in the next release. It may break some edge cases of TensorFlow API usage.

  • tf.lite

    • Quantization for FullyConnected layer is switched from per-tensor to per-channel scales for dynamic range quantization use case (float32 inputs / outputs and int8 weights). The change enables new quantization schema globally in the converter and inference engine. The new behaviour can be disabled via experimental flag converter._experimental_disable_per_channel_quantization_for_dense_layers = True.
    • C API:
      • The experimental TfLiteRegistrationExternal type has been renamed as TfLiteOperator, and likewise for the corresponding API functions.
    • The Python TF Lite Interpreter bindings now have an option experimental_default_delegate_latest_features to enable all default delegate features.
    • Flatbuffer version update:
      • GetTemporaryPointer() bug fixed.
  • tf.data

    • Add wait to tf.data.Dataset.load. If True, for snapshots written with distributed_save, it reads the snapshot while it is being written. For snapshots written with regular save, it waits for the snapshot until it's finished. The default is False for backward compatibility. Users of distributed_save are recommended to set it to True.
  • tf.tpu.experimental.embedding.TPUEmbeddingV2

    • Add compute_sparse_core_stats for sparse core users to profile the data with this API to get the max_ids and max_unique_ids. These numbers will be needed to configure the sparse core embedding mid level api.
    • Remove the preprocess_features method since that's no longer needed.

Thanks to our Contributors

This release contains contributions from many people at Google, as well as:

Abdulaziz Aloqeely, Ahmad-M-Al-Khateeb, Akhil Goel, akhilgoe, Alexander Pivovarov, Amir Samani, Andrew Goodbody, Andrey Portnoy, Ashiq Imran, Ben Olson, Chao, Chase Riley Roberts, Clemens Giuliani, dependabot[bot], Dimitris Vardoulakis, Dragan Mladjenovic, ekuznetsov139, Elfie Guo, Faijul Amin, Gauri1 Deshpande, Georg Stefan Schmid, guozhong.zhuang, Hao Wu, Haoyu (Daniel), Harsha H S, Harsha Hs, Harshit Monish, Ilia Sergachev, Jane Liu, Jaroslav Sevcik, Jinzhe Zeng, Justin Dhillon, Kaixi Hou, Kanvi Khanna, LakshmiKalaKadali, Learning-To-Play, lingzhi98, Lu Teng, Matt Bahr, Max Ren, Meekail Zain, Mmakevic-Amd, mraunak, neverlva, nhatle, Nicola Ferralis, Olli Lupton, Om Thakkar, orangekame3, ourfor, pateldeev, Pearu Peterson, pemeliya, Peng Sun, Philipp Hack, Pratik Joshi, prrathi, rahulbatra85, Raunak, redwrasse, Robert Kalmar, Robin Zhang, RoboSchmied, Ruturaj Vaidya, sachinmuradi, Shawn Wang, Sheng Yang, Surya, Thibaut Goetghebuer-Planchon, Thomas Preud'Homme, tilakrayal, Tj Xu, Trevor Morris, wenchenvincent, Yimei Sun, zahiqbal, Zhu Jianjiang, Zoranjovanovic-Ns

Changelog

Sourced from tensorflow's changelog.

Release 2.17.0

TensorFlow

Breaking Changes

Known Caveats

Major Features and Improvements

Bug Fixes and Other Changes

  • GPU

    • Support for NVIDIA GPUs with compute capability 8.9 (e.g. L4 & L40) has been added to TF binary distributions (Python wheels).
  • Replace DebuggerOptions of TensorFlow Quantizer, and migrate to DebuggerConfig of StableHLO Quantizer.

  • Add TensorFlow to StableHLO converter to TensorFlow pip package.

  • TensorRT support: this is the last release supporting TensorRT. It will be removed in the next release.

  • NumPy 2.0 support: TensorFlow is going to support NumPy 2.0 in the next release. It may break some edge cases of TensorFlow API usage.

Keras

Breaking Changes

  • GPU
    • Support for NVIDIA GPUs with compute capability 5.x (Maxwell generation) has been removed from TF binary distributions (Python wheels).

... (truncated)

Commits
  • ad6d8cc Merge pull request #71345 from tensorflow-jenkins/version-numbers-2.17.0-6959
  • 8ca87bf Update version numbers to 2.17.0
  • b3dcff9 Merge pull request #70600 from tensorflow/r2.17-2d72742d40f
  • 742ccbb Add tensorflow support for 16k page sizes on arm64
  • 8581151 Merge pull request #70475 from tensorflow-jenkins/version-numbers-2.17.0rc1-8204
  • d6b2aa0 Update version numbers to 2.17.0-rc1
  • bb8057c Merge pull request #70454 from vladbelit/gcs_trailing_dot_undo
  • 72f4b02 Fix issues with TF GCS operations not working in certain environments.
  • 6ed0a1a Merge pull request #70358 from tensorflow/r2.17-b24db0b2a85
  • ffca2f5 Add back xla/stream_executor:cuda_platform to tf_additional_binary_deps.
  • Additional commits viewable in compare view

Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@dependabot rebase` will rebase this PR - `@dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@dependabot merge` will merge this PR after your CI passes on it - `@dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@dependabot cancel merge` will cancel a previously requested merge and block automerging - `@dependabot reopen` will reopen this PR if it is closed - `@dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- src/bindings/python/constraints.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/bindings/python/constraints.txt b/src/bindings/python/constraints.txt index 9c3eef6d75a286..38e1770a5c2989 100644 --- a/src/bindings/python/constraints.txt +++ b/src/bindings/python/constraints.txt @@ -19,7 +19,7 @@ patchelf<=0.17.2.1 h5py>=3.1.0,<3.12.0 docopt~=0.6.2 paddlepaddle==2.6.0 -tensorflow>=1.15.5,<2.17.0 +tensorflow>=1.15.5,<2.18.0 six~=1.16.0 protobuf>=3.18.1,<4.0.0 onnx==1.15.0 From 34f7a492a281f086301cf41d391e0233d11ec433 Mon Sep 17 00:00:00 2001 From: Jade Cho Date: Thu, 25 Jul 2024 20:17:29 +0900 Subject: [PATCH 22/54] [GPU] Fix a bug when fusing reorders for data type conversion (#25718) ### Tickets: - *147410* --- .../src/kernel_selector/kernel_base.cpp | 4 ++- .../unit/fusions/eltwise_fusion_test.cpp | 27 +++++++++++++++++++ 2 files changed, 30 insertions(+), 1 deletion(-) diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernel_base.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernel_base.cpp index 890e086f28a6bd..271e8d6a620890 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernel_base.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/kernel_base.cpp @@ -140,8 +140,10 @@ JitConstants KernelBase::MakeFusedOpsJitConstants(const kernel_selector::base_pa if (conf.empty()) return jit; - if (params.fused_ops.size() == 1 && params.fused_ops[0].GetType() == KernelType::REORDER) + if (std::all_of(params.fused_ops.cbegin(), params.fused_ops.cend(), + [](fused_operation_desc desc) { return desc.GetType() == KernelType::REORDER; })) { return jit; + } try { for (auto& c : conf) { diff --git a/src/plugins/intel_gpu/tests/unit/fusions/eltwise_fusion_test.cpp b/src/plugins/intel_gpu/tests/unit/fusions/eltwise_fusion_test.cpp index 883279ed369dd9..d4c50ec84ac78a 100644 --- a/src/plugins/intel_gpu/tests/unit/fusions/eltwise_fusion_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/fusions/eltwise_fusion_test.cpp @@ -672,3 +672,30 @@ INSTANTIATE_TEST_SUITE_P(fusings_gpu, eltwise_quantize_fs_b_yx_fsv32_exception, eltwise_test_params{ CASE_ELTWISE_FP16_BATCH_FS_B, 6, 6 }, eltwise_test_params{ CASE_ELTWISE_FP16_BATCH_B_FS, 6, 6 }, })); + +class eltwise_fusing_reorders : public EltwiseFusingTest { +public: + layout get_input_layout3(eltwise_test_params& p) { + return layout{ {1, 1, 1, p.input_size[3]}, p.input_type, p.input_format }; + } +}; +TEST_P(eltwise_fusing_reorders, reorders_for_data_type) { + auto p = GetParam(); + create_topologies( + input_layout("input", get_input_layout(p)), + data("data", get_mem(get_input_layout3(p))), + eltwise("eltwise", { input_info("input"), input_info("data") }, p.mode, p.default_type), + reorder("reorder1", input_info("eltwise"), format::bfyx, data_types::i32, {}, reorder_mean_mode::subtract, padding(), true), + reorder("reorder2", input_info("reorder1"), format::bfyx, data_types::f16, {}, reorder_mean_mode::subtract, padding(), true), + data("data2", get_mem(get_input_layout3(p))), + eltwise("eltwise_min", { input_info("reorder2"), input_info("data2") }, eltwise_mode::min, p.default_type), + reorder("out", input_info("eltwise_min"), p.default_format, data_types::f32) + ); + + tolerance = default_tolerance(p.input_type); + execute(p, true); +} + +INSTANTIATE_TEST_SUITE_P(fusings_gpu, eltwise_fusing_reorders, ::testing::ValuesIn(std::vector{ + eltwise_test_params{ { 1, 16, 16, 2 }, data_types::f16, data_types::f16, format::bfyx, data_types::f16, format::bfyx, eltwise_mode::max, 4, 6 }, +})); From 34bb671ab7eef5b8375c1df0110dae597e3907f1 Mon Sep 17 00:00:00 2001 From: Anastasia Kuporosova Date: Thu, 25 Jul 2024 15:55:12 +0200 Subject: [PATCH 23/54] [Docs][PyOV] return back snippet for stateful model (#25694) ### Details: - Snippet was removed here https://github.com/openvinotoolkit/openvino/issues/24510 - Returned it back and aligned it with C++ one (generally took it from this https://github.com/openvinotoolkit/openvino/pull/15978) - Added it to documentation ### Tickets: - *ticket-id* --------- Co-authored-by: Ivan Tikhonov --- docs/articles_en/assets/snippets/main.py | 2 +- .../snippets/ov_stateful_model_intro.py | 210 ++++++++++++++++++ .../snippets/ov_stateful_models_intro.cpp | 10 +- .../obtaining-stateful-openvino-model.rst | 38 +++- 4 files changed, 254 insertions(+), 6 deletions(-) create mode 100644 docs/articles_en/assets/snippets/ov_stateful_model_intro.py diff --git a/docs/articles_en/assets/snippets/main.py b/docs/articles_en/assets/snippets/main.py index a063a1645f0ec1..4d5429cd4b7925 100644 --- a/docs/articles_en/assets/snippets/main.py +++ b/docs/articles_en/assets/snippets/main.py @@ -9,7 +9,7 @@ from contextlib import redirect_stdout, redirect_stderr -skip_snippets = ["main.py", "__init__.py", "utils.py", "ov_common.py"] +skip_snippets = ["main.py", "__init__.py", "utils.py", "ov_common.py", "ov_stateful_model_intro.py"] def import_python_modules(directory, subdirectory=""): for item in os.listdir(directory): diff --git a/docs/articles_en/assets/snippets/ov_stateful_model_intro.py b/docs/articles_en/assets/snippets/ov_stateful_model_intro.py new file mode 100644 index 00000000000000..f9e84b2891a7a3 --- /dev/null +++ b/docs/articles_en/assets/snippets/ov_stateful_model_intro.py @@ -0,0 +1,210 @@ +# Copyright (C) 2018-2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import logging as log +import numpy as np + +import openvino as ov +from openvino.runtime import opset13 as ops +from openvino.runtime.op.util import VariableInfo, Variable +from openvino.runtime.passes import LowLatency2, MakeStateful, Manager +from openvino.runtime.utils import replace_node + + +def state_model_example(): + #! [ov:stateful_model] + input = ops.parameter([1, 1], dtype=np.float32, name="data") + init_const = ops.constant([[0]], dtype=np.float32) + + # Typically ReadValue/Assign operations are presented as pairs in models. + # ReadValue operation reads information from an internal memory buffer, Assign operation writes data to this buffer. + # For each pair, its own Variable object must be created. + # Variable defines name, shape and type of the buffer. + var_info = VariableInfo() + var_info.data_shape = init_const.get_shape() + var_info.data_type = init_const.get_element_type() + var_info.variable_id = "variable0" + variable = Variable(var_info) + + # Creating Model + read = ops.read_value(init_const, variable) + add = ops.add(input, read) + assign = ops.assign(add, variable) + result = ops.result(add) + model = ov.Model(results=[result], sinks=[assign], parameters=[input], name="model") + #! [ov:stateful_model] + + return model + + +def low_latency_2_example(): + #! [ov:low_latency_2] + # Precondition for Model. + # TensorIterator and Parameter are created in body of TensorIterator with names + tensor_iterator_name = "TI_name" + body_parameter_name = "body_parameter_name" + idx = "0" # this is a first variable in the model + + # The State will be named "TI_name/param_name/variable_0" + state_name = tensor_iterator_name + "//" + body_parameter_name + "//" + "variable_" + idx + + #! [ov:get_ov_model] + core = ov.Core() + ov_model = core.read_model("path_to_the_model") + #! [ov:get_ov_model] + + # reshape input if needed + + #! [ov:reshape_ov_model] + ov_model.reshape({"X": ov.PartialShape([1, 1, 16])}) + #! [ov:reshape_ov_model] + + #! [ov:apply_low_latency_2] + manager = Manager() + manager.register_pass(LowLatency2()) + manager.run_passes(ov_model) + #! [ov:apply_low_latency_2] + + compied_model = core.compile_model(ov_model) + # Try to find the Variable by name + infer_request = compied_model.create_infer_request() + states = infer_request.query_state() + for state in states: + name = state.get_name() + if (name == state_name): + # some actions + #! [ov:low_latency_2] + pass + + #! [ov:low_latency_2_use_parameters] + manager.register_pass(LowLatency2(False)) + #! [ov:low_latency_2_use_parameters] + + +def replace_non_reshapable_const(): + #! [ov:replace_const] + # OpenVINO example. How to replace a Constant with hardcoded values of shapes in the model with another one with the new values. + # Assume we know which Constant (const_with_hardcoded_shape) prevents the reshape from being applied. + # Then we can find this Constant by name in the model and replace it with a new one with the correct shape. + core = ov.Core() + model = core.read_model("path_to_model"); + # Creating the new Constant with a correct shape. + # For the example shown in the picture above, the new values of the Constant should be 1, 1, 10 instead of 1, 49, 10 + new_const = ops.constant( """value_with_correct_shape, type""") + for node in model.get_ops(): + # Trying to find the problematic Constant by name. + if node.get_friendly_name() != "name_of_non_reshapable_const": + continue + # Replacing the problematic Constant with a new one. Do this for all the problematic Constants in the model, then + # you can apply the reshape feature. + replace_node(node, new_const) + + #! [ov:replace_const] + + +def apply_make_stateful_tensor_names(): + #! [ov:make_stateful_tensor_names] + core = ov.Core() + ov_model = core.read_model("path_to_the_model") + tensor_names = {"tensor_name_1": "tensor_name_4", + "tensor_name_3": "tensor_name_6"} + manager = Manager() + manager.register_pass(MakeStateful(tensor_names)) + manager.run_passes(ov_model) + #! [ov:make_stateful_tensor_names] + + +def apply_make_stateful_ov_nodes(): + #! [ov:make_stateful_ov_nodes] + core = ov.Core() + ov_model = core.read_model("path_to_the_model") + # Parameter_1, Result_1, Parameter_3, Result_3 are + # ops.parameter/ops.result in the ov_model + pairs = ["""(Parameter_1, Result_1), (Parameter_3, Result_3)"""] + manager = Manager() + manager.register_pass(MakeStateful(pairs)) + manager.run_passes(ov_model) + #! [ov:make_stateful_ov_nodes] + + +def main(): + + #! [ov:state_api_usage] + # 1. Load inference engine + log.info("Loading OpenVINO") + core = ov.Core() + + # 2. Read a model + log.info("Loading model files") + model = core.read_model("path_to_ir_xml_from_the_previous_section"); + model.get_parameters()[0].set_layout("NC"); + ov.set_batch(model, 1); + + # 3. Load the model to CPU + compiled_model = core.compile_model(model, "CPU") + + # 4. Create Infer Request + infer_request = compiled_model.create_infer_request() + + # 5. Reset memory states before starting + states = infer_request.query_state() + + if len(states) != 1: + log.error(f"Invalid queried state number. Expected 1, but got {str(states.size())}") + return -1 + + infer_request.reset_state() + + # 6. Inference + input_data = np.arange(start=1, stop=12, dtype=np.float32) + + # This example demonstrates how to work with OpenVINO State API. + # Input_data: some array with 12 float numbers + + # Part1: read the first four elements of the input_data array sequentially. + # Expected output for the first utterance: + # sum of the previously processed elements [ 1, 3, 6, 10] + + # Part2: reset state value (set to 0) and read the next four elements. + # Expected output for the second utterance: + # sum of the previously processed elements [ 5, 11, 18, 26] + + # Part3: set state value to 5 and read the next four elements. + # Expected output for the third utterance: + # sum of the previously processed elements + 5 [ 14, 24, 35, 47] + target_state = states[0] + + # Part 1 + log.info("Infer the first utterance") + for next_input in range(len(input_data)/3): + infer_request.infer({0 : input_data[next_input]}) + state_buf = target_state.state.data + log.info(state_buf[0]) + + # Part 2 + log.info("\nReset state between utterances...\n") + target_state.reset() + + log.info("Infer the second utterance") + + for next_input in range(len(input_data)/3, (len(input_data)/3 * 2)): + infer_request.infer({0 : input_data[next_input]}) + state_buf = target_state.state.data + log.info(state_buf[0]) + + # Part 3 + log.info("\nSet state value between utterances to 5...\n") + data = np.asarray([5], dtype=np.float32) + tensor = ov.Tensor(data, shared_memory=True) + target_state.state = tensor + + log.info("Infer the third utterance") + for next_input in range((len(input_data)/3 * 2), len(input_data)): + infer_request.infer({0 : input_data[next_input]}) + + state_buf = target_state.state.data + log.info(state_buf[0]) + + log.info("Execution successful") + #! [ov:state_api_usage] + return 0 diff --git a/docs/articles_en/assets/snippets/ov_stateful_models_intro.cpp b/docs/articles_en/assets/snippets/ov_stateful_models_intro.cpp index 3f3cd2cb713a19..01170795dbea22 100644 --- a/docs/articles_en/assets/snippets/ov_stateful_models_intro.cpp +++ b/docs/articles_en/assets/snippets/ov_stateful_models_intro.cpp @@ -14,14 +14,16 @@ using namespace ov; void state_network_example () { - //! [ov:state_network] + //! [ov:stateful_model] // ... auto input = std::make_shared(ov::element::f32, ov::Shape{1, 1}); auto init_const = ov::opset8::Constant::create(ov::element::f32, ov::Shape{1, 1}, {0}); - // The ReadValue/Assign operations must be used in pairs in the network. - // For each such a pair, its own variable object must be created. + // Typically ReadValue/Assign operations are presented as pairs in models. + // ReadValue operation reads information from an internal memory buffer, Assign operation writes data to this buffer. + // For each pair, its own Variable object must be created. + // Variable defines name, shape and type of the buffer. const std::string variable_name("variable0"); ov::op::util::VariableInfo var_info = {init_const->get_shape(), init_const->get_element_type(), @@ -37,7 +39,7 @@ void state_network_example () { auto model = std::make_shared(ov::ResultVector({result}), ov::SinkVector({save}), ov::ParameterVector({input})); - //! [ov:state_network] + //! [ov:stateful_model] } void low_latency_2_example() { diff --git a/docs/articles_en/openvino-workflow/running-inference/stateful-models/obtaining-stateful-openvino-model.rst b/docs/articles_en/openvino-workflow/running-inference/stateful-models/obtaining-stateful-openvino-model.rst index a350d1bcbb5a77..a7db3317203045 100644 --- a/docs/articles_en/openvino-workflow/running-inference/stateful-models/obtaining-stateful-openvino-model.rst +++ b/docs/articles_en/openvino-workflow/running-inference/stateful-models/obtaining-stateful-openvino-model.rst @@ -60,12 +60,20 @@ Parameter/Result tensor names. If there are no tensor names, .. doxygensnippet:: docs/articles_en/assets/snippets/ov_stateful_models_intro.cpp :language: cpp :fragment: [ov:make_stateful_tensor_names] + + .. doxygensnippet:: docs/articles_en/assets/snippets/ov_stateful_models_intro.py + :language: py + :fragment: [ov:make_stateful_tensor_names] .. tab-item:: Using Parameter/Result operations .. doxygensnippet:: docs/articles_en/assets/snippets/ov_stateful_models_intro.cpp :language: cpp :fragment: [ov:make_stateful_ov_nodes] + + .. doxygensnippet:: docs/articles_en/assets/snippets/ov_stateful_models_intro.py + :language: py + :fragment: [ov:make_stateful_ov_nodes] .. tab-item:: command line @@ -114,6 +122,10 @@ To apply LowLatency2 Transformation, follow the instruction below: .. doxygensnippet:: docs/articles_en/assets/snippets/ov_stateful_models_intro.cpp :language: cpp :fragment: [ov:get_ov_model] + + .. doxygensnippet:: docs/articles_en/assets/snippets/ov_stateful_models_intro.py + :language: py + :fragment: [ov:get_ov_model] 2. Change the number of iterations inside TensorIterator/Loop nodes in the model using the :doc:`Reshape <../changing-input-shape>` feature. @@ -129,6 +141,10 @@ To apply LowLatency2 Transformation, follow the instruction below: .. doxygensnippet:: docs/articles_en/assets/snippets/ov_stateful_models_intro.cpp :language: cpp :fragment: [ov:reshape_ov_model] + + .. doxygensnippet:: docs/articles_en/assets/snippets/ov_stateful_models_intro.py + :language: py + :fragment: [ov:reshape_ov_model] **Unrolling**: If the LowLatency2 transformation is applied to a model containing TensorIterator/Loop nodes with exactly one iteration inside, these nodes are unrolled. @@ -143,6 +159,10 @@ To apply LowLatency2 Transformation, follow the instruction below: .. doxygensnippet:: docs/articles_en/assets/snippets/ov_stateful_models_intro.cpp :language: cpp :fragment: [ov:apply_low_latency_2] + + .. doxygensnippet:: docs/articles_en/assets/snippets/ov_stateful_models_intro.py + :language: py + :fragment: [ov:apply_low_latency_2] (Optional) Use Const Initializer argument: @@ -159,6 +179,10 @@ To apply LowLatency2 Transformation, follow the instruction below: .. doxygensnippet:: docs/articles_en/assets/snippets/ov_stateful_models_intro.cpp :language: cpp :fragment: [ov:low_latency_2_use_parameters] + + .. doxygensnippet:: docs/articles_en/assets/snippets/ov_stateful_models_intro.py + :language: py + :fragment: [ov:low_latency_2_use_parameters] .. image:: ../../../assets/images/llt2_use_const_initializer.svg @@ -178,6 +202,10 @@ To apply LowLatency2 Transformation, follow the instruction below: .. doxygensnippet:: docs/articles_en/assets/snippets/ov_stateful_models_intro.cpp :language: cpp :fragment: [ov:low_latency_2] + + .. doxygensnippet:: docs/articles_en/assets/snippets/ov_stateful_models_intro.py + :language: py + :fragment: [ov:low_latency_2] 4. Use state API. See sections :doc:`OpenVINO State API <../stateful-models>`, @@ -208,6 +236,10 @@ To apply LowLatency2 Transformation, follow the instruction below: .. doxygensnippet:: docs/articles_en/assets/snippets/ov_stateful_models_intro.cpp :language: cpp :fragment: [ov:replace_const] + + .. doxygensnippet:: docs/articles_en/assets/snippets/ov_stateful_models_intro.py + :language: py + :fragment: [ov:replace_const] Stateful Model from Scratch ################################## @@ -228,7 +260,11 @@ a sink from `ov::Model` after deleting the node from the graph with the `delete_ .. doxygensnippet:: docs/articles_en/assets/snippets/ov_stateful_models_intro.cpp :language: cpp - :fragment: [ov:state_network] + :fragment: [ov:stateful_model] + + .. doxygensnippet:: docs/articles_en/assets/snippets/ov_stateful_models_intro.py + :language: py + :fragment: [ov:stateful_model] .. note:: From f6ca0e7dec066e994a3f6c0f16e48a9a2842df75 Mon Sep 17 00:00:00 2001 From: Sebastian Golebiewski Date: Thu, 25 Jul 2024 17:48:30 +0200 Subject: [PATCH 24/54] [DOCS] Updating Contribute to OpenVINO articles (#23650) Updating the existing articles about making contributions to OpenVINO, adding an article to docs.openvino.ai. --------- Co-authored-by: Tatiana Savina Co-authored-by: Przemyslaw Wysocki --- docs/articles_en/about-openvino.rst | 3 +- .../about-openvino/contributing.rst | 169 ++++++++++++++++++ .../contributing/code-contribution-guide.rst | 88 +++++++++ 3 files changed, 258 insertions(+), 2 deletions(-) create mode 100644 docs/articles_en/about-openvino/contributing.rst create mode 100644 docs/articles_en/about-openvino/contributing/code-contribution-guide.rst diff --git a/docs/articles_en/about-openvino.rst b/docs/articles_en/about-openvino.rst index a9b599960d2e2b..dbe5f6d3c1061f 100644 --- a/docs/articles_en/about-openvino.rst +++ b/docs/articles_en/about-openvino.rst @@ -1,5 +1,3 @@ -.. {#about_openvino} - About OpenVINO ============== @@ -10,6 +8,7 @@ About OpenVINO about-openvino/performance-benchmarks about-openvino/compatibility-and-support + about-openvino/contributing Release Notes OpenVINO is a toolkit for simple and efficient deployment of various deep learning models. diff --git a/docs/articles_en/about-openvino/contributing.rst b/docs/articles_en/about-openvino/contributing.rst new file mode 100644 index 00000000000000..f14e5f58249259 --- /dev/null +++ b/docs/articles_en/about-openvino/contributing.rst @@ -0,0 +1,169 @@ +Contribute to OpenVINO +======================== + +.. toctree:: + :maxdepth: 1 + :hidden: + + contributing/code-contribution-guide + +OpenVINO™ is always looking for opportunities to improve and your contributions +play a big role in this process. Here are four ways you can make OpenVINO better: + +- `Provide feedback <#provide-feedback>`__ +- `Contribute code changes <#contribute-code-changes>`__ +- `Improve documentation <#improve-documentation>`__ +- `Promote and support OpenVINO <#promote-and-support-openvino>`__ + + +:fas:`comments` Provide feedback +################################ + +.. rubric:: Report bugs / issues + :name: report-bugs-issues + +If you notice unexpected behavior in OpenVINO or its components, you can +`create a new issue `__ +in the GitHub issue tracker. + +.. rubric:: Propose improvements + :name: propose-improvements + +If you want to share your ideas for improving OpenVINO: + +- Open a new `GitHub Discussion `__. +- Create a `Feature Request Issue `__ + if your idea is already well defined. + +In both cases, provide a detailed description and list potential use cases, +benefits, and challenges. Keep in mind that even if your input is not immediately +prioritized, it may be used at a later or undertaken by the community. + + +:fas:`code-branch` Contribute code changes +########################################## + +Always check if the change is still needed! Verify if +`the issue `__ or +`request `__ is still open +and nobody has started working on it. If the ticket is already work in progress, +you can always ask if you can help. + +**Address only the issues that affect the master or** +:doc:`LTS release branches <./release-notes-openvino/release-policy>`. + +**Do not start work on contributions, if a proper issue/ request has not been created.** + +.. tip:: + + If you want to start with something simple, check out + `first-time contributions `__. + + +.. rubric:: Fix bugs + :name: fix-bugs + +Choose one of the issues reported in +`GitHub Issue Tracker `__ and +`create a Pull Request `__ +(PR) addressing it. + +If you find a new bug and want to fix it, you should still +create a new issue before working on the PR. This way, it will be easier for other +developers to track changes. + +.. rubric:: Develop new features + :name: develop-new-features + +If you find a `Feature Request `__ +you want to work on, make sure it is clearly defined. If you have any doubts, +or the change is complex, `discuss it `__ +with OpenVINO developers first. + +If you have an idea for a new feature and want +to develop it, you should still create a Feature Request before working on the +PR. This way, it will be easier for other developers to track changes. + +.. rubric:: Develop a new device plugin + :name: develop-new-device-plugin + +If you want to run inference on a device that is currently not supported, you +can see how to develop a new plugin for it in the +`Plugin Developer Guide `__. + + +:fas:`file-alt` Improve documentation +##################################### + +OpenVINO user documentation is built from several sources, mainly the files in +the `docs/articles_en `__ +folder, using `Sphinx `__ and the +`reStructuredText `__ +markup language. + +OpenVINO `developer documentation `__ +is available only in markdown in the `docs/dev `__ +folder. + +To edit docs, consider using the Editor’s +`guide `__ +and contacting `documentation maintainers `__, +who will help you with information architecture and formatting, as well as +review, adjust, and merge the PR. + +.. rubric:: Review user documentation + :name: review-user-documentation + +In most cases, creating a PR is enough to correct a documentation mistake, improve +the language, and update or extend the information. For your convenience, the +top-right panel of most pages includes the “Edit on GitHub” button that will +take you to the source file of the given article. + +.. rubric:: Write new content + :name: write-new-content + +For more extensive changes in docs, reach out to any of the +`documentation maintainers `__ +to discuss the new content. + + +:fas:`bullhorn` Promote and support OpenVINO +############################################ + +.. rubric:: Popularize OpenVINO + :name: popularize-openvino + +Articles, tutorials, blog posts, demos, videos, and any other involvement in the +OpenVINO community is more than welcome. If you discuss or present OpenVINO on +various social platforms, you are raising awareness of the product among AI +enthusiasts and enabling other people to discover the toolkit. + +Feel free to reach out to OpenVINO developers if you need help with making a +contribution. You can also contact +`documentation maintainers `__ +, if you need help with visuals, brand materials, or content creation in general. + +.. rubric:: Help other community members + :name: help-community + +If you are an experienced OpenVINO user and want to help, you can share your +expertise with the community at any time. Check GitHub +`Discussions `__ +and `Issues `__ to see if +you can help someone. + +.. note:: + + By contributing to the OpenVINO project, you agree that your contributions + will be licensed under `the terms of the OpenVINO repository `__. + + +Additional Resources +##################### + +- :doc:`Code Contribution Guide <./contributing/code-contribution-guide>` +- Choose a `"Good First Issue" `__. +- Learn more about `OpenVINO architecture `__. +- Check out a `blog post on contributing to OpenVINO `__. +- Visit `Intel DevHub Discord server `__ to join + discussions and talk to OpenVINO developers. \ No newline at end of file diff --git a/docs/articles_en/about-openvino/contributing/code-contribution-guide.rst b/docs/articles_en/about-openvino/contributing/code-contribution-guide.rst new file mode 100644 index 00000000000000..a74bb586e18130 --- /dev/null +++ b/docs/articles_en/about-openvino/contributing/code-contribution-guide.rst @@ -0,0 +1,88 @@ +Code Contribution Guide +======================= + +This section will start you off with a few simple steps to begin your code contribution. +If you have any doubts, talk to +`the development team `__. +Remember, your questions help us keep improving OpenVINO. + + +1. **Choose the issue you want to work on.** + + Choose one of the existing `issues `__ / + requests. The `“Good First Issue” `__ + board is a good place to start. If you have a new idea for the contribution, + make sure to first create a proper issue, discussion, or feature request. + + Here are some of the components you may choose to work on. + + .. tab-set:: + + .. tab-item:: APIs + + - `Core C++ API `__ + - `C API `__ + - `Python API `__ + - `JavaScript (Node.js) API `__ + + .. tab-item:: Frontends + + - `IR Frontend `__ + - `ONNX Frontend `__ + - `PaddlePaddle Frontend `__ + - `PyTorch Frontend `__ + - `TensorFlow Frontend `__ + - `TensorFlow Lite Frontend `__ + + .. tab-item:: Plugins + + - `Auto plugin `__ + - `CPU plugin `__ + - `GPU plugin `__ + - `NPU plugin `__ + - `Hetero plugin `__ + - `Template plugin `__ + + .. tab-item:: Tools + + - `Benchmark Tool `__ + - `Model Conversion `__ + +2. **Assign yourself to the issue.** + + To get assigned to a task, simply leave a comment with the ``.take`` command in + the selected issue. You can always ask OpenVINO developers for guidance, + both technical and organizational: + + - assign users in the **“Contact points”** section, + - visit `Intel DevHub Discord server `__ to ask + questions in the channel dedicated to **“Good First Issue”** support, or any other. + +3. **Build OpenVINO.** + + In order to build OpenVINO, follow the + `build instructions for your specific OS `__. + + Use the local build and the information found in the issue description to + develop your contribution. + +4. **Submit a PR with your changes.** + + Follow the `guidelines `__ + and do not forget to `link your Pull Request to the issue `__ + it addresses. + +5. **Wait for a review.** + + We will make sure to review your **Pull Request** as soon as possible and provide feedback. + You can expect a merge once your changes have been validated with automatic tests and + approved by `maintainers `__. + + +Additional Resources +##################### + +- Choose a `“Good First Issue” `__. +- Learn more about `OpenVINO architecture `__. +- Check out a `blog post on contributing to OpenVINO `__. +- Visit `Intel DevHub Discord server `__ to join discussions and talk to OpenVINO developers. \ No newline at end of file From a9c8b990f2116f123a36c56928458789540bc267 Mon Sep 17 00:00:00 2001 From: Sebastian Golebiewski Date: Thu, 25 Jul 2024 18:27:27 +0200 Subject: [PATCH 25/54] [DOCS] Updating NPU device article for master (#25734) Porting: https://github.com/openvinotoolkit/openvino/pull/25727 --- .../npu-device.rst | 36 +++++++++++++------ 1 file changed, 26 insertions(+), 10 deletions(-) diff --git a/docs/articles_en/openvino-workflow/running-inference/inference-devices-and-modes/npu-device.rst b/docs/articles_en/openvino-workflow/running-inference/inference-devices-and-modes/npu-device.rst index 4c262b49f6f704..f701774d19e42e 100644 --- a/docs/articles_en/openvino-workflow/running-inference/inference-devices-and-modes/npu-device.rst +++ b/docs/articles_en/openvino-workflow/running-inference/inference-devices-and-modes/npu-device.rst @@ -164,8 +164,8 @@ offer a limited set of supported OpenVINO features. **ov::intel_npu::compilation_mode_params** -``ov::intel_npu::compilation_mode_params`` is an NPU-specific property that allows to -control model compilation for NPU. +``ov::intel_npu::compilation_mode_params`` is an NPU-specific property that allows +control of model compilation for NPU. .. note:: @@ -176,7 +176,7 @@ Following configuration options are supported: **optimization-level** -Defines a preset of optimization passes to be applied during compilation. +Defines an optimization effort hint to the compiler. .. list-table:: :widths: 10 200 @@ -185,7 +185,7 @@ Defines a preset of optimization passes to be applied during compilation. * - **Value** - **Description** * - 0 - - Reduced subset of optimization passes. Smaller compile time. + - Reduced subset of optimization passes. May result in smaller compile time. * - 1 - **Default.** Balanced performance/compile time. * - 2 @@ -193,7 +193,7 @@ Defines a preset of optimization passes to be applied during compilation. **performance-hint-override** -An extension for LATENCY mode being specified using ``ov::hint::performance_mode`` +The LATENCY mode can be overridden by specifying ``ov::hint::performance_mode`` Has no effect for other ``ov::hint::PerformanceMode`` hints. .. list-table:: @@ -207,15 +207,31 @@ Has no effect for other ``ov::hint::PerformanceMode`` hints. * - latency - Prioritize performance over power efficiency. -.. tab-set:: +Usage example: - .. tab-item:: Usage example +.. code-block:: - .. code-block:: + map config = {ov::intel_npu::compilation_mode_params.name(), ov::Any("optimization-level=1 performance-hint-override=latency")}; + + compile_model(model, config); + +**npu_turbo** + +The turbo mode, where available, provides a hint to the system to maintain the +maximum NPU frequency and memory throughput within the platform TDP limits. +The turbo mode is not recommended for sustainable workloads due to higher power +consumption and potential impact on other compute resources. + +.. code-block:: + + core.set_property("NPU", ov::intel_npu::turbo(true)); + +or + +.. code-block:: - map config = {ov::intel_npu::compilation_mode_params.name(), ov::Any("optimization-level=1 performance-hint-override=latency")}; + core.compile_model(ov_model, "NPU", {ov::intel_npu::turbo(true)}); - compile_model(model, config); Limitations ############################# From e110479442bf22c8292d722f07d405a0e38ac58f Mon Sep 17 00:00:00 2001 From: Wilson Seok Date: Thu, 25 Jul 2024 18:05:05 -0700 Subject: [PATCH 26/54] [GPU] Add condition for dynamic shape split_lengths for in place crop buffer fusing (#25595) ### Details: - Add condition for dynamic shape split_lengths for in place crop buffer fusing ### Tickets: - 146739 --- src/plugins/intel_gpu/src/graph/crop.cpp | 2 +- .../graph_optimizer/prepare_buffer_fusing.cpp | 8 ++ .../passes/prepare_buffer_fusing_test.cpp | 85 +++++++++++++++++++ 3 files changed, 94 insertions(+), 1 deletion(-) diff --git a/src/plugins/intel_gpu/src/graph/crop.cpp b/src/plugins/intel_gpu/src/graph/crop.cpp index 146a1fa89b400b..09c5f01f216e57 100644 --- a/src/plugins/intel_gpu/src/graph/crop.cpp +++ b/src/plugins/intel_gpu/src/graph/crop.cpp @@ -250,7 +250,7 @@ crop_inst::typed_primitive_inst(network& network, crop_node const& node) : paren "Invalid Batch offset: exceeds data for output!"); } - if (node.can_be_optimized()) { + if (!node.is_dynamic() && node.can_be_optimized()) { update_output_memory(); } } diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_buffer_fusing.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_buffer_fusing.cpp index e3471b37c05bd9..7f1fb69446edb9 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_buffer_fusing.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_buffer_fusing.cpp @@ -500,6 +500,14 @@ bool crop_in_place_optimization::match(const program_node& node, if (node.is_constant()) return false; + // do not optimize variadic_split crop when either input1 or input2 is not constant. + // VariadicSplit ngraph shape infer requires value of axis(input1) and split_lengths(input2). + // And non_constant input1/input2 makes risky execution of runtime buffer fusing. + auto& crop_node = node.as(); + if ((crop_node.get_primitive()->op_mode == cldnn::crop_ngraph_op_mode::variadic_split) && + (!crop_node.get_dependency(1).is_constant() || !crop_node.get_dependency(2).is_constant())) + return false; + if (node.get_users().size() > 0) { if (node.get_program().is_body_program() && node.get_dependency(0).is_type()) { return false; diff --git a/src/plugins/intel_gpu/tests/unit/passes/prepare_buffer_fusing_test.cpp b/src/plugins/intel_gpu/tests/unit/passes/prepare_buffer_fusing_test.cpp index e4a077594c7a7e..e5506388eba273 100644 --- a/src/plugins/intel_gpu/tests/unit/passes/prepare_buffer_fusing_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/passes/prepare_buffer_fusing_test.cpp @@ -777,6 +777,91 @@ TEST(prepare_buffer_fusing, in_place_crop_dynamic) { ASSERT_EQ(output_ptr_3[i], out3[i]); } +TEST(prepare_buffer_fusing, in_place_crop_dynamic_split_lengths) { + auto& engine = get_test_engine(); + + auto in_layout = layout{ ov::PartialShape{-1, -1, -1}, data_types::f32, format::bfyx}; + auto in2_layout = layout{ ov::PartialShape{-1, -1}, data_types::f32, format::bfyx}; + auto input_mem = engine.allocate_memory({ {1, 2, 4}, data_types::f32, format::bfyx }); + auto weights_mem = engine.allocate_memory({ {8, 4}, data_types::u8, format::bfyx }); + auto bias_mem = engine.allocate_memory({ {1, 1, 8}, data_types::f32, format::bfyx }); + auto scale_mem = engine.allocate_memory({ {8, 1}, data_types::f32, format::bfyx }); + auto zp_mem = engine.allocate_memory({ {8, 1}, data_types::f32, format::bfyx }); + auto axis_mem = engine.allocate_memory({ {}, data_types::i64, format::bfyx }); + auto shapeof_mem = engine.allocate_memory({ {2, 6}, data_types::f32, format::bfyx }); + + int64_t axis = 2; + set_values(input_mem, { -0.5f, 2.0f, 0.5f, 1.0f, + 0.5f, -2.0f, -0.5f, -1.0f }); + set_values(axis_mem, {axis}); + set_values(shapeof_mem, { 1.0f, 2.0f, 3.0f, 4.0f, + 5.0f, 6.0f, 7.0f, 8.0f, + 9.0f, 10.0f, 11.0f, 12.0f}); + set_values(weights_mem, { 1, 2, 3, 4, + 5, 6, 7, 8, + 9, 10, 11, 12, + 13, 14, 15, 0, + 15, 14, 13, 12, + 11, 10, 9, 8, + 7, 6, 5, 4, + 3, 2, 1, 0}); + set_values(bias_mem, { 1.0f, -2.0f, 3.0f, -4.0f, 5.0f, -6.0f, 7.0f, 2.0f }); + set_values(scale_mem, { 2.0f, 4.0f, -2.0f, -4.0f, 0.5f, -0.5f, 2.0f, 2.0f }); + set_values(zp_mem, { 1.0f, 2.0f, 2.0f, 1.0f, 4.0f, 1.0f, 6.0f, 2.0f }); + + std::vector out1 = { 13.f, 58.f, -11.f, -62.f }; + std::vector out2 = { -51.f, -108.f, 18.5f, -18.f, 1.f, -4.f, 57.f, 100.f, -8.5f, 6.f, 13.f, 8.f }; + std::vector out3 = { 13.f, 58.f, -51.f, -108.f, 18.5f, -18.f, 1.f, -4.f, -11.f, -62.f, 57.f, 100.f, -8.5f, 6.f, 13.f, 8.f }; + + cldnn::crop_ngraph_op_mode op_mode = cldnn::crop_ngraph_op_mode::variadic_split; + topology topology( + input_layout("input", in_layout), + input_layout("input_shapeof", in2_layout), + data("axis", axis_mem), + data("weights", weights_mem), + data("bias", bias_mem), + data("scale", scale_mem), + data("zp", zp_mem), + fully_connected("fc", input_info("input"), "weights", "bias", "scale", "zp", data_types::f32, 3, 2), + shape_of("shapeof", input_info("input_shapeof"), cldnn::data_types::i64), + crop("crop1", { input_info("fc"), input_info("axis"), input_info("shapeof") }, cldnn::tensor(1), cldnn::tensor(0), op_mode, 0, axis), + reorder("output1", input_info("crop1"), format::bfyx, data_types::f32), + crop("crop2", { input_info("fc"), input_info("axis"), input_info("shapeof") }, cldnn::tensor(1), cldnn::tensor(0), op_mode, 1, axis), + reshape("reshape", input_info("crop2"), true, std::vector{0, 0, 3, 2}, ov::PartialShape{-1, -1, 3, 2}, cldnn::reshape::reshape_mode::base), + reorder("output2", input_info("reshape"), format::bfyx, data_types::f32, std::vector(), reorder_mean_mode::subtract, padding(), true), + reorder("output3", input_info("fc"), format::bfyx, data_types::f32) + ); + + auto config = get_test_default_config(engine); + config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); + config.set_property(ov::intel_gpu::optimize_data(true)); + network network(engine, topology, config); + + network.set_input_data("input", input_mem); + network.set_input_data("input_shapeof", shapeof_mem); + + std::map outputs; + EXPECT_NO_THROW(outputs = network.execute()); + + auto output = outputs.at("output1").get_memory(); + cldnn::mem_lock output_ptr(output, get_test_stream()); + + for (size_t i = 0; i < out1.size(); i++) + ASSERT_EQ(output_ptr[i], out1[i]); + + auto output_2 = outputs.at("output2").get_memory(); + cldnn::mem_lock output_ptr_2(output_2, get_test_stream()); + + for (size_t i = 0; i < out2.size(); i++) + ASSERT_EQ(output_ptr_2[i], out2[i]); + + auto output_3 = outputs.at("output3").get_memory(); + cldnn::mem_lock output_ptr_3(output_3, get_test_stream()); + + for (size_t i = 0; i < out3.size(); i++) + ASSERT_EQ(output_ptr_3[i], out3[i]); +} + // Testing for implicit crop along batch axis and outer padding optimzing. // Outer padding opt includes opt out of reshape and reorder which has padded input only in batch axis // This optimzing also includes offset(outer axis padded input) handling of oneDNN primitive. From 4ad2202634724029dedbe518087a1f294aa7181b Mon Sep 17 00:00:00 2001 From: Wilson Seok Date: Thu, 25 Jul 2024 18:27:35 -0700 Subject: [PATCH 27/54] [GPU] Use get_output_pshape() in crop_in_place_optimization::match() to check user output dynamic shape (#25632) ### Details: - Use get_output_pshape() in crop_in_place_optimization::match() to check user output dynamic shape because user would have valid_output_layouts=false ### Tickets: - 146725 --- .../src/graph/graph_optimizer/prepare_buffer_fusing.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_buffer_fusing.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_buffer_fusing.cpp index 7f1fb69446edb9..680445296eb606 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_buffer_fusing.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_buffer_fusing.cpp @@ -470,7 +470,7 @@ bool crop_in_place_optimization::match(const program_node& node, for (auto user : node.get_users()) { // If the user node's output shape is already static, the padding // w/ dyn pad mask will not be propagated properly at runtime - if (node.is_dynamic() && !user->get_output_layout().is_dynamic()) + if (node.is_dynamic() && !user->get_output_pshape().is_dynamic()) return false; // do not optimize when next node is concatenation which is not output if (user->is_type() && !user->is_output()) From e01704017f2de871f556b49a4fd9d903bbba30d4 Mon Sep 17 00:00:00 2001 From: Wilson Seok Date: Thu, 25 Jul 2024 20:50:03 -0700 Subject: [PATCH 28/54] [GPU] Avoid crop buffer fusing when dynamic shape and squeeze/unsqueeze reshape mode (#25700) ### Details: - Avoid crop buffer fusing when dynamic shape and squeeze/unsqueeze reshape mode ### Tickets: - 146626 --- .../src/graph/graph_optimizer/prepare_buffer_fusing.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_buffer_fusing.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_buffer_fusing.cpp index 680445296eb606..17cc9e9f42d38a 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_buffer_fusing.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_buffer_fusing.cpp @@ -484,10 +484,10 @@ bool crop_in_place_optimization::match(const program_node& node, if (node.is_dynamic() && (user->is_type() || user->is_type())) return false; if (user->is_type()) { - // runtime buffer fusing is only handled when there is only one reshape user - if (node.is_dynamic() && node.get_users().size() != 1) - return false; auto& reshape_node = user->as(); + // runtime buffer fusing is only handled when there is only one reshape user and reshape mode is base + if (node.is_dynamic() && (node.get_users().size() != 1 || reshape_node.get_primitive()->mode != reshape::reshape_mode::base)) + return false; if (can_reshape_be_optimized(reshape_node) && (!node.is_dynamic() || !reshape_node.is_runtime_propagatable_padding())) return false; From cc42cdf37d543841e52cbb072a42329708aa1901 Mon Sep 17 00:00:00 2001 From: Sebastian Golebiewski Date: Fri, 26 Jul 2024 07:19:13 +0200 Subject: [PATCH 29/54] [DOCS] Add max_new_tokens to every generate call in GenAI guide (#25739) Porting: https://github.com/openvinotoolkit/openvino/pull/25705 --- .../llm_inference_guide/genai-guide.rst | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/docs/articles_en/learn-openvino/llm_inference_guide/genai-guide.rst b/docs/articles_en/learn-openvino/llm_inference_guide/genai-guide.rst index 79c3471f3ab783..08efa7406e42b5 100644 --- a/docs/articles_en/learn-openvino/llm_inference_guide/genai-guide.rst +++ b/docs/articles_en/learn-openvino/llm_inference_guide/genai-guide.rst @@ -44,7 +44,7 @@ will not work with these instructions, make sure to import openvino_genai as ov_genai pipe = ov_genai.LLMPipeline(model_path, "CPU") - print(pipe.generate("The Sun is yellow because")) + print(pipe.generate("The Sun is yellow because", max_new_tokens=100)) .. tab-item:: C++ :sync: cpp @@ -57,7 +57,7 @@ will not work with these instructions, make sure to int main(int argc, char* argv[]) { std::string model_path = argv[1]; ov::genai::LLMPipeline pipe(model_path, "CPU"); - std::cout << pipe.generate("The Sun is yellow because"); + std::cout << pipe.generate("The Sun is yellow because", ov::genai::max_new_tokens(100)); } The `LLMPipeline` is the main object used for decoding. You can construct it directly from the @@ -85,7 +85,7 @@ below, where a lambda function outputs words to the console immediately upon gen pipe = ov_genai.LLMPipeline(model_path, "CPU") streamer = lambda x: print(x, end='', flush=True) - pipe.generate("The Sun is yellow because", streamer=streamer) + pipe.generate("The Sun is yellow because", streamer=streamer, max_new_tokens=100) .. tab-item:: C++ @@ -104,7 +104,7 @@ below, where a lambda function outputs words to the console immediately upon gen // false means continue generation. return false; }; - pipe.generate("The Sun is yellow because", ov::genai::streamer(streamer)); + pipe.generate("The Sun is yellow because", ov::genai::streamer(streamer), ov::genai::max_new_tokens(100)); } You can also create your custom streamer for more sophisticated processing: @@ -132,7 +132,7 @@ You can also create your custom streamer for more sophisticated processing: # Decode tokens and process them. pipe = ov_genai.LLMPipeline(model_path, "CPU") - pipe.generate("The Sun is yellow because", streamer=CustomStreamer()) + pipe.generate("The Sun is yellow because", streamer=CustomStreamer(), max_new_tokens=100) .. tab-item:: C++ @@ -164,7 +164,7 @@ You can also create your custom streamer for more sophisticated processing: std::string model_path = argv[1]; ov::genai::LLMPipeline pipe(model_path, "CPU"); - pipe.generate("The Sun is yellow because", ov::genai::streamer(custom_streamer)); + pipe.generate("The Sun is yellow because", ov::genai::streamer(custom_streamer), ov::genai::max_new_tokens(100)); } Using GenAI in Chat Scenario From d13b3d9a46a9ec48e8b7f5740b307de228774003 Mon Sep 17 00:00:00 2001 From: Edward Shogulin Date: Fri, 26 Jul 2024 06:39:24 +0100 Subject: [PATCH 30/54] [DOC] Quantization Scheme (#25295) ### Details: - *[DOC] Quantization Scheme* --------- Co-authored-by: Karol Blaszczak --- .../assets/images/quantization_scheme.svg | 3 +++ .../low-precision-transformations.rst | 1 + .../quantization-scheme.rst | 27 +++++++++++++++++++ 3 files changed, 31 insertions(+) create mode 100644 docs/articles_en/assets/images/quantization_scheme.svg create mode 100644 docs/articles_en/documentation/openvino-extensibility/openvino-plugin-library/advanced-guides/low-precision-transformations/quantization-scheme.rst diff --git a/docs/articles_en/assets/images/quantization_scheme.svg b/docs/articles_en/assets/images/quantization_scheme.svg new file mode 100644 index 00000000000000..b58934ec08e57d --- /dev/null +++ b/docs/articles_en/assets/images/quantization_scheme.svg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d197730e090d582d7ae1f68d139564b845bba5eb9aa168437c2b80f53545e706 +size 100328 diff --git a/docs/articles_en/documentation/openvino-extensibility/openvino-plugin-library/advanced-guides/low-precision-transformations.rst b/docs/articles_en/documentation/openvino-extensibility/openvino-plugin-library/advanced-guides/low-precision-transformations.rst index c8e041e5a367e9..5d922ef8bdc4e7 100644 --- a/docs/articles_en/documentation/openvino-extensibility/openvino-plugin-library/advanced-guides/low-precision-transformations.rst +++ b/docs/articles_en/documentation/openvino-extensibility/openvino-plugin-library/advanced-guides/low-precision-transformations.rst @@ -12,6 +12,7 @@ OpenVINO™ Low Precision Transformations :caption: Low Precision Transformations :hidden: + Quantization Scheme Attributes Step 1. Prerequisites transformations Step 2. Markup transformations diff --git a/docs/articles_en/documentation/openvino-extensibility/openvino-plugin-library/advanced-guides/low-precision-transformations/quantization-scheme.rst b/docs/articles_en/documentation/openvino-extensibility/openvino-plugin-library/advanced-guides/low-precision-transformations/quantization-scheme.rst new file mode 100644 index 00000000000000..90d757c10668f3 --- /dev/null +++ b/docs/articles_en/documentation/openvino-extensibility/openvino-plugin-library/advanced-guides/low-precision-transformations/quantization-scheme.rst @@ -0,0 +1,27 @@ +Quantization Scheme +============================== + + +.. meta:: + :description: Learn about quantization scheme. + +.. toctree:: + :maxdepth: 1 + :caption: Low Precision Transformations + +Key steps in the quantization scheme: + +* Low Precision Transformations: ``FakeQuantize`` decomposition to Quantize with a low precision output and Dequantize. For more details, refer to the :doc:`Quantize decomposition <../low-precision-transformations>` section. +* Low Precision Transformations: move Dequantize through operations. For more details, refer to the :doc:`Main transformations <./step3-main>` section. +* Plugin: fuse operations with Quantize and inference in low precision. + +Quantization scheme features: + +* Quantization operation is expressed through the ``FakeQuantize`` operation, which involves more than scale and shift. For more details, see: :doc:`FakeQuantize-1 <../../../../openvino-ir-format/operation-sets/operation-specs/quantization/fake-quantize-1>`. If the ``FakeQuantize`` input and output intervals are the same, ``FakeQuantize`` degenerates to ``Multiply``, ``Subtract`` and ``Convert`` (scale & shift). +* Dequantization operation is expressed through element-wise ``Convert``, ``Subtract`` and ``Multiply`` operations. ``Convert`` and ``Subtract`` are optional. These operations can be handled as typical element-wise operations, for example, fused or transformed to another. +* OpenVINO plugins fuse ``Dequantize`` and ``Quantize`` operations after a low precision operation and do not fuse ``Quantize`` before it. + +Here is a quantization scheme example for int8 quantization applied to a part of a model with two ``Convolution`` operations in CPU plugin. + +.. image:: ../../../../../assets/images/quantization_scheme.svg + :alt: Quantization scheme From ab73c77c5deb8a8544b5b0564d8f1b78e9b267e5 Mon Sep 17 00:00:00 2001 From: Sebastian Golebiewski Date: Fri, 26 Jul 2024 08:50:56 +0200 Subject: [PATCH 31/54] [DOCS] Updating NNCF documentation (#25738) Porting: https://github.com/openvinotoolkit/openvino/pull/25667 --- .../weight-compression.rst | 33 ++++++++++--------- 1 file changed, 17 insertions(+), 16 deletions(-) diff --git a/docs/articles_en/openvino-workflow/model-optimization-guide/weight-compression.rst b/docs/articles_en/openvino-workflow/model-optimization-guide/weight-compression.rst index a393a0925cba3c..da4f34b8806aea 100644 --- a/docs/articles_en/openvino-workflow/model-optimization-guide/weight-compression.rst +++ b/docs/articles_en/openvino-workflow/model-optimization-guide/weight-compression.rst @@ -52,11 +52,12 @@ Compress Model Weights **8-bit weight quantization** method offers a balance between model size reduction and maintaining accuracy, which usually leads to significant performance improvements for Transformer-based models. Models with 8-bit compressed weights are performant on the -vast majority of supported CPU and GPU platforms. +vast majority of supported CPU and GPU platforms. By default, weights are compressed +asymmetrically to "INT8_ASYM" mode. -The code snippet below shows how to do 8-bit quantization of the model weights represented -in OpenVINO IR using NNCF: +The code snippet below shows how to do asymmetrical 8-bit quantization of the model weights +represented in OpenVINO IR using NNCF: .. tab-set:: @@ -72,7 +73,7 @@ Now, the model is ready for compilation and inference. It can be also saved into a compressed format, resulting in a smaller binary file. **4-bit weight quantization** method stands for an INT4-INT8 mixed-precision weight quantization, -where INT4 is considered as the primary precision and INT8 is the backup one. +where INT4 is considered as the primary precision and asymmetric INT8 is the backup one. It usually results in a smaller model size and lower inference latency, although the accuracy degradation could be higher, depending on the model. @@ -100,7 +101,7 @@ memory reduction, speed gain, and accuracy loss. - Memory Reduction - Latency Improvement - Accuracy Loss - * - INT8 + * - INT8 Asymmetric - Low - Medium - Low @@ -122,8 +123,8 @@ trade-offs after optimization: **Symmetric Compression** - ``INT4_SYM`` - INT4 Symmetric mode involves quantizing weights to an unsigned 4-bit integer - symmetrically with a fixed zero point of 8. This mode is faster than the INT8, making + INT4 Symmetric mode involves quantizing weights to a signed 4-bit integer + symmetrically without zero point. This mode is faster than the INT8_ASYM, making it ideal for situations where **speed and size reduction are prioritized over accuracy**. .. code-block:: python @@ -159,15 +160,15 @@ trade-offs after optimization: `Larger Group Size`: Results in faster inference and a smaller model, but might compromise accuracy. -* ``ratio`` controls the ratio between INT4 and INT8 compressed layers in the model. +* ``ratio`` controls the ratio between INT4 and INT8_ASYM compressed layers in the model. Ratio is a decimal between 0 and 1. For example, 0.8 means that 80% of layers will be - compressed to INT4, while the rest will be compressed to INT8 precision. The default + compressed to INT4, while the rest will be compressed to INT8_ASYM precision. The default value for ratio is 1. `Higher Ratio (more INT4)`: Reduces the model size and increase inference speed but might lead to higher accuracy degradation. - `Lower Ratio (more INT8)`: Maintains better accuracy but results in a larger model size + `Lower Ratio (more INT8_ASYM)`: Maintains better accuracy but results in a larger model size and potentially slower inference. In this example, 90% of the model's layers are quantized to INT4 asymmetrically with @@ -238,7 +239,7 @@ If the model comes from `Hugging Face `__ and is by Optimum, it may be easier to use the Optimum Intel API to perform weight compression. The compression type is specified when the model is loaded using the ``load_in_8bit=True`` or ``load_in_4bit=True`` parameter. The second example uses the Weight Compression API -from Optimum Intel instead of NNCF to compress the model to INT8. +from Optimum Intel instead of NNCF to compress the model to INT8_ASYM. .. tab-set:: @@ -359,7 +360,7 @@ score indicates a lower accuracy. It is measured on the - 5.01 - 10.3 * - databricks/dolly-v2-3b - - INT8 + - INT8_ASYM - 5.07 - 2.6 * - databricks/dolly-v2-3b @@ -371,7 +372,7 @@ score indicates a lower accuracy. It is measured on the - 4.25 - 24.8 * - facebook/opt-6.7b - - INT8 + - INT8_ASYM - 4.27 - 6.2 * - facebook/opt-6.7b @@ -383,7 +384,7 @@ score indicates a lower accuracy. It is measured on the - 3.28 - 25.1 * - meta-llama/Llama-2-7b-chat-hf - - INT8 + - INT8_ASYM - 3.29 - 6.3 * - meta-llama/Llama-2-7b-chat-hf @@ -395,7 +396,7 @@ score indicates a lower accuracy. It is measured on the - 4.15 - 25.6 * - togethercomputer/RedPajama-INCITE-7B-Instruct - - INT8 + - INT8_ASYM - 4.17 - 6.4 * - togethercomputer/RedPajama-INCITE-7B-Instruct @@ -407,7 +408,7 @@ score indicates a lower accuracy. It is measured on the - 2.92 - 48.5 * - meta-llama/Llama-2-13b-chat-hf - - INT8 + - INT8_ASYM - 2.91 - 12.1 * - meta-llama/Llama-2-13b-chat-hf From 18e0f5df0f006187e2a339dde6769c01b1faa26b Mon Sep 17 00:00:00 2001 From: Ivan Tikhonov Date: Fri, 26 Jul 2024 11:58:05 +0400 Subject: [PATCH 32/54] Fix concat axis handling in TSConcat transformation (#25723) ### Details: Fixed a dyn rank handling Fixed positive case ### Tickets: - *CVS-147730* --- .../transpose_sinking/ts_concat.cpp | 26 +++++++++++++------ 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/src/common/transformations/src/transformations/transpose_sinking/ts_concat.cpp b/src/common/transformations/src/transformations/transpose_sinking/ts_concat.cpp index 8dbcf7ba285f5b..502d89cb205aeb 100644 --- a/src/common/transformations/src/transformations/transpose_sinking/ts_concat.cpp +++ b/src/common/transformations/src/transformations/transpose_sinking/ts_concat.cpp @@ -36,18 +36,21 @@ TSConcatForward::TSConcatForward() { return false; } - if (concat_node->get_output_partial_shape(0).is_dynamic()) { - return false; + auto concat_axis = concat_node->get_axis(); + if (concat_axis < 0) { + if (concat_node->get_output_partial_shape(0).rank().is_dynamic()) { + return false; + } + const auto rank = concat_node->get_output_partial_shape(0).rank().get_length(); + concat_axis = ov::util::normalize(concat_axis, rank); } + // todo: support dyn rank case bool updated = sink_forward::UpdateInputTransposes(main_node, transpose_info); if (!updated) { return false; } - const auto rank = concat_node->get_output_partial_shape(0).rank().get_length(); - const auto concat_axis = ov::util::normalize(concat_node->get_axis(), rank); - const auto transpose_axis_order = transpose_info.transpose_const->get_axis_vector_val(); const int64_t transposed_concat_axis = transpose_axis_order[concat_axis]; concat_node->set_axis(transposed_concat_axis); @@ -83,12 +86,19 @@ TSConcatBackward::TSConcatBackward() { } auto concat_node = as_type_ptr(main_node); - if (concat_node->get_output_partial_shape(0).is_dynamic()) { + if (!concat_node) { return false; } - const auto rank = concat_node->get_output_partial_shape(0).rank().get_length(); - auto concat_axis = ov::util::normalize(concat_node->get_axis(), rank); + auto concat_axis = concat_node->get_axis(); + if (concat_axis < 0) { + if (concat_node->get_output_partial_shape(0).rank().is_dynamic()) { + return false; + } + + const auto rank = concat_node->get_output_partial_shape(0).rank().get_length(); + concat_axis = ov::util::normalize(concat_axis, rank); + } const auto transpose_axis_order = transpose_const->get_axis_vector_val(); const auto reversed_transpose_axis_order = ReverseTransposeOrder(transpose_axis_order); From a3ed68a590bf9a6fd69da7a8b9cb5c85d72c4ecd Mon Sep 17 00:00:00 2001 From: Alexandra Sidorova Date: Fri, 26 Jul 2024 13:18:56 +0400 Subject: [PATCH 33/54] [Snippets] Added single evaluation of Brgemm in Tail Loop by dynamic M (#25378) ### Details: *Previously Tail Loop with `Brgemm` by dynamic dimension `M` was processed with scalar increment. This PR is extended the functionality: now this Tail Loop is evaluated once and subtensors of Brgemm are updated to get new `M` by `CPURuntimeConfigurator`* ### Tickets: - *[145859](https://jira.devtools.intel.com/browse/CVS-145859)* ### Prerequisites: - *https://github.com/openvinotoolkit/openvino/pull/25326* --- .../snippets/kernel_executor_table.hpp | 12 +- .../include/snippets/lowered/loop_info.hpp | 16 +- .../snippets/lowered/port_descriptor.hpp | 22 +-- .../include/snippets/runtime_configurator.hpp | 16 +- .../snippets/include/snippets/utils/utils.hpp | 30 ++-- src/common/snippets/src/lowered/loop_info.cpp | 15 +- .../snippets/src/lowered/loop_manager.cpp | 11 +- .../pass/compute_buffer_allocation_size.cpp | 2 +- .../pass/insert_specific_iterations.cpp | 3 +- .../pass/optimize_loop_single_evaluation.cpp | 40 +++-- .../src/lowered/pass/propagate_subtensors.cpp | 42 ++++- .../snippets/src/lowered/port_descriptor.cpp | 27 ++- src/common/snippets/src/op/reduce.cpp | 3 +- .../snippets/src/op/serialization_node.cpp | 3 +- src/common/snippets/src/op/subgraph.cpp | 2 +- .../snippets/src/pass/matmul_to_brgemm.cpp | 6 +- .../src/pass/softmax_decomposition.cpp | 2 +- .../snippets/src/runtime_configurator.cpp | 38 ++-- .../snippets/tests/include/lir_test_utils.hpp | 32 ---- .../snippets/tests/src/lir_test_utils.cpp | 26 +-- .../src/lowered/pass/buffer_allocation.cpp | 3 +- .../pass/extracted_loop_invariants.cpp | 168 +++++++++--------- .../snippets/cpu_runtime_configurator.cpp | 19 +- .../snippets/cpu_runtime_configurator.hpp | 6 +- .../snippets/x64/jit_loop_emitters.cpp | 9 +- .../snippets/x64/kernel_executors/brgemm.cpp | 95 ++++++---- .../snippets/x64/kernel_executors/brgemm.hpp | 5 +- .../src/emitters/tpp/x64/jit_tpp_emitter.cpp | 2 +- .../x64/pass/brgemm_to_brgemm_cpu.cpp | 2 +- .../x64/pass/lowered/brgemm_blocking.cpp | 56 ++++-- .../x64/pass/lowered/brgemm_blocking.hpp | 3 + .../x64/pass/lowered/cpu_iter_handlers.cpp | 14 ++ .../x64/pass/lowered/cpu_iter_handlers.hpp | 18 +- .../tpp/x64/pass/eltwise_to_eltwise_tpp.cpp | 9 +- .../x64/pass/lowered/set_tpp_leading_dim.cpp | 2 +- .../tpp/x64/pass/scalar_to_scalar_tpp.cpp | 4 +- .../snippets/matmul.cpp | 32 +++- .../x64/lowered/brgemm_blocking.cpp | 66 ++++--- .../x64/lowered/buffer_allocation.cpp | 8 +- 39 files changed, 521 insertions(+), 348 deletions(-) diff --git a/src/common/snippets/include/snippets/kernel_executor_table.hpp b/src/common/snippets/include/snippets/kernel_executor_table.hpp index 46f9cd04b923ba..af797e4c80422a 100644 --- a/src/common/snippets/include/snippets/kernel_executor_table.hpp +++ b/src/common/snippets/include/snippets/kernel_executor_table.hpp @@ -43,7 +43,7 @@ class KernelExecutorBase { * @brief Update current kernel config in accordance with the passed expression. Corresponding kernel is recompiled if necessary. * This method should be called to update KernelExecutor based on runtime info (e.g. shapes) available through expression ptr */ - virtual void update_by_expression(const lowered::ExpressionPtr& expr) = 0; + virtual void update_by_expression(const lowered::ExpressionPtr& expr, const lowered::LinearIRPtr& linear_ir) = 0; /** * @brief Replace current kernel config with the provided value. Corresponding kernel is recompiled if necessary. * This method should be called to restore a saved state of the executor, that was configured using update_by_expression(). @@ -70,8 +70,8 @@ class KernelExecutor : public KernelExecutorBase { explicit KernelExecutor(Conf c) : KernelExecutorBase(), m_config{std::move(c)} {} // Note: override when final is redundant, but needed to avoid warnings on some compilers - void update_by_expression(const lowered::ExpressionPtr& expr) override final { // NOLINT - update_config(expr, m_config); + void update_by_expression(const lowered::ExpressionPtr& expr, const lowered::LinearIRPtr& linear_ir) override final { // NOLINT + update_config(expr, linear_ir, m_config); OPENVINO_ASSERT(m_config.is_completed(), "Failed to update kernel config in update_by_expression"); update_kernel(m_config, m_kernel); OPENVINO_ASSERT(m_kernel, "Failed to compile kernel executor"); @@ -103,7 +103,7 @@ class KernelExecutor : public KernelExecutorBase { protected: /*** Updates stored kernel config based on runtime info from expression (e.g. new input shapes). */ - virtual void update_config(const lowered::ExpressionPtr& expr, Conf& config) const = 0; + virtual void update_config(const lowered::ExpressionPtr& expr, const lowered::LinearIRPtr& linear_ir, Conf& config) const = 0; /*** Updates stored kernel in accordance with the passed config. Recompilation of the kernel is * performed if necessary. */ virtual void update_kernel(const Conf& c, std::shared_ptr& kernel) const = 0; @@ -130,9 +130,9 @@ class KernelExecutorTable { return m_table.at(expr); } /*** Updates every registered KernelExecutor in accordance with the corresponding expression */ - void update_state() const { + void update_state(const lowered::LinearIRPtr& linear_ir) const { for (const auto& record : m_table) - record.second->update_by_expression(record.first); + record.second->update_by_expression(record.first, linear_ir); } /*** Returns lambda function that contains current state of the table, and restores this state when called */ diff --git a/src/common/snippets/include/snippets/lowered/loop_info.hpp b/src/common/snippets/include/snippets/lowered/loop_info.hpp index e763f2244d76c6..6be47f49d17ae1 100644 --- a/src/common/snippets/include/snippets/lowered/loop_info.hpp +++ b/src/common/snippets/include/snippets/lowered/loop_info.hpp @@ -430,7 +430,8 @@ class ExpandedLoopInfo : public LoopInfo { ExpandedLoopInfo(size_t work_amount, size_t increment, const std::vector& entries, const std::vector& exits, std::vector ptr_increments, std::vector final_offsets, std::vector data_sizes, - SpecificLoopIterType type, std::shared_ptr unified_loop_info, bool is_wa_const = false); + SpecificLoopIterType type, std::shared_ptr unified_loop_info, bool is_wa_const = false, + bool evaluate_once = false); /** * @brief Clone LoopInfo with new expressions * @param expr_map map of new and old expressions @@ -474,7 +475,18 @@ class ExpandedLoopInfo : public LoopInfo { * @return const ref of `m_data_sizes` */ const std::vector& get_data_sizes() const; + /** + * @brief Returns True if the current Loop should be executed once + * Otherwise, returns False + * @return `m_evaluance_once` + */ + bool is_evaluate_once() const; + /** + * @brief Set value to `m_evaluance_once` + * @param value - new value of `m_evaluance_once` + */ + void set_evaluate_once(bool value); /** * @brief Update `m_ptr_increments` using copy values from `new_values`. * The count of new values must be equal to the count of current increments. @@ -517,6 +529,8 @@ class ExpandedLoopInfo : public LoopInfo { const SpecificLoopIterType m_type = {}; std::shared_ptr m_unified_loop_info = {}; + + bool m_evaluate_once = false; }; using ExpandedLoopInfoPtr = std::shared_ptr; diff --git a/src/common/snippets/include/snippets/lowered/port_descriptor.hpp b/src/common/snippets/include/snippets/lowered/port_descriptor.hpp index 3fc429bec4df1e..2d5c72c06ef983 100644 --- a/src/common/snippets/include/snippets/lowered/port_descriptor.hpp +++ b/src/common/snippets/include/snippets/lowered/port_descriptor.hpp @@ -20,12 +20,6 @@ using PortDescriptorPtr = std::shared_ptr; class PortDescriptor { friend class LinearIRBuilder; public: - // The structure with service values for scheduling parameters - struct ServiceDimensions { - // The value for the subtensor that means that scheduling should be by full dimension - static size_t FULL_DIM; - }; - explicit PortDescriptor(const ov::Input& node, VectorDims subtensor_shape = {}, std::vector layout = {}); @@ -54,6 +48,9 @@ class PortDescriptor { void set_reg_type(RegType type) { m_reg.type = type; } void set_reg_idx(size_t idx) { m_reg.idx = idx; } + // Indexing starts from the end (rbegin() + idx) + void set_subtensor_dim(size_t idx, VectorDims::value_type value); + std::string serialize() const; bool empty() const { return m_layout.empty() && m_subtensor_shape.empty();} PortDescriptorPtr clone() const; @@ -87,6 +84,8 @@ class PortDescriptorUtils { public: static void set_port_descriptor_ptr(const ov::Input& n, const PortDescriptorPtr& desc); static void set_port_descriptor_ptr(const ov::Output& n, const PortDescriptorPtr& desc); + static void set_port_descriptor(const ov::Input& n, std::vector subtensor, std::vector layout = {}); + static void set_port_descriptor(const ov::Output& n, std::vector subtensor, std::vector layout = {}); static PortDescriptorPtr get_port_descriptor_ptr(const ov::Input& in); static PortDescriptorPtr get_port_descriptor_ptr(const ov::Input& out); @@ -116,17 +115,6 @@ class PortDescriptorVectorAttribute : public ov::RuntimeAttribute { std::vector outputs{}; }; -template -void set_port_desc(const T& port, std::vector subtensor) { - const auto& shape = port.get_shape(); - for (size_t i = 1; i <= std::min(subtensor.size(), shape.size()); i++) { - auto& dim = subtensor[subtensor.size() - i]; - if (dim != PortDescriptor::ServiceDimensions::FULL_DIM) - dim = std::min(dim, shape[shape.size() - i]); - } - PortDescriptorUtils::set_port_descriptor_ptr(port, std::make_shared(shape, subtensor)); -} - } // namespace lowered } // namespace snippets } // namespace ov diff --git a/src/common/snippets/include/snippets/runtime_configurator.hpp b/src/common/snippets/include/snippets/runtime_configurator.hpp index 059771d961df82..058eca59716d1b 100644 --- a/src/common/snippets/include/snippets/runtime_configurator.hpp +++ b/src/common/snippets/include/snippets/runtime_configurator.hpp @@ -61,7 +61,7 @@ class RuntimeConfigurator { * @param linear_ir LinearIR * @return updated config */ - const std::shared_ptr& get_updated_config(const std::shared_ptr& linear_ir); + const std::shared_ptr& get_updated_config(const lowered::LinearIRPtr& linear_ir); /*** Returns pointer to KernelExecutorTable owned by the config */ const std::shared_ptr& get_kernel_executor_table() const { return m_config->kernel_executor_table; } @@ -70,19 +70,19 @@ class RuntimeConfigurator { * @brief Update RuntimeConfig based on LinearIR * @param linear_ir LinearIR */ - virtual void update(const std::shared_ptr& linear_ir); + virtual void update(const lowered::LinearIRPtr& linear_ir); /** * @brief Allocate and intialize fields in RuntimeConfig and RuntimeConfigurator * @param linear_ir LinearIR */ - virtual void initialization(const std::shared_ptr& linear_ir); + virtual void initialization(const lowered::LinearIRPtr& linear_ir); /** * @brief Initializes input and data information of LinearIR: * descriptors (that contains shapes and layouts) and data_sizes * @param linear_ir LinearIR */ - void init_data_info(const std::shared_ptr& linear_ir); + void init_data_info(const lowered::LinearIRPtr& linear_ir); /** * @brief Initializes information of buffers: * - static buffer_scratchpad_size @@ -90,23 +90,23 @@ class RuntimeConfigurator { * - clusters with dynamic buffers (`m_dynamic_buffer_clusters`) for the quick access in `update()` * @param linear_ir LinearIR */ - void init_buffer_info(const std::shared_ptr& linear_ir); + void init_buffer_info(const lowered::LinearIRPtr& linear_ir); /** * @brief Initializes tensor rank of config * @param linear_ir LinearIR */ - virtual void init_tensor_rank(const std::shared_ptr& linear_ir) const; + virtual void init_tensor_rank(const lowered::LinearIRPtr& linear_ir) const; /** * @brief Update Loop informations in LinearIR: Unified and ExpandedLoopInfo * @param linear_ir LinearIR */ - void update_loop_info(const std::shared_ptr& linear_ir) const; + void update_loop_info(const lowered::LinearIRPtr& linear_ir) const; /** * @brief Update Buffer scratchpad size and offsets if needed * Note: `update_loop_info` must be called before * @param linear_ir LinearIR */ - void update_buffer_scratchpad_size(const std::shared_ptr& linear_ir) const; + void update_buffer_scratchpad_size(const lowered::LinearIRPtr& linear_ir) const; /** * @brief Calculate data offsets of LinearIR and update these values in RuntimeConfig */ diff --git a/src/common/snippets/include/snippets/utils/utils.hpp b/src/common/snippets/include/snippets/utils/utils.hpp index 33eebcffedf68b..869956b5274c60 100644 --- a/src/common/snippets/include/snippets/utils/utils.hpp +++ b/src/common/snippets/include/snippets/utils/utils.hpp @@ -21,6 +21,26 @@ namespace ov { namespace snippets { namespace utils { +/* --- Special values --- */ +template::value || std::is_same::value), bool>::type> +constexpr inline T get_dynamic_value() { + return std::numeric_limits::max(); +} +template::value || std::is_same::value), bool>::type> +constexpr inline bool is_dynamic_value(T value) { + return value == get_dynamic_value(); +} + +// This value means full dimension +// For example, for the subtensor it means that scheduling should be by full dimension +constexpr inline size_t get_full_dim_value() { + return get_dynamic_value() - 1; +} +constexpr inline bool is_full_dim_value(size_t value) { + return value == get_full_dim_value(); +} +/* ---------------------- */ + // Get non-scalar Constant count that will be created after FakeQuantize decomposition. // This count is needed to know exact count of non-scalar Constants during tokenization. auto get_non_scalar_constant_count_for_fq(const std::shared_ptr& fq) -> size_t; @@ -59,16 +79,6 @@ inline T div_up(const T a, const U b) { return static_cast((a + b - 1) / b); } -template::value || std::is_same::value), bool>::type> -constexpr inline T get_dynamic_value() { - return std::numeric_limits::max(); -} - -template::value || std::is_same::value), bool>::type> -constexpr inline bool is_dynamic_value(T value) { - return value == get_dynamic_value(); -} - inline bool is_dynamic_vdims(const VectorDims& shape) { return std::any_of(shape.cbegin(), shape.cend(), [](size_t v){ return is_dynamic_value(v); }); } diff --git a/src/common/snippets/src/lowered/loop_info.cpp b/src/common/snippets/src/lowered/loop_info.cpp index 6f14a52e750feb..d99788fad12946 100644 --- a/src/common/snippets/src/lowered/loop_info.cpp +++ b/src/common/snippets/src/lowered/loop_info.cpp @@ -373,10 +373,10 @@ void UnifiedLoopInfo::add_loop_ports(const std::vector& ports) { ExpandedLoopInfo::ExpandedLoopInfo(size_t work_amount, size_t increment, const std::vector& entries, const std::vector& exits, std::vector ptr_increments, std::vector final_offsets, std::vector data_sizes, - SpecificLoopIterType type, std::shared_ptr unified_loop_info, bool is_wa_const) + SpecificLoopIterType type, std::shared_ptr unified_loop_info, bool is_wa_const, bool evaluate_once) : LoopInfo(work_amount, increment, entries, exits, is_wa_const), m_ptr_increments(std::move(ptr_increments)), m_finalization_offsets(std::move(final_offsets)), - m_data_sizes(std::move(data_sizes)), m_type(type), m_unified_loop_info(std::move(unified_loop_info)) { + m_data_sizes(std::move(data_sizes)), m_type(type), m_unified_loop_info(std::move(unified_loop_info)), m_evaluate_once(evaluate_once) { validate(); } @@ -392,7 +392,8 @@ std::shared_ptr ExpandedLoopInfo::clone_with_new_expr(const Expression const auto& new_output_ports = clone_loop_ports(expr_map, m_output_ports); return std::make_shared(m_work_amount, m_increment, new_input_ports, new_output_ports, - m_ptr_increments, m_finalization_offsets, m_data_sizes, m_type, m_unified_loop_info, m_is_work_amount_const); + m_ptr_increments, m_finalization_offsets, m_data_sizes, m_type, + m_unified_loop_info, m_is_work_amount_const, m_evaluate_once); } bool ExpandedLoopInfo::is_dynamic() const { @@ -435,6 +436,14 @@ const std::vector& ExpandedLoopInfo::get_data_sizes() const { return m_data_sizes; } +bool ExpandedLoopInfo::is_evaluate_once() const { + return m_evaluate_once; +} + +void ExpandedLoopInfo::set_evaluate_once(bool value) { + m_evaluate_once = value; +} + void ExpandedLoopInfo::update_ptr_increments(const std::vector& new_values) { OPENVINO_ASSERT(new_values.size() == m_ptr_increments.size(), "Failed to update ptr_increments: incompatible counts"); m_ptr_increments.assign(new_values.cbegin(), new_values.end()); diff --git a/src/common/snippets/src/lowered/loop_manager.cpp b/src/common/snippets/src/lowered/loop_manager.cpp index 3e07ec850927ab..09f8ccb94b9660 100644 --- a/src/common/snippets/src/lowered/loop_manager.cpp +++ b/src/common/snippets/src/lowered/loop_manager.cpp @@ -160,7 +160,6 @@ void LoopManager::get_io_loop_ports(LinearIR::constExprIt loop_begin_pos, void LoopManager::mark_loop(LinearIR::constExprIt loop_begin_pos, LinearIR::constExprIt loop_end_pos, size_t loop_depth, size_t vector_size) { - const auto FULL_DIM = PortDescriptor::ServiceDimensions::FULL_DIM; std::vector loop_input_ports, loop_output_ports; LoopManager::get_io_loop_ports(loop_begin_pos, loop_end_pos, loop_input_ports, loop_output_ports); @@ -178,8 +177,8 @@ void LoopManager::mark_loop(LinearIR::constExprIt loop_begin_pos, "Failed to broadcast work amount in marking loop"); }; - auto is_outside_loop = [&FULL_DIM](const std::vector& subtensor) { - return std::all_of(subtensor.begin(), subtensor.end(), [&FULL_DIM](size_t lhs) { return lhs == FULL_DIM; }); + auto is_outside_loop = [](const std::vector& subtensor) { + return std::all_of(subtensor.begin(), subtensor.end(), utils::is_full_dim_value); }; std::vector loop_subtensor; @@ -192,7 +191,7 @@ void LoopManager::mark_loop(LinearIR::constExprIt loop_begin_pos, subtensor[subtensor.size() - 1] = vector_size; } - const size_t resizing_value = is_outside_loop(subtensor) ? FULL_DIM : 1; + const size_t resizing_value = is_outside_loop(subtensor) ? utils::get_full_dim_value() : 1; while (subtensor.size() < loop_depth) subtensor.insert(subtensor.begin(), resizing_value); if (loop_subtensor.empty()) @@ -202,7 +201,7 @@ void LoopManager::mark_loop(LinearIR::constExprIt loop_begin_pos, "Incorrect scheduling parameters for loop"); for (size_t dim_idx = 0; dim_idx < loop_depth; ++dim_idx) { - if (*(subtensor.rbegin() + dim_idx) != FULL_DIM) { + if (!utils::is_full_dim_value(*(subtensor.rbegin() + dim_idx))) { broadcast(loop_tensor, shape, dim_idx); } } @@ -211,7 +210,7 @@ void LoopManager::mark_loop(LinearIR::constExprIt loop_begin_pos, for (size_t dim_idx = 0; dim_idx < loop_depth; ++dim_idx) { OPENVINO_ASSERT(dim_idx < loop_subtensor.size(), "Incorrect indexes of Loop for markup"); const auto& subtensor_value = *(loop_subtensor.rbegin() + dim_idx); - if (subtensor_value == FULL_DIM) { + if (utils::is_full_dim_value(subtensor_value)) { continue; } diff --git a/src/common/snippets/src/lowered/pass/compute_buffer_allocation_size.cpp b/src/common/snippets/src/lowered/pass/compute_buffer_allocation_size.cpp index e4664800995db1..028cdde1088e60 100644 --- a/src/common/snippets/src/lowered/pass/compute_buffer_allocation_size.cpp +++ b/src/common/snippets/src/lowered/pass/compute_buffer_allocation_size.cpp @@ -60,7 +60,7 @@ size_t ComputeBufferAllocationSize::get_allocation_size(const LoopManagerPtr& lo const auto processing_rank = !processed_dim_idxs.empty() ? std::max(*processed_dim_idxs.rbegin(), subtensor.size()) : subtensor.size(); for (size_t i = 0; i < std::min(processing_rank, rank); ++i) { if (processed_dim_idxs.count(i) == 0) { - if (i < subtensor.size()) + if (i < subtensor.size() && !utils::is_full_dim_value(*(subtensor.rbegin() + i))) allocation_size = utils::dynamic_safe_mul(allocation_size, std::min(*(planar_shape.rbegin() + i), *(subtensor.rbegin() + i))); else allocation_size = utils::dynamic_safe_mul(allocation_size, *(planar_shape.rbegin() + i)); diff --git a/src/common/snippets/src/lowered/pass/insert_specific_iterations.cpp b/src/common/snippets/src/lowered/pass/insert_specific_iterations.cpp index 2ef872ba4ad262..dcff90015d28f2 100644 --- a/src/common/snippets/src/lowered/pass/insert_specific_iterations.cpp +++ b/src/common/snippets/src/lowered/pass/insert_specific_iterations.cpp @@ -167,6 +167,7 @@ bool InsertSpecificIterations::decompose(LinearIR& linear_ir, LinearIR::constExp if (is_decomposed_loop_needed(unified_loop_info, iter_type, remaining_work_amount)) { const auto work_amount = get_decomposed_loop_work_amount(unified_loop_info, iter_type, remaining_work_amount); const auto increment = get_decomposed_loop_increment(unified_loop_info, iter_type, remaining_work_amount); + const auto evaluate_once = !utils::is_dynamic_value(work_amount) && work_amount == increment; // Update remaining Loop work amount // Note: if work_amount is unknown and increment = 1, it means that a loop will iterate by whole work_amount if (!is_wa_dynamic || increment == 1) { @@ -199,7 +200,7 @@ bool InsertSpecificIterations::decompose(LinearIR& linear_ir, LinearIR::constExp const auto decomposed_loop_info = std::make_shared(work_amount, increment, decomposed_loop_entry_ports, decomposed_loop_exit_ports, decomposed_ptr_increments, decomposed_finalization_offsets, - decomposed_data_sizes, iter_type, unified_loop_info); + decomposed_data_sizes, iter_type, unified_loop_info, false, evaluate_once); init_decomposed_loop(linear_ir, decomposed_loop_begin_it, decomposed_loop_end_it, decomposed_loop_info, loop_id, decomposed_loop_end); decomposed = true; diff --git a/src/common/snippets/src/lowered/pass/optimize_loop_single_evaluation.cpp b/src/common/snippets/src/lowered/pass/optimize_loop_single_evaluation.cpp index c19bf7d65a2fef..c6255d90106e77 100644 --- a/src/common/snippets/src/lowered/pass/optimize_loop_single_evaluation.cpp +++ b/src/common/snippets/src/lowered/pass/optimize_loop_single_evaluation.cpp @@ -4,6 +4,7 @@ #include "snippets/lowered/pass/optimize_loop_single_evaluation.hpp" +#include "snippets/lowered/loop_manager.hpp" #include "snippets/lowered/linear_ir.hpp" #include "snippets/op/loop.hpp" #include "snippets/utils/utils.hpp" @@ -16,30 +17,31 @@ namespace pass { bool OptimizeLoopSingleEvaluation::run(lowered::LinearIR& linear_ir, lowered::LinearIR::constExprIt begin, lowered::LinearIR::constExprIt end) { OV_ITT_SCOPED_TASK(ov::pass::itt::domains::SnippetsTransform, "Snippets::OptimizeLoopSingleEvaluation") + const auto& loop_manager = linear_ir.get_loop_manager(); + bool is_modified = false; for (auto expr_it = begin; expr_it != end; ++expr_it) { const auto& expr = *expr_it; if (auto loop_end = ov::as_type_ptr(expr->get_node())) { - // *1* solo vector/tail loop + empty outer loop - // => skip increments (both counter & ptr) : set evaluate_once flag - // *2* solo vector/tail loop + non-empty outer loop - // => skip counter increments but perform ptr increments : set evaluate_once, - // and perform pointer increments through finalization offsets - // *3* vector loop(s) + one tail loop - // => vector as usual, tail depends on outer loop, see *1* and *2* - if (loop_end->has_dynamic_params() || loop_end->get_work_amount() >= 2 * loop_end->get_increment()) - continue; - - auto new_finalization_offsets = loop_end->get_finalization_offsets(); - const auto& ptr_increments = loop_end->get_ptr_increments(); - const auto work_amount_incr = static_cast(loop_end->get_increment()); - for (size_t i = 0; i < new_finalization_offsets.size(); i++) { - new_finalization_offsets[i] += ptr_increments[i] * work_amount_incr; + const auto& loop_info = loop_manager->get_loop_info(loop_end->get_id()); + if (loop_info->is_evaluate_once()) { + auto new_finalization_offsets = loop_end->get_finalization_offsets(); + const auto& ptr_increments = loop_end->get_ptr_increments(); + const auto work_amount_incr = static_cast(loop_end->get_increment()); + for (size_t i = 0; i < new_finalization_offsets.size(); i++) { + const auto ptr_shift = utils::dynamic_safe_mul(ptr_increments[i], work_amount_incr); + new_finalization_offsets[i] = utils::dynamic_safe_add(new_finalization_offsets[i], ptr_shift); + } + loop_end->set_finalization_offsets(new_finalization_offsets); + loop_end->set_ptr_increments(std::vector(new_finalization_offsets.size(), 0)); + loop_end->set_evaluate_once(true); + + // Update the corresponding ExpandedLoopInfo + loop_info->update_ptr_increments(loop_end->get_ptr_increments()); + loop_info->update_finalization_offsets(loop_end->get_finalization_offsets()); + + is_modified = true; } - loop_end->set_finalization_offsets(new_finalization_offsets); - loop_end->set_ptr_increments(std::vector(new_finalization_offsets.size(), 0)); - loop_end->set_evaluate_once(true); - is_modified = true; } } return is_modified; diff --git a/src/common/snippets/src/lowered/pass/propagate_subtensors.cpp b/src/common/snippets/src/lowered/pass/propagate_subtensors.cpp index b58de6790c23a4..c89274a728c4c9 100644 --- a/src/common/snippets/src/lowered/pass/propagate_subtensors.cpp +++ b/src/common/snippets/src/lowered/pass/propagate_subtensors.cpp @@ -15,14 +15,43 @@ namespace snippets { namespace lowered { namespace pass { namespace { + +// The algorithm uses the following special values in subtensors/shapes: +// 1. Dynamic value in subtensor/shape : SIZE_MAX +// 2. Full dimension in subtensor : SIZE_MAX - 1 +// 3. Default value of `new_dim_value` : SIZE_MAX - 2 +// 4. `Forced` special dynamic value : SIZE_MAX - 3 +// +// We have to introduce `FORCED_DYNAMIC_VALUE` to distinguish `new_dim_value = DYNAMIC` +// from the real dynamic values in subtensors and shapes and force this value in subtensors. +// For example, there is Brgemm with the following info in the tail Loop: +// Input 0: shape [?, ?], existing subtensor [32, FULL_DIM] +// Input 1: shape [?, ?], existing subtensor [FULL_DIM, FULL_DIM] +// Output : shape [?, ?], existing subtensor [32, FULL_DIM] +// If the user wants to force `?` in the place of `32` in subtensors, the steps will be: +// 1. Set `?` to subtensor and shape of Input 0 : +// shape [?, ?] (shape has not been changed!), new subtensor [?, FULL_DIM] +// 2. Make shape inference of Brgemm and get Output: +// shape [?, ?] (shape has not been changed!), existing subtensor [FULL_DIM, FULL_DIM] +// 3. Update subtensor on output using shape: +// new_subtensor[i] = std::min(planar_shape[i], subtensor[i]); // i = 0: std::min(SIZE_MAX(?), 32) +// new subtensor [32, FULL_DIM] - has not been changed! But should be [?, FULL_DIM] +// Conclusion: we have to distinguish forced dynamic value with existing dynamic values in shape and subtensor + +constexpr size_t NEW_DEFAULT_VALUE = SIZE_MAX - 2; +constexpr size_t FORCED_DYNAMIC_VALUE = SIZE_MAX - 3; + void propagate_updated_subtensor_through_loop(const LinearIR& linear_ir, const LoopInfoPtr& loop_info, LinearIR::container::const_iterator begin, LinearIR::container::const_iterator end, bool most_outer_loop, - const size_t new_dim_value = SIZE_MAX) { - OPENVINO_ASSERT(snippets::utils::implication(most_outer_loop, new_dim_value != SIZE_MAX), + size_t new_dim_value = NEW_DEFAULT_VALUE) { + // Marks the forced dynamic value + new_dim_value = utils::is_dynamic_value(new_dim_value) ? FORCED_DYNAMIC_VALUE : new_dim_value; + OPENVINO_ASSERT(snippets::utils::implication(most_outer_loop, new_dim_value != NEW_DEFAULT_VALUE), "if the updated subtensor propagation was called for the outer loop, new_dim_value must not be equal to default value"); + std::map original_shapes; // First step: set new dim value to the corresponding input_ports' dimensions if (most_outer_loop) { @@ -32,9 +61,8 @@ void propagate_updated_subtensor_through_loop(const LinearIR& linear_ir, const auto& expr = port.expr_port->get_expr(); const auto& desc = port.expr_port->get_descriptor_ptr(); auto subtensor = desc->get_subtensor(); - if (port.dim_idx < subtensor.size()) { - *(subtensor.rbegin() + port.dim_idx) = new_dim_value; - desc->set_subtensor(subtensor); + if (port.dim_idx < desc->get_subtensor().size()) { + desc->set_subtensor_dim(port.dim_idx, new_dim_value); } const auto parent_desc = expr->get_input_port_connector(port.expr_port->get_index())->get_source().get_descriptor_ptr(); @@ -78,7 +106,9 @@ void propagate_updated_subtensor_through_loop(const LinearIR& linear_ir, const size_t subtensor_start = planar_dims.size() - subtensor.size(); VectorDims new_subtensor(planar_dims.begin() + subtensor_start, planar_dims.end()); for (size_t i = 0; i < new_subtensor.size(); ++i) { - new_subtensor[i] = std::min(new_subtensor[i], subtensor[i]); + // If user forces dynamic value to set in subtensor, set real dynamic dimension using `get_dynamic_value()` + new_subtensor[i] = new_subtensor[i] == FORCED_DYNAMIC_VALUE ? utils::get_dynamic_value() : + utils::is_full_dim_value(subtensor[i]) ? subtensor[i] : std::min(new_subtensor[i], subtensor[i]); } desc->set_subtensor(new_subtensor); } diff --git a/src/common/snippets/src/lowered/port_descriptor.cpp b/src/common/snippets/src/lowered/port_descriptor.cpp index 3280be29973b69..e5fd3638e831c8 100644 --- a/src/common/snippets/src/lowered/port_descriptor.cpp +++ b/src/common/snippets/src/lowered/port_descriptor.cpp @@ -9,8 +9,6 @@ namespace ov { namespace snippets { namespace lowered { -size_t PortDescriptor::ServiceDimensions::FULL_DIM = SIZE_MAX; - PortDescriptor::PortDescriptor(const ov::Input& in, VectorDims subtensor_shape, std::vector layout) : PortDescriptor(ov::Input(in.get_node(), in.get_index()), std::move(subtensor_shape), std::move(layout)) {} @@ -53,6 +51,11 @@ void PortDescriptor::set_shape(const VectorDims& tensor) { *m_tensor_shape = tensor; } +void PortDescriptor::set_subtensor_dim(size_t idx, VectorDims::value_type value) { + OPENVINO_ASSERT(idx < m_subtensor_shape.size(), "Failed to set subtensor value: idx should be less than size"); + *(m_subtensor_shape.rbegin() + idx) = value; +} + PortDescriptorPtr PortDescriptor::clone() const { auto desc = std::make_shared(*m_tensor_shape, m_subtensor_shape, m_layout); desc->set_reg(m_reg); @@ -130,6 +133,26 @@ void PortDescriptorUtils::set_port_descriptor_ptr(const ov::Output& ou } } +namespace { +template +void set_port_desc(const T& port, std::vector subtensor, std::vector layout) { + const auto& shape = port.get_shape(); + for (size_t i = 1; i <= std::min(subtensor.size(), shape.size()); i++) { + auto& dim = subtensor[subtensor.size() - i]; + if (!utils::is_full_dim_value(dim)) + dim = std::min(dim, shape[shape.size() - i]); + } + PortDescriptorUtils::set_port_descriptor_ptr(port, std::make_shared(shape, subtensor, layout)); +} +} // namespace + +void PortDescriptorUtils::set_port_descriptor(const ov::Input& in, std::vector subtensor, std::vector layout) { + set_port_desc(in, subtensor, layout); +} +void PortDescriptorUtils::set_port_descriptor(const ov::Output& in, std::vector subtensor, std::vector layout) { + set_port_desc(in, subtensor, layout); +} + PortDescriptorPtr PortDescriptorUtils::get_port_descriptor_ptr(const ov::Input& in) { return get_port_descriptor_ptr(ov::Input(in.get_node(), in.get_index())); } diff --git a/src/common/snippets/src/op/reduce.cpp b/src/common/snippets/src/op/reduce.cpp index 5717bfe1255300..b0b69e0bd7e84c 100644 --- a/src/common/snippets/src/op/reduce.cpp +++ b/src/common/snippets/src/op/reduce.cpp @@ -5,6 +5,7 @@ #include "snippets/op/reduce.hpp" #include "snippets/itt.hpp" +#include "snippets/utils/utils.hpp" #include "snippets/lowered/port_descriptor.hpp" namespace ov { @@ -33,7 +34,7 @@ void ReduceBase::compute_and_set_reduce_subtensors(const std::shared_ptr subtensor(reduce_rank, 1); for (size_t i = axis; i < reduce_rank; ++i) - subtensor[i] = lowered::PortDescriptor::ServiceDimensions::FULL_DIM; + subtensor[i] = utils::get_full_dim_value(); lowered::PortDescriptorUtils::set_port_descriptor_ptr(reduce->input(0), std::make_shared(reduce->input(0), subtensor)); lowered::PortDescriptorUtils::set_port_descriptor_ptr(reduce->output(0), std::make_shared(reduce->output(0), subtensor)); } diff --git a/src/common/snippets/src/op/serialization_node.cpp b/src/common/snippets/src/op/serialization_node.cpp index cb17e8a57ddf24..9864a1a12f94a5 100644 --- a/src/common/snippets/src/op/serialization_node.cpp +++ b/src/common/snippets/src/op/serialization_node.cpp @@ -49,7 +49,8 @@ bool SerializationNode::visit_attributes(AttributeVisitor &visitor) { std::stringstream ss; for (size_t i = 0; i < subtensor.size(); ++i) { const auto& v = subtensor[i]; - const auto v_str = (v == lowered::PortDescriptor::ServiceDimensions::FULL_DIM) ? "FULL_DIM" : std::to_string(v); + const auto v_str = utils::is_full_dim_value(v) ? "FULL_DIM" : + utils::is_dynamic_value(v) ? "?" : std::to_string(v); const auto del = i < subtensor.size() - 1 ? ", " : ""; ss << v_str << del; } diff --git a/src/common/snippets/src/op/subgraph.cpp b/src/common/snippets/src/op/subgraph.cpp index a33d478ee3929d..4ede0b58a66cf0 100644 --- a/src/common/snippets/src/op/subgraph.cpp +++ b/src/common/snippets/src/op/subgraph.cpp @@ -552,7 +552,7 @@ snippets::Schedule Subgraph::generate(const void* compile_params) const { exec_table->replace_key_expression(expression_map.at(expr.get()), expr); // Some kernel executors might've been registered during code emission. // We need to update them, so appropriate kernels will be compiled. - exec_table->update_state(); + exec_table->update_state(m_linear_ir); return {std::move(lowering_result)}; } diff --git a/src/common/snippets/src/pass/matmul_to_brgemm.cpp b/src/common/snippets/src/pass/matmul_to_brgemm.cpp index 6eaf8424ff5a78..7268d4a7cc6a67 100644 --- a/src/common/snippets/src/pass/matmul_to_brgemm.cpp +++ b/src/common/snippets/src/pass/matmul_to_brgemm.cpp @@ -18,16 +18,12 @@ namespace snippets { namespace pass { void MatMulToBrgemm::init_ports(const std::shared_ptr& brgemm) const { - auto get_subtensor = []() { - return std::vector{ lowered::PortDescriptor::ServiceDimensions::FULL_DIM, lowered::PortDescriptor::ServiceDimensions::FULL_DIM }; - }; + const auto subtensor = std::vector(2, utils::get_full_dim_value()); for (const auto& input : brgemm->inputs()) { const auto& tensor = utils::pshape_to_vdims(input.get_partial_shape()); - const auto& subtensor = get_subtensor(); lowered::PortDescriptorUtils::set_port_descriptor_ptr(input, std::make_shared(tensor, subtensor)); } const auto& tensor = utils::pshape_to_vdims(brgemm->get_output_partial_shape(0)); - const auto& subtensor = get_subtensor(); lowered::PortDescriptorUtils::set_port_descriptor_ptr(brgemm->output(0), std::make_shared(tensor, subtensor)); } diff --git a/src/common/snippets/src/pass/softmax_decomposition.cpp b/src/common/snippets/src/pass/softmax_decomposition.cpp index 269d06c958dd39..34dc1c19c5d9d0 100644 --- a/src/common/snippets/src/pass/softmax_decomposition.cpp +++ b/src/common/snippets/src/pass/softmax_decomposition.cpp @@ -55,7 +55,7 @@ SoftmaxDecomposition::SoftmaxDecomposition() { OPENVINO_ASSERT(axis < rank, "Softmax has incorrect axis"); std::vector subtensor(rank, 1); for (size_t i = axis; i < rank; ++i) - subtensor[i] = PortDescriptor::ServiceDimensions::FULL_DIM; + subtensor[i] = utils::get_full_dim_value(); PortDescriptorUtils::set_port_descriptor_ptr(power->input(0), std::make_shared(power->input(0), subtensor)); PortDescriptorUtils::set_port_descriptor_ptr(power->output(0), std::make_shared(power->output(0), subtensor)); diff --git a/src/common/snippets/src/runtime_configurator.cpp b/src/common/snippets/src/runtime_configurator.cpp index c3db1864bf1135..6f8945649c2b94 100644 --- a/src/common/snippets/src/runtime_configurator.cpp +++ b/src/common/snippets/src/runtime_configurator.cpp @@ -35,7 +35,7 @@ RuntimeConfigurator::RuntimeConfigurator(std::shared_ptr c) : OPENVINO_ASSERT(m_config, "Runtime config is nullptr!"); } -const std::shared_ptr& RuntimeConfigurator::get_updated_config(const std::shared_ptr& linear_ir) { +const std::shared_ptr& RuntimeConfigurator::get_updated_config(const lowered::LinearIRPtr& linear_ir) { // First initialization if (m_io_num == 0) initialization(linear_ir); @@ -44,7 +44,7 @@ const std::shared_ptr& RuntimeConfigurator::get_updated_config(co return m_config; } -void RuntimeConfigurator::initialization(const std::shared_ptr& linear_ir) { +void RuntimeConfigurator::initialization(const lowered::LinearIRPtr& linear_ir) { init_data_info(linear_ir); init_tensor_rank(linear_ir); init_buffer_info(linear_ir); @@ -55,7 +55,7 @@ void RuntimeConfigurator::initialization(const std::shared_ptrtile_rank = linear_ir->get_config().m_loop_depth; } -void RuntimeConfigurator::update(const std::shared_ptr& linear_ir) { +void RuntimeConfigurator::update(const lowered::LinearIRPtr& linear_ir) { if (linear_ir->is_dynamic()) { update_loop_info(linear_ir); update_buffer_scratchpad_size(linear_ir); @@ -67,11 +67,11 @@ void RuntimeConfigurator::update(const std::shared_ptr& linea update_latest_shapes(); } -void RuntimeConfigurator::init_tensor_rank(const std::shared_ptr& linear_ir) const { +void RuntimeConfigurator::init_tensor_rank(const lowered::LinearIRPtr& linear_ir) const { m_config->tensor_rank = linear_ir->get_master_shape().size(); } -void RuntimeConfigurator::init_data_info(const std::shared_ptr& linear_ir) { +void RuntimeConfigurator::init_data_info(const lowered::LinearIRPtr& linear_ir) { const auto& parameters = linear_ir->get_parameters(); const auto& results = linear_ir->get_results(); m_in_num = parameters.size(); @@ -113,7 +113,7 @@ void RuntimeConfigurator::init_data_info(const std::shared_ptr& linear_ir) { +void RuntimeConfigurator::init_buffer_info(const lowered::LinearIRPtr& linear_ir) { std::map> dynamic_buffer_clusters, static_buffer_clusters; // All needed checks are in Validate pass @@ -143,7 +143,7 @@ void RuntimeConfigurator::init_buffer_info(const std::shared_ptr& linear_ir) const { +void RuntimeConfigurator::update_loop_info(const lowered::LinearIRPtr& linear_ir) const { // Initialized UnifiedLoopInfo struct CurrentUnifiedLoopInfo { size_t current_work_amount = 0; @@ -180,21 +180,27 @@ void RuntimeConfigurator::update_loop_info(const std::shared_ptrset_work_amount( - lowered::pass::InsertSpecificIterations::get_decomposed_loop_work_amount(current_unified_loop_info, decomposed_loop_type, current_work_amount)); + const auto work_amount = + lowered::pass::InsertSpecificIterations::get_decomposed_loop_work_amount(current_unified_loop_info, decomposed_loop_type, current_work_amount); + expanded_loop_info->set_work_amount(work_amount); // Update remaining Loop work amount - current_work_amount -= expanded_loop_info->get_work_amount(); - - expanded_loop_info->update_ptr_increments(ptr_increments); - if (current_work_amount > 0) { - expanded_loop_info->update_finalization_offsets(std::vector(finalization_offsets.size(), 0)); + current_work_amount -= work_amount; + + // Update only `finalization offsets`. `Ptr increments` are always zeroed in this case + auto updated_finalization_offsets = current_work_amount > 0 ? std::vector(finalization_offsets.size(), 0) : finalization_offsets; + if (expanded_loop_info->is_evaluate_once()) { + expanded_loop_info->set_increment(work_amount); + // work_amount is equal to increment in cases with `evaluate_once` + for (size_t i = 0; i < updated_finalization_offsets.size(); ++i) + updated_finalization_offsets[i] += ptr_increments[i] * work_amount; } else { - expanded_loop_info->update_finalization_offsets(finalization_offsets); + expanded_loop_info->update_ptr_increments(ptr_increments); } + expanded_loop_info->update_finalization_offsets(updated_finalization_offsets); } } -void RuntimeConfigurator::update_buffer_scratchpad_size(const std::shared_ptr& linear_ir) const { +void RuntimeConfigurator::update_buffer_scratchpad_size(const lowered::LinearIRPtr& linear_ir) const { const auto& loop_manager = linear_ir->get_loop_manager(); m_config->buffer_scratchpad_size = linear_ir->get_static_buffer_scratchpad_size(); diff --git a/src/common/snippets/tests/include/lir_test_utils.hpp b/src/common/snippets/tests/include/lir_test_utils.hpp index 2f687f6e1412d1..b653c86af8ab0b 100644 --- a/src/common/snippets/tests/include/lir_test_utils.hpp +++ b/src/common/snippets/tests/include/lir_test_utils.hpp @@ -44,38 +44,6 @@ void init_expr_descriptors(const ov::snippets::lowered::ExpressionPtr& expr, const std::vector& subtensors = {}, const std::vector& layouts = {}); -/** - * @brief Creates unified loop info based on provided entry and exit points, and adds it to the linear_ir's loops map - * @attention This helper wraps LoopManager::mark_loop method, but only for LoopInfo creation (whereas original - * mark_loop method also marks expressions with the corresponding loop info). - * @param linear_ir linear_ir in which loop info should be added - * @param entries entry points of loop - * @param exits exit points of loop - */ -void create_and_add_unified_loop_info(const std::shared_ptr& linear_ir, - size_t work_amount, - size_t increment, - const std::vector& entries, - const std::vector& exits, - bool add_default_handlers = true); -/** - * @brief Creates unified loop info based on provided entry and exit points, and adds it to the linear_ir's loops map. - * Meanwhile set loop id to expr range [loop_begin_pos, loop_end_pos). - * @attention This helper wraps LoopManager::mark_loop method, which also marks expressions with the corresponding loop info - * @param linear_ir linear_ir in which loop info should be added - * @param loop_begin_pos begin expr postion in this loop - * @param loop_end_pos end expr postion in this loop - * @param entries entry points of loop - * @param exits exit points of loop - */ -void create_and_add_unified_loop_info(const std::shared_ptr& linear_ir, - ov::snippets::lowered::LinearIR::constExprIt loop_begin_pos, - ov::snippets::lowered::LinearIR::constExprIt loop_end_pos, - size_t work_amount, - size_t increment, - const std::vector& entries, - const std::vector& exits, - bool add_default_handlers = true); } // namespace snippets } // namespace test } // namespace ov diff --git a/src/common/snippets/tests/src/lir_test_utils.cpp b/src/common/snippets/tests/src/lir_test_utils.cpp index 274480fcd84c85..c4f5047011cd08 100644 --- a/src/common/snippets/tests/src/lir_test_utils.cpp +++ b/src/common/snippets/tests/src/lir_test_utils.cpp @@ -39,9 +39,7 @@ void LoweredPassTestsF::TearDown() { } ov::snippets::VectorDims get_default_subtensor() { - static const VectorDims default_subtensor{PortDescriptor::ServiceDimensions::FULL_DIM, - PortDescriptor::ServiceDimensions::FULL_DIM}; - return default_subtensor; + return VectorDims(2, ov::snippets::utils::get_full_dim_value()); } void init_expr_descriptors(const ov::snippets::lowered::ExpressionPtr& expr, @@ -85,28 +83,6 @@ void init_expr_descriptors(const ov::snippets::lowered::ExpressionPtr& expr, } } -void create_and_add_unified_loop_info(const LinearIRPtr& linear_ir, - size_t work_amount, - size_t increment, - const std::vector& entries, - const std::vector& exits, - bool set_default_handlers) { - // Equal begin and end iterators are set to avoid expressions marking with new loop id - create_and_add_unified_loop_info(linear_ir, linear_ir->begin(), linear_ir->begin(), work_amount, increment, entries, exits, set_default_handlers); -} - -void create_and_add_unified_loop_info(const LinearIRPtr& linear_ir, - ov::snippets::lowered::LinearIR::constExprIt loop_begin_pos, - ov::snippets::lowered::LinearIR::constExprIt loop_end_pos, - size_t work_amount, - size_t increment, - const std::vector& entries, - const std::vector& exits, - bool set_default_handlers) { - const auto& loop_manager = linear_ir->get_loop_manager(); - loop_manager->mark_loop(loop_begin_pos, loop_end_pos, work_amount, increment, entries, exits, set_default_handlers); -} - } // namespace snippets } // namespace test } // namespace ov diff --git a/src/common/snippets/tests/src/lowered/pass/buffer_allocation.cpp b/src/common/snippets/tests/src/lowered/pass/buffer_allocation.cpp index e56a31a8e92a4c..4dc3f2dae7e867 100644 --- a/src/common/snippets/tests/src/lowered/pass/buffer_allocation.cpp +++ b/src/common/snippets/tests/src/lowered/pass/buffer_allocation.cpp @@ -95,8 +95,7 @@ void BufferAllocationTest::Validate() { std::shared_ptr EltwiseBufferAllocationTest::GetModel() const { const auto subtensor_eltwise = std::vector{1, m_vector_size}; - const auto subtensor_buffer = std::vector{ov::snippets::lowered::PortDescriptor::ServiceDimensions::FULL_DIM, - ov::snippets::lowered::PortDescriptor::ServiceDimensions::FULL_DIM}; + const auto subtensor_buffer = std::vector(2, ov::snippets::utils::get_full_dim_value()); const auto parameter0 = std::make_shared(ov::element::f32, ov::PartialShape({1, 3, 100, 100})); const auto parameter1 = std::make_shared(ov::element::f32, ov::PartialShape({1, 3, 100, 100})); diff --git a/src/common/snippets/tests/src/lowered/pass/extracted_loop_invariants.cpp b/src/common/snippets/tests/src/lowered/pass/extracted_loop_invariants.cpp index c3f4f5ea7f6877..ee762f4bfca746 100644 --- a/src/common/snippets/tests/src/lowered/pass/extracted_loop_invariants.cpp +++ b/src/common/snippets/tests/src/lowered/pass/extracted_loop_invariants.cpp @@ -66,11 +66,11 @@ TEST_F(ExtractLoopInvariantsTest, ExtractedLoopInvariantsWithParams) { auto result = linear_ir->push_node(sub.second); auto begin = multiply.first; auto end = result.first; - create_and_add_unified_loop_info(linear_ir, begin, end, 512, vector_size, - {LoopPort((*multiply.first)->get_input_port(0)), - LoopPort((*multiply.first)->get_input_port(1)), - LoopPort((*sub.first)->get_input_port(0))}, - {LoopPort((*sub.first)->get_output_port(0))}); + linear_ir->get_loop_manager()->mark_loop(begin, end, 512, vector_size, + std::vector{LoopPort((*multiply.first)->get_input_port(0)), + LoopPort((*multiply.first)->get_input_port(1)), + LoopPort((*sub.first)->get_input_port(0))}, + std::vector{LoopPort((*sub.first)->get_output_port(0))}); linear_ir->set_loop_depth(1); } { @@ -85,10 +85,10 @@ TEST_F(ExtractLoopInvariantsTest, ExtractedLoopInvariantsWithParams) { auto result = linear_ir_ref->push_node(sub.second); auto begin = sub.first; auto end = result.first; - create_and_add_unified_loop_info(linear_ir_ref, begin, end, 512, vector_size, - {LoopPort((*sub.first)->get_input_port(0)), - LoopPort((*sub.first)->get_input_port(1))}, - {LoopPort((*sub.first)->get_output_port(0))}); + linear_ir_ref->get_loop_manager()->mark_loop(begin, end, 512, vector_size, + std::vector{LoopPort((*sub.first)->get_input_port(0)), + LoopPort((*sub.first)->get_input_port(1))}, + std::vector{LoopPort((*sub.first)->get_output_port(0))}); } } @@ -124,10 +124,10 @@ TEST_F(ExtractLoopInvariantsTest, ExtractedLoopInvariantsWithScalar) { auto result = linear_ir->push_node(sub.second); auto begin = scalar.first; auto end = result.first; - create_and_add_unified_loop_info(linear_ir, begin, end, 512, vector_size, - {LoopPort((*multiply.first)->get_input_port(0)), - LoopPort((*sub.first)->get_input_port(0))}, - {LoopPort((*sub.first)->get_output_port(0))}); + linear_ir->get_loop_manager()->mark_loop(begin, end, 512, vector_size, + std::vector{LoopPort((*multiply.first)->get_input_port(0)), + LoopPort((*sub.first)->get_input_port(0))}, + std::vector{LoopPort((*sub.first)->get_output_port(0))}); linear_ir->set_loop_depth(1); } { @@ -142,10 +142,10 @@ TEST_F(ExtractLoopInvariantsTest, ExtractedLoopInvariantsWithScalar) { auto result = linear_ir_ref->push_node(sub.second); auto begin = sub.first; auto end = result.first; - create_and_add_unified_loop_info(linear_ir_ref, begin, end, 512, vector_size, - {LoopPort((*sub.first)->get_input_port(0)), - LoopPort((*sub.first)->get_input_port(1))}, - {LoopPort((*sub.first)->get_output_port(0))}); + linear_ir_ref->get_loop_manager()->mark_loop(begin, end, 512, vector_size, + std::vector{LoopPort((*sub.first)->get_input_port(0)), + LoopPort((*sub.first)->get_input_port(1))}, + std::vector{LoopPort((*sub.first)->get_output_port(0))}); } } @@ -187,20 +187,20 @@ TEST_F(ExtractLoopInvariantsTest, ExtractedLoopInvariantsOutputLoopUpdateNotNeed auto result1 = linear_ir->push_node(sub.second); auto begin = multiply.first; auto end = result1.first; - create_and_add_unified_loop_info(linear_ir, begin, end, 16, vector_size, - {LoopPort((*multiply.first)->get_input_port(0), true, 0), - LoopPort((*multiply.first)->get_input_port(1), true, 0), - LoopPort((*add.first)->get_input_port(0), true, 0), - LoopPort((*sub.first)->get_input_port(0), true, 0)}, - {LoopPort((*add.first)->get_output_port(0), true, 0), - LoopPort((*sub.first)->get_output_port(0), true, 0)}); - create_and_add_unified_loop_info(linear_ir, begin, end, 3, 1, - {LoopPort((*multiply.first)->get_input_port(0), true, 1), - LoopPort((*multiply.first)->get_input_port(1), true, 1), - LoopPort((*add.first)->get_input_port(0), true, 1), - LoopPort((*sub.first)->get_input_port(0), true, 1)}, - {LoopPort((*add.first)->get_output_port(0), true, 1), - LoopPort((*sub.first)->get_output_port(0), true, 1)}); + linear_ir->get_loop_manager()->mark_loop(begin, end, 16, vector_size, + std::vector{LoopPort((*multiply.first)->get_input_port(0), true, 0), + LoopPort((*multiply.first)->get_input_port(1), true, 0), + LoopPort((*add.first)->get_input_port(0), true, 0), + LoopPort((*sub.first)->get_input_port(0), true, 0)}, + std::vector{LoopPort((*add.first)->get_output_port(0), true, 0), + LoopPort((*sub.first)->get_output_port(0), true, 0)}); + linear_ir->get_loop_manager()->mark_loop(begin, end, 3, 1, + std::vector{LoopPort((*multiply.first)->get_input_port(0), true, 1), + LoopPort((*multiply.first)->get_input_port(1), true, 1), + LoopPort((*add.first)->get_input_port(0), true, 1), + LoopPort((*sub.first)->get_input_port(0), true, 1)}, + std::vector{LoopPort((*add.first)->get_output_port(0), true, 1), + LoopPort((*sub.first)->get_output_port(0), true, 1)}); linear_ir->set_loop_depth(2); } { @@ -218,21 +218,21 @@ TEST_F(ExtractLoopInvariantsTest, ExtractedLoopInvariantsOutputLoopUpdateNotNeed auto result1 = linear_ir_ref->push_node(sub.second); auto begin_inner = add.first; auto end_inner = result1.first; - create_and_add_unified_loop_info(linear_ir_ref, begin_inner, end_inner, 16, vector_size, - {LoopPort((*add.first)->get_input_port(0), true, 0), - LoopPort((*add.first)->get_input_port(1), true, 0), - LoopPort((*sub.first)->get_input_port(0), true, 0)}, - {LoopPort((*add.first)->get_output_port(0), true, 0), - LoopPort((*sub.first)->get_output_port(0), true, 0)}); + linear_ir_ref->get_loop_manager()->mark_loop(begin_inner, end_inner, 16, vector_size, + std::vector{LoopPort((*add.first)->get_input_port(0), true, 0), + LoopPort((*add.first)->get_input_port(1), true, 0), + LoopPort((*sub.first)->get_input_port(0), true, 0)}, + std::vector{LoopPort((*add.first)->get_output_port(0), true, 0), + LoopPort((*sub.first)->get_output_port(0), true, 0)}); auto begin_outer = multiply.first; auto end_outer = result1.first; - create_and_add_unified_loop_info(linear_ir_ref, begin_outer, end_outer, 3, 1, - {LoopPort((*multiply.first)->get_input_port(0), true, 1), - LoopPort((*multiply.first)->get_input_port(1), true, 1), - LoopPort((*add.first)->get_input_port(0), true, 1), - LoopPort((*sub.first)->get_input_port(0), true, 1)}, - {LoopPort((*add.first)->get_output_port(0), true, 1), - LoopPort((*sub.first)->get_output_port(0), true, 1)}); + linear_ir_ref->get_loop_manager()->mark_loop(begin_outer, end_outer, 3, 1, + std::vector{LoopPort((*multiply.first)->get_input_port(0), true, 1), + LoopPort((*multiply.first)->get_input_port(1), true, 1), + LoopPort((*add.first)->get_input_port(0), true, 1), + LoopPort((*sub.first)->get_input_port(0), true, 1)}, + std::vector{LoopPort((*add.first)->get_output_port(0), true, 1), + LoopPort((*sub.first)->get_output_port(0), true, 1)}); } } @@ -263,14 +263,14 @@ TEST_F(ExtractLoopInvariantsTest, ExtractedLoopInvariantsFromInnermostToLoopOuts auto add = linear_ir->push_node(param_0.second, broadcastmove.second); init_expr_descriptors(*add.first, {subtensor, subtensor, subtensor}, {layout, layout, layout}); auto result = linear_ir->push_node(add.second); - create_and_add_unified_loop_info(linear_ir, broadcastmove.first, result.first, 3, 1, - {LoopPort((*broadcastmove.first)->get_input_port(0), true, 1), - LoopPort((*add.first)->get_input_port(0), true, 1)}, - {LoopPort((*add.first)->get_output_port(0), true, 1)}); - create_and_add_unified_loop_info(linear_ir, broadcastmove.first, result.first, 512, vector_size, - {LoopPort((*broadcastmove.first)->get_input_port(0), true, 0), - LoopPort((*add.first)->get_input_port(0), true, 0)}, - {LoopPort((*add.first)->get_output_port(0), true, 0)}); + linear_ir->get_loop_manager()->mark_loop(broadcastmove.first, result.first, 3, 1, + std::vector{LoopPort((*broadcastmove.first)->get_input_port(0), true, 1), + LoopPort((*add.first)->get_input_port(0), true, 1)}, + std::vector{LoopPort((*add.first)->get_output_port(0), true, 1)}); + linear_ir->get_loop_manager()->mark_loop(broadcastmove.first, result.first, 512, vector_size, + std::vector{LoopPort((*broadcastmove.first)->get_input_port(0), true, 0), + LoopPort((*add.first)->get_input_port(0), true, 0)}, + std::vector{LoopPort((*add.first)->get_output_port(0), true, 0)}); linear_ir->set_loop_depth(2); } { @@ -281,14 +281,14 @@ TEST_F(ExtractLoopInvariantsTest, ExtractedLoopInvariantsFromInnermostToLoopOuts auto add = linear_ir_ref->push_node(param_0.second, broadcastmove.second); init_expr_descriptors(*add.first, {subtensor, subtensor, subtensor}, {layout, layout, layout}); auto result = linear_ir_ref->push_node(add.second); - create_and_add_unified_loop_info(linear_ir_ref, add.first, result.first, 3, 1, - {LoopPort((*add.first)->get_input_port(0), true, 1), - LoopPort((*add.first)->get_input_port(1), true, 1)}, - {LoopPort((*add.first)->get_output_port(0), true, 1)}); - create_and_add_unified_loop_info(linear_ir_ref, add.first, result.first, 512, vector_size, - {LoopPort((*add.first)->get_input_port(0), true, 0), - LoopPort((*add.first)->get_input_port(1), true, 0)}, - {LoopPort((*add.first)->get_output_port(0), true, 0)}); + linear_ir_ref->get_loop_manager()->mark_loop(add.first, result.first, 3, 1, + std::vector{LoopPort((*add.first)->get_input_port(0), true, 1), + LoopPort((*add.first)->get_input_port(1), true, 1)}, + std::vector{LoopPort((*add.first)->get_output_port(0), true, 1)}); + linear_ir_ref->get_loop_manager()->mark_loop(add.first, result.first, 512, vector_size, + std::vector{LoopPort((*add.first)->get_input_port(0), true, 0), + LoopPort((*add.first)->get_input_port(1), true, 0)}, + std::vector{LoopPort((*add.first)->get_output_port(0), true, 0)}); } } @@ -356,31 +356,31 @@ TEST_F(ExtractLoopInvariantsRemoveLoopsTest, ExtractedLoopInvariantsAllExprsInLo init_expr_descriptors(*multiply.first, {subtensor, subtensor, subtensor}, {layout, layout, layout}); auto result = linear_ir->push_node(multiply.second); // 3 inner loop - create_and_add_unified_loop_info(linear_ir, max.first, hmax.first, 1, vector_size, - {LoopPort((*max.first)->get_input_port(0), true, 0), - LoopPort((*max.first)->get_input_port(1), true, 0)}, - {LoopPort((*max.first)->get_output_port(0), true, 0)}); - create_and_add_unified_loop_info(linear_ir, sub.first, hsum.first, 1, vector_size, - {LoopPort((*sub.first)->get_input_port(0), true, 0), - LoopPort((*sub.first)->get_input_port(1), true, 0), - LoopPort((*add.first)->get_input_port(1), true, 0)}, - {LoopPort((*exp.first)->get_output_port(0), true, 0), - LoopPort((*add.first)->get_output_port(0), true, 0)}); - create_and_add_unified_loop_info(linear_ir, multiply.first, result.first, 1, vector_size, - {LoopPort((*multiply.first)->get_input_port(0), true, 0), - LoopPort((*multiply.first)->get_input_port(1), true, 0)}, - {LoopPort((*multiply.first)->get_output_port(0), true, 0)}); + linear_ir->get_loop_manager()->mark_loop(max.first, hmax.first, 1, vector_size, + std::vector{LoopPort((*max.first)->get_input_port(0), true, 0), + LoopPort((*max.first)->get_input_port(1), true, 0)}, + std::vector{LoopPort((*max.first)->get_output_port(0), true, 0)}); + linear_ir->get_loop_manager()->mark_loop(sub.first, hsum.first, 1, vector_size, + std::vector{LoopPort((*sub.first)->get_input_port(0), true, 0), + LoopPort((*sub.first)->get_input_port(1), true, 0), + LoopPort((*add.first)->get_input_port(1), true, 0)}, + std::vector{LoopPort((*exp.first)->get_output_port(0), true, 0), + LoopPort((*add.first)->get_output_port(0), true, 0)}); + linear_ir->get_loop_manager()->mark_loop(multiply.first, result.first, 1, vector_size, + std::vector{LoopPort((*multiply.first)->get_input_port(0), true, 0), + LoopPort((*multiply.first)->get_input_port(1), true, 0)}, + std::vector{LoopPort((*multiply.first)->get_output_port(0), true, 0)}); // outer loop info const auto loop_begin = std::make_shared(); auto loop_begin_expr = linear_ir->insert_node(loop_begin, std::vector{}, {}, false, max.first); const auto loop_end = std::make_shared(); std::vector loop_end_inputs{(*loop_begin_expr)->get_output_port_connector(0)}; auto loop_end_expr = linear_ir->insert_node(loop_end, loop_end_inputs, {}, false, result.first); - create_and_add_unified_loop_info(linear_ir, loop_begin_expr, result.first, 10, 1, - {LoopPort((*max.first)->get_input_port(0), true, 1), - LoopPort((*max.first)->get_input_port(1), true, 0), - LoopPort((*add.first)->get_input_port(1), true, 0)}, - {LoopPort((*multiply.first)->get_output_port(0), true, 1)}); + linear_ir->get_loop_manager()->mark_loop(loop_begin_expr, result.first, 10, 1, + std::vector{LoopPort((*max.first)->get_input_port(0), true, 1), + LoopPort((*max.first)->get_input_port(1), true, 0), + LoopPort((*add.first)->get_input_port(1), true, 0)}, + std::vector{LoopPort((*multiply.first)->get_output_port(0), true, 1)}); loop_end->set_id((*loop_end_expr)->get_loop_ids().back()); linear_ir->set_loop_depth(2); } @@ -409,11 +409,11 @@ TEST_F(ExtractLoopInvariantsRemoveLoopsTest, ExtractedLoopInvariantsAllExprsInLo const auto loop_end = std::make_shared(); std::vector loop_end_inputs{(*loop_begin_expr)->get_output_port_connector(0)}; auto loop_end_expr = linear_ir_ref->insert_node(loop_end, loop_end_inputs, {}, false, result.first); - create_and_add_unified_loop_info(linear_ir_ref, loop_begin_expr, result.first, 10, 1, - {LoopPort((*max.first)->get_input_port(0), true, 1), - LoopPort((*max.first)->get_input_port(1), true, 0), - LoopPort((*add.first)->get_input_port(1), true, 0)}, - {LoopPort((*multiply.first)->get_output_port(0), true, 1)}); + linear_ir_ref->get_loop_manager()->mark_loop(loop_begin_expr, result.first, 10, 1, + std::vector{LoopPort((*max.first)->get_input_port(0), true, 1), + LoopPort((*max.first)->get_input_port(1), true, 0), + LoopPort((*add.first)->get_input_port(1), true, 0)}, + std::vector{LoopPort((*multiply.first)->get_output_port(0), true, 1)}); loop_end->set_id((*loop_end_expr)->get_loop_ids().back()); } } diff --git a/src/plugins/intel_cpu/src/emitters/snippets/cpu_runtime_configurator.cpp b/src/plugins/intel_cpu/src/emitters/snippets/cpu_runtime_configurator.cpp index 1f6bd487032730..925a6d28697d41 100644 --- a/src/plugins/intel_cpu/src/emitters/snippets/cpu_runtime_configurator.cpp +++ b/src/plugins/intel_cpu/src/emitters/snippets/cpu_runtime_configurator.cpp @@ -14,20 +14,27 @@ namespace intel_cpu { CPURuntimeConfigurator::CPURuntimeConfigurator() : ov::snippets::RuntimeConfigurator(std::make_shared()) { } -void CPURuntimeConfigurator::update(const std::shared_ptr& linear_ir) { - RuntimeConfigurator::update(linear_ir); - +void CPURuntimeConfigurator::update(const ov::snippets::lowered::LinearIRPtr& linear_ir) { if (linear_ir->is_dynamic()) { - get_kernel_executor_table()->update_state(); + update_loop_info(linear_ir); update_loop_args(linear_ir); + // Update KernelExecutor Table should be before `update_buffer_scratchpad_size` + // because `ComputeAllocationSize` depends on subtensors which are updated in the table + get_kernel_executor_table()->update_state(linear_ir); + update_buffer_scratchpad_size(linear_ir); } + + m_config->master_shape = linear_ir->get_master_shape(); + + update_data_offsets(); + update_latest_shapes(); } -void CPURuntimeConfigurator::init_tensor_rank(const std::shared_ptr& linear_ir) const { +void CPURuntimeConfigurator::init_tensor_rank(const ov::snippets::lowered::LinearIRPtr& linear_ir) const { m_config->tensor_rank = std::max(linear_ir->get_master_shape().size(), rank6D); } -void CPURuntimeConfigurator::update_loop_args(const std::shared_ptr& linear_ir) const { +void CPURuntimeConfigurator::update_loop_args(const ov::snippets::lowered::LinearIRPtr& linear_ir) const { const auto& cpu_config = ov::as_type_ptr(m_config); OPENVINO_ASSERT(cpu_config, "CPURuntimeConfigurator expects CPURuntimeConfig"); diff --git a/src/plugins/intel_cpu/src/emitters/snippets/cpu_runtime_configurator.hpp b/src/plugins/intel_cpu/src/emitters/snippets/cpu_runtime_configurator.hpp index 6b3a54652097ae..f1a21e5982aa1c 100644 --- a/src/plugins/intel_cpu/src/emitters/snippets/cpu_runtime_configurator.hpp +++ b/src/plugins/intel_cpu/src/emitters/snippets/cpu_runtime_configurator.hpp @@ -29,17 +29,17 @@ class CPURuntimeConfigurator : public ov::snippets::RuntimeConfigurator { * @brief Update RuntimeConfig based on LinearIR * @param linear_ir LinearIR */ - void update(const std::shared_ptr& linear_ir) override; + void update(const ov::snippets::lowered::LinearIRPtr& linear_ir) override; /** * @brief Initializes tensor rank of config * @param linear_ir LinearIR */ - void init_tensor_rank(const std::shared_ptr& linear_ir) const override; + void init_tensor_rank(const ov::snippets::lowered::LinearIRPtr& linear_ir) const override; /** * @brief Calculate Loop parameters of Loop emitters and update these values in CPURuntimeConfig * @param linear_ir LinearIR */ - void update_loop_args(const std::shared_ptr& linear_ir) const; + void update_loop_args(const ov::snippets::lowered::LinearIRPtr& linear_ir) const; const size_t rank6D = 6; }; diff --git a/src/plugins/intel_cpu/src/emitters/snippets/x64/jit_loop_emitters.cpp b/src/plugins/intel_cpu/src/emitters/snippets/x64/jit_loop_emitters.cpp index 6b99097872db37..cb6dfeb741109a 100644 --- a/src/plugins/intel_cpu/src/emitters/snippets/x64/jit_loop_emitters.cpp +++ b/src/plugins/intel_cpu/src/emitters/snippets/x64/jit_loop_emitters.cpp @@ -41,7 +41,8 @@ void jit_loop_begin_emitter::validate_arguments(const std::vector &in, c // Note: the only expected output is work amount register (communicated to jit_loop_end_emitter) OV_CPU_JIT_EMITTER_ASSERT(out.size() == 1, "Invalid outputs size: expected 1 got " + std::to_string(out.size())); OV_CPU_JIT_EMITTER_ASSERT(loop_begin_label != nullptr && loop_end_label != nullptr, "has not inited labels!"); - OV_CPU_JIT_EMITTER_ASSERT(implication(is_work_amount_dynamic, !evaluate_once), "with dynamic work_amount cannot evaluate once!"); + OV_CPU_JIT_EMITTER_ASSERT(!snippets::utils::is_dynamic_value(wa_increment) || evaluate_once, + "loop increment might be dynamic only if loop evaluates once!"); } void jit_loop_begin_emitter::emit_code(const std::vector &in, const std::vector &out, @@ -52,7 +53,8 @@ void jit_loop_begin_emitter::emit_code(const std::vector &in, const std: void jit_loop_begin_emitter::emit_impl(const std::vector& in, const std::vector& out) const { // If the loop evaulate once, we can skip loop begin code emission - if (evaluate_once) + // If work_amount is dynamic, we should get runtime `work_amount` - it might be `zero` and we should skip loop evaluation + if (evaluate_once && !is_work_amount_dynamic) return; Reg64 reg_work_amount = Reg64(static_cast(out.back())); @@ -124,7 +126,8 @@ void jit_loop_end_emitter::validate_arguments(const std::vector &in, con "Invalid finalization_offsets size: expected: ", io_size, " got ", finalization_offsets.size()); OV_CPU_JIT_EMITTER_ASSERT(data_sizes.size() == io_size, "Invalid data_sizes size: expected: ", io_size, " got ", data_sizes.size()); OV_CPU_JIT_EMITTER_ASSERT(loop_end_label != nullptr && loop_begin_label != nullptr, "has not inited labels!"); - OV_CPU_JIT_EMITTER_ASSERT(implication(are_ptr_shifts_dynamic, !evaluate_once), "with dynamic data pointer shifts cannot evaluate once!"); + OV_CPU_JIT_EMITTER_ASSERT(!snippets::utils::is_dynamic_value(wa_increment) || evaluate_once, + "loop increment might be dynamic only if loop evaluates once!"); } void jit_loop_end_emitter::emit_code(const std::vector &in, const std::vector &out, diff --git a/src/plugins/intel_cpu/src/emitters/snippets/x64/kernel_executors/brgemm.cpp b/src/plugins/intel_cpu/src/emitters/snippets/x64/kernel_executors/brgemm.cpp index 6898fd18b587cd..e538c3baef28bb 100644 --- a/src/plugins/intel_cpu/src/emitters/snippets/x64/kernel_executors/brgemm.cpp +++ b/src/plugins/intel_cpu/src/emitters/snippets/x64/kernel_executors/brgemm.cpp @@ -4,6 +4,8 @@ #include "brgemm.hpp" +#include "snippets/lowered/loop_manager.hpp" + #include #include "common/utils.hpp" #include "dnnl_extension_utils.h" @@ -18,7 +20,7 @@ using namespace dnnl::impl::cpu::x64; namespace { size_t init_hash(dnnl_data_type_t dt_in0, dnnl_data_type_t dt_in1, float beta, bool is_with_amx, - bool is_with_comp, dnnl::impl::cpu::x64::cpu_isa_t isa) { + bool is_with_comp, dnnl::impl::cpu::x64::cpu_isa_t isa) { size_t seed = 0; #define HASH(X) seed = hash_combine(seed, X) HASH(dt_in0); HASH(dt_in1); @@ -41,7 +43,7 @@ BrgemmKernelConfig::BrgemmKernelConfig(const element::Type& in0_dtype, const ele } bool BrgemmKernelConfig::is_completed() const { - return !utils::one_of(0, m_M, m_N, m_K, m_LDA, m_LDB, m_LDC); + return !utils::one_of(0, m_M, m_N, m_K, m_LDA, m_LDB, m_LDC) || is_empty(); } bool BrgemmKernelConfig::operator==(const BrgemmKernelConfig& rhs) const { @@ -54,11 +56,22 @@ bool BrgemmKernelConfig::operator==(const BrgemmKernelConfig& rhs) const { } void BrgemmKernelConfig::update(dnnl_dim_t M, dnnl_dim_t N, dnnl_dim_t K, dnnl_dim_t LDA, dnnl_dim_t LDB, dnnl_dim_t LDC) { - m_M = M; m_N = N; m_K = K; - m_LDA = LDA; m_LDB = LDB; m_LDC = LDC; + // If M is zero, it means that Brgemm won't be executed (in Loop with work_amount = 0, for example) + // To process this case, we have to make this Config as empty (nullify runtime parameters) + if (utils::one_of(0, M, N, K)) { + m_M = 0; m_N = 0; m_K = 0; + m_LDA = 0; m_LDB = 0; m_LDC = 0; + } else { + m_M = M; m_N = N; m_K = K; + m_LDA = LDA; m_LDB = LDB; m_LDC = LDC; + } m_hash = compute_hash(); } +bool BrgemmKernelConfig::is_empty() const { + return everyone_is(0, m_M, m_N, m_K, m_LDA, m_LDB, m_LDC); +} + BrgemmKernelConfig::operator amx_tile_config_t() const { amx_tile_config_t res; res.M = m_M; res.N = m_N; res.K = m_K; @@ -115,6 +128,12 @@ BrgemmKernelExecutor::BrgemmKernelExecutor(ov::intel_cpu::MultiCacheWeakPtr kern std::shared_ptr BrgemmKernelExecutor::compile_kernel(const BrgemmKernelConfig& config) const { + std::shared_ptr compiled_kernel = std::make_shared(); + + // Brgemm is not executable - nothing to compile + if (config.is_empty()) + return compiled_kernel; + cpu::x64::brgemm_t desc; auto status = brgemm_desc_init(&desc, config.get_isa(), cpu::x64::brgemm_strd, config.get_dt_in0(), config.get_dt_in1(), @@ -122,10 +141,8 @@ std::shared_ptr BrgemmKernelExecutor::compile_kernel(const config.get_beta(), config.get_LDA(), config.get_LDB(), config.get_LDC(), config.get_M(), config.get_N(), config.get_K(), nullptr); - - auto compiled_kernel = std::make_shared(); - OV_CPU_JIT_EMITTER_ASSERT(status == dnnl_success, "Cannot initialize brgemm descriptor due to invalid params"); + if (config.is_with_amx()) { status = brgemm_init_tiles(desc, compiled_kernel->palette); OV_CPU_JIT_EMITTER_ASSERT(status == dnnl_success, "Cannot initialize brgemm tiles due to invalid params"); @@ -138,31 +155,49 @@ std::shared_ptr BrgemmKernelExecutor::compile_kernel(const return compiled_kernel; } -void BrgemmKernelExecutor::update_config(const ov::snippets::lowered::ExpressionPtr& expr, BrgemmKernelConfig& config) const { - auto get_projected_input_subtensor = [](const snippets::lowered::PortDescriptorPtr& desc) { - // Note: for output shape you will need get_preordered_vdims() - auto shape = snippets::utils::get_planar_vdims(desc->get_shape(), desc->get_layout()); - auto subtensor = desc->get_subtensor(); - OV_CPU_JIT_EMITTER_ASSERT(subtensor.size() <= shape.size() && subtensor.size() == 2, - "Invalid subtensor + shape combination"); - auto shape_it = shape.rbegin(); - for (auto sub_it = subtensor.rbegin(); sub_it != subtensor.rend(); sub_it++, shape_it++) { - *sub_it = std::min(*sub_it, *shape_it); - } - return subtensor; - }; +void BrgemmKernelExecutor::update_config(const ov::snippets::lowered::ExpressionPtr& expr, + const ov::snippets::lowered::LinearIRPtr& linear_ir, + BrgemmKernelConfig& config) const { const auto& input_pds = expr->get_input_port_descriptors(); const auto& output_pds = expr->get_output_port_descriptors(); OV_CPU_JIT_EMITTER_ASSERT((input_pds.size() == 2 || input_pds.size() == 3) && output_pds.size() == 1, "Invalid number of in/out port descriptors"); - // Update runtime-defined config fields: - // Matrix A (first input) + + const auto in0_shape = snippets::utils::get_planar_vdims(input_pds[0]->get_shape(), input_pds[0]->get_layout()); + const auto in1_shape = snippets::utils::get_planar_vdims(input_pds[1]->get_shape(), input_pds[1]->get_layout()); + auto in0_subtensor = input_pds[0]->get_subtensor(); + auto in1_subtensor = input_pds[1]->get_subtensor(); + + auto M = *++in0_subtensor.rbegin(); + auto K = *in0_subtensor.rbegin(); + auto N = *in1_subtensor.rbegin(); + + if (ov::snippets::utils::is_full_dim_value(M)) { + M = *++in0_shape.rbegin(); + } else { + const auto& loop_ids = expr->get_loop_ids(); + OPENVINO_ASSERT(!loop_ids.empty(), "Loop by dimension M is missed"); + // TODO [146125]: Loop by M is first one in `loop_ids` + const auto& expanded_loop_info = linear_ir->get_loop_manager()->get_loop_info(loop_ids.front()); + M = expanded_loop_info->get_increment(); + input_pds[0]->set_subtensor_dim(1, M); + output_pds[0]->set_subtensor_dim(1, M); + } + + if (ov::snippets::utils::is_full_dim_value(K)) { + K = *in0_shape.rbegin(); + } else if (ov::snippets::utils::is_dynamic_value(K)) { + OPENVINO_THROW("Dynamic K is not supported"); + } + + if (ov::snippets::utils::is_full_dim_value(N)) { + N = *in1_shape.rbegin(); + } else if (ov::snippets::utils::is_dynamic_value(N)) { + OPENVINO_THROW("Dynamic N is not supported"); + } + const auto LDA = DIM_CAST(snippets::utils::get_dim_stride(expr->get_input_port(0))); - const auto& in0_subtensor = get_projected_input_subtensor(input_pds[0]); - const auto K = DIM_CAST(*in0_subtensor.rbegin()); - const auto M = DIM_CAST(*++in0_subtensor.rbegin()); - // Matrix B (second input) - // Non float input 1 => with data repacking + const auto LDC = DIM_CAST(snippets::utils::get_dim_stride(expr->get_output_port(0))); auto LDB = DIM_CAST(snippets::utils::get_dim_stride(expr->get_input_port(1))); const auto& brgemm_node = as_type_ptr(expr->get_node()); @@ -172,10 +207,8 @@ void BrgemmKernelExecutor::update_config(const ov::snippets::lowered::Expression OV_CPU_JIT_EMITTER_ASSERT(!repacking_buffer_shape.empty(), "Repacking buffer shape mustn't be empty"); LDB = DIM_CAST(repacking_buffer_shape.back()); } - const auto N = DIM_CAST(*get_projected_input_subtensor(input_pds[1]).rbegin()); - // Matrix C (output) - const auto LDC = DIM_CAST(snippets::utils::get_dim_stride(expr->get_output_port(0))); - config.update(M, N, K, LDA, LDB, LDC); + + config.update(DIM_CAST(M), DIM_CAST(N), DIM_CAST(K), LDA, LDB, LDC); } void BrgemmKernelExecutor::execute(const BrgemmKernelExecutor* executor, call_args* args) { diff --git a/src/plugins/intel_cpu/src/emitters/snippets/x64/kernel_executors/brgemm.hpp b/src/plugins/intel_cpu/src/emitters/snippets/x64/kernel_executors/brgemm.hpp index b0dd9c465b66de..4dd52e21ca2dfd 100644 --- a/src/plugins/intel_cpu/src/emitters/snippets/x64/kernel_executors/brgemm.hpp +++ b/src/plugins/intel_cpu/src/emitters/snippets/x64/kernel_executors/brgemm.hpp @@ -24,6 +24,7 @@ struct BrgemmKernelConfig : public snippets::KernelExecutorBase::GenericConfig { return std::unique_ptr( new BrgemmKernelConfig(*this)); } void update(dnnl_dim_t M, dnnl_dim_t N, dnnl_dim_t K, dnnl_dim_t LDA, dnnl_dim_t LDB, dnnl_dim_t LDC); + bool is_empty() const; dnnl_data_type_t get_dt_in0() const { return m_static_params->dt_in0; } dnnl_data_type_t get_dt_in1() const { return m_static_params->dt_in1; } @@ -95,7 +96,9 @@ class BrgemmKernelExecutor : public CPUKernelExecutor compile_kernel(const BrgemmKernelConfig& c) const override; - void update_config(const ov::snippets::lowered::ExpressionPtr& expr, BrgemmKernelConfig& config) const override; + void update_config(const ov::snippets::lowered::ExpressionPtr& expr, + const ov::snippets::lowered::LinearIRPtr& linear_ir, + BrgemmKernelConfig& config) const override; }; #define GET_OFF_BRGEMM_ARGS(field) offsetof(BrgemmKernelExecutor::call_args, field) diff --git a/src/plugins/intel_cpu/src/emitters/tpp/x64/jit_tpp_emitter.cpp b/src/plugins/intel_cpu/src/emitters/tpp/x64/jit_tpp_emitter.cpp index 91c95f0a478d3c..70ddbb3d79ee21 100644 --- a/src/plugins/intel_cpu/src/emitters/tpp/x64/jit_tpp_emitter.cpp +++ b/src/plugins/intel_cpu/src/emitters/tpp/x64/jit_tpp_emitter.cpp @@ -48,7 +48,7 @@ TppEmitter::TppEmitter(dnnl::impl::cpu::x64::jit_generator* h, io_port_descriptors.resize(num_kernel_args); // Note: this is needed mostly for Reduce operations, since they allow the last subternsor dim to be FULL_DIM; auto replace_full_dim = [](size_t dim, size_t replace_dim) { - if (dim == snippets::lowered::PortDescriptor::ServiceDimensions::FULL_DIM) + if (ov::snippets::utils::is_full_dim_value(dim)) return replace_dim; return dim; }; diff --git a/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/brgemm_to_brgemm_cpu.cpp b/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/brgemm_to_brgemm_cpu.cpp index 3c9bfcc5ea064b..d71faef96923d0 100644 --- a/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/brgemm_to_brgemm_cpu.cpp +++ b/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/brgemm_to_brgemm_cpu.cpp @@ -31,7 +31,7 @@ using namespace snippets::lowered; namespace { std::vector make_subtensor(const ov::Shape& tensor) { - return std::vector(std::min(tensor.size(), size_t(2)), PortDescriptor::ServiceDimensions::FULL_DIM); + return std::vector(std::min(tensor.size(), size_t(2)), ov::snippets::utils::get_full_dim_value()); } template void set_full_port_desc(const T& port) { diff --git a/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/lowered/brgemm_blocking.cpp b/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/lowered/brgemm_blocking.cpp index 044a1f724e78c3..3c8e4caf00c9b0 100644 --- a/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/lowered/brgemm_blocking.cpp +++ b/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/lowered/brgemm_blocking.cpp @@ -9,6 +9,7 @@ #include "snippets/lowered/linear_ir.hpp" #include "snippets/lowered/loop_manager.hpp" #include "snippets/lowered/pass/pass.hpp" +#include "snippets/lowered/pass/propagate_subtensors.hpp" #include "snippets/snippets_isa.hpp" #include "snippets/utils/utils.hpp" #include "transformations/snippets/x64/op/brgemm_cpu.hpp" @@ -56,6 +57,15 @@ LinearIR::constExprIt BrgemmBlocking::get_loop_begin_pos(LinearIR& linear_ir, co return loop_begin_it; } +snippets::lowered::SpecificIterationHandlers BrgemmBlocking::get_default_blocking_loop_handlers(size_t work_amount, size_t block_size) { + SpecificIterationHandlers handlers; + const auto tail_size = snippets::utils::is_dynamic_value(work_amount) ? snippets::utils::get_dynamic_value() : work_amount % block_size; + if (tail_size != 0) + handlers.register_pass(tail_size); + handlers.register_pass(); + return handlers; +} + bool BrgemmBlocking::run(LinearIR& linear_ir, LinearIR::constExprIt begin, LinearIR::constExprIt end) { OV_ITT_SCOPED_TASK(ov::pass::itt::domains::SnippetsTransform, "Snippets::BrgemmBlocking") const auto& loop_manager = linear_ir.get_loop_manager(); @@ -107,16 +117,24 @@ bool BrgemmBlocking::run(LinearIR& linear_ir, LinearIR::constExprIt begin, Linea const auto block_size_n = snippets::utils::is_dynamic_value(n) ? brgemm->get_n_block_size() : std::min(brgemm->get_n_block_size(), n); const auto block_size_k = snippets::utils::is_dynamic_value(k) ? brgemm->get_k_block_size() : std::min(brgemm->get_k_block_size(), k); - *++in_0_subtensor.rbegin() = block_size_m; - *++out_subtensor.rbegin() = block_size_m; - *in_1_subtensor.rbegin() = block_size_n; - *out_subtensor.rbegin() = block_size_n; - *in_0_subtensor.rbegin() = block_size_k; - *++in_1_subtensor.rbegin() = block_size_k; + const bool m_blocking = block_size_m != m; + const bool n_blocking = block_size_n != n; + const bool k_blocking = block_size_k != k; - brgemm_expr->get_input_port_descriptor(0)->set_subtensor(in_0_subtensor); - brgemm_expr->get_input_port_descriptor(1)->set_subtensor(in_1_subtensor); - brgemm_expr->get_output_port_descriptor(0)->set_subtensor(out_subtensor); + // If block_size is dynamic, it means that Brgemm will process full tensor: + // subtensor[i] = FULL_DIM as by default + if (!snippets::utils::is_dynamic_value(block_size_m) && m_blocking) { + brgemm_expr->get_input_port_descriptor(0)->set_subtensor_dim(1, block_size_m); + brgemm_expr->get_output_port_descriptor(0)->set_subtensor_dim(1, block_size_m); + } + if (!snippets::utils::is_dynamic_value(block_size_n) && n_blocking) { + brgemm_expr->get_input_port_descriptor(1)->set_subtensor_dim(0, block_size_n); + brgemm_expr->get_output_port_descriptor(0)->set_subtensor_dim(0, block_size_n); + } + if (!snippets::utils::is_dynamic_value(block_size_k) && k_blocking) { + brgemm_expr->get_input_port_descriptor(0)->set_subtensor_dim(0, block_size_k); + brgemm_expr->get_input_port_descriptor(1)->set_subtensor_dim(1, block_size_k); + } const bool need_brgemm_copy_b = brgemm_cpu && with_repacking(brgemm_cpu->get_type()); ov::snippets::lowered::ExpressionPtr copy_b_expr = nullptr; @@ -154,7 +172,9 @@ bool BrgemmBlocking::run(LinearIR& linear_ir, LinearIR::constExprIt begin, Linea if (!include_repacking && brgemm_cpu && with_compensations(brgemm_cpu->get_type())) entries.emplace_back(brgemm_expr->get_input_port(2), false); const std::vector exits{LoopPort(brgemm_expr->get_output_port(0), true)}; - loop_manager->mark_loop(loop_begin_it, loop_end_it, m, block_size_m, 1, entries, exits); + + const auto id = loop_manager->mark_loop(loop_begin_it, loop_end_it, m, block_size_m, 1, entries, exits, false); + loop_manager->get_loop_info(id)->set_handlers(get_default_blocking_loop_handlers(m, block_size_m)); }; auto mark_n_blocking = [&]() { @@ -165,7 +185,9 @@ bool BrgemmBlocking::run(LinearIR& linear_ir, LinearIR::constExprIt begin, Linea LoopPort(brgemm_expr->get_input_port(0), false), LoopPort(need_brgemm_copy_b ? copy_b_expr->get_input_port(0) : brgemm_expr->get_input_port(1), true)}; const std::vector exits{LoopPort(brgemm_expr->get_output_port(0), true)}; - loop_manager->mark_loop(loop_begin_it, loop_end_it, n, block_size_n, 0, entries, exits); + + const auto id = loop_manager->mark_loop(loop_begin_it, loop_end_it, n, block_size_n, 0, entries, exits, false); + loop_manager->get_loop_info(id)->set_handlers(get_default_blocking_loop_handlers(n, block_size_n)); }; auto mark_k_blocking = [&]() { @@ -176,14 +198,14 @@ bool BrgemmBlocking::run(LinearIR& linear_ir, LinearIR::constExprIt begin, Linea LoopPort(brgemm_expr->get_input_port(0), true, 0), LoopPort(need_brgemm_copy_b ? copy_b_expr->get_input_port(0) : brgemm_expr->get_input_port(1), true, 1)}; const std::vector exits{LoopPort(brgemm_expr->get_output_port(0), false)}; - const auto id = loop_manager->mark_loop(loop_begin_it, loop_end_it, k, block_size_k, entries, exits); - const auto& loop_info = loop_manager->get_loop_info(id); - loop_info->register_pass_to_handler(0.f); + + auto handlers = get_default_blocking_loop_handlers(k, block_size_k); + handlers.register_pass(0.f); + + const auto id = loop_manager->mark_loop(loop_begin_it, loop_end_it, k, block_size_k, entries, exits, false); + loop_manager->get_loop_info(id)->set_handlers(handlers); }; - const bool k_blocking = block_size_k != k; - const bool n_blocking = block_size_n != n; - const bool m_blocking = block_size_m != m; // It is not necessary to include copyB in loop by M if there are no blocking by KN const bool include_repacking_in_loop = k_blocking || n_blocking; diff --git a/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/lowered/brgemm_blocking.hpp b/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/lowered/brgemm_blocking.hpp index cdc2d05cffd1e5..4d29267f034fc9 100644 --- a/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/lowered/brgemm_blocking.hpp +++ b/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/lowered/brgemm_blocking.hpp @@ -5,6 +5,7 @@ #pragma once #include "snippets/lowered/pass/pass.hpp" +#include "snippets/lowered/specific_loop_iter_handlers.hpp" namespace ov { namespace intel_cpu { @@ -24,6 +25,8 @@ class BrgemmBlocking : public snippets::lowered::pass::RangedPass { snippets::lowered::LinearIR::constExprIt begin, snippets::lowered::LinearIR::constExprIt end) override; + static snippets::lowered::SpecificIterationHandlers get_default_blocking_loop_handlers(size_t work_amount, size_t block_size); + private: static snippets::lowered::LinearIR::constExprIt move_new_memory_buffer(snippets::lowered::LinearIR& linear_ir, const snippets::lowered::LinearIR::constExprIt& brgemm_it); diff --git a/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/lowered/cpu_iter_handlers.cpp b/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/lowered/cpu_iter_handlers.cpp index d5e96b2a7339ba..a8281ad1d02da6 100644 --- a/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/lowered/cpu_iter_handlers.cpp +++ b/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/lowered/cpu_iter_handlers.cpp @@ -4,6 +4,7 @@ #include "cpu_iter_handlers.hpp" +#include "snippets/op/loop.hpp" #include "snippets/lowered/loop_manager.hpp" #include "transformations/snippets/x64/op/brgemm_cpu.hpp" @@ -34,6 +35,19 @@ std::shared_ptr SetBrgemmBeta::merge(const st return nullptr; return merged_pass; } + +bool SetEvaluateOnce::run(LinearIR& linear_ir, LinearIR::constExprIt begin, LinearIR::constExprIt end) { + const auto& loop_end = ov::as_type_ptr(end->get()->get_node()); + OPENVINO_ASSERT(loop_end, "SetEvaluateOnce expected LoopEnd node in iterator `end`."); + const auto& loop_info = linear_ir.get_loop_manager()->get_loop_info(loop_end->get_id()); + loop_info->set_evaluate_once(true); + return true; +} + +std::shared_ptr SetEvaluateOnce::merge(const std::shared_ptr& other) { + return !other || ov::is_type(other) ? std::make_shared() : nullptr; +} + } // namespace pass } // namespace intel_cpu } // namespace ov \ No newline at end of file diff --git a/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/lowered/cpu_iter_handlers.hpp b/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/lowered/cpu_iter_handlers.hpp index 5da97e29796f70..24697c2f50f6a6 100644 --- a/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/lowered/cpu_iter_handlers.hpp +++ b/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/lowered/cpu_iter_handlers.hpp @@ -25,7 +25,23 @@ class SetBrgemmBeta : public snippets::lowered::pass::RangedPass { std::shared_ptr merge(const std::shared_ptr& other) override; private: - float m_beta; + float m_beta = 0; +}; + +/** + * @interface SetEvaluateOnce + * @brief The pass set `evaluate once = true` only to ExpandedLoopInfo which is mapped on LoopEnd in the passed iterator `end`. + * The pointer arithmetic should be updated in the separate optimization `OptimizeLoopSingleEvaluation` + * @ingroup snippets + */ +class SetEvaluateOnce : public snippets::lowered::pass::RangedPass { +public: + SetEvaluateOnce() = default; + OPENVINO_RTTI("SetEvaluateOnce", "RangedPass") + bool run(snippets::lowered::LinearIR& linear_ir, + snippets::lowered::LinearIR::constExprIt begin, + snippets::lowered::LinearIR::constExprIt end) override; + std::shared_ptr merge(const std::shared_ptr& other) override; }; } // namespace pass } // namespace intel_cpu diff --git a/src/plugins/intel_cpu/src/transformations/tpp/x64/pass/eltwise_to_eltwise_tpp.cpp b/src/plugins/intel_cpu/src/transformations/tpp/x64/pass/eltwise_to_eltwise_tpp.cpp index b3c04fb7833db9..da83038f5455f8 100644 --- a/src/plugins/intel_cpu/src/transformations/tpp/x64/pass/eltwise_to_eltwise_tpp.cpp +++ b/src/plugins/intel_cpu/src/transformations/tpp/x64/pass/eltwise_to_eltwise_tpp.cpp @@ -3,6 +3,7 @@ // #include "snippets/itt.hpp" +#include "snippets/utils/utils.hpp" #include "eltwise_to_eltwise_tpp.hpp" #include "openvino/pass/pattern/op/wrap_type.hpp" @@ -40,14 +41,12 @@ EltwiseToEltwiseTPP::EltwiseToEltwiseTPP() { OPENVINO_ASSERT(tpp_eltwise, "Failed to create TPP node"); const size_t M_block = 32; - const size_t N_block = ov::is_type(node) ? - snippets::lowered::PortDescriptor::ServiceDimensions::FULL_DIM : - 64; + const size_t N_block = ov::is_type(node) ? ov::snippets::utils::get_full_dim_value() : 64; ov::replace_node_update_name(node, tpp_eltwise); for (size_t i = 0; i < node->get_input_size(); i++) - snippets::lowered::set_port_desc(tpp_eltwise->input(i), {M_block, N_block}); + ov::snippets::lowered::PortDescriptorUtils::set_port_descriptor(tpp_eltwise->input(i), {M_block, N_block}); - snippets::lowered::set_port_desc(tpp_eltwise->output(0), {M_block, N_block}); + ov::snippets::lowered::PortDescriptorUtils::set_port_descriptor(tpp_eltwise->output(0), {M_block, N_block}); return true; }; diff --git a/src/plugins/intel_cpu/src/transformations/tpp/x64/pass/lowered/set_tpp_leading_dim.cpp b/src/plugins/intel_cpu/src/transformations/tpp/x64/pass/lowered/set_tpp_leading_dim.cpp index a420ed2cbfea22..4f38eddc2bde0f 100644 --- a/src/plugins/intel_cpu/src/transformations/tpp/x64/pass/lowered/set_tpp_leading_dim.cpp +++ b/src/plugins/intel_cpu/src/transformations/tpp/x64/pass/lowered/set_tpp_leading_dim.cpp @@ -74,7 +74,7 @@ size_t get_leading_dim(ExpressionPort port, const snippets::lowered::LoopManager bool full_dim_substituted = false; for (size_t i = 1; i <= subtensor.size(); i++) { const auto idx = subtensor.size() - i; - if (subtensor[idx] == snippets::lowered::PortDescriptor::ServiceDimensions::FULL_DIM) { + if (ov::snippets::utils::is_full_dim_value(subtensor[idx])) { // the reason that we don't support FULL_DIM substitution for an arbitrary layout is that // the layout and subtersor can (and usually do) have different ranks full_dim_substituted = true; diff --git a/src/plugins/intel_cpu/src/transformations/tpp/x64/pass/scalar_to_scalar_tpp.cpp b/src/plugins/intel_cpu/src/transformations/tpp/x64/pass/scalar_to_scalar_tpp.cpp index 5ea5b135ba595a..0b9f41d47aa0da 100644 --- a/src/plugins/intel_cpu/src/transformations/tpp/x64/pass/scalar_to_scalar_tpp.cpp +++ b/src/plugins/intel_cpu/src/transformations/tpp/x64/pass/scalar_to_scalar_tpp.cpp @@ -42,9 +42,9 @@ ScalarToScalarTPP::ScalarToScalarTPP() { tpp_scalar->set_friendly_name(node->get_friendly_name()); ov::replace_node_update_name(node, tpp_scalar); const auto& out = tpp_scalar->output(0); - snippets::lowered::set_port_desc(out, {1}); + ov::snippets::lowered::PortDescriptorUtils::set_port_descriptor(out, {1}); for (const auto& in : out.get_target_inputs()) - snippets::lowered::set_port_desc(in, {1}); + ov::snippets::lowered::PortDescriptorUtils::set_port_descriptor(in, {1}); return true; }; diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/matmul.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/matmul.cpp index 778bcba7a235a0..1089bdc3faffaa 100644 --- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/matmul.cpp +++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/matmul.cpp @@ -66,11 +66,39 @@ INSTANTIATE_TEST_SUITE_P(smoke_Snippets_MatMult, MatMul, std::vector> input_shapes_dynamic{ + // All dimensions are dynamic { {PartialShape{-1, -1, -1, -1}, {{2, 1, 32, 64}, {2, 2, 10, 20}, {2, 2, 100, 80}, - {2, 2, 10, 20}, {2, 1, 32, 64}}}, + {2, 2, 10, 20}, {2, 1, 32, 64}, {2, 3, 64, 55}}}, {PartialShape{-1, -1, -1, -1}, {{1, 3, 64, 128}, {2, 2, 20, 30}, {2, 2, 80, 120}, - {2, 2, 20, 30}, {1, 3, 64, 128}}} + {2, 2, 20, 30}, {1, 3, 64, 128}, {2, 3, 55, 128}}} + }, + // Only M dimension is dynamic + one one loop by M + { + {PartialShape{-1, 2, -1, 64}, {{2, 2, 64, 64}, {2, 2, 64, 64}, {2, 2, 35, 64}, + {2, 2, 120, 64}, {2, 2, 15, 64}, {2, 2, 35, 64}}}, + {PartialShape{-1, 2, 64, 32}, {{2, 2, 64, 32}, {2, 2, 64, 32}, {1, 2, 64, 32}, + {1, 2, 64, 32}, {2, 2, 64, 32}, {1, 2, 64, 32}}} + }, + // Only M dimension is dynamic + all Loops (by M, N, K) + { + {PartialShape{2, 2, -1, 550}, {{2, 2, 64, 550}, {2, 2, 16, 550}, {2, 2, 35, 550}, + {2, 2, 16, 550}, {2, 2, 70, 550}, {2, 2, 64, 550}}}, + {PartialShape{2, 1, 550, 70}, {{2, 1, 550, 70}, {2, 1, 550, 70}, {2, 1, 550, 70}, + {2, 1, 550, 70}, {2, 1, 550, 70}, {2, 1, 550, 70}}} + }, + // Only K dimension is dynamic + { + {PartialShape{2, 2, 70, -1}, {{2, 2, 70, 128}, {2, 2, 70, 10}, {2, 2, 70, 33}, + {2, 2, 70, 35}, {2, 2, 70, 100}}}, + {PartialShape{2, 2, -1, 70}, {{2, 2, 128, 70}, {2, 2, 10, 70}, {2, 2, 33, 70}, + {2, 2, 35, 70}, {2, 2, 100, 70}}} + }, + // Only N dimension is dynamic + { + {PartialShape{}, {{2, 2, 65, 550}}}, + {PartialShape{2, 2, 550, -1}, {{2, 2, 550, 70}, {2, 2, 550, 12}, {2, 2, 550, 70}, + {2, 2, 550, 12}, {2, 2, 550, 10}}} }, }; diff --git a/src/plugins/intel_cpu/tests/unit/snippets_transformations/x64/lowered/brgemm_blocking.cpp b/src/plugins/intel_cpu/tests/unit/snippets_transformations/x64/lowered/brgemm_blocking.cpp index ef0ffcd70e6c39..82cbcdfa2c21f3 100644 --- a/src/plugins/intel_cpu/tests/unit/snippets_transformations/x64/lowered/brgemm_blocking.cpp +++ b/src/plugins/intel_cpu/tests/unit/snippets_transformations/x64/lowered/brgemm_blocking.cpp @@ -7,6 +7,7 @@ #include "lir_test_utils.hpp" #include "openvino/opsets/opset10.hpp" #include "snippets/lowered/linear_ir.hpp" +#include "snippets/lowered/loop_info.hpp" #include "snippets/snippets_isa.hpp" #include "transformations/snippets/x64/op/brgemm_copy_b.hpp" #include "transformations/snippets/x64/op/brgemm_cpu.hpp" @@ -22,6 +23,7 @@ using namespace ov::snippets; using BRGEMM_TYPE = intel_cpu::brgemm_utils::BRGEMM_TYPE; namespace { + void create_brgemm_loop_infos(const LinearIRPtr& linear_ir, const ExpressionPtr& brgemm_expr, size_t m = 0, size_t m_blk = 0, @@ -31,21 +33,30 @@ void create_brgemm_loop_infos(const LinearIRPtr& linear_ir, const bool n_block = k != 0 && k_blk != 0; const bool m_block = m != 0 && m_blk != 0; if (k_block) { - create_and_add_unified_loop_info(linear_ir, k, k_blk, - {LoopPort(brgemm_expr->get_input_port(0)), LoopPort(brgemm_expr->get_input_port(1), true, 1)}, - {LoopPort(brgemm_expr->get_output_port(0), false)}); - const auto& loop_info = linear_ir->get_loop_manager()->get_loop_info(0); + const auto loop_info = + std::make_shared(k, k_blk, + std::vector{LoopPort(brgemm_expr->get_input_port(0)), + LoopPort(brgemm_expr->get_input_port(1), true, 1)}, + std::vector{LoopPort(brgemm_expr->get_output_port(0), false)}, + ov::intel_cpu::pass::BrgemmBlocking::get_default_blocking_loop_handlers(k, k_block)); loop_info->register_pass_to_handler(0.f); + linear_ir->get_loop_manager()->add_loop_info(loop_info); } if (n_block) { - create_and_add_unified_loop_info(linear_ir, n, n_blk, - {LoopPort(brgemm_expr->get_input_port(0), false), LoopPort(brgemm_expr->get_input_port(1))}, - {LoopPort(brgemm_expr->get_output_port(0))}); + linear_ir->get_loop_manager()->add_loop_info( + std::make_shared(n, n_blk, + std::vector{LoopPort(brgemm_expr->get_input_port(0), false), + LoopPort(brgemm_expr->get_input_port(1))}, + std::vector{LoopPort(brgemm_expr->get_output_port(0))}, + ov::intel_cpu::pass::BrgemmBlocking::get_default_blocking_loop_handlers(n, n_block))); } if (m_block) { - create_and_add_unified_loop_info(linear_ir, m, m_blk, - {LoopPort(brgemm_expr->get_input_port(0), true, 1), LoopPort(brgemm_expr->get_input_port(1), false, 1)}, - {LoopPort(brgemm_expr->get_output_port(0), true, 1)}); + linear_ir->get_loop_manager()->add_loop_info( + std::make_shared(m, m_blk, + std::vector{LoopPort(brgemm_expr->get_input_port(0), true, 1), + LoopPort(brgemm_expr->get_input_port(1), false, 1)}, + std::vector{LoopPort(brgemm_expr->get_output_port(0), true, 1)}, + ov::intel_cpu::pass::BrgemmBlocking::get_default_blocking_loop_handlers(m, m_block))); } } @@ -59,22 +70,31 @@ void create_brgemm_with_copy_b_loop_infos(const LinearIRPtr& linear_ir, const bool n_block = k != 0 && k_blk != 0; const bool m_block = m != 0 && m_blk != 0; if (k_block) { - create_and_add_unified_loop_info(linear_ir, k, k_blk, - {LoopPort(brgemm_expr->get_input_port(0)), LoopPort(copy_b_expr->get_input_port(0), true, 1)}, - {LoopPort(brgemm_expr->get_output_port(0), false)}); - const auto& loop_info = linear_ir->get_loop_manager()->get_loop_info(0); + const auto loop_info = + std::make_shared(k, k_blk, + std::vector{LoopPort(brgemm_expr->get_input_port(0)), + LoopPort(copy_b_expr->get_input_port(0), true, 1)}, + std::vector{LoopPort(brgemm_expr->get_output_port(0), false)}, + ov::intel_cpu::pass::BrgemmBlocking::get_default_blocking_loop_handlers(k, k_block)); loop_info->register_pass_to_handler(0.f); + linear_ir->get_loop_manager()->add_loop_info(loop_info); } if (n_block) { - create_and_add_unified_loop_info(linear_ir, n, n_blk, - {LoopPort(brgemm_expr->get_input_port(0), false), LoopPort(copy_b_expr->get_input_port(0))}, - {LoopPort(brgemm_expr->get_output_port(0))}); + linear_ir->get_loop_manager()->add_loop_info( + std::make_shared(n, n_blk, + std::vector{LoopPort(brgemm_expr->get_input_port(0), false), + LoopPort(copy_b_expr->get_input_port(0))}, + std::vector{LoopPort(brgemm_expr->get_output_port(0))}, + ov::intel_cpu::pass::BrgemmBlocking::get_default_blocking_loop_handlers(n, n_block))); } if (m_block) { const auto& second_input_port = k_block || n_block ? copy_b_expr->get_input_port(0) : brgemm_expr->get_input_port(1); - create_and_add_unified_loop_info(linear_ir, m, m_blk, - {LoopPort(brgemm_expr->get_input_port(0), true, 1), LoopPort(second_input_port, false, 1)}, - {LoopPort(brgemm_expr->get_output_port(0), true, 1)}); + linear_ir->get_loop_manager()->add_loop_info( + std::make_shared(m, m_blk, + std::vector{LoopPort(brgemm_expr->get_input_port(0), true, 1), + LoopPort(second_input_port, false, 1)}, + std::vector{LoopPort(brgemm_expr->get_output_port(0), true, 1)}, + ov::intel_cpu::pass::BrgemmBlocking::get_default_blocking_loop_handlers(m, m_block))); } } } // namespace @@ -148,7 +168,8 @@ TEST_F(BrgemmBlockingTest, BlockingIsNotNeeded) { auto brgemm = linear_ir_ref->push_node(data_a.second, data_b.second, BRGEMM_TYPE::STAND_ALONE, 0, 0, 0, layout, layout, layout, m, k, n); brgemm.second->set_beta(0.f); - init_expr_descriptors(*brgemm.first, {{m, k}, {k, n}, {m, n}}); + const auto full_subtensor = VectorDims(2, ov::snippets::utils::get_full_dim_value()); + init_expr_descriptors(*brgemm.first, std::vector(3, full_subtensor)); auto result = linear_ir_ref->push_node(brgemm.second); } } @@ -201,6 +222,7 @@ TEST_F(BrgemmBlockingTest, WithDataRepackingOnlyByM) { const ov::PartialShape input_shape_b{1, 16, 64, 384}; const auto precision_a = ov::element::u8; const auto precision_b = ov::element::i8; + const auto full = ov::snippets::utils::get_full_dim_value(); { auto data_a = linear_ir->push_node(precision_a, input_shape_a); @@ -226,7 +248,7 @@ TEST_F(BrgemmBlockingTest, WithDataRepackingOnlyByM) { auto brgemm = linear_ir_ref->push_node(data_a.second, copy_b.second, BRGEMM_TYPE::REPACKING_ONLY, 0, 0, 0, VectorDims{}, VectorDims{}, VectorDims{}, m_blk, k, n, 0.f); const auto& brgemm_expr = *brgemm.first; - init_expr_descriptors(brgemm_expr, {{m_blk, k}, {k, n}, {m_blk, n}}); + init_expr_descriptors(brgemm_expr, {{m_blk, full}, {full, full}, {m_blk, full}}); create_brgemm_with_copy_b_loop_infos(linear_ir_ref, brgemm_expr, copy_b_expr, 384, m_blk); brgemm_expr->set_loop_ids({0}); auto result = linear_ir_ref->push_node(brgemm.second); diff --git a/src/plugins/intel_cpu/tests/unit/snippets_transformations/x64/lowered/buffer_allocation.cpp b/src/plugins/intel_cpu/tests/unit/snippets_transformations/x64/lowered/buffer_allocation.cpp index c618c9e0d86fb5..2abfde0b3bb431 100644 --- a/src/plugins/intel_cpu/tests/unit/snippets_transformations/x64/lowered/buffer_allocation.cpp +++ b/src/plugins/intel_cpu/tests/unit/snippets_transformations/x64/lowered/buffer_allocation.cpp @@ -138,8 +138,8 @@ class MHAFP32BufferAllocationTest : public BufferAllocationCPUTest { const size_t k_blk = 16; const size_t n_blk = 64; const auto subtensor_scalar = std::vector{1}; - const auto subtensor_power = std::vector{1, ov::snippets::lowered::PortDescriptor::ServiceDimensions::FULL_DIM}; - const auto subtensor_full = std::vector(2, ov::snippets::lowered::PortDescriptor::ServiceDimensions::FULL_DIM); + const auto subtensor_power = std::vector{1, ov::snippets::utils::get_full_dim_value()}; + const auto subtensor_full = std::vector(2, ov::snippets::utils::get_full_dim_value()); const auto parameter0 = std::make_shared(ov::element::f32, ov::PartialShape({1, 12, 128, 64})); const auto parameter1 = std::make_shared(ov::element::f32, ov::PartialShape({1, 128, 12, 64})); @@ -196,8 +196,8 @@ class MHABF16AMXBufferAllocationTest : public BufferAllocationCPUTest { const size_t k_blk = 16; const size_t n_blk = 64; const auto subtensor_scalar = std::vector{1}; - const auto subtensor_power = std::vector{1, ov::snippets::lowered::PortDescriptor::ServiceDimensions::FULL_DIM}; - const auto subtensor_full = std::vector(2, ov::snippets::lowered::PortDescriptor::ServiceDimensions::FULL_DIM); + const auto subtensor_power = std::vector{1, ov::snippets::utils::get_full_dim_value()}; + const auto subtensor_full = std::vector(2, ov::snippets::utils::get_full_dim_value()); const auto parameter0 = std::make_shared(ov::element::bf16, ov::PartialShape({1, 12, 128, 64})); const auto parameter1 = std::make_shared(ov::element::bf16, ov::PartialShape({1, 128, 12, 64})); From 26319c0b7869acd365cebd876e327d5f18db9501 Mon Sep 17 00:00:00 2001 From: Ujjayant Kadian <118752727+ujjayant-kadian@users.noreply.github.com> Date: Fri, 26 Jul 2024 11:38:13 +0100 Subject: [PATCH 34/54] Single Image Test: Maintaining blob file name order in Single-Image-Test dump reference output (#25349) Cherrypicked changes from here PR: pull/11621. ### Details: - *item1* - *...* ### Tickets: - *ticket-id* --- .../intel_npu/tools/single-image-test/main.cpp | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/src/plugins/intel_npu/tools/single-image-test/main.cpp b/src/plugins/intel_npu/tools/single-image-test/main.cpp index 44cbe246ae2ec3..e29b5025158373 100644 --- a/src/plugins/intel_npu/tools/single-image-test/main.cpp +++ b/src/plugins/intel_npu/tools/single-image-test/main.cpp @@ -2027,7 +2027,9 @@ static int runSingleImageTest() { LayoutMap outputLayouts; // Several metrics may require this // Load the reference data - for (const auto& [tensorName, tensor] : outputTensors) { + for (const auto& out : compiledModel.outputs()) { + const auto& tensorName = out.get_any_name(); + const auto& tensor = outputTensors.at(tensorName); const ov::element::Type& precision = tensor.get_element_type(); const ov::Shape& shape = tensor.get_shape(); @@ -2067,7 +2069,8 @@ static int runSingleImageTest() { outputInd = 0; // Dump the outputs obtained upon prediction - for (const auto& tensorEntry : outputTensors) { + for (const auto& out : compiledModel.outputs()) { + const auto& tensor = outputTensors.at(out.get_any_name()); std::ostringstream ostr; ostr << netFileName << "_kmb_out_" << outputInd << "_case_" << numberOfTestCase << ".blob"; const auto blobFileName = ostr.str(); @@ -2075,7 +2078,7 @@ static int runSingleImageTest() { std::cout << "Dump device output #" << outputInd << "_case_" << numberOfTestCase << " to " << blobFileName << std::endl; - dumpTensor(tensorEntry.second, blobFileName); + dumpTensor(tensor, blobFileName); ++outputInd; } @@ -2174,13 +2177,14 @@ static int runSingleImageTest() { } } else { size_t outputInd = 0; - for (const auto& tensorEntry : outputTensors) { + for (const auto& out : compiledModel.outputs()) { + const auto& tensor = outputTensors.at(out.get_any_name()); std::ostringstream ostr; ostr << netFileName << "_ref_out_" << outputInd << "_case_" << numberOfTestCase << ".blob"; const auto blobFileName = ostr.str(); std::cout << "Dump reference output #" << outputInd << " to " << blobFileName << std::endl; - dumpTensor(tensorEntry.second, blobFileName); + dumpTensor(tensor, blobFileName); ++outputInd; } From d727c97d43db29fdc8eff4ce70f465662836fdd1 Mon Sep 17 00:00:00 2001 From: Bogdan Pereanu Date: Fri, 26 Jul 2024 15:07:54 +0300 Subject: [PATCH 35/54] [NPU] Add documentation for the NPU remote tensor feature (#25689) ### Details: - *Add documentation for the NPU remote tensor feature* - *...* ### Tickets: - *EISW-131918* --------- Co-authored-by: Sebastian Golebiewski --- .../snippets/npu_remote_objects_creation.cpp | 67 +++++++++ .../npu-device.rst | 7 + .../remote-tensor-api-npu-plugin.rst | 137 ++++++++++++++++++ .../intel_npu/level_zero/level_zero.hpp | 2 +- .../runtime/intel_npu/remote_properties.hpp | 72 ++++++--- 5 files changed, 266 insertions(+), 19 deletions(-) create mode 100644 docs/articles_en/assets/snippets/npu_remote_objects_creation.cpp create mode 100644 docs/articles_en/openvino-workflow/running-inference/inference-devices-and-modes/npu-device/remote-tensor-api-npu-plugin.rst diff --git a/docs/articles_en/assets/snippets/npu_remote_objects_creation.cpp b/docs/articles_en/assets/snippets/npu_remote_objects_creation.cpp new file mode 100644 index 00000000000000..75eb50839ca117 --- /dev/null +++ b/docs/articles_en/assets/snippets/npu_remote_objects_creation.cpp @@ -0,0 +1,67 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include +#include +#include + +int main() { + ov::Core core; + auto model = core.read_model("model.xml"); + auto input = model->get_parameters().at(0); + + auto compiled_model = core.compile_model(model, "NPU"); + auto npu_context = compiled_model.get_context().as(); + + auto in_element_type = input->get_element_type(); + auto in_shape = input->get_shape(); + + { + //! [default_context_from_core] + auto npu_context = core.get_default_context("NPU").as(); + // Extract raw level zero context handle from RemoteContext + void* context_handle = npu_context.get(); + //! [default_context_from_core] + } + + { + //! [default_context_from_model] + auto npu_context = compiled_model.get_context().as(); + // Extract raw level zero context handle from RemoteContext + void* context_handle = npu_context.get(); + //! [default_context_from_model] + } + + { + //! [wrap_nt_handle] + void* shared_buffer = nullptr; // create the NT handle + auto remote_tensor = npu_context.create_tensor(in_element_type, in_shape, shared_buffer); + //! [wrap_nt_handle] + } + + { + //! [wrap_dmabuf_fd] + int32_t fd_heap; // create the DMA-BUF System Heap file descriptor + auto remote_tensor = npu_context.create_tensor(in_element_type, in_shape, fd_heap); + //! [wrap_dmabuf_fd] + } + + { + //! [allocate_remote_level_zero_host] + auto remote_tensor = npu_context.create_l0_host_tensor(in_element_type, in_shape); + // Extract raw level zero pointer from remote tensor + void* level_zero_ptr = remote_tensor.get(); + //! [allocate_remote_level_zero_host] + } + + { + //! [allocate_level_zero_host] + auto tensor = npu_context.create_host_tensor(in_element_type, in_shape); + // Extract raw level zero pointer from remote tensor + void* level_zero_ptr = tensor.data(); + //! [allocate_level_zero_host] + } + + return 0; +} diff --git a/docs/articles_en/openvino-workflow/running-inference/inference-devices-and-modes/npu-device.rst b/docs/articles_en/openvino-workflow/running-inference/inference-devices-and-modes/npu-device.rst index f701774d19e42e..7ac982e37f6716 100644 --- a/docs/articles_en/openvino-workflow/running-inference/inference-devices-and-modes/npu-device.rst +++ b/docs/articles_en/openvino-workflow/running-inference/inference-devices-and-modes/npu-device.rst @@ -6,6 +6,13 @@ NPU Device a low-power processing device dedicated to running AI inference. +.. toctree:: + :maxdepth: 1 + :hidden: + + npu-device/remote-tensor-api-npu-plugin + + The Neural Processing Unit is a low-power hardware solution, introduced with the Intel® Core™ Ultra generation of CPUs (formerly known as Meteor Lake). It enables you to offload certain neural network computation tasks from other devices, diff --git a/docs/articles_en/openvino-workflow/running-inference/inference-devices-and-modes/npu-device/remote-tensor-api-npu-plugin.rst b/docs/articles_en/openvino-workflow/running-inference/inference-devices-and-modes/npu-device/remote-tensor-api-npu-plugin.rst new file mode 100644 index 00000000000000..2e41f4f5616ff2 --- /dev/null +++ b/docs/articles_en/openvino-workflow/running-inference/inference-devices-and-modes/npu-device/remote-tensor-api-npu-plugin.rst @@ -0,0 +1,137 @@ +.. {#openvino_docs_OV_UG_supported_plugins_NPU_RemoteTensor_API} + +Remote Tensor API of NPU Plugin +=============================== + + +.. meta:: + :description: The Remote Tensor API of NPU plugin in OpenVINO™ supports + interoperability with existing native APIs, such as + NT handle, or DMA-BUF System Heap. + + +The NPU plugin implementation of the ``ov::RemoteContext`` and ``ov::RemoteTensor`` interface assists NPU +pipeline developers who need memory sharing with existing native APIs (for example, OpenCL, Vulkan, DirectX 12) +by exporting an NT handle on Windows, or DMA-BUF System Heap on Linux and passing that pointer as the +``shared_buffer`` member to the ``remote_tensor(..., shared_buffer)`` create function. They allow you +to avoid any memory copy overhead when plugging OpenVINO™ inference into an existing NPU pipeline. + +Supported scenario by the Remote Tensor API: + +* The NPU plugin context and memory objects can be constructed from low-level device, display, or memory handles and used to create the OpenVINO™ ``ov::CompiledModel`` or ``ov::Tensor`` objects. + +Class and function declarations for the API are defined in the following file: ``src/inference/include/openvino/runtime/intel_npu/level_zero/level_zero.hpp`` + +The most common way to enable the interaction of your application with the Remote Tensor API is to use user-side utility classes +and functions that consume or produce native handles directly. + +Context Sharing Between Application and NPU Plugin +################################################## + +NPU plugin classes that implement the ``ov::RemoteContext`` interface are responsible for context sharing. +Obtaining a context object is the first step in sharing pipeline objects. +The context object of the NPU plugin directly wraps Level Zero context, setting a scope for sharing the +``ov::RemoteTensor`` objects. The ``ov::RemoteContext`` object is retrieved from the NPU plugin. + +Once you have obtained the context, you can use it to create the ``ov::RemoteTensor`` objects. + +Getting RemoteContext from the Plugin ++++++++++++++++++++++++++++++++++++++ + +To request the current default context of the plugin, use one of the following methods: + +.. tab-set:: + + .. tab-item:: Get context from Core + :sync: get-context-core + + .. doxygensnippet:: docs/articles_en/assets/snippets/npu_remote_objects_creation.cpp + :language: cpp + :fragment: [default_context_from_core] + + .. tab-item:: Get context from compiled model + :sync: get-context-compiled-model + + .. doxygensnippet:: docs/articles_en/assets/snippets/npu_remote_objects_creation.cpp + :language: cpp + :fragment: [default_context_from_model] + +Memory Sharing Between Application and NPU Plugin +################################################# + +The classes that implement the ``ov::RemoteTensor`` interface are the wrappers for native API +memory handles, which can be obtained from them at any time. + +To create a shared tensor from a native memory handle, use dedicated ``create_tensor``, ``create_l0_host_tensor``, or ``create_host_tensor`` +methods of the ``ov::RemoteContext`` sub-classes. +``ov::intel_npu::level_zero::LevelZero`` has multiple overloads methods which enable wrapping pre-allocated native handles with the ``ov::RemoteTensor`` +object or requesting plugin to allocate specific device memory. +For more details, see the code snippets below: + + +.. tab-set:: + + .. tab-item:: Wrap native handle + :sync: wrap-native-handles + + .. tab-set:: + + .. tab-item:: NT handle + :sync: nthandle + + .. doxygensnippet:: docs/articles_en/assets/snippets/npu_remote_objects_creation.cpp + :language: cpp + :fragment: [wrap_nt_handle] + + .. tab-item:: DMA-BUF System Heap file descriptor + :sync: dma-buf + + .. doxygensnippet:: docs/articles_en/assets/snippets/npu_remote_objects_creation.cpp + :language: cpp + :fragment: [wrap_dmabuf_fd] + + .. tab-item:: Allocate device memory + :sync: allocate-device-memory + + .. tab-set:: + + .. tab-item:: Remote Tensor - Level Zero host memory + :sync: remote-level-zero-host-memory + + .. doxygensnippet:: docs/articles_en/assets/snippets/npu_remote_objects_creation.cpp + :language: cpp + :fragment: [allocate_remote_level_zero_host] + + .. tab-item:: Tensor - Level Zero host memory + :sync: level-zero-host-memory + + .. doxygensnippet:: docs/articles_en/assets/snippets/npu_remote_objects_creation.cpp + :language: cpp + :fragment: [allocate_level_zero_host] + + +Limitations ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + +* Allocation of the NT handle or DMA-BUF System Heap file descriptor is done manually. + +Low-Level Methods for RemoteContext and RemoteTensor Creation +############################################################# + +The high-level wrappers mentioned above bring a direct dependency on native APIs to your program. +If you want to avoid the dependency, you still can directly use the ``ov::Core::create_context()``, +``ov::RemoteContext::create_tensor()``, and ``ov::RemoteContext::get_params()`` methods. +On this level, native handles are re-interpreted as void pointers and all arguments are passed +using ``ov::AnyMap`` containers that are filled with the ``std::string, ov::Any`` pairs. +Two types of map entries are possible: a descriptor and a container. +The descriptor sets the expected structure and possible parameter values of the map. + +For possible low-level properties and their description, refer to the header file: +`remote_properties.hpp `__. + +Additional Resources +#################### + +* `ov::Core `__ +* `ov::RemoteTensor `__ + diff --git a/src/inference/include/openvino/runtime/intel_npu/level_zero/level_zero.hpp b/src/inference/include/openvino/runtime/intel_npu/level_zero/level_zero.hpp index 7bb1d4bf1d3905..3709891a5e3000 100644 --- a/src/inference/include/openvino/runtime/intel_npu/level_zero/level_zero.hpp +++ b/src/inference/include/openvino/runtime/intel_npu/level_zero/level_zero.hpp @@ -34,7 +34,7 @@ namespace level_zero { * @brief This class represents an abstraction for NPU plugin remote tensor * which can be shared with user-supplied LevelZero buffer. * The plugin object derived from this class can be obtained with ZeroContext::create_tensor() call. - * @note User can obtain LevelZero buffer handle from this class. + * @note User can obtain Level Zero buffer handle from this class. * @ingroup ov_runtime_level_zero_npu_cpp_api */ class ZeroBufferTensor : public RemoteTensor { diff --git a/src/inference/include/openvino/runtime/intel_npu/remote_properties.hpp b/src/inference/include/openvino/runtime/intel_npu/remote_properties.hpp index f058b5ece45971..0d29d44b6ff170 100644 --- a/src/inference/include/openvino/runtime/intel_npu/remote_properties.hpp +++ b/src/inference/include/openvino/runtime/intel_npu/remote_properties.hpp @@ -19,24 +19,16 @@ using npu_handle_param = void*; /** * @brief Enum to define the type of the shared memory buffer + * @ingroup ov_runtime_level_zero_npu_cpp_api */ enum class MemType { - L0_INTERNAL_BUF = 0, //!< Internal L0 buffer type allocated by plugin + L0_INTERNAL_BUF = 0, //!< Internal Level Zero buffer type allocated by plugin SHARED_BUF = 1, //!< Shared buffer }; -/** - * @brief Enum to define the type of the tensor - */ -enum class TensorType { - INPUT = 0, //!< Tensor is only used as input - OUTPUT = 1, //!< Tensor is only used as output - BINDED = 2 //!< Tensor could be used as input and output -}; - /** @cond INTERNAL */ -inline std::ostream& operator<<(std::ostream& os, const MemType& share_mem_type) { - switch (share_mem_type) { +inline std::ostream& operator<<(std::ostream& os, const MemType& mem_type) { + switch (mem_type) { case MemType::L0_INTERNAL_BUF: return os << "L0_INTERNAL_BUF"; case MemType::SHARED_BUF: @@ -46,13 +38,13 @@ inline std::ostream& operator<<(std::ostream& os, const MemType& share_mem_type) } } -inline std::istream& operator>>(std::istream& is, MemType& share_mem_type) { +inline std::istream& operator>>(std::istream& is, MemType& mem_type) { std::string str; is >> str; if (str == "L0_INTERNAL_BUF") { - share_mem_type = MemType::L0_INTERNAL_BUF; + mem_type = MemType::L0_INTERNAL_BUF; } else if (str == "SHARED_BUF") { - share_mem_type = MemType::SHARED_BUF; + mem_type = MemType::SHARED_BUF; } else { OPENVINO_THROW("Unsupported memory type: ", str); } @@ -63,24 +55,68 @@ inline std::istream& operator>>(std::istream& is, MemType& share_mem_type) { /** * @brief This key identifies type of internal shared memory * in a shared memory tensor parameter map. + * @ingroup ov_runtime_level_zero_npu_cpp_api */ static constexpr Property mem_type{"MEM_TYPE"}; /** * @brief This key identifies memory handle * in a shared memory tensor parameter map + * @ingroup ov_runtime_level_zero_npu_cpp_api */ static constexpr Property mem_handle{"MEM_HANDLE"}; /** * @brief This key identifies LevelZero context handle - * in a shared context or shared memory tensor parameter map + * in a shared context parameter map + * @ingroup ov_runtime_level_zero_npu_cpp_api */ static constexpr Property l0_context{"L0_CONTEXT"}; /** - * @brief This key identifies type of the tensor - * in a shared memory tensor parameter map. + * @brief Enum to define the type of the tensor + * @ingroup ov_runtime_level_zero_npu_cpp_api + */ +enum class TensorType { + INPUT = 0, //!< Tensor is only used as input + OUTPUT = 1, //!< Tensor is only used as output + BINDED = 2 //!< Tensor could be used as input and output +}; + +/** @cond INTERNAL */ +inline std::ostream& operator<<(std::ostream& os, const TensorType& tensor_type) { + switch (tensor_type) { + case TensorType::INPUT: + return os << "INPUT"; + case TensorType::OUTPUT: + return os << "OUTPUT"; + case TensorType::BINDED: + return os << "BINDED"; + default: + OPENVINO_THROW("Unsupported tensor type"); + } +} + +inline std::istream& operator>>(std::istream& is, TensorType& tensor_type) { + std::string str; + is >> str; + if (str == "INPUT") { + tensor_type = TensorType::INPUT; + } else if (str == "OUTPUT") { + tensor_type = TensorType::OUTPUT; + } else if (str == "BINDED") { + tensor_type = TensorType::BINDED; + } else { + OPENVINO_THROW("Unsupported tensor type: ", str); + } + return is; +} +/** @endcond */ + +/** + * @brief This key sets the type of the internal Level Zero buffer + * allocated by the plugin in a shared memory tensor parameter map. + * @ingroup ov_runtime_level_zero_npu_cpp_api */ static constexpr Property tensor_type{"TENSOR_TYPE"}; From 88cefef73a431b586473f56eecd30585844c1f90 Mon Sep 17 00:00:00 2001 From: Andrei Kashchikhin Date: Fri, 26 Jul 2024 13:17:26 +0100 Subject: [PATCH 36/54] [CI] [GHA] Add `502: Bad Gateway` to errors to look for in workflow rerunner (#25599) ### Tickets: - *146254* --- .github/scripts/workflow_rerun/errors_to_look_for.json | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/scripts/workflow_rerun/errors_to_look_for.json b/.github/scripts/workflow_rerun/errors_to_look_for.json index 51e8106944ca9c..3d59bb9a1e569f 100644 --- a/.github/scripts/workflow_rerun/errors_to_look_for.json +++ b/.github/scripts/workflow_rerun/errors_to_look_for.json @@ -58,5 +58,9 @@ { "error_text": "status_string: \"Timeout was reached\"", "ticket": 142653 + }, + { + "error_text": "ERROR 502: Bad Gateway", + "ticket": 146254 } ] \ No newline at end of file From b0b36a6c28b4046f817025f427b24b6f30848951 Mon Sep 17 00:00:00 2001 From: Artyom Anokhov Date: Fri, 26 Jul 2024 14:20:40 +0200 Subject: [PATCH 37/54] NPU CMakeLists.txt: Build NPU internal tools only if tests are enabled (#25749) ### Details: - Build NPU internal tools only if tests are enabled --- src/plugins/intel_npu/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/plugins/intel_npu/CMakeLists.txt b/src/plugins/intel_npu/CMakeLists.txt index ce06b2542ac31c..6267fcfb288d38 100644 --- a/src/plugins/intel_npu/CMakeLists.txt +++ b/src/plugins/intel_npu/CMakeLists.txt @@ -30,8 +30,8 @@ add_subdirectory(src) if(ENABLE_TESTS) add_subdirectory(tests) + add_subdirectory(tools) endif() -add_subdirectory(tools) ov_cpack_add_component(${NPU_INTERNAL_COMPONENT} HIDDEN) From 4a5bd43723eeaff934d58e188c23629b65189778 Mon Sep 17 00:00:00 2001 From: Prakash Date: Fri, 26 Jul 2024 19:56:10 +0530 Subject: [PATCH 38/54] [OV JS] Add optical-character-recognition sample notebook (#25191) ### Details: - *added code in node notebook* - *updated the samples list in readme* ### Workarounds which still needs to be worked upon - couldn't find the js equivalent method for `cv2.getTextSize()` (opencv python method) which is used for getting the height and width of the crop text the opencv-wasm package does not have this api , in the current implementation I have written a custom function `getTextSize` which uses canvas to get the width and height of the text - `text-recognition-resnet-fc` model IR was larger in size it was around 355MB hence I did not included it in my PR Please provide Feedback @Aliczi @vishniakov-nikolai With Regards Prakash --------- Co-authored-by: Vishniakov Nikolai --- samples/js/node/README.md | 1 + .../optical-character-recognition.nnb | 314 ++++++++++++++++++ 2 files changed, 315 insertions(+) create mode 100644 samples/js/node/notebooks/optical-character-recognition.nnb diff --git a/samples/js/node/README.md b/samples/js/node/README.md index 59fb381f460abc..7375219ccf2c0a 100644 --- a/samples/js/node/README.md +++ b/samples/js/node/README.md @@ -26,6 +26,7 @@ VSCode extension to run these notebook samples - hello-detection.nnb - question-answering.nnb - pose-estimation.nnb + - optical-character-recognition.nnb ## Live Sample diff --git a/samples/js/node/notebooks/optical-character-recognition.nnb b/samples/js/node/notebooks/optical-character-recognition.nnb new file mode 100644 index 00000000000000..b7e8e109ff857f --- /dev/null +++ b/samples/js/node/notebooks/optical-character-recognition.nnb @@ -0,0 +1,314 @@ +{ + "cells": [ + { + "language": "markdown", + "source": [ + "# Optical Character Recognition with OpenVINO™" + ], + "outputs": [] + }, + { + "language": "markdown", + "source": [ + "#### This tutorial demonstrates how to perform optical character recognition (OCR) with OpenVINO models" + ], + "outputs": [] + }, + { + "language": "markdown", + "source": [ + "# Imports" + ], + "outputs": [] + }, + { + "language": "typescript", + "source": [ + "const fs = require(\"node:fs\");\nconst path = require(\"node:path\");\nconst { createCanvas, Image, ImageData } = require(\"canvas\");\nconst { addon: ov } = require(\"openvino-node\");\nconst { display } = require(\"node-kernel\");\nconst { cv } = require(\"opencv-wasm\");\nconst {\n transform,\n getImageData,\n displayArrayAsImage,\n downloadFile,\n arrayToImageData,\n getImageBuffer,\n argMax,\n setShape,\n} = require(\"../helpers.js\");\n" + ], + "outputs": [] + }, + { + "language": "markdown", + "source": [ + "# Download Models" + ], + "outputs": [] + }, + { + "language": "typescript", + "source": [ + "// Intializing Images, Models\nconst baseArtifactsDir = '../../assets/models';\nconst detBaseURL = 'https://storage.openvinotoolkit.org/repositories/open_model_zoo/2022.3/models_bin/1/horizontal-text-detection-0001/FP32/';\nconst recBaseURL = 'https://storage.openvinotoolkit.org/repositories/open_model_zoo/public/text-recognition-resnet-fc/';\nconst detectionModelName = 'horizontal-text-detection-0001';\nconst textRecModelName = 'text-recognition-resnet-fc';\n\nconst detModelXMLName = `${detectionModelName}.xml`;\nconst detModelBINName = `${detectionModelName}.bin`;\n\nconst detModelXMLPath = `${baseArtifactsDir}/${detModelXMLName}`;\nconst detModelBINPath = `${baseArtifactsDir}/${detModelBINName}`;\n\nconst recModelXMLName = `${textRecModelName}.xml`;\nconst recModelBINName = `${textRecModelName}.bin`;\n\nconst recModelXMLPath = `${baseArtifactsDir}/${textRecModelName}.xml`;\nconst recModelBINPath = `${baseArtifactsDir}/${textRecModelName}.bin`;\n\nawait downloadFile(\n detBaseURL + detModelXMLName,\n detModelXMLName,\n baseArtifactsDir\n);\n\nawait downloadFile(\n detBaseURL + detModelBINName,\n detModelBINName,\n baseArtifactsDir\n);\n\nawait downloadFile(\n recBaseURL + recModelXMLName,\n recModelXMLName,\n baseArtifactsDir\n);\n\nawait downloadFile(\n recBaseURL + recModelBINName,\n recModelBINName,\n baseArtifactsDir\n);\n" + ], + "outputs": [ + { + "items": [ + { + "mime": "application/vnd.code.notebook.stdout", + "value": [ + "File successfully stored at '/home/prakash/OpenSource-Repos/ocr-node-sample/assets/models/horizontal-text-detection-0001.xml'", + "File successfully stored at '/home/prakash/OpenSource-Repos/ocr-node-sample/assets/models/horizontal-text-detection-0001.bin'", + "File successfully stored at '/home/prakash/OpenSource-Repos/ocr-node-sample/assets/models/text-recognition-resnet-fc.xml'", + "File successfully stored at '/home/prakash/OpenSource-Repos/ocr-node-sample/assets/models/text-recognition-resnet-fc.bin'", + "" + ] + } + ] + } + ] + }, + { + "language": "markdown", + "source": [ + "# Dowload Image" + ], + "outputs": [] + }, + { + "language": "typescript", + "source": [ + "const baseImagesDir = '../../assets/images';\nconst imgUrl = 'https://storage.openvinotoolkit.org/repositories/openvino_notebooks/data/data/image/intel_rnb.jpg';\nconst imgName = 'intel_rnb.jpg';\nawait downloadFile(imgUrl, imgName, baseImagesDir);\n" + ], + "outputs": [ + { + "items": [ + { + "mime": "application/vnd.code.notebook.stdout", + "value": [ + "File successfully stored at '/home/prakash/OpenSource-Repos/ocr-node-sample/assets/images/intel_rnb.jpg'", + "" + ] + } + ] + } + ] + }, + { + "language": "markdown", + "source": [ + "# Load a Detection Model" + ], + "outputs": [] + }, + { + "language": "typescript", + "source": [ + "// Initialize OpenVINO core and load the detection model\nconst core = new ov.Core();\nconst detModel = await core.readModel(detModelXMLPath);\nconst detCompiledModel = await core.compileModel(detModel, 'AUTO');\nconst detInputLayer = detCompiledModel.input(0);\nconst detOutputLayer = detCompiledModel.output('boxes');\n" + ], + "outputs": [] + }, + { + "language": "markdown", + "source": [ + "# Prepare Image for Inference" + ], + "outputs": [] + }, + { + "language": "typescript", + "source": [ + "const imageData = await getImageData(`${baseImagesDir}/intel_rnb.jpg`);\nconst inputImageMat = cv.matFromImageData(imageData);\nconst displayImageMat = inputImageMat.clone();\n\n// Resize the image to meet network input size\nconst [B, C, H, W] = detInputLayer.shape;\nconst resizedImage = new cv.Mat();\ncv.cvtColor(inputImageMat, inputImageMat, cv.COLOR_RGBA2RGB);\ncv.cvtColor(inputImageMat, inputImageMat, cv.COLOR_BGR2RGB);\ncv.resize(inputImageMat, resizedImage, new cv.Size(W, H));\n\n// Prepare input tensor\nconst inputImage = transform(resizedImage.data,\n { width: W, height: H },\n [0, 1, 2]);\nconst tensorData = new Float32Array(inputImage);\nconst tensor = new ov.Tensor(\n ov.element.f32,\n detInputLayer.shape,\n tensorData\n);\n" + ], + "outputs": [] + }, + { + "language": "markdown", + "source": [ + "## Define Post-Processing Functions" + ], + "outputs": [] + }, + { + "language": "typescript", + "source": [ + "// Function to extract bounding boxes from the model output\nfunction extractBoundingBoxes(output) {\n console.log(`Output shape: ${output.getData()}`);\n const { data: boxes } = output;\n const foldingCoefficient = 5;\n const numberOfBoxes = boxes.length / foldingCoefficient;\n\n return setShape(boxes, [numberOfBoxes, foldingCoefficient]);\n}\n" + ], + "outputs": [] + }, + { + "language": "markdown", + "source": [ + "# Do Inference" + ], + "outputs": [] + }, + { + "language": "typescript", + "source": [ + "// Create infer request\nconst detInferRequest = detCompiledModel.createInferRequest();\n\nconst detResult = await detInferRequest.inferAsync([tensor]);\nconst boundingBoxesArray = extractBoundingBoxes(detResult[detOutputLayer]);\n\n// Show original image\ndisplayArrayAsImage(\n displayImageMat.data,\n displayImageMat.cols,\n displayImageMat.rows,\n display\n);\n" + ], + "outputs": [ + { + "items": [ + { + "mime": "image/jpeg", + "value": "" + } + ] + } + ] + }, + { + "language": "markdown", + "source": [ + "# Load Text Recognition Model" + ], + "outputs": [] + }, + { + "language": "typescript", + "source": [ + "// Loading the text recognition model\nconst recModel = await core.readModel(recModelXMLPath);\nconst recModelCompiled = await core.compileModel(recModel, 'AUTO');\nconst recInputLayer = recModelCompiled.input(0);\nconst recOutputLayer = recModelCompiled.output(0);\n" + ], + "outputs": [] + }, + { + "language": "markdown", + "source": [ + "# Define Post-Processing Functions" + ], + "outputs": [] + }, + { + "language": "typescript", + "source": [ + "// Function to calculate the ratios for the image\nfunction calculateRatios(originalImage, resizedImage) {\n const realY = originalImage.rows;\n const realX = originalImage.cols;\n const resizedY = resizedImage.rows;\n const resizedX = resizedImage.cols;\n const ratioX = realX / resizedX;\n const ratioY = realY / resizedY;\n\n return { ratioX, ratioY };\n}\n\n// Function to convert the image to grayscale\nfunction convertToGrayscale(originalImage) {\n const grayscaleImage = new cv.Mat();\n cv.cvtColor(originalImage, grayscaleImage, cv.COLOR_BGR2GRAY);\n\n return grayscaleImage;\n}\n\n\n// Function to adjust bounding box coordinates by a given ratio\nfunction multiplyByRatio(ratioX, ratioY, box) {\n const scaleShape = (shape, idx) => idx % 2\n ? Math.max(shape * ratioY, 10)\n : shape * ratioX;\n\n return box.map(scaleShape);\n}\n\n\n// Function to resize and convert a crop to the recognition model input format\nfunction resizeAndConvertCropToModelInput(crop, netShape) {\n const [netWidth, netHeight] = netShape;\n\n // Resize the crop to the network's input shape\n const tempImg = new cv.Mat();\n cv.resize(crop, tempImg, new cv.Size(netWidth, netHeight));\n\n // Create the reshaped buffer\n const reshapedBuffer = new Uint8Array(netHeight * netWidth);\n let index = 0;\n\n for (let i = 0; i < netHeight; i++) {\n for (let j = 0; j < netWidth; j++) {\n reshapedBuffer[index++] = tempImg.ucharPtr(i, j)[0];\n }\n }\n\n // Clean up\n tempImg.delete();\n\n return reshapedBuffer;\n}\n\n// Function to extract recognition results from the model output\nfunction extractRecognitionResults(output) {\n const outputData = output.getData();\n const outputShape = output.getShape();\n const [batchSize, height, width] = outputShape;\n\n return setShape(outputData, [height, width]);\n}\n\n// Function to parse annotations from the recognition results\nfunction parseAnnotations(recognitionResults) {\n const letters = \"~0123456789abcdefghijklmnopqrstuvwxyz\";\n const annotation = [];\n\n for (const row of recognitionResults) {\n const letterIndex = argMax(row);\n const parsedLetter = letters[letterIndex];\n\n // Stop if end character is encountered\n if (parsedLetter === letters[0]) break;\n annotation.push(parsedLetter);\n }\n\n return annotation.join('');\n}\n\n// Function to crop the image based on the bounding box coordinates\nfunction cropImage(originalImage, xMin, yMin, xMax, yMax) {\n xMin = Math.max(0, xMin);\n yMin = Math.max(0, yMin);\n xMax = Math.min(originalImage.cols, xMax);\n yMax = Math.min(originalImage.rows, yMax);\n if (xMin >= xMax || yMin >= yMax) {\n throw new Error('Invalid crop coordinates');\n }\n const roi = originalImage.roi(\n new cv.Rect(xMin, yMin, xMax - xMin, yMax - yMin)\n );\n const cropped = new cv.Mat();\n roi.copyTo(cropped);\n roi.delete();\n\n return cropped;\n}\n\n// Function to log the bounding boxes with annotations\nfunction printSortedAnnotations(boxesWithAnnotations) {\n /* Sort the boxes with annotations based\n on their position in the input image */\n const sortedAnnotations = boxesWithAnnotations\n .sort((a, b) => {\n const [aXMin, aYMin] = a.box;\n const [bXMin, bYMin] = b.box;\n\n return (aYMin - bYMin) || (aXMin - bXMin);\n })\n .map(item => item.annotation);\n\n console.log('Sorted Annotations:', sortedAnnotations);\n}\n\n// Get Text size\nfunction getTextSize(text, fontFace, fontScale) {\n const canvas = createCanvas(200, 200);\n const ctx = canvas.getContext('2d');\n const adjustedFontScale = fontScale * 35;\n ctx.font = `${adjustedFontScale}px ${fontFace}`;\n const metrics = ctx.measureText(text);\n const width = metrics.width;\n const height =\n metrics.actualBoundingBoxAscent +\n metrics.actualBoundingBoxDescent;\n\n return { width, height };\n}\n\n/* The convertResultToImage function visualizes object detection\n results on an image by drawing bounding boxes around detected\n objects and optionally adding labels to them. */\nfunction convertResultToImage(\n bgrImage,\n resizedImage,\n boxesWithAnnotations,\n options,\n) {\n const defaultOptions = { threshold: 0.3, confLabels: true };\n const { threshold, confLabels } = Object.assign(defaultOptions, options);\n\n const colors = {\n red: [255, 0, 0, 255],\n green: [0, 255, 0, 255],\n white: [255, 255, 255, 255]\n };\n const [realY, realX] = [bgrImage.rows, bgrImage.cols];\n const [resizedY, resizedX] = [resizedImage.rows, resizedImage.cols];\n const [ratioX, ratioY] = [realX / resizedX, realY / resizedY];\n\n const rgbImage = new cv.Mat();\n cv.cvtColor(bgrImage, rgbImage, cv.COLOR_BGR2RGB);\n\n boxesWithAnnotations.forEach(({ box, annotation }) => {\n const conf = box[box.length - 1];\n\n if (conf < threshold) return;\n\n const [xMin, yMin, xMax, yMax] = multiplyByRatio(ratioX, ratioY, box);\n\n cv.rectangle(\n rgbImage,\n new cv.Point(xMin, yMin),\n new cv.Point(xMax, yMax),\n colors.green,\n 3\n );\n\n if (!confLabels) return;\n\n const text = `${annotation}`;\n const fontScale = 0.8;\n const thickness = 1;\n const { width: textW, height: textH } = getTextSize(text, 'Arial', fontScale);\n const imageCopy = rgbImage.clone();\n\n cv.rectangle(\n imageCopy,\n new cv.Point(xMin, yMin - textH - 10),\n new cv.Point(xMin + textW, yMin - 10),\n colors.white,\n cv.FILLED\n );\n cv.addWeighted(imageCopy, 0.4, rgbImage, 0.6, 0, rgbImage);\n cv.putText(\n rgbImage,\n text,\n new cv.Point(xMin, yMin - 10),\n cv.FONT_HERSHEY_SIMPLEX,\n fontScale,\n colors.red,\n thickness,\n cv.LINE_AA\n );\n\n imageCopy.delete();\n\n });\n\n return rgbImage;\n}\n" + ], + "outputs": [] + }, + { + "language": "markdown", + "source": [ + "# Async Inference Helper Function" + ], + "outputs": [] + }, + { + "language": "typescript", + "source": [ + "async function inferAsyncProcess(\n tensor,\n recModelCompiled,\n recOutputLayer,\n i,\n annotations,\n) {\n // Create infer request\n const inferRequest = recModelCompiled.createInferRequest();\n\n // Define the completion callback function\n function completionCallback(outputTensor, i, annotations) {\n const recognitionResults = extractRecognitionResults(outputTensor);\n const annotation = parseAnnotations(recognitionResults);\n annotations.push(annotation);\n }\n\n // Start inference in asynchronous mode\n try {\n const result = await inferRequest.inferAsync([tensor]);\n completionCallback(result[recOutputLayer], i, annotations);\n }catch (error) {\n console.error('Error during inference:', error);\n }\n}\n" + ], + "outputs": [] + }, + { + "language": "markdown", + "source": [ + "### Do Inference and Show Detected Text Boxes and OCR Results for the Image\n" + ], + "outputs": [] + }, + { + "language": "typescript", + "source": [ + "// Process each bounding box and run inference on the recognition model\nconst [batchSize, channels, height, width] = recInputLayer.shape;\n// Calculate ratios\nconst {\n ratioX,\n ratioY,\n} = calculateRatios(inputImageMat, resizedImage);\n\n// Convert image to grayscale\nconst grayscaleImage = convertToGrayscale(inputImageMat);\n\nconst annotations = [];\nconst croppedImages = [];\n\n\nfor (let i = 0; i < boundingBoxesArray.length; i++) {\n const crop = boundingBoxesArray[i];\n const [xMin, yMin, xMax, yMax] = multiplyByRatio(ratioX, ratioY, crop).map(Math.floor);\n const cropRect = new cv.Rect(xMin, yMin, xMax - xMin, yMax - yMin);\n const croppedImage = grayscaleImage.roi(cropRect);\n\n try {\n const preprocessedCrop = resizeAndConvertCropToModelInput(croppedImage, [width, height]);\n const tensorData = new Float32Array(preprocessedCrop);\n const tensor = new ov.Tensor(\n ov.element.f32,\n Int32Array.from(recInputLayer.shape),\n tensorData\n );\n\n await inferAsyncProcess(\n tensor,\n recModelCompiled,\n recOutputLayer,\n i,\n annotations\n );\n\n croppedImages.push(\n cropImage(inputImageMat, xMin, yMin, xMax, yMax)\n );\n } catch (error) {\n console.error('Error during preprocessing:', error);\n }\n\n croppedImage.delete();\n}\n\ngrayscaleImage.delete();\n\nconst boxesWithAnnotations = boundingBoxesArray.map((box, index) => ({\n box,\n annotation: annotations[index]\n}));\n\nconst resultImage = convertResultToImage(\n inputImageMat,\n resizedImage,\n boxesWithAnnotations,\n { threshold: 0.3, confLabels: true }\n);\n\ndisplayArrayAsImage(\n resultImage.data,\n resultImage.cols,\n resultImage.rows,\n display\n);\n\ncroppedImages.forEach((croppedImage) => {\n displayArrayAsImage(\n croppedImage.data,\n croppedImage.cols,\n croppedImage.rows,\n display\n );\n});\n" + ], + "outputs": [ + { + "items": [ + { + "mime": "application/vnd.code.notebook.stdout", + "value": [ + "Annotation for box 0: building", + "Cropped Image Size: 159 x 40", + "Annotation for box 1: noyce", + "Original Image Size: 690 x 517", + "Cropping Coordinates: (256, 50) to (377, 88)", + "Cropped Image Size: 121 x 38", + "Cropping Coordinates: (604, 205) to (653, 228)", + "Cropped Image Size: 49 x 23", + "Cropped Image Size: 26 x 32", + "Cropped Image Size: 31 x 23", + "Text: noyce, Width: 74.716796875, Height: 21", + "Text: 2200, Width: 62.2890625, Height: 19", + "Text: robert, Width: 73.14453125, Height: 20", + "" + ] + } + ] + }, + { + "items": [ + { + "mime": "image/jpeg", + "value": "" + } + ] + }, + { + "items": [ + { + "mime": "image/jpeg", + "value": "" + } + ] + }, + { + "items": [ + { + "mime": "image/jpeg", + "value": "" + } + ] + }, + { + "items": [ + { + "mime": "image/jpeg", + "value": "" + } + ] + }, + { + "items": [ + { + "mime": "image/jpeg", + "value": "" + } + ] + }, + { + "items": [ + { + "mime": "image/jpeg", + "value": "" + } + ] + }, + { + "items": [ + { + "mime": "image/jpeg", + "value": "" + } + ] + } + ] + }, + { + "language": "markdown", + "source": [ + "### Print Annotations in Plain Text Format" + ], + "outputs": [] + }, + { + "language": "typescript", + "source": [ + "printSortedAnnotations(boxesWithAnnotations);\n" + ], + "outputs": [ + { + "items": [ + { + "mime": "application/vnd.code.notebook.stdout", + "value": [ + "Sorted Annotations: [ 'robert', 'n', 'noyce', 'building', '2200', 'center' ]", + "" + ] + } + ] + } + ] + } + ] +} \ No newline at end of file From 3056b53056d6319666f3fc250bebefb0c4b1a91e Mon Sep 17 00:00:00 2001 From: Edward Shogulin Date: Sat, 27 Jul 2024 01:05:27 +0100 Subject: [PATCH 39/54] [LPT] Quantized LSTMSequence & GRUSequence extended support (#25654) ### Details: - *Low Precision Transformations: Quantized LSTMSequence & GRUSequence extended support* ### Tickets: - Current implementation for: *CVS-146067* - Will be changed in feature request: *CVS-147588* --- .../include/low_precision/broadcast.hpp | 30 +++ .../include/low_precision/recurrent_cell.hpp | 5 +- .../src/broadcast.cpp | 77 +++++++ .../src/layer_transformation.cpp | 1 + .../src/low_precision.cpp | 2 + .../src/markup_precisions.cpp | 5 + .../src/recurrent_cell.cpp | 205 +++++++++++++----- .../tests/broadcast_transformation.cpp | 197 +++++++++++++++++ .../recurrent_cell_transformation.cpp | 4 +- .../recurrent_cell_transformation.cpp | 4 +- .../recurrent_cell_transformation.hpp | 1 + .../recurrent_cell_transformation.cpp | 14 +- .../include/ov_lpt_models/broadcast.hpp | 29 +++ .../include/ov_lpt_models/recurrent_cell.hpp | 8 +- .../ov_lpt_models/src/broadcast.cpp | 62 ++++++ .../ov_lpt_models/src/recurrent_cell.cpp | 40 +++- 16 files changed, 608 insertions(+), 76 deletions(-) create mode 100644 src/common/low_precision_transformations/include/low_precision/broadcast.hpp create mode 100644 src/common/low_precision_transformations/src/broadcast.cpp create mode 100644 src/common/low_precision_transformations/tests/broadcast_transformation.cpp create mode 100644 src/tests/ov_helpers/ov_lpt_models/include/ov_lpt_models/broadcast.hpp create mode 100644 src/tests/ov_helpers/ov_lpt_models/src/broadcast.cpp diff --git a/src/common/low_precision_transformations/include/low_precision/broadcast.hpp b/src/common/low_precision_transformations/include/low_precision/broadcast.hpp new file mode 100644 index 00000000000000..39ba4052535c29 --- /dev/null +++ b/src/common/low_precision_transformations/include/low_precision/broadcast.hpp @@ -0,0 +1,30 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "transparent_base_transformation.hpp" + +namespace ov { +namespace pass { +namespace low_precision { + +/** + * @ingroup ov_transformation_common_api + * @brief BroadcastTransformation propagates dequantization operations through Broadcast operation. + * + * For more details about the transformation, refer to + * [BroadcastTransformation](@ref openvino_docs_OV_UG_lpt_BroadcastTransformation) page + * in the OpenVINO Developer Guide. + */ +class LP_TRANSFORMATIONS_API BroadcastTransformation : public TransparentBaseTransformation { +public: + OPENVINO_RTTI("BroadcastTransformation", "0"); + BroadcastTransformation(const Params& params = Params()); + bool canBeTransformed(const TransformationContext& context, std::shared_ptr layer) const override; +}; + +} // namespace low_precision +} // namespace pass +} // namespace ov diff --git a/src/common/low_precision_transformations/include/low_precision/recurrent_cell.hpp b/src/common/low_precision_transformations/include/low_precision/recurrent_cell.hpp index 8a305db307c612..22aaf3281c2b94 100644 --- a/src/common/low_precision_transformations/include/low_precision/recurrent_cell.hpp +++ b/src/common/low_precision_transformations/include/low_precision/recurrent_cell.hpp @@ -1,4 +1,4 @@ -// Copyright (C) 2022 Intel Corporation +// Copyright (C) 2022-2024 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // @@ -23,6 +23,9 @@ class LP_TRANSFORMATIONS_API RecurrentCellTransformation : public LayerTransform static std::shared_ptr wrap_fake_quantize(const std::shared_ptr parameter); static std::shared_ptr wrap_quantization(const std::shared_ptr parameter); static std::shared_ptr wrap_dequantization(const std::shared_ptr parameter, const bool with_subtract); + +private: + void propagate(TransformationContext& context, const std::shared_ptr node); }; } // namespace low_precision diff --git a/src/common/low_precision_transformations/src/broadcast.cpp b/src/common/low_precision_transformations/src/broadcast.cpp new file mode 100644 index 00000000000000..5e78ca0ef50996 --- /dev/null +++ b/src/common/low_precision_transformations/src/broadcast.cpp @@ -0,0 +1,77 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "low_precision/broadcast.hpp" + +#include + +#include "openvino/opsets/opset1.hpp" +#include "openvino/opsets/opset3.hpp" +#include "openvino/pass/pattern/op/or.hpp" +#include "openvino/pass/pattern/op/wrap_type.hpp" +#include "low_precision/network_helper.hpp" + +#include "itt.hpp" + +using namespace ov::pass::low_precision; + +BroadcastTransformation::BroadcastTransformation(const Params& params) : TransparentBaseTransformation(params) { + MATCHER_SCOPE(BroadcastTransformation); + auto broadcast1 = pattern::wrap_type({ + pattern::wrap_type(), + ov::pass::pattern::any_input(), + ov::pass::pattern::any_input() }); + + auto broadcast3 = pattern::wrap_type({ + pattern::wrap_type(), + ov::pass::pattern::any_input(), + ov::pass::pattern::any_input() }); + + const auto matcher = std::make_shared(ov::OutputVector{ broadcast1, broadcast3 }); + + ov::graph_rewrite_callback callback = [this](pattern::Matcher& m) { + auto op = m.get_match_root(); + if (transformation_callback(op)) { + return false; + } + return transform(*context, m); + }; + + auto m = std::make_shared(matcher, matcher_name); + this->register_matcher(m, callback); +} + +bool BroadcastTransformation::canBeTransformed(const TransformationContext& context, std::shared_ptr layer) const { + if (!LayerTransformation::canBeTransformed(context, layer)) { + return false; + } + + const auto& dequantization = NetworkHelper::getDequantization(layer, defaultPrecisions); + if (dequantization.empty()) { + return false; + } + + if (dequantization.isPerTensor()) { + return true; + } + + const auto& inputShape = layer->get_input_partial_shape(0); + if (inputShape.rank().is_dynamic() || inputShape[dequantization.channelDimIndex].is_dynamic()) { + return false; + } + + const auto targetShapeConstant = ov::as_type_ptr(layer->get_input_node_shared_ptr(1)); + const auto& targetShape = targetShapeConstant->cast_vector(); + if (targetShape[dequantization.channelDimIndex] != inputShape[dequantization.channelDimIndex].get_length()) { + return false; + } + + const auto axesMappingConstant = ov::as_type_ptr(layer->get_input_node_shared_ptr(2)); + const auto& axesMapping = axesMappingConstant->cast_vector(); + if (static_cast(axesMapping[dequantization.channelDimIndex]) != dequantization.channelDimIndex) { + return false; + } + + return true; +} diff --git a/src/common/low_precision_transformations/src/layer_transformation.cpp b/src/common/low_precision_transformations/src/layer_transformation.cpp index a4c0133c5813c3..4ec573c0f2a6ea 100644 --- a/src/common/low_precision_transformations/src/layer_transformation.cpp +++ b/src/common/low_precision_transformations/src/layer_transformation.cpp @@ -401,6 +401,7 @@ std::shared_ptr LayerTransformation::moveDequantizationAfter( const FakeQuantizeDequantization& dequantization, const bool updateOutputPrecision, const bool moveSubtract) const { + OPENVINO_ASSERT(!dequantization.empty()); const auto result = ov::pass::low_precision::NetworkHelper::moveDequantizationAfter(operation, dequantization, updateOutputPrecision, diff --git a/src/common/low_precision_transformations/src/low_precision.cpp b/src/common/low_precision_transformations/src/low_precision.cpp index bba12f7e389be8..6435f47d12ffec 100644 --- a/src/common/low_precision_transformations/src/low_precision.cpp +++ b/src/common/low_precision_transformations/src/low_precision.cpp @@ -44,6 +44,7 @@ #include "low_precision/assign_and_read_value.hpp" #include "low_precision/avg_pool.hpp" #include "low_precision/batch_to_space.hpp" +#include "low_precision/broadcast.hpp" #include "low_precision/clamp.hpp" #include "low_precision/convolution.hpp" #include "low_precision/convolution_backprop_data.hpp" @@ -240,6 +241,7 @@ bool ov::pass::low_precision::LowPrecision::run_on_model(const std::shared_ptr() }, // TODO: there are conditions { name() }, + { name() }, + { name() }, { name() }, { name() }, { name() }, @@ -192,6 +195,8 @@ bool ov::pass::low_precision::MarkupPrecisions::isSupported(const std::shared_pt { name() }, { name() }, { name() }, + { name() }, + { name() }, { name() }, { name() }, // ? diff --git a/src/common/low_precision_transformations/src/recurrent_cell.cpp b/src/common/low_precision_transformations/src/recurrent_cell.cpp index 7fd40cf2071a0f..cec96044502596 100644 --- a/src/common/low_precision_transformations/src/recurrent_cell.cpp +++ b/src/common/low_precision_transformations/src/recurrent_cell.cpp @@ -1,17 +1,19 @@ -// Copyright (C) 2022 Intel Corporation +// Copyright (C) 2022-2024 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // #include "low_precision/recurrent_cell.hpp" -#include "openvino/pass/pattern/op/wrap_type.hpp" -#include "openvino/opsets/opset1.hpp" - #include + #include "openvino/core/node.hpp" #include "openvino/opsets/opset1.hpp" +#include "openvino/opsets/opset2.hpp" +#include "openvino/opsets/opset3.hpp" #include "openvino/opsets/opset5.hpp" +#include "openvino/opsets/opset12.hpp" #include "openvino/pass/pattern/op/or.hpp" +#include "openvino/pass/pattern/op/wrap_type.hpp" #include "low_precision/network_helper.hpp" #include "low_precision/rt_info/disable_cleanup_attribute.hpp" @@ -21,50 +23,14 @@ namespace pass { namespace low_precision { RecurrentCellTransformation::RecurrentCellTransformation(const Params& params) : LayerTransformation(params) { - const auto X = ov::pass::pattern::any_input(); - const auto H = ov::pass::pattern::any_input(); const auto C = ov::pass::pattern::any_input(); const auto S = ov::pass::pattern::any_input(); - const auto W = ov::pass::pattern::wrap_type(); - const auto R = ov::pass::pattern::wrap_type(); const auto B = ov::pass::pattern::wrap_type(); - const auto H_as_const = ov::pass::pattern::wrap_type(); - - const auto fq_X = wrap_fake_quantize(X); - const auto fq_H = wrap_fake_quantize(H); - const auto fq_W = wrap_fake_quantize(W); - const auto fq_R = wrap_fake_quantize(R); - - const auto dequantization_X = wrap_dequantization(ov::pass::pattern::any_input(), true); - const auto dequantization_H = wrap_dequantization(ov::pass::pattern::any_input(), true); - const auto dequantization_W = wrap_dequantization(ov::pass::pattern::any_input(), true); - const auto dequantization_R = wrap_dequantization(ov::pass::pattern::any_input(), true); - - const auto dequantization_without_subtract_X = wrap_dequantization(ov::pass::pattern::any_input(), false); - const auto dequantization_without_subtract_H = wrap_dequantization(ov::pass::pattern::any_input(), false); - const auto dequantization_without_subtract_W = wrap_dequantization(ov::pass::pattern::any_input(), false); - const auto dequantization_without_subtract_R = wrap_dequantization(ov::pass::pattern::any_input(), false); - - auto X_in = std::make_shared( - OutputVector{ - fq_X, dequantization_X, dequantization_without_subtract_X - }); - - auto H_in = std::make_shared( - OutputVector{ - H_as_const, fq_H, dequantization_H, dequantization_without_subtract_H - }); - - auto W_in = std::make_shared( - OutputVector{ - fq_W, dequantization_W, dequantization_without_subtract_W - }); - - auto R_in = std::make_shared( - OutputVector{ - fq_R, dequantization_R, dequantization_without_subtract_R - }); + auto X_in = ov::pass::pattern::any_input(); + auto H_in = ov::pass::pattern::any_input(); + auto W_in = ov::pass::pattern::any_input(); + auto R_in = ov::pass::pattern::any_input(); const auto lstm_seq = ov::pass::pattern::wrap_type( {X_in, H_in, C, S, W_in, R_in, B}); @@ -91,8 +57,134 @@ RecurrentCellTransformation::RecurrentCellTransformation(const Params& params) : this->register_matcher(m, callback); } +namespace { + +std::shared_ptr find_fake_quantize_upper(const std::shared_ptr& parent) { + if (auto fq = as_type_ptr(parent)) { + return fq; + } + + if (!NetworkHelper::isPrecisionPreserved(parent)) { + return nullptr; + } + + return find_fake_quantize_upper(parent->get_input_node_shared_ptr(0)); +} + +template +std::string name() { + return Operation::get_type_info_static().name; +} + +bool isSupportedForPerChannelQuantization(const std::shared_ptr& node) { + static const std::unordered_set supportedForPerChannelQuantization = { + { name() }, + { name() }, + { name() }, + { name() }, + { name() }, + { name() }, + { name() }, + { name() }, + { name() }, + { name() }, + { name() }, + { name() }, + { name() }, + { name() }, + { name() }, + { name() }, + { name() }, + { name() } + }; + + return supportedForPerChannelQuantization.find(node->get_type_name()) != supportedForPerChannelQuantization.end(); +} + +std::vector> get_supported_precisions(std::shared_ptr lstm) { + // pair fields: + // 0 - input number, + // 1 - input type, `element::undefined` - any precision + if (is_type(lstm)) { + return std::vector>{ {0, element::u8}, { 1, element::u8 }, { 4, element::undefined }, { 5, element::undefined } }; + } else if (is_type(lstm)) { + return std::vector>{ {0, element::u8}, { 1, element::u8 }, { 3, element::undefined }, { 4, element::undefined } }; + } + + OPENVINO_THROW("unsupported operation type: ", lstm->get_type_name()); +} + +} // namespace + +void RecurrentCellTransformation::propagate(TransformationContext& context, const std::shared_ptr node) { + if (!isSupportedForPerChannelQuantization(node)) { + return; + } + + const auto& normalized_node = NetworkHelper::separateInStandaloneBranch(node, defaultPrecisions); + auto dequantization = NetworkHelper::getDequantization(node, defaultPrecisions); + if (dequantization.empty()) { + return; + } + const auto& new_node = moveDequantizationAfter(context, normalized_node, dequantization); + + const auto& new_dequantization = NetworkHelper::getDequantizationBelow(new_node); + if (new_dequantization.empty()) { + return; + } + + for (auto output : new_dequantization.multiply->outputs()) { + for (auto input : output.get_target_inputs()) { + auto child = input.get_node()->shared_from_this(); + propagate(context, child); + } + } +} + bool RecurrentCellTransformation::transform(TransformationContext& context, ov::pass::pattern::Matcher& m) { const auto lstm = m.get_match_root(); + const auto inputs = get_supported_precisions(lstm); + for (const auto& input : inputs) { + const auto& parent = lstm->get_input_node_shared_ptr(input.first); + if (!isSupportedForPerChannelQuantization(parent)) { + continue; + } + + const auto& fq = find_fake_quantize_upper(parent); + if (fq != nullptr) { + const auto& quantizationDetails = QuantizationDetails::getDetails(fq); + if ((quantizationDetails.inputLowValues.size() != 1) || (quantizationDetails.inputHighValues.size() != 1) || + (quantizationDetails.outputLowValues.size() != 1) || (quantizationDetails.outputHighValues.size() != 1)) { + continue; + } + + const auto& precisionsAttribute = getAttributeFromOutput(fq); + const auto& precisions = precisionsAttribute.empty() ? + defaultPrecisions : + precisionsAttribute.as().value(); + const auto& dataPrecision = getDataPrecision(fq, quantizationDetails, precisions); + if (dataPrecision.empty() || ((input.second != element::undefined) && (dataPrecision.precision != input.second))) { + return false; + } + + auto result = NetworkHelper::decomposeFakeQuantize( + fq, + dataPrecision.precision, + dataPrecision.min, + dataPrecision.max, + dataPrecision.hasZeroPoint, + updatePrecisions); + auto multiply = std::get<1>(result); + + for (const auto& output : multiply->outputs()) { + for (const auto& input : output.get_target_inputs()) { + const auto input_node = input.get_node(); + propagate(context, input_node->shared_from_this()); + } + } + } + } + if (!canBeTransformed(context, lstm)) { return false; } @@ -154,18 +246,21 @@ bool RecurrentCellTransformation::transform(TransformationContext& context, ov:: } bool RecurrentCellTransformation::canBeTransformed(const TransformationContext& context, std::shared_ptr lstm) const { - std::shared_ptr W, R; - - if (is_type(lstm)) { - W = lstm->get_input_node_shared_ptr(4); - R = lstm->get_input_node_shared_ptr(5); - } else if (is_type(lstm)) { - W = lstm->get_input_node_shared_ptr(3); - R = lstm->get_input_node_shared_ptr(4); - } else { - return false; - } + const auto inputs = get_supported_precisions(lstm); + for (const auto& index : inputs) { + const auto& input = lstm->get_input_node_ptr(index.first); + if (as_type(input) || as_type(input)) { + continue; + } + const auto dequantization = NetworkHelper::getDequantization(lstm, defaultPrecisions, index.first); + if (dequantization.empty()) { + continue; + } + if ((index.second != element::undefined) && (dequantization.data.get_element_type() != index.second)) { + return false; + } + } return true; } diff --git a/src/common/low_precision_transformations/tests/broadcast_transformation.cpp b/src/common/low_precision_transformations/tests/broadcast_transformation.cpp new file mode 100644 index 00000000000000..7745f38143d440 --- /dev/null +++ b/src/common/low_precision_transformations/tests/broadcast_transformation.cpp @@ -0,0 +1,197 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "layer_transformation.hpp" + +#include +#include + +#include + +#include "common_test_utils/ov_test_utils.hpp" +#include "low_precision/broadcast.hpp" +#include "ov_lpt_models/broadcast.hpp" +#include "simple_low_precision_transformer.hpp" + +namespace { +using namespace ov::pass; +using namespace ov::builder::subgraph; +using namespace ov::opset1; +using namespace ov; + +class BroadcastTransformationTestValues { +public: + class Pattern { + public: + ov::element::Type precisionBeforeDequantization; + ov::builder::subgraph::DequantizationOperations dequantizationBefore; + ov::builder::subgraph::DequantizationOperations dequantizationAfter; + }; + + TestTransformationParams params; + Shape tagetShape; + Shape axesMapping; + Pattern actual; + Pattern expected; +}; + +typedef std::tuple< + ov::PartialShape, + bool, + BroadcastTransformationTestValues> BroadcastTransformationParams; + +class BroadcastTransformation : public LayerTransformation, public testing::WithParamInterface { +public: + void SetUp() override { + const ov::PartialShape inputShape = std::get<0>(GetParam()); + const bool v1 = std::get<1>(GetParam()); + const BroadcastTransformationTestValues testValues = std::get<2>(GetParam()); + + // batch update support + auto tagetShape = testValues.tagetShape; + tagetShape[0] = inputShape[0].get_length(); + + actualFunction = BroadcastFunction::get( + v1, + inputShape, + testValues.actual.precisionBeforeDequantization, + testValues.actual.dequantizationBefore, + tagetShape, + testValues.axesMapping, + testValues.actual.dequantizationAfter); + + SimpleLowPrecisionTransformer transform; + transform.add(testValues.params); + transform.transform(actualFunction); + + referenceFunction = BroadcastFunction::get( + v1, + inputShape, + testValues.expected.precisionBeforeDequantization, + testValues.expected.dequantizationBefore, + tagetShape, + testValues.axesMapping, + testValues.expected.dequantizationAfter); + } + + static std::string getTestCaseName(testing::TestParamInfo obj) { + const ov::PartialShape inputShape = std::get<0>(obj.param); + const bool v1 = std::get<1>(obj.param); + const BroadcastTransformationTestValues testValues = std::get<2>(obj.param); + + std::ostringstream result; + result << + v1 << "_" << + inputShape << "_" << + testValues.tagetShape << "_" << + testValues.axesMapping << "_" << + testValues.actual.precisionBeforeDequantization << "_" << + testValues.actual.dequantizationBefore << "_" << + testValues.actual.dequantizationAfter << "_" << + testValues.expected.precisionBeforeDequantization << "_" << + testValues.expected.dequantizationBefore << "_" << + testValues.expected.dequantizationAfter; + return result.str(); + } +}; + +TEST_P(BroadcastTransformation, CompareFunctions) { + actualFunction->validate_nodes_and_infer_types(); + + auto res = compare_functions(actualFunction, referenceFunction, true); + ASSERT_TRUE(res.first) << res.second; + + ASSERT_TRUE(LayerTransformation::allNamesAreUnique(actualFunction)) << "Not all names are unique"; +} + +namespace hw_broadcast { +const std::vector inputShapes = { + { 1, 3, 1, 1 }, + { 4, 3, 1, 1 }, +}; + +const std::vector testValues = { + { + LayerTransformation::createParamsU8I8(), + { 1, 3, 9, 9}, + { 0, 1, 2, 3 }, + { + ov::element::u8, + {{ov::element::f32}, {0.1f}, {0.2f}}, + {{}, {}, {}}, + }, + { + ov::element::u8, + {{}, {}, {}}, + {{ov::element::f32}, {0.1f}, {0.2f}} + } + }, + { + LayerTransformation::createParamsU8I8(), + { 1, 3, 9, 9 }, + { 0, 1, 2, 3 }, + { + ov::element::u8, + { + {ov::element::f32}, + {{0.1f, 0.2f, 0.3f}}, + {{0.4f, 0.5f, 0.6f}} + } + }, + { + ov::element::u8, + { {}, {}, {}}, + { + {ov::element::f32}, + {{0.1f, 0.2f, 0.3f}}, + {{0.4f, 0.5f, 0.6f}} + } + } + } +}; + +INSTANTIATE_TEST_SUITE_P( + smoke_LPT, + BroadcastTransformation, + ::testing::Combine( + ::testing::ValuesIn(inputShapes), + ::testing::ValuesIn({ true, false }), + ::testing::ValuesIn(testValues)), + BroadcastTransformation::getTestCaseName); +} // hw_broadcast + +namespace chw_broadcast { +const std::vector inputShapes = { + { 1, 1, 1, 1 } +}; + +const std::vector testValues = { + { + LayerTransformation::createParamsU8I8(), + { 1, 9, 9, 9}, + { 0, 1, 2, 3 }, + { + ov::element::u8, + {{ov::element::f32}, {0.1f}, {0.2f}}, + {{}, {}, {}}, + }, + { + ov::element::u8, + {{}, {}, {}}, + {{ov::element::f32}, {0.1f}, {0.2f}} + } + } +}; + +INSTANTIATE_TEST_SUITE_P( + smoke_LPT, + BroadcastTransformation, + ::testing::Combine( + ::testing::ValuesIn(inputShapes), + ::testing::ValuesIn({ true, false }), + ::testing::ValuesIn(testValues)), + BroadcastTransformation::getTestCaseName); +} // chw_broadcast + +} // namespace diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/recurrent_cell_transformation.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/recurrent_cell_transformation.cpp index ae5c19559e5a7b..066d81d1f37f36 100644 --- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/recurrent_cell_transformation.cpp +++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/low_precision_transformations/recurrent_cell_transformation.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2022 Intel Corporation +// Copyright (C) 2022-2024 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // @@ -92,6 +92,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_LPT, RecurrentCellTransformation, ::testing::ValuesIn(weights_shapes), ::testing::Values(ov::test::utils::DEVICE_CPU), ::testing::ValuesIn(trasformationParamValues), + ::testing::ValuesIn({ true, false }), ::testing::ValuesIn(params)), RecurrentCellTransformation::getTestCaseName); } // namespace testValues1 @@ -171,6 +172,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_LPT, RecurrentCellTransformation, ::testing::ValuesIn(weights_shapes), ::testing::Values(ov::test::utils::DEVICE_CPU), ::testing::ValuesIn(trasformationParamValues), + ::testing::ValuesIn({ true, false }), ::testing::ValuesIn(params)), RecurrentCellTransformation::getTestCaseName); } // namespace testValues2 diff --git a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/low_precision_transformations/recurrent_cell_transformation.cpp b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/low_precision_transformations/recurrent_cell_transformation.cpp index afda5292e69c60..85f8d79e7ace31 100644 --- a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/low_precision_transformations/recurrent_cell_transformation.cpp +++ b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/low_precision_transformations/recurrent_cell_transformation.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2022 Intel Corporation +// Copyright (C) 2022-2024 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // @@ -94,6 +94,7 @@ INSTANTIATE_TEST_SUITE_P(DISABLED_smoke_LPT, RecurrentCellTransformation, ::testing::ValuesIn(weights_shapes), ::testing::Values(ov::test::utils::DEVICE_GPU), ::testing::ValuesIn(trasformationParamValues), + ::testing::ValuesIn({ true, false }), ::testing::ValuesIn(params)), RecurrentCellTransformation::getTestCaseName); } // namespace testValues1 @@ -174,6 +175,7 @@ INSTANTIATE_TEST_SUITE_P(DISABLED_smoke_LPT, RecurrentCellTransformation, ::testing::ValuesIn(weights_shapes), ::testing::Values(ov::test::utils::DEVICE_GPU), ::testing::ValuesIn(trasformationParamValues), + ::testing::ValuesIn({ true, false }), ::testing::ValuesIn(params)), RecurrentCellTransformation::getTestCaseName); } // namespace testValues2 diff --git a/src/tests/functional/plugin/shared/include/low_precision_transformations/recurrent_cell_transformation.hpp b/src/tests/functional/plugin/shared/include/low_precision_transformations/recurrent_cell_transformation.hpp index d0452c9da1b638..82a8795698bb36 100644 --- a/src/tests/functional/plugin/shared/include/low_precision_transformations/recurrent_cell_transformation.hpp +++ b/src/tests/functional/plugin/shared/include/low_precision_transformations/recurrent_cell_transformation.hpp @@ -42,6 +42,7 @@ typedef std::tuple< std::vector, std::string, ov::pass::low_precision::LayerTransformation::Params, + bool, // use precision transparent operations RecurrentCellTransformationParam >RecurrentCellTransformationParams; diff --git a/src/tests/functional/plugin/shared/src/low_precision_transformations/recurrent_cell_transformation.cpp b/src/tests/functional/plugin/shared/src/low_precision_transformations/recurrent_cell_transformation.cpp index e94663bf2b8596..692a00877c3368 100644 --- a/src/tests/functional/plugin/shared/src/low_precision_transformations/recurrent_cell_transformation.cpp +++ b/src/tests/functional/plugin/shared/src/low_precision_transformations/recurrent_cell_transformation.cpp @@ -21,14 +21,16 @@ std::string RecurrentCellTransformation::getTestCaseName(testing::TestParamInfo< std::string targetDevice; RecurrentCellTransformationParam param; ov::pass::low_precision::LayerTransformation::Params params; - std::tie(netPrecision, activationsShape, weightsShape, targetDevice, params, param) = obj.param; + bool addPrecisionTransparentOperations; + std::tie(netPrecision, activationsShape, weightsShape, targetDevice, params, addPrecisionTransparentOperations, param) = obj.param; std::ostringstream result; result << get_test_case_name_by_params(netPrecision, activationsShape[0], targetDevice, params) << "FQ_X_" << param.fakeQuantize_X << "_" << "DQ_X_" << param.dequantization_X << "_" << "FQ_W_" << param.fakeQuantize_W << "_" << - "DQ_W_" << param.dequantization_W; + "DQ_W_" << param.dequantization_W << "_" << + "PTO" << addPrecisionTransparentOperations; return result.str(); } @@ -37,9 +39,10 @@ void RecurrentCellTransformation::SetUp() { std::vector activations_shapes; std::vector weights_shapes; RecurrentCellTransformationParam param; + bool addPrecisionTransparentOperations; ov::pass::low_precision::LayerTransformation::Params params; - std::tie(precision, activations_shapes, weights_shapes, targetDevice, params, param) = this->GetParam(); + std::tie(precision, activations_shapes, weights_shapes, targetDevice, params, addPrecisionTransparentOperations, param) = this->GetParam(); init_input_shapes(activations_shapes); @@ -64,13 +67,14 @@ void RecurrentCellTransformation::SetUp() { param.dequantization_H, param.dequantization_W, param.dequantization_R - }); + }, + addPrecisionTransparentOperations); } void RecurrentCellTransformation::run() { LayerTransformation::run(); - const auto params = std::get<5>(GetParam()); + const auto params = std::get<6>(GetParam()); const auto actualPrecision = get_runtime_precision_by_type(params.layerName); auto expectedPrecision = params.expectedKernelType; if (expectedPrecision == "FP32" && std::get<0>(GetParam()) == ov::element::f16) { diff --git a/src/tests/ov_helpers/ov_lpt_models/include/ov_lpt_models/broadcast.hpp b/src/tests/ov_helpers/ov_lpt_models/include/ov_lpt_models/broadcast.hpp new file mode 100644 index 00000000000000..4384fecd089ea6 --- /dev/null +++ b/src/tests/ov_helpers/ov_lpt_models/include/ov_lpt_models/broadcast.hpp @@ -0,0 +1,29 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include "low_precision/layer_transformation.hpp" +#include "ov_lpt_models/common/dequantization_operations.hpp" + +namespace ov { +namespace builder { +namespace subgraph { + +class BroadcastFunction { +public: + static std::shared_ptr get( + const bool v1, + const ov::PartialShape& inputShape, + const ov::element::Type precisionBeforeDequantization, + const ov::builder::subgraph::DequantizationOperations& dequantizationBefore, + const Shape& tagetShape, + const Shape& axesMapping, + const ov::builder::subgraph::DequantizationOperations& dequantizationAfter); +}; + +} // namespace subgraph +} // namespace builder +} // namespace ov diff --git a/src/tests/ov_helpers/ov_lpt_models/include/ov_lpt_models/recurrent_cell.hpp b/src/tests/ov_helpers/ov_lpt_models/include/ov_lpt_models/recurrent_cell.hpp index da98410c55d13c..57ffdedc4c0eb6 100644 --- a/src/tests/ov_helpers/ov_lpt_models/include/ov_lpt_models/recurrent_cell.hpp +++ b/src/tests/ov_helpers/ov_lpt_models/include/ov_lpt_models/recurrent_cell.hpp @@ -1,4 +1,4 @@ -// Copyright (C) 2022 Intel Corporation +// Copyright (C) 2022-2024 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // @@ -25,7 +25,8 @@ class RecurrentCellFunction { const RNNType type, const std::vector& fqOnDatas, const std::vector& converts, - const std::vector& dequantizations); + const std::vector& dequantizations, + const bool addPrecisionTransparentOperations = false); }; std::shared_ptr makeQuantizationAndDequantization(const std::shared_ptr input, @@ -33,7 +34,8 @@ std::shared_ptr makeQuantizationAndDequantization(const std::shared_ptr +std::shared_ptr make_broadcast(const std::shared_ptr& parent, const Shape& tagetShape, const Shape& axesMapping) { + return std::make_shared( + parent, + std::make_shared(ov::element::i32, Shape{ tagetShape.size() }, tagetShape), + std::make_shared(ov::element::i32, Shape{ axesMapping.size() }, axesMapping)); +} +} // namespace + +std::shared_ptr BroadcastFunction::get( + const bool v1, + const ov::PartialShape& inputShape, + const ov::element::Type precisionBeforeDequantization, + const ov::builder::subgraph::DequantizationOperations& dequantizationBefore, + const Shape& tagetShape, + const Shape& axesMapping, + const ov::builder::subgraph::DequantizationOperations& dequantizationAfter) { + const auto input = std::make_shared(precisionBeforeDequantization, inputShape); + std::shared_ptr parent = input; + + if (!dequantizationBefore.empty()) { + parent = makeDequantization(parent, dequantizationBefore); + } + + parent = v1 ? + make_broadcast(parent, tagetShape, axesMapping) : + make_broadcast(parent, tagetShape, axesMapping); + parent->set_friendly_name("broadcast"); + + if (!dequantizationAfter.empty()) { + parent = makeDequantization(parent, dequantizationAfter); + } + + const std::shared_ptr result = std::make_shared(parent); + + const std::shared_ptr function = std::make_shared( + ov::ResultVector{ result }, + std::vector> { input }, + "BroadcastTransformation"); + return function; +} + +} // namespace subgraph +} // namespace builder +} // namespace ov diff --git a/src/tests/ov_helpers/ov_lpt_models/src/recurrent_cell.cpp b/src/tests/ov_helpers/ov_lpt_models/src/recurrent_cell.cpp index 7be3fca1217403..7a3537c91f3824 100644 --- a/src/tests/ov_helpers/ov_lpt_models/src/recurrent_cell.cpp +++ b/src/tests/ov_helpers/ov_lpt_models/src/recurrent_cell.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2022 Intel Corporation +// Copyright (C) 2022-2024 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // @@ -30,7 +30,8 @@ std::shared_ptr RecurrentCellFunction::get( const RNNType type, const std::vector& fqOnDatas, const std::vector& converts, - const std::vector& dequantizations) { + const std::vector& dequantizations, + const bool addPrecisionTransparentOperations) { auto X = std::make_shared(inputPrecision, inputActivationsShapes[0]); X->set_friendly_name("X"); std::shared_ptr parent_X = makeQuantizationAndDequantization(X, @@ -46,7 +47,8 @@ std::shared_ptr RecurrentCellFunction::get( H->get_friendly_name(), fqOnDatas[1], converts[1], - dequantizations[1]); + dequantizations[1], + addPrecisionTransparentOperations); auto C = std::make_shared(inputPrecision, inputActivationsShapes[2]); C->set_friendly_name("C"); @@ -58,7 +60,8 @@ std::shared_ptr RecurrentCellFunction::get( W->get_friendly_name(), fqOnDatas[2], converts[2], - dequantizations[2]); + dequantizations[2], + addPrecisionTransparentOperations); auto R = ov::opset1::Constant::create(fqOnDatas[2].empty() ? ov::element::i8 : inputPrecision, inputWeightsShapes[1], {1}); @@ -127,12 +130,20 @@ std::shared_ptr makeQuantizationAndDequantization(const std::shared_ptr parent; - if (fqOnData.empty()) { - parent = input; - } else { - std::shared_ptr fakeQuantize1 = makeFakeQuantizeTypeRelaxed(input, inputPrecision, fqOnData); + const DequantizationOperations& dequantization, + const bool addPrecisionTransparentOperations) { + std::shared_ptr parent = input; + if (addPrecisionTransparentOperations) { + auto shape = input->get_output_shape(0); + std::swap(shape[shape.size() - 2], shape[shape.size() - 1]); + parent = std::make_shared( + parent, + std::make_shared(element::u32, Shape({ shape.size() }), shape), + true); + } + + if (!fqOnData.empty()) { + std::shared_ptr fakeQuantize1 = makeFakeQuantizeTypeRelaxed(parent, inputPrecision, fqOnData); fakeQuantize1->set_friendly_name("fakeQuantize_" + friendly_name); parent = fakeQuantize1; } @@ -142,6 +153,15 @@ std::shared_ptr makeQuantizationAndDequantization(const std::shared_ptrget_output_shape(0); + parent = std::make_shared( + parent, + std::make_shared(element::u32, Shape({ shape.size() }), shape), + true); + } + return parent; } From 2ff7bfc287d1bf04f321ce3074c3cc796c1b50b5 Mon Sep 17 00:00:00 2001 From: Tingqian Li Date: Mon, 29 Jul 2024 13:05:13 +0800 Subject: [PATCH 40/54] [CPU] fix brgemm with weight fp16 on Sierra Forest platform (#25686) ### Details: - fix a bug introduced during f16 weight-compression introduction which causes FP16 accuracy issue on Sierra Forest platform - oneDNN fork PR: https://github.com/openvinotoolkit/oneDNN/pull/257 ### Tickets: - *CVS-147164* --- src/plugins/intel_cpu/thirdparty/onednn | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/plugins/intel_cpu/thirdparty/onednn b/src/plugins/intel_cpu/thirdparty/onednn index f0f8defe2dff50..f1cf31a2fa0979 160000 --- a/src/plugins/intel_cpu/thirdparty/onednn +++ b/src/plugins/intel_cpu/thirdparty/onednn @@ -1 +1 @@ -Subproject commit f0f8defe2dff5058391f2a66e775e20b5de33b08 +Subproject commit f1cf31a2fa097932b8d74e88bf4bd941382504e4 From 8f0d89d08bb34f6dade8015d7e119c48462d129b Mon Sep 17 00:00:00 2001 From: Taylor Yeonbok Lee Date: Mon, 29 Jul 2024 05:05:24 +0000 Subject: [PATCH 41/54] [GPU] Fixed fc tile size for better perf for small N size (4096) kernels (#25759) ### Details: - [GPU] Fixed fc tile size for better perf for small N size (4096) kernels in MTL - To get 3-5% gain on 2nd token latency on MTL ![image](https://github.com/user-attachments/assets/9da816ab-b104-4dcb-b8d2-e816068589fb) ### Tickets: - *ticket-id* --- .../kernels/fully_connected/fully_connected_kernel_bf_tiled.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/fully_connected/fully_connected_kernel_bf_tiled.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/fully_connected/fully_connected_kernel_bf_tiled.cpp index 07d81dce5e3f23..1613afec063eb1 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/fully_connected/fully_connected_kernel_bf_tiled.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/fully_connected/fully_connected_kernel_bf_tiled.cpp @@ -310,7 +310,7 @@ FullyConnected_bf_tiled::GetAutoTuneParams(const fully_connected_params& params, if (!params.is_shape_agnostic && batch == 1) { // Tuning for Meteor Lake size_t min_num_threads = params.engineInfo.computeUnitsCount * simd; - if (output_f / 2 < min_num_threads && params.weights.GetLayout() == WeightsLayout::os_is_yx_osv32_isv2) { + if (output_f / 2 <= min_num_threads && params.weights.GetLayout() == WeightsLayout::os_is_yx_osv32_isv2) { GPU_DEBUG_TRACE_DETAIL << "FC bf tiled: Set ofm_tile 1. (output_f : " << output_f << ", computeUnitsCount : " << params.engineInfo.computeUnitsCount << " min_num_threads : " << min_num_threads << ")" << std::endl; From 8b9bcfb9e76785120c5c5919303ef0dc998e1b67 Mon Sep 17 00:00:00 2001 From: Aleksandr Voron Date: Mon, 29 Jul 2024 10:29:28 +0200 Subject: [PATCH 42/54] [CI][ARM] Enable multi-isa build in linux_arm64.yml (#25761) ### Details: - Multi-isa ARM build was disabled in GHA because of ACL issue that prevents FP16 kernel usage. This issue is fixed now and we may switch back to multi-isa build in CI. ### Tickets: - *ticket-id* --- .github/workflows/linux_arm64.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/linux_arm64.yml b/.github/workflows/linux_arm64.yml index feb0ffee81f8f0..7439599be35663 100644 --- a/.github/workflows/linux_arm64.yml +++ b/.github/workflows/linux_arm64.yml @@ -182,7 +182,7 @@ jobs: -DCMAKE_COMPILE_WARNING_AS_ERROR=ON \ -DCMAKE_CXX_COMPILER_LAUNCHER=${{ env.CMAKE_CXX_COMPILER_LAUNCHER }} \ -DCMAKE_C_COMPILER_LAUNCHER=${{ env.CMAKE_C_COMPILER_LAUNCHER }} \ - -DOV_CPU_AARCH64_USE_MULTI_ISA=OFF \ + -DOV_CPU_AARCH64_USE_MULTI_ISA=ON \ -S ${OPENVINO_REPO} \ -B ${BUILD_DIR} From e9dac4136af157a380eeb7053dab9c603f21e8f0 Mon Sep 17 00:00:00 2001 From: Pawel Raasz Date: Mon, 29 Jul 2024 10:47:02 +0200 Subject: [PATCH 43/54] [core] Enable `tensor::copy_to` to support destination with different shape (#25640) ### Details: - Relax `copy_to` function to allow copy to tensor with different shape (set new shape if required - Clean-up ITensor internals ### Tickets: - CVS-124077 --- src/core/src/runtime/itensor.cpp | 82 +++++++++++-------------------- src/core/tests/ov_tensor_test.cpp | 12 +++++ 2 files changed, 41 insertions(+), 53 deletions(-) diff --git a/src/core/src/runtime/itensor.cpp b/src/core/src/runtime/itensor.cpp index 203297c671d401..b1b517426b9f67 100644 --- a/src/core/src/runtime/itensor.cpp +++ b/src/core/src/runtime/itensor.cpp @@ -16,6 +16,21 @@ namespace ov { +namespace { +Strides default_byte_strides(const Shape& shape, const element::Type& et) { + auto strides = Strides(shape.size()); + if (!strides.empty()) { + strides.back() = et.size(); + std::transform(shape.crbegin(), + shape.crend() - 1, + strides.rbegin(), + strides.rbegin() + 1, + std::multiplies()); + } + return strides; +} +} // namespace + ITensor::~ITensor() = default; size_t ITensor::get_size() const { @@ -31,31 +46,13 @@ bool ITensor::is_continuous() const { // OpenVINO doesn't support strides for lp types return true; } - const auto& shape = get_shape(); - const auto& type = get_element_type(); - std::vector strides(shape.size()); - if (!shape.empty()) { - strides[shape.size() - 1] = 1; - } - auto size = shape.size(); - for (size_t i = 1; i < size; i++) { - strides[size - i - 1] = strides[size - i] * shape[size - i]; - } - - ov::Strides byte_strides(strides.size()); - for (size_t i = 0; i < strides.size(); ++i) - byte_strides[i] = strides[i] * type.size(); - return byte_strides == get_strides(); + return default_byte_strides(get_shape(), get_element_type()) == get_strides(); } void ITensor::copy_to(const std::shared_ptr& dst) const { const auto& is_scalar = [](const ov::Shape& shape) { return shape.empty() || (shape.size() == 1 && shape[0] == 1); }; - const auto shapes_equal = [is_scalar](const ov::Shape& src, const ov::Shape& dst) { - // WA for scalar tensors to copy {1} to {} or otherwise - return src == dst || (is_scalar(src) && is_scalar(dst)); - }; OPENVINO_ASSERT(dst, "Destination tensor was not initialized."); OPENVINO_ASSERT(!dynamic_cast(this), "Default copy to doesn't support copy from remote tensor."); @@ -68,16 +65,11 @@ void ITensor::copy_to(const std::shared_ptr& dst) const { dst->get_element_type(), ")"); - if (dst->get_shape() == ov::Shape{0}) - dst->set_shape(get_shape()); - - OPENVINO_ASSERT(shapes_equal(get_shape(), dst->get_shape()), - "Tensor shapes are not equal. (src: ", - get_shape(), - " != dst: ", - dst->get_shape(), - ")"); const auto& shape = get_shape(); + if (shape != dst->get_shape()) { + dst->set_shape(shape); + } + auto* src_data = static_cast(data()); auto* dst_data = static_cast(dst->data()); ov::Strides src_strides{get_byte_size()}; @@ -86,25 +78,15 @@ void ITensor::copy_to(const std::shared_ptr& dst) const { ov::Shape max_pos{1}; if (get_element_type().bitwidth() < 8 || (get_strides() == dst->get_strides() && is_continuous()) || - (is_scalar(get_shape()) && is_scalar(dst->get_shape()))) { + (is_scalar(shape) && is_scalar(dst->get_shape()))) { // OpenVINO doesn't support strides for LP types // or both tensors have default strides // Strides and positions already initialized } else { // Tensors have default strides const auto& type = get_element_type(); - std::vector strides(shape.size()); - if (!shape.empty()) { - strides[shape.size() - 1] = 1; - } - auto size = shape.size(); - for (size_t i = 1; i < size; i++) { - strides[size - i - 1] = strides[size - i] * shape[size - i]; - } - - ov::Strides default_strides(strides.size()); - for (size_t i = 0; i < strides.size(); ++i) - default_strides[i] = strides[i] * type.size(); + const auto shape_rank = shape.size(); + const auto default_strides = default_byte_strides(shape, type); src_strides = get_strides(); dst_strides = dst->get_strides(); @@ -113,8 +95,7 @@ void ITensor::copy_to(const std::shared_ptr& dst) const { // Calculate src and dst shapes bool found_step = false; - for (size_t i = 0; i < shape.size(); i++) { - size_t inverted_idx = shape.size() - i - 1; + for (size_t inverted_idx = shape_rank - 1; inverted_idx < shape_rank; --inverted_idx) { if (!found_step) { if (default_strides[inverted_idx] == src_strides[inverted_idx] && src_strides[inverted_idx] == dst_strides[inverted_idx]) { @@ -134,7 +115,7 @@ void ITensor::copy_to(const std::shared_ptr& dst) const { if (strides_size < default_strides.size()) { strides = default_strides[strides_size]; - dim = get_shape()[strides_size]; + dim = shape[strides_size]; } src_str[strides_size] = strides; dst_str[strides_size] = strides; @@ -151,13 +132,8 @@ void ITensor::copy_to(const std::shared_ptr& dst) const { dst_strides = std::move(dst_str); } - const auto update_index = [](const ov::Shape& pos, const ov::Shape& shape, const ov::Strides& strides) { - size_t offset = 0; - - for (size_t i = 0; i < pos.size(); i++) { - offset += pos[i] * strides[i]; - } - return offset; + const auto update_index = [](const ov::Shape& pos, const ov::Strides& strides) { + return std::inner_product(pos.begin(), pos.end(), strides.begin(), static_cast(0)); }; using copy_function_def = std::function; @@ -190,8 +166,8 @@ void ITensor::copy_to(const std::shared_ptr& dst) const { else finish = true; } - src_idx = update_index(cur_pos, max_pos, src_strides); - dst_idx = update_index(cur_pos, max_pos, dst_strides); + src_idx = update_index(cur_pos, src_strides); + dst_idx = update_index(cur_pos, dst_strides); } } diff --git a/src/core/tests/ov_tensor_test.cpp b/src/core/tests/ov_tensor_test.cpp index a6832f2bb5aff9..8e610196b6e4a1 100644 --- a/src/core/tests/ov_tensor_test.cpp +++ b/src/core/tests/ov_tensor_test.cpp @@ -936,6 +936,18 @@ INSTANTIATE_TEST_SUITE_P(copy_tests, TestParams { ov::Shape{}, {}, {1}, {} + }, + TestParams{ + ov::Shape{3,2,2}, {}, + ov::Shape{5}, {} + }, + TestParams{ + ov::Shape{3,2,2}, ov::Strides{64,16,8}, + ov::Shape{5,2}, {} + }, + TestParams{ + ov::Shape{3,2,2}, ov::Strides{64,16,8}, + ov::Shape{3,4,3}, ov::Strides{128,24,8} } ))); From f6aa1ad29ac8eaafef759e7040fb0822c81a947e Mon Sep 17 00:00:00 2001 From: Wenjing Kang Date: Mon, 29 Jul 2024 16:50:40 +0800 Subject: [PATCH 44/54] Fix Ninja gen onecore toolchain (#25592) ### Details: - To fix the following the error: ``` 03:59:53 Run Build Command(s):C:/PROGRA~1/CMake/bin/ninja.exe cmTC_2bf60 && ninja: error: build.ninja:47: bad $-escape (literal $ must be written as $$) 03:59:53 FLAGS = /I"$(UniversalCRT_IncludePath)" /I"$(UniversalCRT_IncludePath)... 03:59:53 ^ near here 03:59:53 CMake will not be able to correctly generate this project. ``` ### Tickets: - *E-130818* Signed-off-by: Kang Wenjing --- cmake/toolchains/onecoreuap.toolchain.cmake | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/cmake/toolchains/onecoreuap.toolchain.cmake b/cmake/toolchains/onecoreuap.toolchain.cmake index af4285ee124117..b9c71254e09c15 100644 --- a/cmake/toolchains/onecoreuap.toolchain.cmake +++ b/cmake/toolchains/onecoreuap.toolchain.cmake @@ -50,8 +50,12 @@ endif() unset(_onecoreuap_arch) # compile flags +if(CMAKE_GENERATOR MATCHES "Ninja") + set(includes "/I\"\$\$\(UniversalCRT_IncludePath\)\"") +else() + set(includes "/I\"\$\(UniversalCRT_IncludePath\)\"") +endif() -set(includes "/I\"\$\(UniversalCRT_IncludePath\)\"") set(CMAKE_C_FLAGS_INIT "${CMAKE_C_FLAGS_INIT} ${includes}") set(CMAKE_CXX_FLAGS_INIT "${CMAKE_CXX_FLAGS_INIT} ${includes}") unset(includes) From 2127da81fa76f575fbc9e5a502a58c684feb2dab Mon Sep 17 00:00:00 2001 From: Mikhail Ryzhov Date: Mon, 29 Jul 2024 10:58:18 +0200 Subject: [PATCH 45/54] [PyPI] include tbb headers to wheel (#25398) ### Details: - Extend dev package by including tbb cmake configuration and headers ### Tickets: - *135444* --- src/bindings/python/wheel/setup.py | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/src/bindings/python/wheel/setup.py b/src/bindings/python/wheel/setup.py index 25a51027a9082c..095b9579f4b354 100644 --- a/src/bindings/python/wheel/setup.py +++ b/src/bindings/python/wheel/setup.py @@ -193,6 +193,13 @@ "install_dir": "runtime", "binary_dir": OPENVINO_BINARY_DIR, "source_dir": OPENVINO_SOURCE_DIR + }, + "tbb_dev": { + "name": "tbb_dev", + "prefix": f"{BUILD_BASE}/libs.tbb.dev", + "install_dir": "runtime/3rdparty/tbb", + "binary_dir": OPENVINO_BINARY_DIR, + "source_dir": OPENVINO_SOURCE_DIR } } @@ -470,6 +477,8 @@ def copy_package_data(self, src_dirs): os.makedirs(package_dir, exist_ok=True) package_clibs_dir = os.path.join(PACKAGE_DIR, WHEEL_LIBS_INSTALL_DIR) os.makedirs(package_clibs_dir, exist_ok=True) + package_cmake_dir = os.path.join(package_dir, "cmake") + os.makedirs(package_cmake_dir, exist_ok=True) replacements = { # change the path where the libraries are installed (runtime/lib/intel64/Release -> openvino/libs) @@ -491,15 +500,20 @@ def copy_package_data(self, src_dirs): move(file_path, dst_file) self.announce(f"Move {file_path} to {dst_file}", level=3) + # collect all cmake files in one directory + for file_path in Path(src).rglob("*.cmake"): + file_name = os.path.basename(file_path) + if file_path.is_file(): + dst_file = os.path.join(package_cmake_dir, file_name) + self.announce(f"Move {file_path} to {dst_file}", level=3) + move(file_path, dst_file) + self.announce("Patch cmake configurations", level=3) + replace_strings_in_file(dst_file, replacements) + if os.path.isdir(src) and os.listdir(src): # copy the rest of the files to the package directly shutil.copytree(src, dst, dirs_exist_ok=True) - # patch cmake configurations - for file_path in Path(dst).rglob("*.cmake"): - if file_path.is_file(): - replace_strings_in_file(file_path, replacements) - def copy_file(src, dst, verbose=False, dry_run=False): """Custom file copy.""" From 573ed07349f3e267f17a8e1af964fff40d27a1b9 Mon Sep 17 00:00:00 2001 From: M Date: Mon, 29 Jul 2024 02:01:47 -0700 Subject: [PATCH 46/54] [GSOC][CPU][ARM] Add NEON vector instructions for single query attention (#25348) ### Details: - This PR aims to add NEON vector extension instructions to mha single query. --- .../src/nodes/kernels/scaled_attn/common.hpp | 2 + .../kernels/scaled_attn/mha_single_token.cpp | 126 ++++++++++++++++++ 2 files changed, 128 insertions(+) diff --git a/src/plugins/intel_cpu/src/nodes/kernels/scaled_attn/common.hpp b/src/plugins/intel_cpu/src/nodes/kernels/scaled_attn/common.hpp index bd05801c139dc8..34c00a527d2ce7 100644 --- a/src/plugins/intel_cpu/src/nodes/kernels/scaled_attn/common.hpp +++ b/src/plugins/intel_cpu/src/nodes/kernels/scaled_attn/common.hpp @@ -20,9 +20,11 @@ namespace XARCH { // avx512/avx2 register length in byte static constexpr size_t vec_len_avx512 = 64lu; static constexpr size_t vec_len_avx2 = 32lu; +static constexpr size_t vec_len_neon = 16lu; // avx512/avx2 register length in float static constexpr size_t vec_len_f32_avx512 = vec_len_avx512 / sizeof(float); static constexpr size_t vec_len_f32_avx2 = vec_len_avx2 / sizeof(float); +static constexpr size_t vec_len_f32_neon = vec_len_neon / sizeof(float); #ifdef HAVE_AVX512F inline __m512 cvt_bf16_to_fp32(const __m256i src) { diff --git a/src/plugins/intel_cpu/src/nodes/kernels/scaled_attn/mha_single_token.cpp b/src/plugins/intel_cpu/src/nodes/kernels/scaled_attn/mha_single_token.cpp index e4648ece365e9a..5177f4013319e6 100644 --- a/src/plugins/intel_cpu/src/nodes/kernels/scaled_attn/mha_single_token.cpp +++ b/src/plugins/intel_cpu/src/nodes/kernels/scaled_attn/mha_single_token.cpp @@ -13,12 +13,17 @@ # include #endif + #include "openvino/core/type/bfloat16.hpp" #include "openvino/core/parallel.hpp" #include "mha_single_token.hpp" #include "common.hpp" #include "softmax_kernel.hpp" +#if defined(OPENVINO_ARCH_ARM64) +# include +#endif + namespace ov { namespace Extensions { namespace Cpu { @@ -53,6 +58,13 @@ void cvt_copy(TA* dst, TB* src, size_t n) { auto vb = mm256_uni_loadu_ps(src + i); mm256_uni_storeu_ps(dst + i, vb); } +#elif defined(OPENVINO_ARCH_ARM64) + int vec_len_f32_neon = 4; + auto _dst = reinterpret_cast(dst); + for (; i + vec_len_f32_neon <= n; i += vec_len_f32_neon) { + float32x4_t vb1 = vld1q_f32(src + i); + vst1q_f32(_dst + i, vb1); + } #endif for (; i < n; i++) { dst[i] = src[i]; @@ -78,6 +90,15 @@ static void attn_acc_value(float* out, float weight, T* v, size_t S, float* scal v_out = _mm256_fmadd_ps(attn_w_vec_fp32, v_value, v_out); mm256_uni_storeu_ps(out + i, v_out); } +#elif defined(OPENVINO_ARCH_ARM64) + float32x4_t attn_w_vec_fp32 = vdupq_n_f32(weight); + auto _v = reinterpret_cast(v); + for (; i + vec_len_f32_neon <= S; i += vec_len_f32_neon) { + float32x4_t v_value = vld1q_f32(_v + i); + float32x4_t v_out = vld1q_f32(out + i); + v_out = vmlaq_f32(v_out, attn_w_vec_fp32, v_value); + vst1q_f32(out + i, v_out); + } #endif for (; i < S; i++) { out[i] += weight * v[i]; @@ -308,6 +329,47 @@ static float sum_q_head(T* a, size_t n) { vsum0 = _mm256_add_ps(vsum0, vsum2); hsum(vsum0); sum = _mm256_cvtss_f32(vsum0); +#elif defined(OPENVINO_ARCH_ARM64) + size_t vec_len_f32_neon = 4; + float32x4_t vsum0 = vdupq_n_f32(0.0f); + float32x4_t vsum1 = vdupq_n_f32(0.0f); + float32x4_t vsum2 = vdupq_n_f32(0.0f); + float32x4_t vsum3 = vdupq_n_f32(0.0f); + + for (; i + 4 * vec_len_f32_neon <= n; i += vec_len_f32_neon * 4) { + float32x4_t va0 = vld1q_f32(a + i); + float32x4_t va1 = vld1q_f32(a + i + vec_len_f32_neon); + float32x4_t va2 = vld1q_f32(a + i + vec_len_f32_neon * 2); + float32x4_t va3 = vld1q_f32(a + i + vec_len_f32_neon * 3); + + vsum0 = vaddq_f32(va0, vsum0); + vsum1 = vaddq_f32(va1, vsum1); + vsum2 = vaddq_f32(va2, vsum2); + vsum3 = vaddq_f32(va3, vsum3); + } + if (i + 2 * vec_len_f32_neon <= n) { + float32x4_t va0 = vld1q_f32(a + i); + float32x4_t va1 = vld1q_f32(a + i + vec_len_f32_neon); + + vsum0 = vaddq_f32(va0, vsum0); + vsum1 = vaddq_f32(va1, vsum1); + i += 2 * vec_len_f32_neon; + } + if (i + vec_len_f32_neon <= n) { + float32x4_t va0 = vld1q_f32(a + i); + vsum0 = vaddq_f32(va0, vsum0); + i += vec_len_f32_neon; + } + + vsum0 = vaddq_f32(vsum0, vsum1); + vsum2 = vaddq_f32(vsum2, vsum3); + vsum0 = vaddq_f32(vsum0, vsum2); + + float32x2_t sum_low = vget_low_f32(vsum0); + float32x2_t sum_high = vget_high_f32(vsum0); + sum_low = vadd_f32(sum_low, sum_high); + sum_low = vpadd_f32(sum_low, sum_low); + sum = vget_lane_f32(sum_low, 0); #endif for (; i < n; i++) { @@ -406,7 +468,59 @@ static float dot_product(TA* a, TB* b, size_t n, float* scale, float* zp, float* vsum0 = _mm256_add_ps(vsum0, vsum2); hsum(vsum0); sum = _mm256_cvtss_f32(vsum0); + +#elif defined(OPENVINO_ARCH_ARM64) + float32x4_t vsum0 = vdupq_n_f32(0.0f); + float32x4_t vsum1 = vdupq_n_f32(0.0f); + float32x4_t vsum2 = vdupq_n_f32(0.0f); + float32x4_t vsum3 = vdupq_n_f32(0.0f); + + auto _a = reinterpret_cast(a); + auto _b = reinterpret_cast(b); + + for (; i + 4 * vec_len_f32_neon <= n; i += vec_len_f32_neon * 4) { + float32x4_t va0 = vld1q_f32(_a + i); + float32x4_t va1 = vld1q_f32(_a + i + vec_len_f32_neon); + float32x4_t va2 = vld1q_f32(_a + i + vec_len_f32_neon * 2); + float32x4_t va3 = vld1q_f32(_a + i + vec_len_f32_neon * 3); + + float32x4_t vb0 = vld1q_f32(_b + i); + float32x4_t vb1 = vld1q_f32(_b + i + vec_len_f32_neon); + float32x4_t vb2 = vld1q_f32(_b + i + vec_len_f32_neon * 2); + float32x4_t vb3 = vld1q_f32(_b + i + vec_len_f32_neon * 3); + + vsum0 = vmlaq_f32(vsum0, va0, vb0); + vsum1 = vmlaq_f32(vsum1, va1, vb1); + vsum2 = vmlaq_f32(vsum2, va2, vb2); + vsum3 = vmlaq_f32(vsum3, va3, vb3); + } + if (i + 2 * vec_len_f32_neon <= n) { + float32x4_t va0 = vld1q_f32(_a + i); + float32x4_t va1 = vld1q_f32(_a + i + vec_len_f32_neon); + + float32x4_t vb0 = vld1q_f32(_b + i); + float32x4_t vb1 = vld1q_f32(_b + i + vec_len_f32_neon); + + vsum0 = vmlaq_f32(vsum0, va0, vb0); + vsum1 = vmlaq_f32(vsum1, va1, vb1); + i += 2 * vec_len_f32_neon; + } + if (i + vec_len_f32_neon <= n) { + float32x4_t va0 = vld1q_f32(_a + i); + float32x4_t vb0 = vld1q_f32(_b + i); + vsum0 = vmlaq_f32(vsum0, va0, vb0); + i += vec_len_f32_neon; + } + + vsum0 = vaddq_f32(vsum0, vsum1); + vsum2 = vaddq_f32(vsum2, vsum3); + vsum0 = vaddq_f32(vsum0, vsum2); + + float32x2_t temp_sum = vadd_f32(vget_low_f32(vsum0), vget_high_f32(vsum0)); + temp_sum = vpadd_f32(temp_sum, temp_sum); + sum = vget_lane_f32(temp_sum, 0); #endif + for (; i < n; i++) { sum += a[i] * b[i]; } @@ -593,6 +707,18 @@ static void attn_reduce(T* dst, float* temp, size_t M, size_t S, size_t temp_str } mm256_uni_storeu_ps(dst + i, result_vec_fp32); } +#elif defined(OPENVINO_ARCH_ARM64) + auto _dst = reinterpret_cast(dst); + for (; i + vec_len_f32_neon <= S; i += vec_len_f32_neon) { + auto* src = temp + i; + auto result_vec_fp32 = vdupq_n_f32(0.0f); + for (size_t m = 0; m < M; m++) { + auto o_vec_fp32 = vld1q_f32(src); + result_vec_fp32 = vaddq_f32(result_vec_fp32, o_vec_fp32); + src += temp_stride; + } + vst1q_f32(_dst + i, result_vec_fp32); + } #endif for (; i < S; i++) { auto* src = temp + i; From 9cd8ec5455c618427178a0a427ee9107fa83359f Mon Sep 17 00:00:00 2001 From: Bogdan Pereanu Date: Mon, 29 Jul 2024 12:21:24 +0300 Subject: [PATCH 47/54] Add some ITT traces for better visibility (#25750) ### Details: - *Add some ITT traces for better visibility over memcpy vs update mutable command list features* ### Tickets: - ** --- .../src/backend/src/zero_infer_request.cpp | 18 ++++++++++++++++-- .../src/backend/src/zero_pipeline.cpp | 4 ++++ 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/src/plugins/intel_npu/src/backend/src/zero_infer_request.cpp b/src/plugins/intel_npu/src/backend/src/zero_infer_request.cpp index cf1253dd14e713..fdda6083d0bc6d 100644 --- a/src/plugins/intel_npu/src/backend/src/zero_infer_request.cpp +++ b/src/plugins/intel_npu/src/backend/src/zero_infer_request.cpp @@ -338,9 +338,11 @@ void ZeroInferRequest::create_pipeline() { } void ZeroInferRequest::set_tensor_data(std::shared_ptr tensor, const std::string& name, bool isParameter) { + OV_ITT_TASK_CHAIN(ZERO_SET_TENSOR, itt::domains::LevelZeroBackend, "set_tensor", "set_tensor_data"); bool setTensorData = false; bool levelZeroTensorCreatedLocally = true; + OV_ITT_TASK_NEXT(ZERO_SET_TENSOR, "check_data_allocation"); ze_memory_allocation_properties_t desc = {}; desc.stype = ZE_STRUCTURE_TYPE_MEMORY_ALLOCATION_PROPERTIES; auto res = zeMemGetAllocProperties(_initStructs->getContext(), tensor->data(), &desc, nullptr); @@ -367,6 +369,7 @@ void ZeroInferRequest::set_tensor_data(std::shared_ptr tensor, cons // tensor if ((_tensorsData.find(name) != _tensorsData.end()) && !_tensorsData.at(name).levelZeroTensorCreatedLocally) { _logger.debug("ZeroInferRequest::set_tensor_data - create locally L0 tensor"); + OV_ITT_TASK_NEXT(ZERO_SET_TENSOR, "allocate tensor"); allocate_tensor(name, isParameter ? _metadata.parameters.at(name) : _metadata.results.at(name), @@ -393,6 +396,7 @@ void ZeroInferRequest::set_tensor_data(std::shared_ptr tensor, cons desc = _executor->outputs_desc_map().at(name); } + OV_ITT_TASK_NEXT(ZERO_SET_TENSOR, "updateCommandList"); _pipeline->updateCommandList(_tensorsData[name], desc.idx, _batchSize); } } @@ -401,6 +405,8 @@ void ZeroInferRequest::set_tensor_data(std::shared_ptr tensor, cons void ZeroInferRequest::set_remote_tensor_data(std::shared_ptr tensor, const std::string& name, bool isParameter) { + OV_ITT_TASK_CHAIN(ZERO_SET_REMOTE_TENSOR, itt::domains::LevelZeroBackend, "set_tensor", "set_remote_tensor_data"); + auto l0_context = reinterpret_cast( extract_object(tensor->get_context()->get_property(), ov::intel_npu::l0_context)); if (_initStructs->getContext() != l0_context) { @@ -425,11 +431,13 @@ void ZeroInferRequest::set_remote_tensor_data(std::shared_ptr desc = _executor->outputs_desc_map().at(name); } + OV_ITT_TASK_NEXT(ZERO_SET_REMOTE_TENSOR, "updateCommandList"); _pipeline->updateCommandList(_tensorsData[name], desc.idx, _batchSize); } } void ZeroInferRequest::set_tensor(const ov::Output& port, const ov::SoPtr& tensor) { + OV_ITT_SCOPED_TASK(itt::domains::LevelZeroBackend, "set_tensor"); try { check_tensor(port, tensor); } catch (const ov::Exception& ex) { @@ -456,6 +464,7 @@ void ZeroInferRequest::set_tensor(const ov::Output& port, const } ov::SoPtr ZeroInferRequest::get_tensor(const ov::Output& port) const { + OV_ITT_SCOPED_TASK(itt::domains::LevelZeroBackend, "get_tensor"); const std::string& nodeFriendlyName = port.get_node()->get_friendly_name(); if (_allTensors.find(nodeFriendlyName) != _allTensors.end()) { @@ -484,10 +493,11 @@ void ZeroInferRequest::infer() { void ZeroInferRequest::infer_async() { _logger.debug("InferRequest::infer_async started"); - OV_ITT_SCOPED_TASK(itt::domains::LevelZeroBackend, "infer_async"); + OV_ITT_TASK_CHAIN(ZERO_INFER, itt::domains::LevelZeroBackend, "infer_async", "start"); _executor->mutexLock(); if (!_pipelineIsCreated) { + OV_ITT_TASK_NEXT(ZERO_INFER, "create_pipeline"); create_pipeline(); _pipelineIsCreated = true; @@ -522,18 +532,20 @@ void ZeroInferRequest::infer_async() { } _logger.info("Tensor is not allocated in the current Level Zero context"); + OV_ITT_TASK_NEXT(ZERO_INFER, "memcpy"); std::memcpy(copyData, data, inputTensor->get_byte_size()); } } } + OV_ITT_TASK_NEXT(ZERO_INFER, "push"); for (size_t i = 0; i < _batchSize; i++) { _pipeline->push(i); } } void ZeroInferRequest::get_result() { - OV_ITT_SCOPED_TASK(itt::domains::LevelZeroBackend, "get_result"); + OV_ITT_TASK_CHAIN(ZERO_RESULT, itt::domains::LevelZeroBackend, "get_result", "pull"); for (size_t i = 0; i < _batchSize; i++) { _pipeline->pull(i); @@ -577,11 +589,13 @@ void ZeroInferRequest::get_result() { } _logger.info("Tensor is not allocated in the current Level Zero context"); + OV_ITT_TASK_NEXT(ZERO_RESULT, "memcpy"); std::memcpy(data, copyData, outputTensor->get_byte_size()); } } } + OV_ITT_TASK_NEXT(ZERO_RESULT, "reset"); for (size_t i = 0; i < _batchSize; i++) { _pipeline->reset(i); } diff --git a/src/plugins/intel_npu/src/backend/src/zero_pipeline.cpp b/src/plugins/intel_npu/src/backend/src/zero_pipeline.cpp index f98e84a34a0a46..d90dbec4e51ef4 100644 --- a/src/plugins/intel_npu/src/backend/src/zero_pipeline.cpp +++ b/src/plugins/intel_npu/src/backend/src/zero_pipeline.cpp @@ -275,6 +275,10 @@ struct IntegratedPipeline final : public Pipeline { }; void updateCommandList(const TensorData& tensors_data, uint32_t index, size_t batch_size) override { + OV_ITT_TASK_CHAIN(ZERO_EXECUTOR_IP_PULL, + itt::domains::LevelZeroBackend, + "IntegratedPipeline", + "updateCommandList"); for (size_t i = 0; i < batch_size; i++) { _command_lists.at(i)->updateMutableCommandList( index, From db913ce081360a06e5807a2ef9088311e9197468 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 29 Jul 2024 14:13:10 +0400 Subject: [PATCH 48/54] Bump reviewdog/action-shellcheck from 1.23.0 to 1.26.0 (#25562) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bumps [reviewdog/action-shellcheck](https://github.com/reviewdog/action-shellcheck) from 1.23.0 to 1.26.0.
Release notes

Sourced from reviewdog/action-shellcheck's releases.

Release v1.26.0

What's Changed

Full Changelog: https://github.com/reviewdog/action-shellcheck/compare/v1.25.0...v1.26.0

Release v1.25.0

What's Changed

Full Changelog: https://github.com/reviewdog/action-shellcheck/compare/v1.24.0...v1.25.0

Release v1.24.0

What's Changed

Full Changelog: https://github.com/reviewdog/action-shellcheck/compare/v1.23.0...v1.24.0

Commits
  • d99499e Merge pull request #65 from reviewdog/depup/reviewdog/reviewdog
  • e1b792e chore(deps): update reviewdog/reviewdog to 0.20.1
  • 6af7780 chore(deps): update reviewdog/reviewdog to 0.20.0 (#64)
  • 628ce85 Merge pull request #63 from reviewdog/depup/reviewdog/reviewdog
  • 54d0ff0 chore(deps): update reviewdog/reviewdog to 0.19.0
  • See full diff in compare view

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=reviewdog/action-shellcheck&package-manager=github_actions&previous-version=1.23.0&new-version=1.26.0)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@dependabot rebase` will rebase this PR - `@dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@dependabot merge` will merge this PR after your CI passes on it - `@dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@dependabot cancel merge` will cancel a previously requested merge and block automerging - `@dependabot reopen` will reopen this PR if it is closed - `@dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/code_style.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/code_style.yml b/.github/workflows/code_style.yml index 223a7418156e43..de99cdb245916e 100644 --- a/.github/workflows/code_style.yml +++ b/.github/workflows/code_style.yml @@ -60,7 +60,7 @@ jobs: # always provide suggestions even for skipped scripts in ov_shellcheck tagret - name: ShellCheck action if: always() - uses: reviewdog/action-shellcheck@52f34f737a16c65b8caa8c51ae1b23036afe5685 # v1.23.0 + uses: reviewdog/action-shellcheck@d99499e855260c9c56f7a1d066933b57326e9e7c # v1.26.0 with: level: style reporter: github-pr-review From 327e3b9f1e067836ad1913e90020d2c579338827 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 29 Jul 2024 14:13:17 +0400 Subject: [PATCH 49/54] Bump actions/download-artifact from 4.1.7 to 4.1.8 (#25422) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bumps [actions/download-artifact](https://github.com/actions/download-artifact) from 4.1.7 to 4.1.8.
Release notes

Sourced from actions/download-artifact's releases.

v4.1.8

What's Changed

Full Changelog: https://github.com/actions/download-artifact/compare/v4...v4.1.8

Commits
  • fa0a91b Merge pull request #341 from actions/robherley/bump-pkgs
  • b54d088 Update @​actions/artifact version, bump dependencies
  • See full diff in compare view

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=actions/download-artifact&package-manager=github_actions&previous-version=4.1.7&new-version=4.1.8)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@dependabot rebase` will rebase this PR - `@dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@dependabot merge` will merge this PR after your CI passes on it - `@dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@dependabot cancel merge` will cancel a previously requested merge and block automerging - `@dependabot reopen` will reopen this PR if it is closed - `@dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/fedora.yml | 2 +- .github/workflows/job_cpu_functional_tests.yml | 4 ++-- .github/workflows/job_cxx_unit_tests.yml | 4 ++-- .github/workflows/job_debian_packages.yml | 2 +- .github/workflows/job_gpu_tests.yml | 4 ++-- .github/workflows/job_onnx_models_tests.yml | 4 ++-- .github/workflows/job_onnx_runtime.yml | 2 +- .github/workflows/job_openvino_js.yml | 2 +- .github/workflows/job_python_unit_tests.yml | 4 ++-- .github/workflows/job_pytorch_models_tests.yml | 6 +++--- .github/workflows/job_samples_tests.yml | 4 ++-- .../workflows/job_tensorflow_layer_tests.yml | 6 +++--- .../workflows/job_tensorflow_models_tests.yml | 6 +++--- .github/workflows/job_tokenizers.yml | 2 +- .github/workflows/linux.yml | 8 ++++---- .../linux_conditional_compilation.yml | 2 +- .github/workflows/linux_sanitizers.yml | 4 ++-- .github/workflows/windows.yml | 18 +++++++++--------- .../windows_conditional_compilation.yml | 4 ++-- 19 files changed, 44 insertions(+), 44 deletions(-) diff --git a/.github/workflows/fedora.yml b/.github/workflows/fedora.yml index 02cd0abf018319..addaf24e24a98c 100644 --- a/.github/workflows/fedora.yml +++ b/.github/workflows/fedora.yml @@ -234,7 +234,7 @@ jobs: steps: - name: Download OpenVINO RPM packages - uses: actions/download-artifact@65a9edc5881444af0b9093a5e628f2fe47ea3b2e # v4.1.7 + uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 with: name: openvino_rpm_packages path: ${{ env.RPM_PACKAGES_DIR }} diff --git a/.github/workflows/job_cpu_functional_tests.yml b/.github/workflows/job_cpu_functional_tests.yml index 04fa0c8860ab66..08422a50c11849 100644 --- a/.github/workflows/job_cpu_functional_tests.yml +++ b/.github/workflows/job_cpu_functional_tests.yml @@ -33,13 +33,13 @@ jobs: PARALLEL_TEST_CACHE: ${{ github.workspace }}/install/tests/test_cache.lst steps: - name: Download OpenVINO package - uses: actions/download-artifact@65a9edc5881444af0b9093a5e628f2fe47ea3b2e # v4.1.7 + uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 with: name: openvino_package path: ${{ env.INSTALL_DIR }} - name: Download OpenVINO tests package - uses: actions/download-artifact@65a9edc5881444af0b9093a5e628f2fe47ea3b2e # v4.1.7 + uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 with: name: openvino_tests path: ${{ env.INSTALL_TEST_DIR }} diff --git a/.github/workflows/job_cxx_unit_tests.yml b/.github/workflows/job_cxx_unit_tests.yml index 92c12dfcd71251..0ca6f332204f20 100644 --- a/.github/workflows/job_cxx_unit_tests.yml +++ b/.github/workflows/job_cxx_unit_tests.yml @@ -35,13 +35,13 @@ jobs: INSTALL_TEST_DIR: ${{ github.workspace }}/install/tests steps: - name: Download OpenVINO package - uses: actions/download-artifact@65a9edc5881444af0b9093a5e628f2fe47ea3b2e # v4.1.7 + uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 with: name: openvino_package path: ${{ env.INSTALL_DIR }} - name: Download OpenVINO tests package - uses: actions/download-artifact@65a9edc5881444af0b9093a5e628f2fe47ea3b2e # v4.1.7 + uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 with: name: openvino_tests path: ${{ env.INSTALL_TEST_DIR }} diff --git a/.github/workflows/job_debian_packages.yml b/.github/workflows/job_debian_packages.yml index a7547a2483dd16..a8f2731563f779 100644 --- a/.github/workflows/job_debian_packages.yml +++ b/.github/workflows/job_debian_packages.yml @@ -33,7 +33,7 @@ jobs: run: echo 'Acquire::Retries "10";' > /etc/apt/apt.conf.d/80-retries - name: Download OpenVINO debian packages - uses: actions/download-artifact@65a9edc5881444af0b9093a5e628f2fe47ea3b2e # v4.1.7 + uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 with: name: openvino_debian_packages path: ${{ env.DEBIAN_PACKAGES_DIR }} diff --git a/.github/workflows/job_gpu_tests.yml b/.github/workflows/job_gpu_tests.yml index 7a5af97cdcde49..8c3a40f5c038a3 100644 --- a/.github/workflows/job_gpu_tests.yml +++ b/.github/workflows/job_gpu_tests.yml @@ -38,13 +38,13 @@ jobs: GTEST_PARALLEL_SCRIPT: ${{ github.workspace }}/gtest_parallel.py steps: - name: Download OpenVINO package - uses: actions/download-artifact@65a9edc5881444af0b9093a5e628f2fe47ea3b2e # v4.1.7 + uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 with: name: 'openvino_package' path: ${{ env.INSTALL_DIR }} - name: Download OpenVINO tests package - uses: actions/download-artifact@65a9edc5881444af0b9093a5e628f2fe47ea3b2e # v4.1.7 + uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 with: name: 'openvino_tests' path: ${{ env.INSTALL_TEST_DIR }} diff --git a/.github/workflows/job_onnx_models_tests.yml b/.github/workflows/job_onnx_models_tests.yml index 3fac0998d88ced..19bf3b23482b89 100644 --- a/.github/workflows/job_onnx_models_tests.yml +++ b/.github/workflows/job_onnx_models_tests.yml @@ -38,13 +38,13 @@ jobs: if: ${{ github.event_name != 'merge_group' }} steps: - name: Download OpenVINO package - uses: actions/download-artifact@65a9edc5881444af0b9093a5e628f2fe47ea3b2e # v4.1.7 + uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 with: name: openvino_package path: ${{ env.INSTALL_DIR }} - name: Download OpenVINO tests package - uses: actions/download-artifact@65a9edc5881444af0b9093a5e628f2fe47ea3b2e # v4.1.7 + uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 with: name: openvino_tests path: ${{ env.INSTALL_TEST_DIR }} diff --git a/.github/workflows/job_onnx_runtime.yml b/.github/workflows/job_onnx_runtime.yml index ae0f21bf58ab37..b7da6d827d542d 100644 --- a/.github/workflows/job_onnx_runtime.yml +++ b/.github/workflows/job_onnx_runtime.yml @@ -43,7 +43,7 @@ jobs: ONNX_RUNTIME_BUILD_DIR: ${{ github.workspace }}/onnxruntime/build steps: - name: Download OpenVINO package - uses: actions/download-artifact@65a9edc5881444af0b9093a5e628f2fe47ea3b2e # v4.1.7 + uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 with: name: openvino_package path: ${{ env.INSTALL_DIR }} diff --git a/.github/workflows/job_openvino_js.yml b/.github/workflows/job_openvino_js.yml index 25e29dd3f3f9c3..880726bd0d5878 100644 --- a/.github/workflows/job_openvino_js.yml +++ b/.github/workflows/job_openvino_js.yml @@ -45,7 +45,7 @@ jobs: echo "OPENVINO_JS_LIBS_DIR=$GITHUB_WORKSPACE/openvino/src/bindings/js/node/bin" >> "$GITHUB_ENV" - name: Download OpenVINO JS package - uses: actions/download-artifact@65a9edc5881444af0b9093a5e628f2fe47ea3b2e # v4.1.7 + uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 with: name: openvino_js_package path: ${{ env.OPENVINO_JS_LIBS_DIR }} diff --git a/.github/workflows/job_python_unit_tests.yml b/.github/workflows/job_python_unit_tests.yml index 4c7a14e891b49e..c2f709e9783516 100644 --- a/.github/workflows/job_python_unit_tests.yml +++ b/.github/workflows/job_python_unit_tests.yml @@ -41,13 +41,13 @@ jobs: steps: - name: Download OpenVINO package - uses: actions/download-artifact@65a9edc5881444af0b9093a5e628f2fe47ea3b2e # v4.1.7 + uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 with: name: openvino_package path: ${{ env.INSTALL_DIR }} - name: Download OpenVINO tests package - uses: actions/download-artifact@65a9edc5881444af0b9093a5e628f2fe47ea3b2e # v4.1.7 + uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 with: name: openvino_tests path: ${{ env.INSTALL_TEST_DIR }} diff --git a/.github/workflows/job_pytorch_models_tests.yml b/.github/workflows/job_pytorch_models_tests.yml index f0a01847da0be3..386fdbe3fdebf2 100644 --- a/.github/workflows/job_pytorch_models_tests.yml +++ b/.github/workflows/job_pytorch_models_tests.yml @@ -49,19 +49,19 @@ jobs: fi - name: Download OpenVINO package - uses: actions/download-artifact@65a9edc5881444af0b9093a5e628f2fe47ea3b2e # v4.1.7 + uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 with: name: openvino_package path: ${{ env.INSTALL_DIR }} - name: Download OpenVINO tokenizers extension - uses: actions/download-artifact@65a9edc5881444af0b9093a5e628f2fe47ea3b2e # v4.1.7 + uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 with: name: openvino_tokenizers_wheel path: ${{ env.INSTALL_DIR }} - name: Download OpenVINO tests package - uses: actions/download-artifact@65a9edc5881444af0b9093a5e628f2fe47ea3b2e # v4.1.7 + uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 with: name: openvino_tests path: ${{ env.INSTALL_TEST_DIR }} diff --git a/.github/workflows/job_samples_tests.yml b/.github/workflows/job_samples_tests.yml index e453210d58b13b..2fce9965e36b6c 100644 --- a/.github/workflows/job_samples_tests.yml +++ b/.github/workflows/job_samples_tests.yml @@ -34,13 +34,13 @@ jobs: BUILD_DIR: ${{ github.workspace }}/build steps: - name: Download OpenVINO package - uses: actions/download-artifact@65a9edc5881444af0b9093a5e628f2fe47ea3b2e # v4.1.7 + uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 with: name: openvino_package path: ${{ env.INSTALL_DIR }} - name: Download OpenVINO tests package - uses: actions/download-artifact@65a9edc5881444af0b9093a5e628f2fe47ea3b2e # v4.1.7 + uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 with: name: openvino_tests path: ${{ env.INSTALL_TEST_DIR }} diff --git a/.github/workflows/job_tensorflow_layer_tests.yml b/.github/workflows/job_tensorflow_layer_tests.yml index 168d9bf61308d7..243c3ed12a292f 100644 --- a/.github/workflows/job_tensorflow_layer_tests.yml +++ b/.github/workflows/job_tensorflow_layer_tests.yml @@ -44,19 +44,19 @@ jobs: LAYER_TESTS_INSTALL_DIR: ${{ github.workspace }}/install/tests/layer_tests steps: - name: Download OpenVINO package - uses: actions/download-artifact@65a9edc5881444af0b9093a5e628f2fe47ea3b2e # v4.1.7 + uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 with: name: openvino_package path: ${{ env.INSTALL_DIR }} - name: Download OpenVINO tests package - uses: actions/download-artifact@65a9edc5881444af0b9093a5e628f2fe47ea3b2e # v4.1.7 + uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 with: name: openvino_tests path: ${{ env.INSTALL_TEST_DIR }} - name: Download OpenVINO tokenizers extension - uses: actions/download-artifact@65a9edc5881444af0b9093a5e628f2fe47ea3b2e # v4.1.7 + uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 with: name: openvino_tokenizers_wheel path: ${{ env.INSTALL_DIR }} diff --git a/.github/workflows/job_tensorflow_models_tests.yml b/.github/workflows/job_tensorflow_models_tests.yml index 1a452c94db0ace..7cb8c79fea61fa 100644 --- a/.github/workflows/job_tensorflow_models_tests.yml +++ b/.github/workflows/job_tensorflow_models_tests.yml @@ -37,19 +37,19 @@ jobs: NUMBER_OF_REPLICAS: 2 steps: - name: Download OpenVINO package - uses: actions/download-artifact@65a9edc5881444af0b9093a5e628f2fe47ea3b2e # v4.1.7 + uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 with: name: openvino_package path: ${{ env.INSTALL_DIR }} - name: Download OpenVINO tokenizers extension - uses: actions/download-artifact@65a9edc5881444af0b9093a5e628f2fe47ea3b2e # v4.1.7 + uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 with: name: openvino_tokenizers_wheel path: ${{ env.INSTALL_DIR }} - name: Download OpenVINO tests package - uses: actions/download-artifact@65a9edc5881444af0b9093a5e628f2fe47ea3b2e # v4.1.7 + uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 with: name: openvino_tests path: ${{ env.INSTALL_TEST_DIR }} diff --git a/.github/workflows/job_tokenizers.yml b/.github/workflows/job_tokenizers.yml index 9cf1acc05e7220..a682f513c44a5d 100644 --- a/.github/workflows/job_tokenizers.yml +++ b/.github/workflows/job_tokenizers.yml @@ -73,7 +73,7 @@ jobs: ref: 'master' - name: Download OpenVINO package - uses: actions/download-artifact@65a9edc5881444af0b9093a5e628f2fe47ea3b2e # v4.1.7 + uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 with: name: openvino_package path: ${{ env.INSTALL_DIR }} diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml index dce369b6fe4dd9..9f13703832601f 100644 --- a/.github/workflows/linux.yml +++ b/.github/workflows/linux.yml @@ -397,13 +397,13 @@ jobs: # - name: Download OpenVINO package - uses: actions/download-artifact@65a9edc5881444af0b9093a5e628f2fe47ea3b2e # v4.1.7 + uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 with: name: openvino_package path: ${{ env.INSTALL_DIR }} - name: Download OpenVINO tests package - uses: actions/download-artifact@65a9edc5881444af0b9093a5e628f2fe47ea3b2e # v4.1.7 + uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 with: name: openvino_tests path: ${{ env.INSTALL_TEST_DIR }} @@ -649,13 +649,13 @@ jobs: steps: - name: Download OpenVINO package - uses: actions/download-artifact@65a9edc5881444af0b9093a5e628f2fe47ea3b2e # v4.1.7 + uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 with: name: openvino_package path: ${{ env.INSTALL_DIR }} - name: Download OpenVINO Developer package - uses: actions/download-artifact@65a9edc5881444af0b9093a5e628f2fe47ea3b2e # v4.1.7 + uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 with: name: openvino_developer_package path: ${{ env.INSTALL_DIR }} diff --git a/.github/workflows/linux_conditional_compilation.yml b/.github/workflows/linux_conditional_compilation.yml index cdae9c77af9b40..bee3d492358c53 100644 --- a/.github/workflows/linux_conditional_compilation.yml +++ b/.github/workflows/linux_conditional_compilation.yml @@ -293,7 +293,7 @@ jobs: ref: 'master' - name: Download selective build statistics package - uses: actions/download-artifact@65a9edc5881444af0b9093a5e628f2fe47ea3b2e # v4.1.7 + uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 with: name: openvino_selective_build_stat path: ${{ env.SELECTIVE_BUILD_STAT_DIR }} diff --git a/.github/workflows/linux_sanitizers.yml b/.github/workflows/linux_sanitizers.yml index 6f089f205d3b1d..394b065fa67a47 100644 --- a/.github/workflows/linux_sanitizers.yml +++ b/.github/workflows/linux_sanitizers.yml @@ -230,13 +230,13 @@ jobs: run: echo 'Acquire::Retries "10";' > /etc/apt/apt.conf.d/80-retries - name: Download OpenVINO package - uses: actions/download-artifact@65a9edc5881444af0b9093a5e628f2fe47ea3b2e # v4.1.7 + uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 with: name: ${{ format('openvino_package_{0}', matrix.SANITIZER) }} path: ${{ env.INSTALL_DIR }} - name: Download OpenVINO tests package - uses: actions/download-artifact@65a9edc5881444af0b9093a5e628f2fe47ea3b2e # v4.1.7 + uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 with: name: ${{ format('openvino_tests_{0}', matrix.SANITIZER) }} path: ${{ env.INSTALL_TEST_DIR }} diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml index 24fd5946cbcf2c..d3b052876fe428 100644 --- a/.github/workflows/windows.yml +++ b/.github/workflows/windows.yml @@ -284,13 +284,13 @@ jobs: steps: - name: Download OpenVINO package - uses: actions/download-artifact@65a9edc5881444af0b9093a5e628f2fe47ea3b2e # v4.1.7 + uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 with: name: openvino_package path: ${{ env.INSTALL_DIR }} - name: Download OpenVINO tests package - uses: actions/download-artifact@65a9edc5881444af0b9093a5e628f2fe47ea3b2e # v4.1.7 + uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 with: name: openvino_tests path: ${{ env.INSTALL_TEST_DIR }} @@ -378,7 +378,7 @@ jobs: path: 'openvino' - name: Download OpenVINO js package - uses: actions/download-artifact@65a9edc5881444af0b9093a5e628f2fe47ea3b2e # v4.1.7 + uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 with: name: openvino_js_package path: ${{ env.OPENVINO_JS_LIBS_DIR }} @@ -449,13 +449,13 @@ jobs: steps: - name: Download OpenVINO package - uses: actions/download-artifact@65a9edc5881444af0b9093a5e628f2fe47ea3b2e # v4.1.7 + uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 with: name: openvino_package path: ${{ env.INSTALL_DIR }} - name: Download OpenVINO tests package - uses: actions/download-artifact@65a9edc5881444af0b9093a5e628f2fe47ea3b2e # v4.1.7 + uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 with: name: openvino_tests path: ${{ env.INSTALL_TEST_DIR }} @@ -637,13 +637,13 @@ jobs: steps: - name: Download OpenVINO package - uses: actions/download-artifact@65a9edc5881444af0b9093a5e628f2fe47ea3b2e # v4.1.7 + uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 with: name: openvino_package path: ${{ env.INSTALL_DIR }} - name: Download OpenVINO tests package - uses: actions/download-artifact@65a9edc5881444af0b9093a5e628f2fe47ea3b2e # v4.1.7 + uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 with: name: openvino_tests path: ${{ env.INSTALL_TEST_DIR }} @@ -837,13 +837,13 @@ jobs: if: fromJSON(needs.smart_ci.outputs.affected_components).CPU.test steps: - name: Download OpenVINO package - uses: actions/download-artifact@65a9edc5881444af0b9093a5e628f2fe47ea3b2e # v4.1.7 + uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 with: name: openvino_package path: ${{ env.INSTALL_DIR }} - name: Download OpenVINO tests package - uses: actions/download-artifact@65a9edc5881444af0b9093a5e628f2fe47ea3b2e # v4.1.7 + uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 with: name: openvino_tests path: ${{ env.INSTALL_TEST_DIR }} diff --git a/.github/workflows/windows_conditional_compilation.yml b/.github/workflows/windows_conditional_compilation.yml index 977f9aee91bcce..80e4d9a5338e71 100644 --- a/.github/workflows/windows_conditional_compilation.yml +++ b/.github/workflows/windows_conditional_compilation.yml @@ -292,7 +292,7 @@ jobs: ref: 'master' - name: Download selective build statistics package - uses: actions/download-artifact@65a9edc5881444af0b9093a5e628f2fe47ea3b2e # v4.1.7 + uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 with: name: openvino_selective_build_stat path: ${{ env.SELECTIVE_BUILD_STAT_DIR }} @@ -355,7 +355,7 @@ jobs: steps: - name: Download OpenVINO tests package - uses: actions/download-artifact@65a9edc5881444af0b9093a5e628f2fe47ea3b2e # v4.1.7 + uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 with: name: openvino_tests path: ${{ env.INSTALL_TEST_DIR }} From 65efb849191a8fc06d809c2dcc5771571998ddea Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 29 Jul 2024 10:14:11 +0000 Subject: [PATCH 50/54] Bump reviewdog/action-suggester from 1.15.0 to 1.17.0 (#25561) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bumps [reviewdog/action-suggester](https://github.com/reviewdog/action-suggester) from 1.15.0 to 1.17.0.
Release notes

Sourced from reviewdog/action-suggester's releases.

Release v1.17.0

What's Changed

Full Changelog: https://github.com/reviewdog/action-suggester/compare/v1.16.0...v1.17.0

Release v1.16.0

What's Changed

Full Changelog: https://github.com/reviewdog/action-suggester/compare/v1.15.0...v1.16.0

Commits
  • 63b8f8c Merge pull request #61 from reviewdog/depup/reviewdog/reviewdog
  • bc3e032 chore(deps): update reviewdog/reviewdog to 0.20.1
  • 951f8ea Merge pull request #60 from reviewdog/depup/reviewdog/reviewdog
  • 38674b1 chore(deps): update reviewdog/reviewdog to 0.19.0
  • See full diff in compare view

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=reviewdog/action-suggester&package-manager=github_actions&previous-version=1.15.0&new-version=1.17.0)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@dependabot rebase` will rebase this PR - `@dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@dependabot merge` will merge this PR after your CI passes on it - `@dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@dependabot cancel merge` will cancel a previously requested merge and block automerging - `@dependabot reopen` will reopen this PR if it is closed - `@dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/code_style.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/code_style.yml b/.github/workflows/code_style.yml index de99cdb245916e..f43e60538a6089 100644 --- a/.github/workflows/code_style.yml +++ b/.github/workflows/code_style.yml @@ -31,7 +31,7 @@ jobs: - name: suggester / clang-format if: startsWith(github.event_name, 'pull_request') - uses: reviewdog/action-suggester@a1d57ff096639094e0ba35ef3039e79316364796 # v1.15.0 + uses: reviewdog/action-suggester@63b8f8cc21dfa052ac44436e65ed31edcffcb6c1 # v1.17.0 with: github_token: ${{ secrets.GITHUB_TOKEN }} level: warning From ca26c7e8d817504ecbf0fbbb869efa8c38fb3c39 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 29 Jul 2024 10:17:07 +0000 Subject: [PATCH 51/54] Bump actions/upload-artifact from 4.3.3 to 4.3.4 (#25424) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bumps [actions/upload-artifact](https://github.com/actions/upload-artifact) from 4.3.3 to 4.3.4.
Release notes

Sourced from actions/upload-artifact's releases.

v4.3.4

What's Changed

Full Changelog: https://github.com/actions/upload-artifact/compare/v4.3.3...v4.3.4

Commits

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=actions/upload-artifact&package-manager=github_actions&previous-version=4.3.3&new-version=4.3.4)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@dependabot rebase` will rebase this PR - `@dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@dependabot merge` will merge this PR after your CI passes on it - `@dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@dependabot cancel merge` will cancel a previously requested merge and block automerging - `@dependabot reopen` will reopen this PR if it is closed - `@dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/android_arm64.yml | 2 +- .github/workflows/build_doc.yml | 6 +++--- .github/workflows/coverity.yml | 4 ++-- .github/workflows/fedora.yml | 8 ++++---- .github/workflows/job_cpu_functional_tests.yml | 2 +- .github/workflows/job_cxx_unit_tests.yml | 2 +- .github/workflows/job_gpu_tests.yml | 2 +- .github/workflows/job_python_unit_tests.yml | 2 +- .github/workflows/job_pytorch_models_tests.yml | 2 +- .github/workflows/job_tensorflow_layer_tests.yml | 2 +- .../workflows/job_tensorflow_models_tests.yml | 2 +- .github/workflows/job_tokenizers.yml | 2 +- .github/workflows/linux.yml | 16 ++++++++-------- .github/workflows/linux_arm64.yml | 12 ++++++------ .../workflows/linux_conditional_compilation.yml | 8 ++++---- .github/workflows/linux_sanitizers.yml | 6 +++--- .github/workflows/mac.yml | 6 +++--- .github/workflows/mac_arm64.yml | 6 +++--- .github/workflows/py_checks.yml | 6 +++--- .github/workflows/windows.yml | 12 ++++++------ .../windows_conditional_compilation.yml | 6 +++--- 21 files changed, 57 insertions(+), 57 deletions(-) diff --git a/.github/workflows/android_arm64.yml b/.github/workflows/android_arm64.yml index de5b6c0011e34d..25081433f48f10 100644 --- a/.github/workflows/android_arm64.yml +++ b/.github/workflows/android_arm64.yml @@ -175,7 +175,7 @@ jobs: # Upload build logs # - name: Upload build logs - uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3 + uses: actions/upload-artifact@0b2256b8c012f0828dc542b3febcab082c67f72b # v4.3.4 if: always() with: name: build_logs diff --git a/.github/workflows/build_doc.yml b/.github/workflows/build_doc.yml index 66d4c8067edea9..b590d093207e39 100644 --- a/.github/workflows/build_doc.yml +++ b/.github/workflows/build_doc.yml @@ -72,13 +72,13 @@ jobs: echo "PR_NUMBER=$PR_NUMBER" >> $GITHUB_ENV - name: 'Upload sphinx.log' - uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3 + uses: actions/upload-artifact@0b2256b8c012f0828dc542b3febcab082c67f72b # v4.3.4 with: name: sphinx_build_log_${{ env.PR_NUMBER }}.log path: build/docs/sphinx.log - name: 'Upload docs html' - uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3 + uses: actions/upload-artifact@0b2256b8c012f0828dc542b3febcab082c67f72b # v4.3.4 with: name: openvino_docs_html_${{ env.PR_NUMBER }}.zip path: build/docs/openvino_docs_html.zip @@ -95,7 +95,7 @@ jobs: - name: 'Upload test results' if: failure() - uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3 + uses: actions/upload-artifact@0b2256b8c012f0828dc542b3febcab082c67f72b # v4.3.4 with: name: openvino_docs_pytest path: build/docs/_artifacts/ diff --git a/.github/workflows/coverity.yml b/.github/workflows/coverity.yml index ef0fd80e7813cf..da790552c239c1 100644 --- a/.github/workflows/coverity.yml +++ b/.github/workflows/coverity.yml @@ -139,7 +139,7 @@ jobs: run: ${COVERITY_TOOL_DIR}/cov-analysis*/bin/cov-configure -c ${COVERITY_TOOL_DIR}/cov-analysis-linux64-2023.6.2/config/coverity_config.xml -lscc text - name: Upload Coverity build log - uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3 + uses: actions/upload-artifact@0b2256b8c012f0828dc542b3febcab082c67f72b # v4.3.4 if: always() with: name: coverity_logs @@ -147,7 +147,7 @@ jobs: if-no-files-found: 'error' - name: Upload Coverity build archive - uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3 + uses: actions/upload-artifact@0b2256b8c012f0828dc542b3febcab082c67f72b # v4.3.4 if: always() with: name: coverity_archive diff --git a/.github/workflows/fedora.yml b/.github/workflows/fedora.yml index addaf24e24a98c..5833c1d2000fa7 100644 --- a/.github/workflows/fedora.yml +++ b/.github/workflows/fedora.yml @@ -189,7 +189,7 @@ jobs: # Upload build artifacts and logs # - name: Upload build logs - uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3 + uses: actions/upload-artifact@0b2256b8c012f0828dc542b3febcab082c67f72b # v4.3.4 if: always() with: name: build_logs @@ -198,7 +198,7 @@ jobs: - name: Upload openvino package if: ${{ always() }} - uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3 + uses: actions/upload-artifact@0b2256b8c012f0828dc542b3febcab082c67f72b # v4.3.4 with: name: openvino_package path: ${{ env.BUILD_DIR }}/openvino_package.tar.gz @@ -206,7 +206,7 @@ jobs: - name: Upload openvino RPM packages if: ${{ always() }} - uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3 + uses: actions/upload-artifact@0b2256b8c012f0828dc542b3febcab082c67f72b # v4.3.4 with: name: openvino_rpm_packages path: ${{ env.BUILD_DIR }}/*.rpm @@ -214,7 +214,7 @@ jobs: - name: Upload openvino tests package if: ${{ always() }} - uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3 + uses: actions/upload-artifact@0b2256b8c012f0828dc542b3febcab082c67f72b # v4.3.4 with: name: openvino_tests path: ${{ env.BUILD_DIR }}/openvino_tests.tar.gz diff --git a/.github/workflows/job_cpu_functional_tests.yml b/.github/workflows/job_cpu_functional_tests.yml index 08422a50c11849..986c2c42315371 100644 --- a/.github/workflows/job_cpu_functional_tests.yml +++ b/.github/workflows/job_cpu_functional_tests.yml @@ -108,7 +108,7 @@ jobs: key: ${{ runner.os }}-${{ runner.arch }}-tests-functional-cpu-stamp-${{ github.sha }} - name: Upload Test Results - uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3 + uses: actions/upload-artifact@0b2256b8c012f0828dc542b3febcab082c67f72b # v4.3.4 if: always() with: name: test-results-functional-cpu diff --git a/.github/workflows/job_cxx_unit_tests.yml b/.github/workflows/job_cxx_unit_tests.yml index 0ca6f332204f20..29c656f416ecbc 100644 --- a/.github/workflows/job_cxx_unit_tests.yml +++ b/.github/workflows/job_cxx_unit_tests.yml @@ -255,7 +255,7 @@ jobs: ${INSTALL_TEST_DIR}/ov_hetero_func_tests --gtest_print_time=1 --gtest_output=xml:${INSTALL_TEST_DIR}/TEST-OVHeteroFuncTests.xml --gtest_filter="*smoke*" - name: Upload Test Results - uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3 + uses: actions/upload-artifact@0b2256b8c012f0828dc542b3febcab082c67f72b # v4.3.4 if: ${{ !cancelled() }} with: name: test-results-cpp diff --git a/.github/workflows/job_gpu_tests.yml b/.github/workflows/job_gpu_tests.yml index 8c3a40f5c038a3..5d9fb1172e62cb 100644 --- a/.github/workflows/job_gpu_tests.yml +++ b/.github/workflows/job_gpu_tests.yml @@ -128,7 +128,7 @@ jobs: - name: Upload Test Results - uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3 + uses: actions/upload-artifact@0b2256b8c012f0828dc542b3febcab082c67f72b # v4.3.4 if: always() with: name: test-results-${{ inputs.test_type }}-${{ inputs.device }} diff --git a/.github/workflows/job_python_unit_tests.yml b/.github/workflows/job_python_unit_tests.yml index c2f709e9783516..e1bd58fb781d69 100644 --- a/.github/workflows/job_python_unit_tests.yml +++ b/.github/workflows/job_python_unit_tests.yml @@ -306,7 +306,7 @@ jobs: --ignore=${INSTALL_TEST_DIR}/pyopenvino/tests/test_utils/test_utils.py - name: Upload Test Results - uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3 + uses: actions/upload-artifact@0b2256b8c012f0828dc542b3febcab082c67f72b # v4.3.4 if: ${{ !cancelled() }} with: name: test-results-python diff --git a/.github/workflows/job_pytorch_models_tests.yml b/.github/workflows/job_pytorch_models_tests.yml index 386fdbe3fdebf2..b910d9242647b1 100644 --- a/.github/workflows/job_pytorch_models_tests.yml +++ b/.github/workflows/job_pytorch_models_tests.yml @@ -180,7 +180,7 @@ jobs: df -h - name: Upload Test Results - uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3 + uses: actions/upload-artifact@0b2256b8c012f0828dc542b3febcab082c67f72b # v4.3.4 if: ${{ !cancelled() }} with: name: test-results-torch-models-${{ inputs.model_scope == 'precommit' }} diff --git a/.github/workflows/job_tensorflow_layer_tests.yml b/.github/workflows/job_tensorflow_layer_tests.yml index 243c3ed12a292f..9c2392093ab446 100644 --- a/.github/workflows/job_tensorflow_layer_tests.yml +++ b/.github/workflows/job_tensorflow_layer_tests.yml @@ -158,7 +158,7 @@ jobs: TEST_PRECISION: FP16 - name: Upload Test Results - uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3 + uses: actions/upload-artifact@0b2256b8c012f0828dc542b3febcab082c67f72b # v4.3.4 if: ${{ !cancelled() }} with: name: test-results-python-tf-layers diff --git a/.github/workflows/job_tensorflow_models_tests.yml b/.github/workflows/job_tensorflow_models_tests.yml index 7cb8c79fea61fa..ab8163139e4a2b 100644 --- a/.github/workflows/job_tensorflow_models_tests.yml +++ b/.github/workflows/job_tensorflow_models_tests.yml @@ -114,7 +114,7 @@ jobs: TEST_DEVICE: CPU - name: Upload Test Results - uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3 + uses: actions/upload-artifact@0b2256b8c012f0828dc542b3febcab082c67f72b # v4.3.4 if: ${{ !cancelled() }} with: name: test-results-tensorflow-models-${{ inputs.model_scope }} diff --git a/.github/workflows/job_tokenizers.yml b/.github/workflows/job_tokenizers.yml index a682f513c44a5d..e1ef48b14ee7d9 100644 --- a/.github/workflows/job_tokenizers.yml +++ b/.github/workflows/job_tokenizers.yml @@ -137,7 +137,7 @@ jobs: - name: Upload openvino tokenizers wheel if: ${{ always() }} - uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3 + uses: actions/upload-artifact@0b2256b8c012f0828dc542b3febcab082c67f72b # v4.3.4 with: name: openvino_tokenizers_wheel path: ${{ env.EXTENSION_BUILD_DIR }}/*.whl diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml index 9f13703832601f..80ad7ffa92c4f1 100644 --- a/.github/workflows/linux.yml +++ b/.github/workflows/linux.yml @@ -269,7 +269,7 @@ jobs: # Upload build artifacts and logs # - name: Upload build logs - uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3 + uses: actions/upload-artifact@0b2256b8c012f0828dc542b3febcab082c67f72b # v4.3.4 if: always() with: name: build_logs @@ -278,7 +278,7 @@ jobs: - name: Upload openvino package if: ${{ always() }} - uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3 + uses: actions/upload-artifact@0b2256b8c012f0828dc542b3febcab082c67f72b # v4.3.4 with: name: openvino_package path: ${{ env.BUILD_DIR }}/openvino_package.tar.gz @@ -286,7 +286,7 @@ jobs: - name: Upload openvino js package if: fromJSON(needs.smart_ci.outputs.affected_components).JS_API - uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3 + uses: actions/upload-artifact@0b2256b8c012f0828dc542b3febcab082c67f72b # v4.3.4 with: name: openvino_js_package path: ${{ env.INSTALL_DIR_JS }} @@ -294,7 +294,7 @@ jobs: - name: Upload openvino developer package if: ${{ always() }} - uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3 + uses: actions/upload-artifact@0b2256b8c012f0828dc542b3febcab082c67f72b # v4.3.4 with: name: openvino_developer_package path: ${{ env.BUILD_DIR }}/openvino_developer_package.tar.gz @@ -302,7 +302,7 @@ jobs: - name: Upload openvino debian packages if: ${{ always() }} - uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3 + uses: actions/upload-artifact@0b2256b8c012f0828dc542b3febcab082c67f72b # v4.3.4 with: name: openvino_debian_packages path: ${{ env.BUILD_DIR }}/*.deb @@ -310,7 +310,7 @@ jobs: - name: Upload openvino tests package if: ${{ always() }} - uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3 + uses: actions/upload-artifact@0b2256b8c012f0828dc542b3febcab082c67f72b # v4.3.4 with: name: openvino_tests path: ${{ env.BUILD_DIR }}/openvino_tests.tar.gz @@ -462,7 +462,7 @@ jobs: - name: Upload Conformance Artifacts if: ${{ always() }} - uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3 + uses: actions/upload-artifact@0b2256b8c012f0828dc542b3febcab082c67f72b # v4.3.4 with: name: conformance_artifacts_${{ matrix.TEST_TYPE }}-${{ env.TEST_DEVICE }} path: ${{ env.CONFORMANCE_ARTIFACTS_DIR }}/conformance_artifacts.tar.gz @@ -488,7 +488,7 @@ jobs: - name: Upload Conformance Artifacts if: ${{ matrix.TEST_TYPE == 'API' }} - uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3 + uses: actions/upload-artifact@0b2256b8c012f0828dc542b3febcab082c67f72b # v4.3.4 with: name: conformance_artifacts_${{ matrix.TEST_TYPE }}-TEMPLATE path: ${{ env.CONFORMANCE_ARTIFACTS_DIR }}/conformance_artifacts.tar.gz diff --git a/.github/workflows/linux_arm64.yml b/.github/workflows/linux_arm64.yml index 7439599be35663..d38eda93e7d2b8 100644 --- a/.github/workflows/linux_arm64.yml +++ b/.github/workflows/linux_arm64.yml @@ -262,7 +262,7 @@ jobs: # Upload build artifacts and logs # - name: Upload build logs - uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3 + uses: actions/upload-artifact@0b2256b8c012f0828dc542b3febcab082c67f72b # v4.3.4 if: always() with: name: build_logs @@ -271,7 +271,7 @@ jobs: - name: Upload openvino package if: ${{ always() }} - uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3 + uses: actions/upload-artifact@0b2256b8c012f0828dc542b3febcab082c67f72b # v4.3.4 with: name: openvino_package path: ${{ env.BUILD_DIR }}/openvino_package.tar.gz @@ -279,7 +279,7 @@ jobs: - name: Upload openvino developer package if: ${{ always() }} - uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3 + uses: actions/upload-artifact@0b2256b8c012f0828dc542b3febcab082c67f72b # v4.3.4 with: name: openvino_developer_package path: ${{ env.BUILD_DIR }}/openvino_developer_package.tar.gz @@ -287,7 +287,7 @@ jobs: - name: Upload openvino js package if: fromJSON(needs.smart_ci.outputs.affected_components).JS_API - uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3 + uses: actions/upload-artifact@0b2256b8c012f0828dc542b3febcab082c67f72b # v4.3.4 with: name: openvino_js_package path: ${{ env.INSTALL_DIR_JS }} @@ -295,7 +295,7 @@ jobs: - name: Upload openvino debian packages if: ${{ 'false' }} - uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3 + uses: actions/upload-artifact@0b2256b8c012f0828dc542b3febcab082c67f72b # v4.3.4 with: name: openvino_debian_packages path: ${{ env.BUILD_DIR }}/*.deb @@ -303,7 +303,7 @@ jobs: - name: Upload openvino tests package if: ${{ always() }} - uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3 + uses: actions/upload-artifact@0b2256b8c012f0828dc542b3febcab082c67f72b # v4.3.4 with: name: openvino_tests path: ${{ env.BUILD_DIR }}/openvino_tests.tar.gz diff --git a/.github/workflows/linux_conditional_compilation.yml b/.github/workflows/linux_conditional_compilation.yml index bee3d492358c53..cfccad5fe23e12 100644 --- a/.github/workflows/linux_conditional_compilation.yml +++ b/.github/workflows/linux_conditional_compilation.yml @@ -220,7 +220,7 @@ jobs: # Upload build artifacts and logs # - name: Upload build logs - uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3 + uses: actions/upload-artifact@0b2256b8c012f0828dc542b3febcab082c67f72b # v4.3.4 if: always() with: name: build_logs @@ -229,7 +229,7 @@ jobs: - name: Upload openvino package if: ${{ always() }} - uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3 + uses: actions/upload-artifact@0b2256b8c012f0828dc542b3febcab082c67f72b # v4.3.4 with: name: openvino_package path: ${{ env.BUILD_DIR }}/openvino_package.tar.gz @@ -237,7 +237,7 @@ jobs: - name: Upload selective build statistics package if: ${{ always() }} - uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3 + uses: actions/upload-artifact@0b2256b8c012f0828dc542b3febcab082c67f72b # v4.3.4 with: name: openvino_selective_build_stat path: ${{ env.BUILD_DIR }}/openvino_selective_build_stat.tar.gz @@ -245,7 +245,7 @@ jobs: - name: Upload OpenVINO tests package if: ${{ always() }} - uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3 + uses: actions/upload-artifact@0b2256b8c012f0828dc542b3febcab082c67f72b # v4.3.4 with: name: openvino_tests path: ${{ env.BUILD_DIR }}/openvino_tests.tar.gz diff --git a/.github/workflows/linux_sanitizers.yml b/.github/workflows/linux_sanitizers.yml index 394b065fa67a47..5227eb3eacdac9 100644 --- a/.github/workflows/linux_sanitizers.yml +++ b/.github/workflows/linux_sanitizers.yml @@ -186,7 +186,7 @@ jobs: - name: Upload openvino package if: ${{ always() }} - uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3 + uses: actions/upload-artifact@0b2256b8c012f0828dc542b3febcab082c67f72b # v4.3.4 with: name: openvino_package_${{ matrix.SANITIZER }} path: ${{ env.BUILD_DIR }}/openvino_package.tar.gz @@ -194,7 +194,7 @@ jobs: - name: Upload openvino tests package if: ${{ always() }} - uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3 + uses: actions/upload-artifact@0b2256b8c012f0828dc542b3febcab082c67f72b # v4.3.4 with: name: openvino_tests_${{ matrix.SANITIZER }} path: ${{ env.BUILD_DIR }}/openvino_tests.tar.gz @@ -462,7 +462,7 @@ jobs: ${INSTALL_TEST_DIR}/ov_hetero_func_tests --gtest_print_time=1 --gtest_output=xml:${INSTALL_TEST_DIR}/TEST-OVHeteroFuncTests.xml --gtest_filter="*smoke*" - name: Upload Test Results - uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3 + uses: actions/upload-artifact@0b2256b8c012f0828dc542b3febcab082c67f72b # v4.3.4 if: ${{ !cancelled() }} with: name: test-results-cpp diff --git a/.github/workflows/mac.yml b/.github/workflows/mac.yml index 32f5474d14ce76..3880f8333f18c5 100644 --- a/.github/workflows/mac.yml +++ b/.github/workflows/mac.yml @@ -204,7 +204,7 @@ jobs: - name: Upload openvino package if: ${{ always() }} - uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3 + uses: actions/upload-artifact@0b2256b8c012f0828dc542b3febcab082c67f72b # v4.3.4 with: name: openvino_package path: ${{ env.BUILD_DIR }}/openvino_package.tar.gz @@ -212,7 +212,7 @@ jobs: - name: Upload openvino tests package if: ${{ always() }} - uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3 + uses: actions/upload-artifact@0b2256b8c012f0828dc542b3febcab082c67f72b # v4.3.4 with: name: openvino_tests path: ${{ env.BUILD_DIR }}/openvino_tests.tar.gz @@ -220,7 +220,7 @@ jobs: - name: Upload openvino js package if: fromJSON(needs.smart_ci.outputs.affected_components).JS_API - uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3 + uses: actions/upload-artifact@0b2256b8c012f0828dc542b3febcab082c67f72b # v4.3.4 with: name: openvino_js_package path: ${{ env.INSTALL_DIR_JS }} diff --git a/.github/workflows/mac_arm64.yml b/.github/workflows/mac_arm64.yml index 26eb440eb87cb2..8386f54719b02c 100644 --- a/.github/workflows/mac_arm64.yml +++ b/.github/workflows/mac_arm64.yml @@ -204,7 +204,7 @@ jobs: - name: Upload openvino package if: ${{ always() }} - uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3 + uses: actions/upload-artifact@0b2256b8c012f0828dc542b3febcab082c67f72b # v4.3.4 with: name: openvino_package path: ${{ env.BUILD_DIR }}/openvino_package.tar.gz @@ -212,7 +212,7 @@ jobs: - name: Upload openvino tests package if: ${{ always() }} - uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3 + uses: actions/upload-artifact@0b2256b8c012f0828dc542b3febcab082c67f72b # v4.3.4 with: name: openvino_tests path: ${{ env.BUILD_DIR }}/openvino_tests.tar.gz @@ -220,7 +220,7 @@ jobs: - name: Upload openvino js package if: fromJSON(needs.smart_ci.outputs.affected_components).JS_API - uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3 + uses: actions/upload-artifact@0b2256b8c012f0828dc542b3febcab082c67f72b # v4.3.4 with: name: openvino_js_package path: ${{ env.INSTALL_DIR_JS }} diff --git a/.github/workflows/py_checks.yml b/.github/workflows/py_checks.yml index 3a9b23ea2685db..7bd7fe3d840222 100644 --- a/.github/workflows/py_checks.yml +++ b/.github/workflows/py_checks.yml @@ -49,7 +49,7 @@ jobs: git diff > samples_diff.diff working-directory: samples/python - - uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3 + - uses: actions/upload-artifact@0b2256b8c012f0828dc542b3febcab082c67f72b # v4.3.4 if: failure() with: name: samples_diff @@ -67,7 +67,7 @@ jobs: git diff > pyopenvino_diff.diff working-directory: src/bindings/python/src/openvino - - uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3 + - uses: actions/upload-artifact@0b2256b8c012f0828dc542b3febcab082c67f72b # v4.3.4 if: failure() with: name: pyopenvino_diff @@ -85,7 +85,7 @@ jobs: git diff > wheel_diff.diff working-directory: src/bindings/python/wheel - - uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3 + - uses: actions/upload-artifact@0b2256b8c012f0828dc542b3febcab082c67f72b # v4.3.4 if: failure() with: name: wheel_diff diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml index d3b052876fe428..26ed3615ea973a 100644 --- a/.github/workflows/windows.yml +++ b/.github/workflows/windows.yml @@ -234,14 +234,14 @@ jobs: # - name: Upload openvino package - uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3 + uses: actions/upload-artifact@0b2256b8c012f0828dc542b3febcab082c67f72b # v4.3.4 with: name: openvino_package path: ${{ env.BUILD_DIR }}/openvino_package.zip if-no-files-found: 'error' - name: Upload openvino tests package - uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3 + uses: actions/upload-artifact@0b2256b8c012f0828dc542b3febcab082c67f72b # v4.3.4 with: name: openvino_tests path: ${{ env.BUILD_DIR }}/openvino_tests.zip @@ -249,7 +249,7 @@ jobs: - name: Upload openvino js package if: fromJSON(needs.smart_ci.outputs.affected_components).JS_API - uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3 + uses: actions/upload-artifact@0b2256b8c012f0828dc542b3febcab082c67f72b # v4.3.4 with: name: openvino_js_package path: ${{ env.INSTALL_DIR_JS }} @@ -607,7 +607,7 @@ jobs: run: python3 -m pytest -s ${{ env.INSTALL_TEST_DIR }}/ovc/unit_tests --junitxml=${{ env.INSTALL_TEST_DIR }}/TEST-OpenVinoConversion.xml - name: Upload Test Results - uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3 + uses: actions/upload-artifact@0b2256b8c012f0828dc542b3febcab082c67f72b # v4.3.4 if: ${{ !cancelled() }} with: name: test-results-python @@ -813,7 +813,7 @@ jobs: ${{ env.INSTALL_TEST_DIR }}/ov_hetero_func_tests --gtest_print_time=1 --gtest_output=xml:${{ env.INSTALL_TEST_DIR }}/TEST-OVHeteroFuncTests.xml --gtest_filter="*smoke*" - name: Upload Test Results - uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3 + uses: actions/upload-artifact@0b2256b8c012f0828dc542b3febcab082c67f72b # v4.3.4 if: ${{ !cancelled() }} with: name: test-results-cpp @@ -897,7 +897,7 @@ jobs: key: ${{ runner.os }}-tests-functional-cpu-stamp-${{ github.sha }} - name: Upload Test Results - uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3 + uses: actions/upload-artifact@0b2256b8c012f0828dc542b3febcab082c67f72b # v4.3.4 if: ${{ !cancelled() }} with: name: test-results-functional-cpu diff --git a/.github/workflows/windows_conditional_compilation.yml b/.github/workflows/windows_conditional_compilation.yml index 80e4d9a5338e71..963a6edb37a56a 100644 --- a/.github/workflows/windows_conditional_compilation.yml +++ b/.github/workflows/windows_conditional_compilation.yml @@ -247,7 +247,7 @@ jobs: - name: Upload selective build statistics package if: ${{ always() }} - uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3 + uses: actions/upload-artifact@0b2256b8c012f0828dc542b3febcab082c67f72b # v4.3.4 with: name: openvino_selective_build_stat path: ${{ env.BUILD_DIR }}/openvino_selective_build_stat.zip @@ -255,7 +255,7 @@ jobs: - name: Upload OpenVINO tests package if: ${{ always() }} - uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3 + uses: actions/upload-artifact@0b2256b8c012f0828dc542b3febcab082c67f72b # v4.3.4 with: name: openvino_tests path: ${{ env.BUILD_DIR }}/openvino_tests.zip @@ -397,7 +397,7 @@ jobs: timeout-minutes: 60 - name: Upload Test Results - uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3 + uses: actions/upload-artifact@0b2256b8c012f0828dc542b3febcab082c67f72b # v4.3.4 if: ${{ !cancelled() }} with: name: test-results-functional-cpu From 3b01c5e477c390e83b316fd03c94818614ed564e Mon Sep 17 00:00:00 2001 From: Mikhail Ryzhov Date: Mon, 29 Jul 2024 14:22:25 +0200 Subject: [PATCH 52/54] [PY] Limit setuptools version (#25769) ### Details: https://github.com/pypa/setuptools/issues/4519 ### Tickets: - *ticket-id* --- src/bindings/python/constraints.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/bindings/python/constraints.txt b/src/bindings/python/constraints.txt index 38e1770a5c2989..49ebd8d4f87716 100644 --- a/src/bindings/python/constraints.txt +++ b/src/bindings/python/constraints.txt @@ -10,7 +10,7 @@ pytest-timeout==2.2.0 # Python bindings py>=1.9.0 pygments>=2.8.1 -setuptools>=65.6.1 +setuptools>=65.6.1,<72 sympy>=1.10 wheel>=0.38.1 patchelf<=0.17.2.1 From adadfb9953ce234899a8a50963136c023d46eb6e Mon Sep 17 00:00:00 2001 From: Alina Kladieva Date: Mon, 29 Jul 2024 16:22:01 +0200 Subject: [PATCH 53/54] Add MPL-2.0 to the list of allowed licenses (#25775) ### Details: By request of Jacek Pawlak --- .github/dependency_review.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/dependency_review.yml b/.github/dependency_review.yml index 11639f4d2d4b22..5636a441501fc8 100644 --- a/.github/dependency_review.yml +++ b/.github/dependency_review.yml @@ -13,6 +13,7 @@ allow-licenses: - '0BSD' - 'Python-2.0' - 'LGPL-3.0' + - 'MPL-2.0' fail-on-scopes: - 'runtime' - 'development' From 843c3c3a6fc7352968e7013de40ddbd8247d271c Mon Sep 17 00:00:00 2001 From: Anastasiia Pnevskaia Date: Mon, 29 Jul 2024 16:44:01 +0200 Subject: [PATCH 54/54] Added OVC to github labels and updated code owners. (#25729) ### Details: - Added OVC to github labels and updated code owners. ### Tickets: - 144781 --- .github/CODEOWNERS | 6 +++--- .github/components.yml | 19 +++++++++++++++++++ .github/github_org_control/config.json | 1 + .github/labeler.yml | 4 +++- 4 files changed, 26 insertions(+), 4 deletions(-) diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 352049ffbd9211..b1882c395c2873 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -97,7 +97,7 @@ # QA Tests: /tests/ @openvinotoolkit/openvino-tests-maintainers -/tests/layer_tests/ @openvinotoolkit/openvino-tests-maintainers @openvinotoolkit/openvino-mo-maintainers +/tests/layer_tests/ @openvinotoolkit/openvino-tests-maintainers @openvinotoolkit/openvino-ovc-maintainers /tests/layer_tests/pytorch_tests/ @openvinotoolkit/openvino-pytorch-frontend-maintainers /tests/layer_tests/tensorflow_tests @openvinotoolkit/openvino-tf-frontend-maintainers /tests/layer_tests/jax_tests @openvinotoolkit/openvino-tf-frontend-maintainers @openvinotoolkit/openvino-jax-frontend-maintainers @@ -110,7 +110,7 @@ /tools/legacy/ @openvinotoolkit/openvino-samples-maintainers /tools/openvino_dev/ @openvinotoolkit/openvino-tools-maintainers @openvinotoolkit/openvino-ie-python-api-maintainers /tools/mo/ @openvinotoolkit/openvino-mo-maintainers -/tools/ovc/ @openvinotoolkit/openvino-mo-maintainers +/tools/ovc/ @openvinotoolkit/openvino-ovc-maintainers /thirdparty/open_model_zoo/ @openvinotoolkit/omz-maintainers # Documentation @@ -118,7 +118,7 @@ /docs/CMakeLists.txt @openvinotoolkit/openvino-ie-maintainers /**/*.md @openvinotoolkit/openvino-docs-maintainers /**/*.svg @openvinotoolkit/openvino-docs-maintainers -/docs/MO_DG/ @openvinotoolkit/openvino-docs-maintainers @openvinotoolkit/openvino-mo-maintainers +/docs/MO_DG/ @openvinotoolkit/openvino-docs-maintainers @openvinotoolkit/openvino-ovc-maintainers /docs/OV_Runtime_UG/ @openvinotoolkit/openvino-docs-maintainers @openvinotoolkit/openvino-ie-maintainers /docs/IE_PLUGIN_DG/ @openvinotoolkit/openvino-docs-maintainers @openvinotoolkit/openvino-ie-maintainers /docs/Extensibility_UG/ @openvinotoolkit/openvino-docs-maintainers @openvinotoolkit/openvino-ie-maintainers diff --git a/.github/components.yml b/.github/components.yml index 9c5d7c0089c9ca..8de51a2ced3343 100644 --- a/.github/components.yml +++ b/.github/components.yml @@ -111,6 +111,7 @@ IR_FE: ONNX_FE: revalidate: - MO + - OVC - ONNX_RT build: - CPU @@ -119,6 +120,7 @@ ONNX_FE: PDPD_FE: revalidate: - MO + - OVC build: - CPU - Python_API @@ -126,6 +128,7 @@ PDPD_FE: TF_FE: revalidate: - MO + - OVC build: - CPU - Python_API @@ -134,6 +137,7 @@ TF_FE: TFL_FE: revalidate: - MO + - OVC build: - CPU - Python_API @@ -141,6 +145,7 @@ TFL_FE: PyTorch_FE: revalidate: - MO + - OVC build: - CPU - Python_API @@ -148,6 +153,7 @@ PyTorch_FE: JAX_FE: revalidate: - MO + - OVC build: - CPU - Python_API @@ -165,6 +171,7 @@ Python_API: revalidate: - samples - MO + - OVC - tools - TF_FE build: @@ -207,6 +214,18 @@ IE_Tests: build: - IR_FE +OVC: + revalidate: + - PyTorch_FE + - TF_FE + - TFL_FE + - ONNX_FE + - PDPD_FE + - JAX_FE + build: + - Python_API + - TOKENIZERS # TF_FE tests depends on tokenizers build + MO: revalidate: - PyTorch_FE diff --git a/.github/github_org_control/config.json b/.github/github_org_control/config.json index 717403f27d13ea..7fc23b7888c170 100644 --- a/.github/github_org_control/config.json +++ b/.github/github_org_control/config.json @@ -37,6 +37,7 @@ "openvino-onnx-frontend-maintainers": "category: ONNX FE", "openvino-ie-tests-maintainers": "category: IE Tests", "openvino-mo-maintainers": "category: MO", + "openvino-ovc-maintainers": "category: OVC", "openvino-ngraph-maintainers": "category: Core", "openvino-scripts-maintainers": "category: build", "openvino-tests-maintainers": "category: IE Tests", diff --git a/.github/labeler.yml b/.github/labeler.yml index 64a8661cf1e2e8..49aeac7325aa4f 100644 --- a/.github/labeler.yml +++ b/.github/labeler.yml @@ -97,8 +97,10 @@ 'category: MO': - 'tools/mo/**/*' -- 'tools/ovc/**/*' - 'tests/layer_tests/mo_python_api_tests/**/*' + +'category: OVC': +- 'tools/ovc/**/*' - 'tests/layer_tests/ovc_python_api_tests/**/*' 'category: ONNX FE':