From 36b62da080ca3334c0270c607c2b62e7850621fd Mon Sep 17 00:00:00 2001 From: Tomasz Jankowski Date: Mon, 8 Jul 2024 07:05:19 +0200 Subject: [PATCH 01/19] [Inference] Resolve static analysis issues (#25315) ### Details: - Move instead of copy - Avoid throw from destructor ### Tickets: - CVS-145054 --- .../src/dev/threading/cpu_streams_executor.cpp | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/src/inference/src/dev/threading/cpu_streams_executor.cpp b/src/inference/src/dev/threading/cpu_streams_executor.cpp index 2c9905bee9d0da..6cfbeeed305a4a 100644 --- a/src/inference/src/dev/threading/cpu_streams_executor.cpp +++ b/src/inference/src/dev/threading/cpu_streams_executor.cpp @@ -114,7 +114,12 @@ struct CPUStreamsExecutor::Impl { } #if OV_THREAD == OV_THREAD_TBB || OV_THREAD == OV_THREAD_TBB_AUTO if (_impl->_config.get_name().find("StreamsExecutor") == std::string::npos) { - set_cpu_used(_cpu_ids, NOT_USED); + try { + set_cpu_used(_cpu_ids, NOT_USED); + } catch (const ov::Exception&) { + // Destructor should not throw - catch needed for static analysis. + // CPU::CPU() won't throw here as cpu_info() is called from Stream constructor. + } } if (nullptr != _observer) { _observer->observe(false); @@ -294,8 +299,7 @@ struct CPUStreamsExecutor::Impl { // if not, then create ThreadTracker for it auto iter = t_stream_count_map.find((void*)this); if (iter == t_stream_count_map.end()) { - auto new_tracker_ptr = item.first->fetch(); - t_stream_count_map[(void*)this] = new_tracker_ptr; + t_stream_count_map[(void*)this] = item.first->fetch(); } return item.second; } @@ -350,7 +354,7 @@ struct CPUStreamsExecutor::Impl { std::min(streams_num, numaNodes.size()), std::back_inserter(_usedNumaNodes)); } else { - _usedNumaNodes = numaNodes; + _usedNumaNodes = std::move(numaNodes); } if (sub_streams_num > 0) { _subTaskThread.assign(sub_streams_num, std::make_shared()); From 92744e3a35ed8eb0302cecfffea1716be2b4cdcd Mon Sep 17 00:00:00 2001 From: Sergey Shlyapnikov Date: Mon, 8 Jul 2024 10:12:02 +0400 Subject: [PATCH 02/19] [GPU] Reduce the number of set_arguments() and update_impl() calls (#25401) ### Details: - Previously, any node that had an `optimized_out` node in its dependencies, which in turn had an input `dynamic` node in their dependencies (something like: `dynamic_node -> optimized_out_node -> current_node`), could be marked as mutable due to the `is_mutable_input()` check in the `network::allocate_primitive_instance()` call. This forced `set_arguments()` call at every iteration (because of `has_mutable_input()`), despite the fact that the actual buffers remained the same. This change narrows the `is_mutable_input()` check to only actual mutable nodes, moving the optimized_out node dynamic dependencies check to runtime, thus allowing us to avoid unnecessary `set_arguments()` calls - Additionally, this change allows us to determine a little earlier whether `update_impl()` is really needed, reducing some code execution inside the `update_impl()` function in cases where `use_async_compilation()` returns false for the current primitive --- .../src/graph/include/primitive_inst.h | 2 +- src/plugins/intel_gpu/src/graph/network.cpp | 2 +- .../intel_gpu/src/graph/primitive_inst.cpp | 35 ++++++++++++------- 3 files changed, 24 insertions(+), 15 deletions(-) diff --git a/src/plugins/intel_gpu/src/graph/include/primitive_inst.h b/src/plugins/intel_gpu/src/graph/include/primitive_inst.h index 124cdf62bc7811..d47f45e3bfc1f4 100644 --- a/src/plugins/intel_gpu/src/graph/include/primitive_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/primitive_inst.h @@ -398,7 +398,7 @@ class primitive_inst { void fill_shape_info_data(const layout& runtime_layout, const layout& node_layout, int32_t* shape_info_ptr, size_t& offset); bool use_async_compilation(); // if primitive_inst doesn't replace impl to new impl(static impl with opt kerenl or dynamic impl), return false - bool update_impl(); + bool update_impl(bool use_async_compilation); event::ptr realloc_if_needed(); cldnn::network::ptr get_unfused_subgraph(); diff --git a/src/plugins/intel_gpu/src/graph/network.cpp b/src/plugins/intel_gpu/src/graph/network.cpp index 83cba0887de8c3..7b207fd62b7665 100644 --- a/src/plugins/intel_gpu/src/graph/network.cpp +++ b/src/plugins/intel_gpu/src/graph/network.cpp @@ -1470,7 +1470,7 @@ void network::allocate_primitive_instance(program_node const& node) { return true; } if (dep_node->can_be_optimized()) { - if (is_mutable_input(*dep_node) || dep_node->is_dynamic()) { + if (is_mutable_input(*dep_node)) { return true; } } diff --git a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp index d63efb0fa77688..e26b4a536e91df 100644 --- a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp +++ b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp @@ -714,7 +714,7 @@ event::ptr primitive_inst::realloc_if_needed() { GPU_DEBUG_TRACE_DETAIL << id() << ": Update impl with new output padding" << std::endl; set_shape_change(); _impl_params->output_layouts[0] = present_layout; - update_impl(); + update_impl(use_async_compilation()); } GPU_DEBUG_TRACE_DETAIL << id() << ": Update variable " << variable.get_name() << "'s memory with allocated kv cache output: " @@ -858,7 +858,7 @@ void primitive_inst::update_shape_info_tensor(const kernel_impl_params& params) } } -bool primitive_inst::update_impl() { +bool primitive_inst::update_impl(bool use_async_compilation) { OV_ITT_SCOPED_TASK(ov::intel_gpu::itt::domains::intel_gpu_plugin, openvino::itt::handle("update_impl: " + id())); GPU_DEBUG_PROFILED_STAGE(instrumentation::pipeline_stage::update_implementation); auto prev_impl_str = _impl != nullptr ? _impl->get_kernel_name() : "nullptr"; @@ -906,10 +906,9 @@ bool primitive_inst::update_impl() { const auto is_current_impl_dynamic = _impl && _impl->is_dynamic(); const auto& prog = get_network().get_program(); auto& cache = prog->get_implementations_cache(); - const bool async_compilation = use_async_compilation(); std::shared_ptr cached_impl = nullptr; { - if (async_compilation) + if (use_async_compilation) cached_impl = cache.get(updated_params); if (cached_impl) { @@ -926,7 +925,7 @@ bool primitive_inst::update_impl() { } if (!cached_impl) { if (_dynamic_impl || is_current_impl_dynamic) { - if (async_compilation) { + if (use_async_compilation) { auto& compilation_context = prog->get_compilation_context(); compilation_context.push_task(updated_params, [this, &compilation_context, updated_params]() { if (compilation_context.is_stopped()) @@ -1511,8 +1510,9 @@ event::ptr primitive_inst::execute(const std::vector& events) { // Try update impl if current impl is dynamic because opt kernel may be added to impl cache through async compilation. // Only try update weight and realloc when impl is updated. - if (shape_changed() || !_impl || (!shape_changed() && _impl->is_dynamic())) { - if (update_impl()) { + const bool can_use_async_compilation = use_async_compilation(); + if (shape_changed() || !_impl || (!shape_changed() && _impl->is_dynamic() && can_use_async_compilation)) { + if (update_impl(can_use_async_compilation)) { need_args_update = true; auto ev = update_weights(); if (ev) @@ -1529,14 +1529,23 @@ event::ptr primitive_inst::execute(const std::vector& events) { update_shape_done_by_other = false; // reset OPENVINO_ASSERT(_impl != nullptr, "[GPU] Implementation is nullptr for ", primitive_id, " primitive"); - // Dynamic insts may reallocate its' output buffer, so we need to update kernel's args respectively - bool has_dynamic_dependencies_insts = std::any_of(_deps.begin(), _deps.end(), - [](const std::pair& dep) { - return dep.first->mem_changed(); - }); + std::function has_dynamic_dependencies_insts = + [&has_dynamic_dependencies_insts](const cldnn::primitive_inst* prim_inst) { + for (auto& dep : prim_inst->_deps) { + const cldnn::primitive_inst* dep_inst = dep.first; + if (dep_inst->mem_changed()) { + return true; + } else if (dep_inst->can_be_optimized()) { + if (has_dynamic_dependencies_insts(dep_inst)) { + return true; + } + } + } + return false; + }; // Output buffer may be changed under the following conditions, so we need to set args to kernel on each iteration - if ((is_dynamic() && need_args_update) || has_mutable_input() || is_output() || has_dynamic_dependencies_insts) { + if ((is_dynamic() && need_args_update) || has_mutable_input() || is_output() || has_dynamic_dependencies_insts(this)) { set_arguments(); } on_execute(); From 53cf2e662c1b9c55c0722cd4f3430deddca8f843 Mon Sep 17 00:00:00 2001 From: hyunback kim Date: Mon, 8 Jul 2024 15:12:46 +0900 Subject: [PATCH 03/19] [GPU] Stable Diffusion failure using sdpa_kernel_opt. (#25419) Fix idx error. ### Tickets: - *ticket-id* Signed-off-by: hyunback --- .../src/kernel_selector/kernels/sdpa/sdpa_kernel_opt.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/sdpa/sdpa_kernel_opt.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/sdpa/sdpa_kernel_opt.cpp index 8eb9dea65ba23d..750695fc80c9cb 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/sdpa/sdpa_kernel_opt.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/sdpa/sdpa_kernel_opt.cpp @@ -199,6 +199,8 @@ KernelsData SDPAKernelOpt::GetKernelsData(const Params& params) const { kd.needs_sub_kernels_sync = true; GetUpdateDispatchDataFunc(kd); + + size_t kd_kernels_idx = 0; for (const auto& kernel_idx : kernels_type) { auto dispatch_data = SetDefault(prim_params, kernel_idx); auto kernel_name = GetKernelName(kernelName, static_cast(kernel_idx), prim_params.indirect_axis != -1); @@ -206,7 +208,7 @@ KernelsData SDPAKernelOpt::GetKernelsData(const Params& params) const { auto jit_constants = GetJitConstants(prim_params, kernel_idx); auto jit = CreateJit(kernel_name, jit_constants, entry_point); - auto& kernel = kd.kernels[kernel_idx]; + auto& kernel = kd.kernels[kd_kernels_idx++]; auto inputs_num = kernel_idx == KernelsTypes::FINALIZATION ? 0 : static_cast(prim_params.inputs.size()); From 21070948c952fa75bec136e9b0c0ee1dc71ea584 Mon Sep 17 00:00:00 2001 From: Pawel Raasz Date: Mon, 8 Jul 2024 10:01:27 +0200 Subject: [PATCH 04/19] [core] Improve speed of Constant::cast_vector for f16, bf16 -> f32 (#25248) ### Details: - Constant use Convert implementation from core reference instead standard approach. - Remove value range validation when destination type can hold all values from constant e.g i8 -> i16, f16 -> f32. ### Tickets: - CVS-142211 --- src/core/include/openvino/op/constant.hpp | 263 ++++++------------- src/core/src/op/constant.cpp | 299 ++++++++++++---------- 2 files changed, 237 insertions(+), 325 deletions(-) diff --git a/src/core/include/openvino/op/constant.hpp b/src/core/include/openvino/op/constant.hpp index 6f7500e69a3a95..ea06efb911e293 100644 --- a/src/core/include/openvino/op/constant.hpp +++ b/src/core/include/openvino/op/constant.hpp @@ -296,10 +296,7 @@ class OPENVINO_API Constant : public Op { std::vector rc; using Type_t = element::Type_t; - const auto num_elements_in_constant = shape_size(m_shape); - const auto num_elements_to_cast = - (num_elements < 0 ? num_elements_in_constant - : std::min(static_cast(num_elements), num_elements_in_constant)); + const auto num_elements_to_cast = get_num_elements_to_cast(num_elements); rc.reserve(num_elements_to_cast); switch (m_element_type) { @@ -415,17 +412,8 @@ class OPENVINO_API Constant : public Op { private: Constant(bool memset_allocation, const element::Type& type, const Shape& shape); - template < - element::Type_t Type, - class OUT_T, - typename std::enable_if::value>::type* = nullptr> - void cast_vector(std::vector& output_vector, size_t num_elements) const { - // this function is workaround for waring during windows building - // build complains for vector creation based on iterators - // which point on different type than destination vector::value_type - using IN_T = fundamental_type_for; - auto first = get_data_ptr(); - std::transform(first, first + num_elements, std::back_inserter(output_vector), [](IN_T c) { + size_t get_num_elements_to_cast(const int64_t n) const; + #ifdef __clang__ # pragma clang diagnostic push # ifdef __has_warning @@ -444,22 +432,21 @@ class OPENVINO_API Constant : public Op { # pragma warning(disable : 4018) # pragma warning(disable : 4804) #endif - if (!std::is_same::value) { - OPENVINO_ASSERT(!std::numeric_limits::is_signed || std::numeric_limits::lowest() <= c, - "Cannot cast vector from ", - Type, - " constant to ", - element::from(), - ". Some values are outside the range. Example: ", - c); - OPENVINO_ASSERT(std::numeric_limits::max() >= c, - "Cannot cast vector from ", - Type, - " constant to ", - element::from(), - ". Some values are outside the range. Example: ", - c); - } + template ::value && + !std::is_same::value>::type* = nullptr> + static bool in_type_range(const ConstantT v) { + return std::numeric_limits::lowest() <= v && v <= std::numeric_limits::max(); + } + + template ::value && !std::is_same::value>::type* = + nullptr> + static bool in_type_range(const ConstantT v) { + return v <= std::numeric_limits::max(); + } #if defined(__clang__) # pragma clang diagnostic pop #elif defined(__GNUC__) @@ -467,6 +454,27 @@ class OPENVINO_API Constant : public Op { #elif defined(_MSC_VER) # pragma warning(pop) #endif + + template ::value>::type* = nullptr> + static constexpr bool in_type_range(const ConstantT) { + return true; + } + + template < + element::Type_t Type, + class OUT_T, + typename std::enable_if::value>::type* = nullptr> + void cast_vector(std::vector& output_vector, size_t num_elements) const { + using InputT = ov::fundamental_type_for; + auto first = get_data_ptr(); + std::transform(first, first + num_elements, std::back_inserter(output_vector), [](const InputT c) { + OPENVINO_ASSERT(in_type_range(c), + "Cannot cast vector from ", + element::from(), + " constant to ", + element::from(), + ". Some values are outside the range. Example: ", + c); return static_cast(c); }); } @@ -514,39 +522,8 @@ class OPENVINO_API Constant : public Op { typename std::enable_if::value>::type* = nullptr> void fill_data(const T& value) { using StorageDataType = ov::fundamental_type_for; -#ifdef __clang__ -# pragma clang diagnostic push -# ifdef __has_warning -# if __has_warning("-Wimplicit-const-int-float-conversion") -# pragma clang diagnostic ignored "-Wimplicit-const-int-float-conversion" -# elif __has_warning("-Wimplicit-int-float-conversion") -# pragma clang diagnostic ignored "-Wimplicit-int-float-conversion" -# endif -# endif -#elif defined(__GNUC__) -# pragma GCC diagnostic push -# pragma GCC diagnostic ignored "-Wsign-compare" -# pragma GCC diagnostic ignored "-Wbool-compare" -#elif defined(_MSC_VER) -# pragma warning(push) -# pragma warning(disable : 4018) -# pragma warning(disable : 4804) -#endif - if (!std::is_same::value) { - OPENVINO_ASSERT( - !std::numeric_limits::is_signed || std::numeric_limits::lowest() <= value, - "Cannot fill constant data. Values is outside the range."); - OPENVINO_ASSERT(std::numeric_limits::max() >= value, - "Cannot fill constant data. Values is outside the range."); - } -#if defined(__clang__) -# pragma clang diagnostic pop -#elif defined(__GNUC__) -# pragma GCC diagnostic pop -#elif defined(_MSC_VER) -# pragma warning(pop) -#endif - + OPENVINO_ASSERT(in_type_range(value), + "Cannot fill constant data. Values is outside the range."); const auto size = shape_size(m_shape); const auto v = static_cast(value); std::fill_n(get_data_ptr_nc(), size, v); @@ -1020,131 +997,6 @@ CONSTANT_FILL_DATA_SPECIALIZATION(f4e2m1, double) #undef CONSTANT_FILL_DATA_SPECIALIZATION -#define CONSTANT_CAST_VECTOR_SPECIALIZATION(ET, DST_TYPE) \ - template <> \ - OPENVINO_API void Constant::cast_lp_vector(std::vector & output_vector, \ - size_t num_elements) const; - -CONSTANT_CAST_VECTOR_SPECIALIZATION(u1, bool) -CONSTANT_CAST_VECTOR_SPECIALIZATION(u1, char) -CONSTANT_CAST_VECTOR_SPECIALIZATION(u1, signed char) -CONSTANT_CAST_VECTOR_SPECIALIZATION(u1, unsigned char) -CONSTANT_CAST_VECTOR_SPECIALIZATION(u1, short) -CONSTANT_CAST_VECTOR_SPECIALIZATION(u1, unsigned short) -CONSTANT_CAST_VECTOR_SPECIALIZATION(u1, int) -CONSTANT_CAST_VECTOR_SPECIALIZATION(u1, unsigned int) -CONSTANT_CAST_VECTOR_SPECIALIZATION(u1, long) -CONSTANT_CAST_VECTOR_SPECIALIZATION(u1, unsigned long) -CONSTANT_CAST_VECTOR_SPECIALIZATION(u1, long long) -CONSTANT_CAST_VECTOR_SPECIALIZATION(u1, unsigned long long) -CONSTANT_CAST_VECTOR_SPECIALIZATION(u1, float16) -CONSTANT_CAST_VECTOR_SPECIALIZATION(u1, bfloat16) -CONSTANT_CAST_VECTOR_SPECIALIZATION(u1, float) -CONSTANT_CAST_VECTOR_SPECIALIZATION(u1, double) - -CONSTANT_CAST_VECTOR_SPECIALIZATION(u2, bool) -CONSTANT_CAST_VECTOR_SPECIALIZATION(u2, char) -CONSTANT_CAST_VECTOR_SPECIALIZATION(u2, signed char) -CONSTANT_CAST_VECTOR_SPECIALIZATION(u2, unsigned char) -CONSTANT_CAST_VECTOR_SPECIALIZATION(u2, short) -CONSTANT_CAST_VECTOR_SPECIALIZATION(u2, unsigned short) -CONSTANT_CAST_VECTOR_SPECIALIZATION(u2, int) -CONSTANT_CAST_VECTOR_SPECIALIZATION(u2, unsigned int) -CONSTANT_CAST_VECTOR_SPECIALIZATION(u2, long) -CONSTANT_CAST_VECTOR_SPECIALIZATION(u2, unsigned long) -CONSTANT_CAST_VECTOR_SPECIALIZATION(u2, long long) -CONSTANT_CAST_VECTOR_SPECIALIZATION(u2, unsigned long long) -CONSTANT_CAST_VECTOR_SPECIALIZATION(u2, float16) -CONSTANT_CAST_VECTOR_SPECIALIZATION(u2, bfloat16) -CONSTANT_CAST_VECTOR_SPECIALIZATION(u2, float) -CONSTANT_CAST_VECTOR_SPECIALIZATION(u2, double) - -CONSTANT_CAST_VECTOR_SPECIALIZATION(u3, bool) -CONSTANT_CAST_VECTOR_SPECIALIZATION(u3, char) -CONSTANT_CAST_VECTOR_SPECIALIZATION(u3, signed char) -CONSTANT_CAST_VECTOR_SPECIALIZATION(u3, unsigned char) -CONSTANT_CAST_VECTOR_SPECIALIZATION(u3, short) -CONSTANT_CAST_VECTOR_SPECIALIZATION(u3, unsigned short) -CONSTANT_CAST_VECTOR_SPECIALIZATION(u3, int) -CONSTANT_CAST_VECTOR_SPECIALIZATION(u3, unsigned int) -CONSTANT_CAST_VECTOR_SPECIALIZATION(u3, long) -CONSTANT_CAST_VECTOR_SPECIALIZATION(u3, unsigned long) -CONSTANT_CAST_VECTOR_SPECIALIZATION(u3, long long) -CONSTANT_CAST_VECTOR_SPECIALIZATION(u3, unsigned long long) -CONSTANT_CAST_VECTOR_SPECIALIZATION(u3, float16) -CONSTANT_CAST_VECTOR_SPECIALIZATION(u3, bfloat16) -CONSTANT_CAST_VECTOR_SPECIALIZATION(u3, float) -CONSTANT_CAST_VECTOR_SPECIALIZATION(u3, double) - -CONSTANT_CAST_VECTOR_SPECIALIZATION(u4, bool) -CONSTANT_CAST_VECTOR_SPECIALIZATION(u4, char) -CONSTANT_CAST_VECTOR_SPECIALIZATION(u4, signed char) -CONSTANT_CAST_VECTOR_SPECIALIZATION(u4, unsigned char) -CONSTANT_CAST_VECTOR_SPECIALIZATION(u4, short) -CONSTANT_CAST_VECTOR_SPECIALIZATION(u4, unsigned short) -CONSTANT_CAST_VECTOR_SPECIALIZATION(u4, int) -CONSTANT_CAST_VECTOR_SPECIALIZATION(u4, unsigned int) -CONSTANT_CAST_VECTOR_SPECIALIZATION(u4, long) -CONSTANT_CAST_VECTOR_SPECIALIZATION(u4, unsigned long) -CONSTANT_CAST_VECTOR_SPECIALIZATION(u4, long long) -CONSTANT_CAST_VECTOR_SPECIALIZATION(u4, unsigned long long) -CONSTANT_CAST_VECTOR_SPECIALIZATION(u4, float16) -CONSTANT_CAST_VECTOR_SPECIALIZATION(u4, bfloat16) -CONSTANT_CAST_VECTOR_SPECIALIZATION(u4, float) -CONSTANT_CAST_VECTOR_SPECIALIZATION(u4, double) - -CONSTANT_CAST_VECTOR_SPECIALIZATION(u6, bool) -CONSTANT_CAST_VECTOR_SPECIALIZATION(u6, char) -CONSTANT_CAST_VECTOR_SPECIALIZATION(u6, signed char) -CONSTANT_CAST_VECTOR_SPECIALIZATION(u6, unsigned char) -CONSTANT_CAST_VECTOR_SPECIALIZATION(u6, short) -CONSTANT_CAST_VECTOR_SPECIALIZATION(u6, unsigned short) -CONSTANT_CAST_VECTOR_SPECIALIZATION(u6, int) -CONSTANT_CAST_VECTOR_SPECIALIZATION(u6, unsigned int) -CONSTANT_CAST_VECTOR_SPECIALIZATION(u6, long) -CONSTANT_CAST_VECTOR_SPECIALIZATION(u6, unsigned long) -CONSTANT_CAST_VECTOR_SPECIALIZATION(u6, long long) -CONSTANT_CAST_VECTOR_SPECIALIZATION(u6, unsigned long long) -CONSTANT_CAST_VECTOR_SPECIALIZATION(u6, float16) -CONSTANT_CAST_VECTOR_SPECIALIZATION(u6, bfloat16) -CONSTANT_CAST_VECTOR_SPECIALIZATION(u6, float) -CONSTANT_CAST_VECTOR_SPECIALIZATION(u6, double) - -CONSTANT_CAST_VECTOR_SPECIALIZATION(i4, bool) -CONSTANT_CAST_VECTOR_SPECIALIZATION(i4, char) -CONSTANT_CAST_VECTOR_SPECIALIZATION(i4, signed char) -CONSTANT_CAST_VECTOR_SPECIALIZATION(i4, unsigned char) -CONSTANT_CAST_VECTOR_SPECIALIZATION(i4, short) -CONSTANT_CAST_VECTOR_SPECIALIZATION(i4, unsigned short) -CONSTANT_CAST_VECTOR_SPECIALIZATION(i4, int) -CONSTANT_CAST_VECTOR_SPECIALIZATION(i4, unsigned int) -CONSTANT_CAST_VECTOR_SPECIALIZATION(i4, long) -CONSTANT_CAST_VECTOR_SPECIALIZATION(i4, unsigned long) -CONSTANT_CAST_VECTOR_SPECIALIZATION(i4, long long) -CONSTANT_CAST_VECTOR_SPECIALIZATION(i4, unsigned long long) -CONSTANT_CAST_VECTOR_SPECIALIZATION(i4, float16) -CONSTANT_CAST_VECTOR_SPECIALIZATION(i4, bfloat16) -CONSTANT_CAST_VECTOR_SPECIALIZATION(i4, float) -CONSTANT_CAST_VECTOR_SPECIALIZATION(i4, double) - -CONSTANT_CAST_VECTOR_SPECIALIZATION(f4e2m1, bool) -CONSTANT_CAST_VECTOR_SPECIALIZATION(f4e2m1, char) -CONSTANT_CAST_VECTOR_SPECIALIZATION(f4e2m1, signed char) -CONSTANT_CAST_VECTOR_SPECIALIZATION(f4e2m1, unsigned char) -CONSTANT_CAST_VECTOR_SPECIALIZATION(f4e2m1, short) -CONSTANT_CAST_VECTOR_SPECIALIZATION(f4e2m1, unsigned short) -CONSTANT_CAST_VECTOR_SPECIALIZATION(f4e2m1, int) -CONSTANT_CAST_VECTOR_SPECIALIZATION(f4e2m1, unsigned int) -CONSTANT_CAST_VECTOR_SPECIALIZATION(f4e2m1, long) -CONSTANT_CAST_VECTOR_SPECIALIZATION(f4e2m1, unsigned long) -CONSTANT_CAST_VECTOR_SPECIALIZATION(f4e2m1, long long) -CONSTANT_CAST_VECTOR_SPECIALIZATION(f4e2m1, unsigned long long) -CONSTANT_CAST_VECTOR_SPECIALIZATION(f4e2m1, float16) -CONSTANT_CAST_VECTOR_SPECIALIZATION(f4e2m1, bfloat16) -CONSTANT_CAST_VECTOR_SPECIALIZATION(f4e2m1, float) -CONSTANT_CAST_VECTOR_SPECIALIZATION(f4e2m1, double) -#undef CONSTANT_CAST_VECTOR_SPECIALIZATION - #define CONSTANT_WRITE_BUFFER_SPECIALIZATION(ET, SRC_TYPE) \ template <> \ OPENVINO_API void Constant::write_lp_buffer(const std::vector& source); @@ -1311,6 +1163,39 @@ CONSTANT_WRITE_BUFFER_SPECIALIZATION(f4e2m1, double) #undef CONSTANT_WRITE_BUFFER_SPECIALIZATION +template <> +OPENVINO_API std::vector Constant::cast_vector(int64_t num_elements) const; +template <> +OPENVINO_API std::vector Constant::cast_vector(int64_t num_elements) const; +template <> +OPENVINO_API std::vector Constant::cast_vector(int64_t num_elements) const; +template <> +OPENVINO_API std::vector Constant::cast_vector(int64_t num_elements) const; +template <> +OPENVINO_API std::vector Constant::cast_vector(int64_t num_elements) const; +template <> +OPENVINO_API std::vector Constant::cast_vector(int64_t num_elements) const; +template <> +OPENVINO_API std::vector Constant::cast_vector(int64_t num_elements) const; +template <> +OPENVINO_API std::vector Constant::cast_vector(int64_t num_elements) const; +template <> +OPENVINO_API std::vector Constant::cast_vector(int64_t num_elements) const; +template <> +OPENVINO_API std::vector Constant::cast_vector(int64_t num_elements) const; +template <> +OPENVINO_API std::vector Constant::cast_vector(int64_t num_elements) const; +template <> +OPENVINO_API std::vector Constant::cast_vector(int64_t num_elements) const; +template <> +OPENVINO_API std::vector Constant::cast_vector(int64_t num_elements) const; +template <> +OPENVINO_API std::vector Constant::cast_vector(int64_t num_elements) const; +template <> +OPENVINO_API std::vector Constant::cast_vector(int64_t num_elements) const; +template <> +OPENVINO_API std::vector Constant::cast_vector(int64_t num_elements) const; + } // namespace v0 } // namespace op } // namespace ov diff --git a/src/core/src/op/constant.cpp b/src/core/src/op/constant.cpp index e84cad9341b9c9..a507e272f6f5e8 100644 --- a/src/core/src/op/constant.cpp +++ b/src/core/src/op/constant.cpp @@ -25,6 +25,10 @@ namespace ov { namespace op { +#define SUPPORTED_ET \ + boolean, bf16, f16, f32, f64, i4, i8, i16, i32, i64, u1, u2, u3, u4, u6, u8, u16, u32, u64, nf4, f8e4m3, f8e5m2, \ + f4e2m1, f8e8m0 + template TContainer convert_values_to(std::vector&& values, const Shape& shape) { auto out = TContainer(shape_size(shape)); @@ -69,6 +73,47 @@ std::vector from_string_vector(const std::vector& str_values) { return values; } +#ifdef __clang__ +# pragma clang diagnostic push +# ifdef __has_warning +# if __has_warning("-Wimplicit-const-int-float-conversion") +# pragma clang diagnostic ignored "-Wimplicit-const-int-float-conversion" +# elif __has_warning("-Wimplicit-int-float-conversion") +# pragma clang diagnostic ignored "-Wimplicit-int-float-conversion" +# endif +# endif +#elif defined(__GNUC__) +# pragma GCC diagnostic push +# pragma GCC diagnostic ignored "-Wsign-compare" +# pragma GCC diagnostic ignored "-Wbool-compare" +#elif defined(_MSC_VER) +# pragma warning(push) +# pragma warning(disable : 4018) +# pragma warning(disable : 4804) +#endif +template < + class U, + class ConstantT, + typename std::enable_if::value && !std::is_same::value>::type* = nullptr> +static bool in_type_range(const ConstantT v) { + return std::numeric_limits::lowest() <= v && v <= std::numeric_limits::max(); +} + +template < + class U, + class ConstantT, + typename std::enable_if::value && !std::is_same::value>::type* = nullptr> +static bool in_type_range(const ConstantT v) { + return v <= std::numeric_limits::max(); +} +#if defined(__clang__) +# pragma clang diagnostic pop +#elif defined(__GNUC__) +# pragma GCC diagnostic pop +#elif defined(_MSC_VER) +# pragma warning(pop) +#endif + template ::type* = nullptr> fundamental_type_for convert_if_in_element_range(const U& value) { using T = fundamental_type_for; @@ -136,14 +181,6 @@ void fill_buffer(void* buffer, const Shape& shape, const T& value) { std::fill_n(element::iterator(buffer), shape_size(shape), convert_if_in_element_range(value)); } -template -void cast_buffer(const void* buffer, size_t num_elements, std::vector& output) { - const auto first = element::iterator(buffer); - using StorageType = fundamental_type_for; - - std::transform(first, first + num_elements, std::back_inserter(output), reference::detail::convert); -} - template void write_buffer(const std::vector& source, void* buffer) { std::transform(source.begin(), source.end(), element::iterator(buffer), convert_if_in_element_range); @@ -599,6 +636,11 @@ const Strides& Constant::get_strides() const { return m_byte_strides; } +size_t Constant::get_num_elements_to_cast(const int64_t n) const { + auto num_elements_in_shape = shape_size(m_shape); + return (n < 0 ? num_elements_in_shape : std::min(static_cast(n), num_elements_in_shape)); +} + template <> Constant::LPBuffer::LPBuffer(void* ptr) : iter{std::make_shared(reinterpret_cast*>(ptr))} {} @@ -927,134 +969,6 @@ CONSTANT_FILL_DATA(f4e2m1, double) #undef CONSTANT_FILL_DATA -#define CONSTANT_CAST_VECTOR(ET, DST_TYPE) \ - template <> \ - void Constant::cast_lp_vector(std::vector & output_vector, \ - size_t num_elements) const { \ - ov::op::cast_buffer(get_data_ptr(), num_elements, output_vector); \ - } - -CONSTANT_CAST_VECTOR(u1, bool) -CONSTANT_CAST_VECTOR(u1, char) -CONSTANT_CAST_VECTOR(u1, signed char) -CONSTANT_CAST_VECTOR(u1, unsigned char) -CONSTANT_CAST_VECTOR(u1, short) -CONSTANT_CAST_VECTOR(u1, unsigned short) -CONSTANT_CAST_VECTOR(u1, int) -CONSTANT_CAST_VECTOR(u1, unsigned int) -CONSTANT_CAST_VECTOR(u1, long) -CONSTANT_CAST_VECTOR(u1, unsigned long) -CONSTANT_CAST_VECTOR(u1, long long) -CONSTANT_CAST_VECTOR(u1, unsigned long long) -CONSTANT_CAST_VECTOR(u1, float16) -CONSTANT_CAST_VECTOR(u1, bfloat16) -CONSTANT_CAST_VECTOR(u1, float) -CONSTANT_CAST_VECTOR(u1, double) - -CONSTANT_CAST_VECTOR(u2, bool) -CONSTANT_CAST_VECTOR(u2, char) -CONSTANT_CAST_VECTOR(u2, signed char) -CONSTANT_CAST_VECTOR(u2, unsigned char) -CONSTANT_CAST_VECTOR(u2, short) -CONSTANT_CAST_VECTOR(u2, unsigned short) -CONSTANT_CAST_VECTOR(u2, int) -CONSTANT_CAST_VECTOR(u2, unsigned int) -CONSTANT_CAST_VECTOR(u2, long) -CONSTANT_CAST_VECTOR(u2, unsigned long) -CONSTANT_CAST_VECTOR(u2, long long) -CONSTANT_CAST_VECTOR(u2, unsigned long long) -CONSTANT_CAST_VECTOR(u2, float16) -CONSTANT_CAST_VECTOR(u2, bfloat16) -CONSTANT_CAST_VECTOR(u2, float) -CONSTANT_CAST_VECTOR(u2, double) - -CONSTANT_CAST_VECTOR(u3, bool) -CONSTANT_CAST_VECTOR(u3, char) -CONSTANT_CAST_VECTOR(u3, signed char) -CONSTANT_CAST_VECTOR(u3, unsigned char) -CONSTANT_CAST_VECTOR(u3, short) -CONSTANT_CAST_VECTOR(u3, unsigned short) -CONSTANT_CAST_VECTOR(u3, int) -CONSTANT_CAST_VECTOR(u3, unsigned int) -CONSTANT_CAST_VECTOR(u3, long) -CONSTANT_CAST_VECTOR(u3, unsigned long) -CONSTANT_CAST_VECTOR(u3, long long) -CONSTANT_CAST_VECTOR(u3, unsigned long long) -CONSTANT_CAST_VECTOR(u3, float16) -CONSTANT_CAST_VECTOR(u3, bfloat16) -CONSTANT_CAST_VECTOR(u3, float) -CONSTANT_CAST_VECTOR(u3, double) - -CONSTANT_CAST_VECTOR(u4, bool) -CONSTANT_CAST_VECTOR(u4, char) -CONSTANT_CAST_VECTOR(u4, signed char) -CONSTANT_CAST_VECTOR(u4, unsigned char) -CONSTANT_CAST_VECTOR(u4, short) -CONSTANT_CAST_VECTOR(u4, unsigned short) -CONSTANT_CAST_VECTOR(u4, int) -CONSTANT_CAST_VECTOR(u4, unsigned int) -CONSTANT_CAST_VECTOR(u4, long) -CONSTANT_CAST_VECTOR(u4, unsigned long) -CONSTANT_CAST_VECTOR(u4, long long) -CONSTANT_CAST_VECTOR(u4, unsigned long long) -CONSTANT_CAST_VECTOR(u4, float16) -CONSTANT_CAST_VECTOR(u4, bfloat16) -CONSTANT_CAST_VECTOR(u4, float) -CONSTANT_CAST_VECTOR(u4, double) - -CONSTANT_CAST_VECTOR(u6, bool) -CONSTANT_CAST_VECTOR(u6, char) -CONSTANT_CAST_VECTOR(u6, signed char) -CONSTANT_CAST_VECTOR(u6, unsigned char) -CONSTANT_CAST_VECTOR(u6, short) -CONSTANT_CAST_VECTOR(u6, unsigned short) -CONSTANT_CAST_VECTOR(u6, int) -CONSTANT_CAST_VECTOR(u6, unsigned int) -CONSTANT_CAST_VECTOR(u6, long) -CONSTANT_CAST_VECTOR(u6, unsigned long) -CONSTANT_CAST_VECTOR(u6, long long) -CONSTANT_CAST_VECTOR(u6, unsigned long long) -CONSTANT_CAST_VECTOR(u6, float16) -CONSTANT_CAST_VECTOR(u6, bfloat16) -CONSTANT_CAST_VECTOR(u6, float) -CONSTANT_CAST_VECTOR(u6, double) - -CONSTANT_CAST_VECTOR(i4, bool) -CONSTANT_CAST_VECTOR(i4, char) -CONSTANT_CAST_VECTOR(i4, signed char) -CONSTANT_CAST_VECTOR(i4, unsigned char) -CONSTANT_CAST_VECTOR(i4, short) -CONSTANT_CAST_VECTOR(i4, unsigned short) -CONSTANT_CAST_VECTOR(i4, int) -CONSTANT_CAST_VECTOR(i4, unsigned int) -CONSTANT_CAST_VECTOR(i4, long) -CONSTANT_CAST_VECTOR(i4, unsigned long) -CONSTANT_CAST_VECTOR(i4, long long) -CONSTANT_CAST_VECTOR(i4, unsigned long long) -CONSTANT_CAST_VECTOR(i4, float16) -CONSTANT_CAST_VECTOR(i4, bfloat16) -CONSTANT_CAST_VECTOR(i4, float) -CONSTANT_CAST_VECTOR(i4, double) - -CONSTANT_CAST_VECTOR(f4e2m1, bool) -CONSTANT_CAST_VECTOR(f4e2m1, char) -CONSTANT_CAST_VECTOR(f4e2m1, signed char) -CONSTANT_CAST_VECTOR(f4e2m1, unsigned char) -CONSTANT_CAST_VECTOR(f4e2m1, short) -CONSTANT_CAST_VECTOR(f4e2m1, unsigned short) -CONSTANT_CAST_VECTOR(f4e2m1, int) -CONSTANT_CAST_VECTOR(f4e2m1, unsigned int) -CONSTANT_CAST_VECTOR(f4e2m1, long) -CONSTANT_CAST_VECTOR(f4e2m1, unsigned long) -CONSTANT_CAST_VECTOR(f4e2m1, long long) -CONSTANT_CAST_VECTOR(f4e2m1, unsigned long long) -CONSTANT_CAST_VECTOR(f4e2m1, float16) -CONSTANT_CAST_VECTOR(f4e2m1, bfloat16) -CONSTANT_CAST_VECTOR(f4e2m1, float) -CONSTANT_CAST_VECTOR(f4e2m1, double) - -#undef CONSTANT_CAST_VECTOR - #define CONSTANT_WRITE_BUFFER(ET, SRC_TYPE) \ template <> \ void Constant::write_lp_buffer(const std::vector& source) { \ @@ -1223,6 +1137,119 @@ CONSTANT_WRITE_BUFFER(f4e2m1, double) #undef CONSTANT_WRITE_BUFFER +template +struct Validate : element::NoAction { + using element::NoAction::visit; + + template >::value>::type* = nullptr> + static result_type visit(const InputIt src, const size_t n) { + auto first = element::iterator(src); + auto last = first + n; + using T = ov::fundamental_type_for; + auto not_valid_value = std::find_if_not(first, last, &in_type_range); + OPENVINO_ASSERT(not_valid_value == last, + "Cannot cast from ", + ET, + " constant to ", + element::from(), + ". Some values are outside the range. Example: ", + *not_valid_value); + } + + template >::value>::type* = nullptr> + static result_type visit(const InputIt, const size_t) {} +}; + +template +struct Convert : element::NotSupported { + using element::NotSupported::visit; + + template ::type* = nullptr> + static result_type visit(const InputIt src, OutputIt dst, const size_t n) { + auto first = element::iterator(src); + reference::convert(first, dst, n); + } + + template ::type* = nullptr> + [[noreturn]] static result_type visit(const InputIt, OutputIt, const size_t) { + OPENVINO_THROW("'cast_vector' does not support casting Constant of type ", + ET, + " into std::vector of ", + element::from()); + } +}; + +template <> +struct Convert : element::NotSupported { + using element::NotSupported::visit; + + template ::type* = nullptr> + static result_type visit(InputIt src, OutputIt dst, const size_t n) { + auto first = element::iterator(src); + using T = ov::fundamental_type_for; + std::transform(first, first + n, dst, [](const T v) { + return static_cast(v); + }); + } + + template ::type* = nullptr> + [[noreturn]] static result_type visit(InputIt, OutputIt, const size_t) { + OPENVINO_THROW("'cast_vector' does not support casting Constant of type ", ET, " into std::vector of boolean"); + } +}; + +#define CONSTANT_CAST_VECTOR(DTYPE, ET_REQ_VALIDATION) \ + template <> \ + OPENVINO_API std::vector Constant::cast_vector(int64_t num_elements) const { \ + std::vector output(get_num_elements_to_cast(num_elements)); \ + using namespace ov::element; \ + IfTypeOf::apply>(m_element_type, get_data_ptr(), output.size()); \ + IfTypeOf::apply>(m_element_type, get_data_ptr(), output.data(), output.size()); \ + return output; \ + } + +template <> +OPENVINO_API std::vector Constant::cast_vector(int64_t num_elements) const { + std::vector output(get_num_elements_to_cast(num_elements)); + using namespace ov::element; + IfTypeOf::apply>(m_element_type, get_data_ptr(), output.begin(), output.size()); + return output; +} + +CONSTANT_CAST_VECTOR(char, OV_PP_ET_LIST(bf16, f16, f32, f64, i8, i16, i32, i64, u16, u32, u64, f8e8m0, f8e4m3, f8e5m2)) +CONSTANT_CAST_VECTOR(signed char, OV_PP_ET_LIST(bf16, f16, i16, i32, i64, u8, u16, u32, u64, f8e8m0, f8e4m3, f8e5m2)) +CONSTANT_CAST_VECTOR(unsigned char, + OV_PP_ET_LIST(bf16, f16, f32, f64, i8, i16, i32, i64, u16, u32, u64, f8e8m0, f8e4m3, f8e5m2)) +CONSTANT_CAST_VECTOR(short, OV_PP_ET_LIST(bf16, f16, i32, i64, u16, u32, u64, f8e8m0, f8e5m2)) +CONSTANT_CAST_VECTOR(unsigned short, + OV_PP_ET_LIST(bf16, f16, f32, f64, i8, i16, i32, i64, u32, u64, f8e8m0, f8e4m3, f8e5m2)) +CONSTANT_CAST_VECTOR(int, OV_PP_ET_LIST(bf16, f16, i64, u32, u64, f8e8m0)) +CONSTANT_CAST_VECTOR(unsigned int, OV_PP_ET_LIST(bf16, f16, f32, f64, i8, i16, i32, i64, u64, f8e8m0, f8e4m3, f8e5m2)) +CONSTANT_CAST_VECTOR(long, OV_PP_ET_LIST(bf16, f16, u32, u64)) +CONSTANT_CAST_VECTOR(unsigned long, OV_PP_ET_LIST(bf16, f16, f32, f64, i8, i16, i32, i64, f8e8m0, f8e4m3, f8e5m2)) +CONSTANT_CAST_VECTOR(long long, OV_PP_ET_LIST(bf16, f16, u64)) +CONSTANT_CAST_VECTOR(unsigned long long, OV_PP_ET_LIST(bf16, f16, f32, f64, i8, i16, i32, i64, f8e8m0, f8e4m3, f8e5m2)) +CONSTANT_CAST_VECTOR(float16, OV_PP_ET_LIST(bf16, i16, i32, u8, u16, u32, u64)) +CONSTANT_CAST_VECTOR(bfloat16, OV_PP_ET_LIST(f32, f64, i64, u32, u64)) +CONSTANT_CAST_VECTOR(float, OV_PP_ET_LIST(f64, i64, u32, u64)) +CONSTANT_CAST_VECTOR(double, OV_PP_ET_LIST()) + } // namespace v0 } // namespace op } // namespace ov From 182946beaaa44190a1cf5fe3d76c7dd1cf381805 Mon Sep 17 00:00:00 2001 From: Tatiana Savina Date: Mon, 8 Jul 2024 10:59:51 +0200 Subject: [PATCH 05/19] [DOCS] Remove outdated content (#25408) ### Details: - *item1* - *...* ### Tickets: - CVS-145220 --- .../assets/images/quantized_convolution.png | 3 -- .../assets/images/quantized_model_example.png | 3 -- .../advanced-guides/quantized-models.rst | 4 +-- .../low-precision-model-representation.rst | 35 ------------------- 4 files changed, 1 insertion(+), 44 deletions(-) delete mode 100644 docs/articles_en/assets/images/quantized_convolution.png delete mode 100644 docs/articles_en/assets/images/quantized_model_example.png delete mode 100644 docs/articles_en/documentation/openvino-extensibility/openvino-plugin-library/advanced-guides/quantized-models/low-precision-model-representation.rst diff --git a/docs/articles_en/assets/images/quantized_convolution.png b/docs/articles_en/assets/images/quantized_convolution.png deleted file mode 100644 index 6ccb89816065a2..00000000000000 --- a/docs/articles_en/assets/images/quantized_convolution.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:71365e85be040eb01ed524e568b332d9bb6222c760686c54db4e754f587082c2 -size 31032 diff --git a/docs/articles_en/assets/images/quantized_model_example.png b/docs/articles_en/assets/images/quantized_model_example.png deleted file mode 100644 index d9a037779a756c..00000000000000 --- a/docs/articles_en/assets/images/quantized_model_example.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:3f68e826cfac63d8e6f8d77aa5b7fc61957a872dfb09b38695fb481044a6ddd5 -size 48327 diff --git a/docs/articles_en/documentation/openvino-extensibility/openvino-plugin-library/advanced-guides/quantized-models.rst b/docs/articles_en/documentation/openvino-extensibility/openvino-plugin-library/advanced-guides/quantized-models.rst index fadd846af973dc..d44dc20d1e0b5a 100644 --- a/docs/articles_en/documentation/openvino-extensibility/openvino-plugin-library/advanced-guides/quantized-models.rst +++ b/docs/articles_en/documentation/openvino-extensibility/openvino-plugin-library/advanced-guides/quantized-models.rst @@ -7,7 +7,6 @@ Quantized models compute and restrictions :maxdepth: 1 :hidden: - quantized-models/low-precision-model-representation .. meta:: :description: Learn about the support for quantized models with different @@ -16,8 +15,7 @@ Quantized models compute and restrictions One of the feature of OpenVINO is the support of quantized models with different precisions: INT8, INT4, etc. However, it is up to the plugin to define what exact precisions are supported by the particular HW. -All quantized models which can be expressed in IR have a unified representation by means of *FakeQuantize* operation. -For more details about low-precision model representation please refer to this :doc:`document `. + Interpreting FakeQuantize at runtime #################################### diff --git a/docs/articles_en/documentation/openvino-extensibility/openvino-plugin-library/advanced-guides/quantized-models/low-precision-model-representation.rst b/docs/articles_en/documentation/openvino-extensibility/openvino-plugin-library/advanced-guides/quantized-models/low-precision-model-representation.rst deleted file mode 100644 index abecc2cfa8f580..00000000000000 --- a/docs/articles_en/documentation/openvino-extensibility/openvino-plugin-library/advanced-guides/quantized-models/low-precision-model-representation.rst +++ /dev/null @@ -1,35 +0,0 @@ -.. {#openvino_docs_ie_plugin_dg_lp_representation} - -Representation of low-precision models -====================================== - -The goal of this document is to describe how optimized models are represented in OpenVINO Intermediate Representation (IR) and provide guidance -on interpretation rules for such models at runtime. - -Currently, there are two groups of optimization methods that can influence on the IR after applying them to the full-precision model: - -- **Sparsity**. It is represented by zeros inside the weights and this is up to the hardware plugin how to interpret these zeros - (use weights as is or apply special compression algorithms and sparse arithmetic). No additional mask is provided with the model. -- **Quantization**. The rest of this document is dedicated to the representation of quantized models. - -Representation of quantized models -################################### - -The OpenVINO Toolkit represents all the quantized models using the so-called FakeQuantize operation (see the description in -:doc:`this document <../../../../openvino-ir-format/operation-sets/operation-specs/quantization/fake-quantize-1>`). This operation is very expressive and allows mapping values from -arbitrary input and output ranges. The whole idea behind that is quite simple: we project (discretize) the input values to the low-precision -data type using affine transformation (with clamp and rounding) and then reproject discrete values back to the original range and data type. -It can be considered as an emulation of the quantization process which happens at runtime. -In order to be able to execute a particular DL operation in low-precision all its inputs should be quantized i.e. should have FakeQuantize -between operation and data blobs. The figure below shows an example of quantized Convolution which contains two FakeQuantize nodes: one for -weights and one for activations (bias is quantized using the same parameters). - -.. image:: ../../../../../assets/images/quantized_convolution.png - - -Starting from OpenVINO 2020.2 release all the quantized models are represented in the compressed form. It means that the weights -of low-precision operations are converted into the target precision (e.g. INT8). It helps to substantially reduce the model size. -The rest of the parameters can be represented in FLOAT32 or FLOAT16 precision depending on the input full-precision model used in -the quantization process. Fig. 2 below shows an example of the part of the compressed IR. - -.. image:: ../../../../../assets/images/quantized_model_example.png From dd50c29a8898820148d4bc1ef4f5f0fc45956e7c Mon Sep 17 00:00:00 2001 From: Andrzej Kopytko Date: Mon, 8 Jul 2024 11:11:08 +0200 Subject: [PATCH 06/19] [DOCS] Port-to-master-for-Revert-column-visibility-in-datatable (#25429) Port for https://github.com/openvinotoolkit/openvino/pull/25427/files --- .../supported-models.rst | 16 -- .../generative-ai-performance.rst | 49 +--- .../_static/css/jquery.dataTables.min.css | 1 - .../_static/css/openVinoDataTables.css | 224 ++++++++++++------ .../_static/download/llm_models.csv | 44 ++-- .../_static/js/jquery.dataTables.min.js | 4 - .../_static/js/openVinoDataTables.js | 29 ++- docs/sphinx_setup/_templates/layout.html | 4 +- 8 files changed, 197 insertions(+), 174 deletions(-) delete mode 100644 docs/sphinx_setup/_static/css/jquery.dataTables.min.css delete mode 100644 docs/sphinx_setup/_static/js/jquery.dataTables.min.js diff --git a/docs/articles_en/about-openvino/compatibility-and-support/supported-models.rst b/docs/articles_en/about-openvino/compatibility-and-support/supported-models.rst index 968c5b6571bca9..5e47ae7d6ca751 100644 --- a/docs/articles_en/about-openvino/compatibility-and-support/supported-models.rst +++ b/docs/articles_en/about-openvino/compatibility-and-support/supported-models.rst @@ -10,22 +10,6 @@ HuggingFace). This list is not comprehensive and only includes models tested by .. raw:: html -
- - - .. csv-table:: diff --git a/docs/articles_en/about-openvino/performance-benchmarks/generative-ai-performance.rst b/docs/articles_en/about-openvino/performance-benchmarks/generative-ai-performance.rst index 73c9f255ff3dbb..e7a4d69d13baac 100644 --- a/docs/articles_en/about-openvino/performance-benchmarks/generative-ai-performance.rst +++ b/docs/articles_en/about-openvino/performance-benchmarks/generative-ai-performance.rst @@ -7,54 +7,11 @@ Intel® Core™ Ultra processor family and AI PCs. The table below lists the key performance indicators for a selection of Large Language Models, running on an Intel® Core™ Ultra 7-165H based system, on built-in GPUs. -For complete information on the system config, see: -`Hardware Platforms [PDF] `__ .. raw:: html -

- - - - - - - - .. csv-table:: @@ -64,8 +21,4 @@ For complete information on the system config, see: :file: ../../_static/download/llm_models.csv -This page is regularly updated to help you identify the best-performing LLMs on the -Intel® Core™ Ultra processor family and AI PCs. - -For complete information on the system config, see: -`Hardware Platforms [PDF] `__ \ No newline at end of file +For complete information on the system config, see: `Hardware Platforms [PDF] `__ \ No newline at end of file diff --git a/docs/sphinx_setup/_static/css/jquery.dataTables.min.css b/docs/sphinx_setup/_static/css/jquery.dataTables.min.css deleted file mode 100644 index 671093e42a20dd..00000000000000 --- a/docs/sphinx_setup/_static/css/jquery.dataTables.min.css +++ /dev/null @@ -1 +0,0 @@ -:root{--dt-row-selected: 13, 110, 253;--dt-row-selected-text: 255, 255, 255;--dt-row-selected-link: 9, 10, 11}table.dataTable td.dt-control{text-align:center;cursor:pointer}table.dataTable td.dt-control:before{height:1em;width:1em;margin-top:-9px;display:inline-block;color:white;border:.15em solid white;border-radius:1em;box-shadow:0 0 .2em #444;box-sizing:content-box;text-align:center;text-indent:0 !important;font-family:"Courier New",Courier,monospace;line-height:1em;content:"+";background-color:#31b131}table.dataTable tr.dt-hasChild td.dt-control:before{content:"-";background-color:#d33333}table.dataTable thead>tr>th.sorting,table.dataTable thead>tr>th.sorting_asc,table.dataTable thead>tr>th.sorting_desc,table.dataTable thead>tr>th.sorting_asc_disabled,table.dataTable thead>tr>th.sorting_desc_disabled,table.dataTable thead>tr>td.sorting,table.dataTable thead>tr>td.sorting_asc,table.dataTable thead>tr>td.sorting_desc,table.dataTable thead>tr>td.sorting_asc_disabled,table.dataTable thead>tr>td.sorting_desc_disabled{cursor:pointer;position:relative;padding-left:26px}table.dataTable thead>tr>th.sorting:before,table.dataTable thead>tr>th.sorting:after,table.dataTable thead>tr>th.sorting_asc:before,table.dataTable thead>tr>th.sorting_asc:after,table.dataTable thead>tr>th.sorting_desc:before,table.dataTable thead>tr>th.sorting_desc:after,table.dataTable thead>tr>th.sorting_asc_disabled:before,table.dataTable thead>tr>th.sorting_asc_disabled:after,table.dataTable thead>tr>th.sorting_desc_disabled:before,table.dataTable thead>tr>th.sorting_desc_disabled:after,table.dataTable thead>tr>td.sorting:before,table.dataTable thead>tr>td.sorting:after,table.dataTable thead>tr>td.sorting_asc:before,table.dataTable thead>tr>td.sorting_asc:after,table.dataTable thead>tr>td.sorting_desc:before,table.dataTable thead>tr>td.sorting_desc:after,table.dataTable thead>tr>td.sorting_asc_disabled:before,table.dataTable thead>tr>td.sorting_asc_disabled:after,table.dataTable thead>tr>td.sorting_desc_disabled:before,table.dataTable thead>tr>td.sorting_desc_disabled:after{position:absolute;display:block;opacity:.125;left:10px;line-height:9px;font-size:.8em}table.dataTable thead>tr>th.sorting:before,table.dataTable thead>tr>th.sorting_asc:before,table.dataTable thead>tr>th.sorting_desc:before,table.dataTable thead>tr>th.sorting_asc_disabled:before,table.dataTable thead>tr>th.sorting_desc_disabled:before,table.dataTable thead>tr>td.sorting:before,table.dataTable thead>tr>td.sorting_asc:before,table.dataTable thead>tr>td.sorting_desc:before,table.dataTable thead>tr>td.sorting_asc_disabled:before,table.dataTable thead>tr>td.sorting_desc_disabled:before{bottom:50%;content:"▲";content:"▲"/""}table.dataTable thead>tr>th.sorting:after,table.dataTable thead>tr>th.sorting_asc:after,table.dataTable thead>tr>th.sorting_desc:after,table.dataTable thead>tr>th.sorting_asc_disabled:after,table.dataTable thead>tr>th.sorting_desc_disabled:after,table.dataTable thead>tr>td.sorting:after,table.dataTable thead>tr>td.sorting_asc:after,table.dataTable thead>tr>td.sorting_desc:after,table.dataTable thead>tr>td.sorting_asc_disabled:after,table.dataTable thead>tr>td.sorting_desc_disabled:after{top:50%;content:"▼";content:"▼"/""}table.dataTable thead>tr>th.sorting_asc:before,table.dataTable thead>tr>th.sorting_desc:after,table.dataTable thead>tr>td.sorting_asc:before,table.dataTable thead>tr>td.sorting_desc:after{opacity:.6}table.dataTable thead>tr>th.sorting_desc_disabled:after,table.dataTable thead>tr>th.sorting_asc_disabled:before,table.dataTable thead>tr>td.sorting_desc_disabled:after,table.dataTable thead>tr>td.sorting_asc_disabled:before{display:none}table.dataTable thead>tr>th:active,table.dataTable thead>tr>td:active{outline:none}div.dataTables_scrollBody>table.dataTable>thead>tr>th:before,div.dataTables_scrollBody>table.dataTable>thead>tr>th:after,div.dataTables_scrollBody>table.dataTable>thead>tr>td:before,div.dataTables_scrollBody>table.dataTable>thead>tr>td:after{display:none}div.dataTables_processing{position:absolute;top:50%;left:50%;width:200px;margin-left:-100px;margin-top:-26px;text-align:center;padding:2px}div.dataTables_processing>div:last-child{position:relative;width:80px;height:15px;margin:1em auto}div.dataTables_processing>div:last-child>div{position:absolute;top:0;width:13px;height:13px;border-radius:50%;background:rgb(13, 110, 253);background:rgb(var(--dt-row-selected));animation-timing-function:cubic-bezier(0, 1, 1, 0)}div.dataTables_processing>div:last-child>div:nth-child(1){left:8px;animation:datatables-loader-1 .6s infinite}div.dataTables_processing>div:last-child>div:nth-child(2){left:8px;animation:datatables-loader-2 .6s infinite}div.dataTables_processing>div:last-child>div:nth-child(3){left:32px;animation:datatables-loader-2 .6s infinite}div.dataTables_processing>div:last-child>div:nth-child(4){left:56px;animation:datatables-loader-3 .6s infinite}@keyframes datatables-loader-1{0%{transform:scale(0)}100%{transform:scale(1)}}@keyframes datatables-loader-3{0%{transform:scale(1)}100%{transform:scale(0)}}@keyframes datatables-loader-2{0%{transform:translate(0, 0)}100%{transform:translate(24px, 0)}}table.dataTable.nowrap th,table.dataTable.nowrap td{white-space:nowrap}table.dataTable th.dt-left,table.dataTable td.dt-left{text-align:left}table.dataTable th.dt-center,table.dataTable td.dt-center,table.dataTable td.dataTables_empty{text-align:center}table.dataTable th.dt-right,table.dataTable td.dt-right{text-align:right}table.dataTable th.dt-justify,table.dataTable td.dt-justify{text-align:justify}table.dataTable th.dt-nowrap,table.dataTable td.dt-nowrap{white-space:nowrap}table.dataTable thead th,table.dataTable thead td,table.dataTable tfoot th,table.dataTable tfoot td{text-align:left}table.dataTable thead th.dt-head-left,table.dataTable thead td.dt-head-left,table.dataTable tfoot th.dt-head-left,table.dataTable tfoot td.dt-head-left{text-align:left}table.dataTable thead th.dt-head-center,table.dataTable thead td.dt-head-center,table.dataTable tfoot th.dt-head-center,table.dataTable tfoot td.dt-head-center{text-align:center}table.dataTable thead th.dt-head-right,table.dataTable thead td.dt-head-right,table.dataTable tfoot th.dt-head-right,table.dataTable tfoot td.dt-head-right{text-align:right}table.dataTable thead th.dt-head-justify,table.dataTable thead td.dt-head-justify,table.dataTable tfoot th.dt-head-justify,table.dataTable tfoot td.dt-head-justify{text-align:justify}table.dataTable thead th.dt-head-nowrap,table.dataTable thead td.dt-head-nowrap,table.dataTable tfoot th.dt-head-nowrap,table.dataTable tfoot td.dt-head-nowrap{white-space:nowrap}table.dataTable tbody th.dt-body-left,table.dataTable tbody td.dt-body-left{text-align:left}table.dataTable tbody th.dt-body-center,table.dataTable tbody td.dt-body-center{text-align:center}table.dataTable tbody th.dt-body-right,table.dataTable tbody td.dt-body-right{text-align:right}table.dataTable tbody th.dt-body-justify,table.dataTable tbody td.dt-body-justify{text-align:justify}table.dataTable tbody th.dt-body-nowrap,table.dataTable tbody td.dt-body-nowrap{white-space:nowrap}table.dataTable{width:100%;margin:0 auto;clear:both;border-collapse:separate;border-spacing:0}table.dataTable thead th,table.dataTable tfoot th{font-weight:bold}table.dataTable thead th,table.dataTable thead td{padding:10px;border-bottom:1px solid rgba(0, 0, 0, 0.3)}table.dataTable thead th:active,table.dataTable thead td:active{outline:none}table.dataTable tfoot th,table.dataTable tfoot td{padding:10px 10px 6px 10px;border-top:1px solid rgba(0, 0, 0, 0.3)}table.dataTable tbody tr{background-color:transparent}table.dataTable tbody tr.selected>*{box-shadow:inset 0 0 0 9999px rgba(13, 110, 253, 0.9);box-shadow:inset 0 0 0 9999px rgba(var(--dt-row-selected), 0.9);color:rgb(255, 255, 255);color:rgb(var(--dt-row-selected-text))}table.dataTable tbody tr.selected a{color:rgb(9, 10, 11);color:rgb(var(--dt-row-selected-link))}table.dataTable tbody th,table.dataTable tbody td{padding:8px 10px}table.dataTable.row-border tbody th,table.dataTable.row-border tbody td,table.dataTable.display tbody th,table.dataTable.display tbody td{border-top:1px solid rgba(0, 0, 0, 0.15)}table.dataTable.row-border tbody tr:first-child th,table.dataTable.row-border tbody tr:first-child td,table.dataTable.display tbody tr:first-child th,table.dataTable.display tbody tr:first-child td{border-top:none}table.dataTable.cell-border tbody th,table.dataTable.cell-border tbody td{border-top:1px solid rgba(0, 0, 0, 0.15);border-right:1px solid rgba(0, 0, 0, 0.15)}table.dataTable.cell-border tbody tr th:first-child,table.dataTable.cell-border tbody tr td:first-child{border-left:1px solid rgba(0, 0, 0, 0.15)}table.dataTable.cell-border tbody tr:first-child th,table.dataTable.cell-border tbody tr:first-child td{border-top:none}table.dataTable.stripe>tbody>tr.odd>*,table.dataTable.display>tbody>tr.odd>*{box-shadow:inset 0 0 0 9999px rgba(0, 0, 0, 0.023)}table.dataTable.stripe>tbody>tr.odd.selected>*,table.dataTable.display>tbody>tr.odd.selected>*{box-shadow:inset 0 0 0 9999px rgba(13, 110, 253, 0.923);box-shadow:inset 0 0 0 9999px rgba(var(--dt-row-selected, 0.923))}table.dataTable.hover>tbody>tr:hover>*,table.dataTable.display>tbody>tr:hover>*{box-shadow:inset 0 0 0 9999px rgba(0, 0, 0, 0.035)}table.dataTable.hover>tbody>tr.selected:hover>*,table.dataTable.display>tbody>tr.selected:hover>*{box-shadow:inset 0 0 0 9999px #0d6efd !important;box-shadow:inset 0 0 0 9999px rgba(var(--dt-row-selected, 1)) !important}table.dataTable.order-column>tbody tr>.sorting_1,table.dataTable.order-column>tbody tr>.sorting_2,table.dataTable.order-column>tbody tr>.sorting_3,table.dataTable.display>tbody tr>.sorting_1,table.dataTable.display>tbody tr>.sorting_2,table.dataTable.display>tbody tr>.sorting_3{box-shadow:inset 0 0 0 9999px rgba(0, 0, 0, 0.019)}table.dataTable.order-column>tbody tr.selected>.sorting_1,table.dataTable.order-column>tbody tr.selected>.sorting_2,table.dataTable.order-column>tbody tr.selected>.sorting_3,table.dataTable.display>tbody tr.selected>.sorting_1,table.dataTable.display>tbody tr.selected>.sorting_2,table.dataTable.display>tbody tr.selected>.sorting_3{box-shadow:inset 0 0 0 9999px rgba(13, 110, 253, 0.919);box-shadow:inset 0 0 0 9999px rgba(var(--dt-row-selected, 0.919))}table.dataTable.display>tbody>tr.odd>.sorting_1,table.dataTable.order-column.stripe>tbody>tr.odd>.sorting_1{box-shadow:inset 0 0 0 9999px rgba(0, 0, 0, 0.054)}table.dataTable.display>tbody>tr.odd>.sorting_2,table.dataTable.order-column.stripe>tbody>tr.odd>.sorting_2{box-shadow:inset 0 0 0 9999px rgba(0, 0, 0, 0.047)}table.dataTable.display>tbody>tr.odd>.sorting_3,table.dataTable.order-column.stripe>tbody>tr.odd>.sorting_3{box-shadow:inset 0 0 0 9999px rgba(0, 0, 0, 0.039)}table.dataTable.display>tbody>tr.odd.selected>.sorting_1,table.dataTable.order-column.stripe>tbody>tr.odd.selected>.sorting_1{box-shadow:inset 0 0 0 9999px rgba(13, 110, 253, 0.954);box-shadow:inset 0 0 0 9999px rgba(var(--dt-row-selected, 0.954))}table.dataTable.display>tbody>tr.odd.selected>.sorting_2,table.dataTable.order-column.stripe>tbody>tr.odd.selected>.sorting_2{box-shadow:inset 0 0 0 9999px rgba(13, 110, 253, 0.947);box-shadow:inset 0 0 0 9999px rgba(var(--dt-row-selected, 0.947))}table.dataTable.display>tbody>tr.odd.selected>.sorting_3,table.dataTable.order-column.stripe>tbody>tr.odd.selected>.sorting_3{box-shadow:inset 0 0 0 9999px rgba(13, 110, 253, 0.939);box-shadow:inset 0 0 0 9999px rgba(var(--dt-row-selected, 0.939))}table.dataTable.display>tbody>tr.even>.sorting_1,table.dataTable.order-column.stripe>tbody>tr.even>.sorting_1{box-shadow:inset 0 0 0 9999px rgba(0, 0, 0, 0.019)}table.dataTable.display>tbody>tr.even>.sorting_2,table.dataTable.order-column.stripe>tbody>tr.even>.sorting_2{box-shadow:inset 0 0 0 9999px rgba(0, 0, 0, 0.011)}table.dataTable.display>tbody>tr.even>.sorting_3,table.dataTable.order-column.stripe>tbody>tr.even>.sorting_3{box-shadow:inset 0 0 0 9999px rgba(0, 0, 0, 0.003)}table.dataTable.display>tbody>tr.even.selected>.sorting_1,table.dataTable.order-column.stripe>tbody>tr.even.selected>.sorting_1{box-shadow:inset 0 0 0 9999px rgba(13, 110, 253, 0.919);box-shadow:inset 0 0 0 9999px rgba(var(--dt-row-selected, 0.919))}table.dataTable.display>tbody>tr.even.selected>.sorting_2,table.dataTable.order-column.stripe>tbody>tr.even.selected>.sorting_2{box-shadow:inset 0 0 0 9999px rgba(13, 110, 253, 0.911);box-shadow:inset 0 0 0 9999px rgba(var(--dt-row-selected, 0.911))}table.dataTable.display>tbody>tr.even.selected>.sorting_3,table.dataTable.order-column.stripe>tbody>tr.even.selected>.sorting_3{box-shadow:inset 0 0 0 9999px rgba(13, 110, 253, 0.903);box-shadow:inset 0 0 0 9999px rgba(var(--dt-row-selected, 0.903))}table.dataTable.display tbody tr:hover>.sorting_1,table.dataTable.order-column.hover tbody tr:hover>.sorting_1{box-shadow:inset 0 0 0 9999px rgba(0, 0, 0, 0.082)}table.dataTable.display tbody tr:hover>.sorting_2,table.dataTable.order-column.hover tbody tr:hover>.sorting_2{box-shadow:inset 0 0 0 9999px rgba(0, 0, 0, 0.074)}table.dataTable.display tbody tr:hover>.sorting_3,table.dataTable.order-column.hover tbody tr:hover>.sorting_3{box-shadow:inset 0 0 0 9999px rgba(0, 0, 0, 0.062)}table.dataTable.display tbody tr:hover.selected>.sorting_1,table.dataTable.order-column.hover tbody tr:hover.selected>.sorting_1{box-shadow:inset 0 0 0 9999px rgba(13, 110, 253, 0.982);box-shadow:inset 0 0 0 9999px rgba(var(--dt-row-selected, 0.982))}table.dataTable.display tbody tr:hover.selected>.sorting_2,table.dataTable.order-column.hover tbody tr:hover.selected>.sorting_2{box-shadow:inset 0 0 0 9999px rgba(13, 110, 253, 0.974);box-shadow:inset 0 0 0 9999px rgba(var(--dt-row-selected, 0.974))}table.dataTable.display tbody tr:hover.selected>.sorting_3,table.dataTable.order-column.hover tbody tr:hover.selected>.sorting_3{box-shadow:inset 0 0 0 9999px rgba(13, 110, 253, 0.962);box-shadow:inset 0 0 0 9999px rgba(var(--dt-row-selected, 0.962))}table.dataTable.no-footer{border-bottom:1px solid rgba(0, 0, 0, 0.3)}table.dataTable.compact thead th,table.dataTable.compact thead td,table.dataTable.compact tfoot th,table.dataTable.compact tfoot td,table.dataTable.compact tbody th,table.dataTable.compact tbody td{padding:4px}table.dataTable th,table.dataTable td{box-sizing:content-box}.dataTables_wrapper{position:relative;clear:both}.dataTables_wrapper .dataTables_length{float:left}.dataTables_wrapper .dataTables_length select{border:1px solid #aaa;border-radius:3px;padding:5px;background-color:transparent;padding:4px}.dataTables_wrapper .dataTables_filter{float:right;text-align:right}.dataTables_wrapper .dataTables_filter input{border:1px solid #aaa;border-radius:3px;padding:5px;background-color:transparent;margin-left:3px}.dataTables_wrapper .dataTables_info{clear:both;float:left;padding-top:.755em}.dataTables_wrapper .dataTables_paginate{float:right;text-align:right;padding-top:.25em}.dataTables_wrapper .dataTables_paginate .paginate_button{box-sizing:border-box;display:inline-block;min-width:1.5em;padding:.5em 1em;margin-left:2px;text-align:center;text-decoration:none !important;cursor:pointer;color:inherit !important;border:1px solid transparent;border-radius:2px;background:transparent}.dataTables_wrapper .dataTables_paginate .paginate_button.current,.dataTables_wrapper .dataTables_paginate .paginate_button.current:hover{color:inherit !important;border:1px solid rgba(0, 0, 0, 0.3);background-color:rgba(230, 230, 230, 0.1);background:-webkit-gradient(linear, left top, left bottom, color-stop(0%, rgba(230, 230, 230, 0.1)), color-stop(100%, rgba(0, 0, 0, 0.1)));background:-webkit-linear-gradient(top, rgba(230, 230, 230, 0.1) 0%, rgba(0, 0, 0, 0.1) 100%);background:-moz-linear-gradient(top, rgba(230, 230, 230, 0.1) 0%, rgba(0, 0, 0, 0.1) 100%);background:-ms-linear-gradient(top, rgba(230, 230, 230, 0.1) 0%, rgba(0, 0, 0, 0.1) 100%);background:-o-linear-gradient(top, rgba(230, 230, 230, 0.1) 0%, rgba(0, 0, 0, 0.1) 100%);background:linear-gradient(to bottom, rgba(230, 230, 230, 0.1) 0%, rgba(0, 0, 0, 0.1) 100%)}.dataTables_wrapper .dataTables_paginate .paginate_button.disabled,.dataTables_wrapper .dataTables_paginate .paginate_button.disabled:hover,.dataTables_wrapper .dataTables_paginate .paginate_button.disabled:active{cursor:default;color:#666 !important;border:1px solid transparent;background:transparent;box-shadow:none}.dataTables_wrapper .dataTables_paginate .paginate_button:hover{color:white !important;border:1px solid #111;background-color:#585858;background:-webkit-gradient(linear, left top, left bottom, color-stop(0%, #585858), color-stop(100%, #111));background:-webkit-linear-gradient(top, #585858 0%, #111 100%);background:-moz-linear-gradient(top, #585858 0%, #111 100%);background:-ms-linear-gradient(top, #585858 0%, #111 100%);background:-o-linear-gradient(top, #585858 0%, #111 100%);background:linear-gradient(to bottom, #585858 0%, #111 100%)}.dataTables_wrapper .dataTables_paginate .paginate_button:active{outline:none;background-color:#2b2b2b;background:-webkit-gradient(linear, left top, left bottom, color-stop(0%, #2b2b2b), color-stop(100%, #0c0c0c));background:-webkit-linear-gradient(top, #2b2b2b 0%, #0c0c0c 100%);background:-moz-linear-gradient(top, #2b2b2b 0%, #0c0c0c 100%);background:-ms-linear-gradient(top, #2b2b2b 0%, #0c0c0c 100%);background:-o-linear-gradient(top, #2b2b2b 0%, #0c0c0c 100%);background:linear-gradient(to bottom, #2b2b2b 0%, #0c0c0c 100%);box-shadow:inset 0 0 3px #111}.dataTables_wrapper .dataTables_paginate .ellipsis{padding:0 1em}.dataTables_wrapper .dataTables_length,.dataTables_wrapper .dataTables_filter,.dataTables_wrapper .dataTables_info,.dataTables_wrapper .dataTables_processing,.dataTables_wrapper .dataTables_paginate{color:inherit}.dataTables_wrapper .dataTables_scroll{clear:both}.dataTables_wrapper .dataTables_scroll div.dataTables_scrollBody{-webkit-overflow-scrolling:touch}.dataTables_wrapper .dataTables_scroll div.dataTables_scrollBody>table>thead>tr>th,.dataTables_wrapper .dataTables_scroll div.dataTables_scrollBody>table>thead>tr>td,.dataTables_wrapper .dataTables_scroll div.dataTables_scrollBody>table>tbody>tr>th,.dataTables_wrapper .dataTables_scroll div.dataTables_scrollBody>table>tbody>tr>td{vertical-align:middle}.dataTables_wrapper .dataTables_scroll div.dataTables_scrollBody>table>thead>tr>th>div.dataTables_sizing,.dataTables_wrapper .dataTables_scroll div.dataTables_scrollBody>table>thead>tr>td>div.dataTables_sizing,.dataTables_wrapper .dataTables_scroll div.dataTables_scrollBody>table>tbody>tr>th>div.dataTables_sizing,.dataTables_wrapper .dataTables_scroll div.dataTables_scrollBody>table>tbody>tr>td>div.dataTables_sizing{height:0;overflow:hidden;margin:0 !important;padding:0 !important}.dataTables_wrapper.no-footer .dataTables_scrollBody{border-bottom:1px solid rgba(0, 0, 0, 0.3)}.dataTables_wrapper.no-footer div.dataTables_scrollHead table.dataTable,.dataTables_wrapper.no-footer div.dataTables_scrollBody>table{border-bottom:none}.dataTables_wrapper:after{visibility:hidden;display:block;content:"";clear:both;height:0}@media screen and (max-width: 767px){.dataTables_wrapper .dataTables_info,.dataTables_wrapper .dataTables_paginate{float:none;text-align:center}.dataTables_wrapper .dataTables_paginate{margin-top:.5em}}@media screen and (max-width: 640px){.dataTables_wrapper .dataTables_length,.dataTables_wrapper .dataTables_filter{float:none;text-align:center}.dataTables_wrapper .dataTables_filter{margin-top:.5em}} diff --git a/docs/sphinx_setup/_static/css/openVinoDataTables.css b/docs/sphinx_setup/_static/css/openVinoDataTables.css index 569732b62398c8..36e9bc84d6ce0e 100644 --- a/docs/sphinx_setup/_static/css/openVinoDataTables.css +++ b/docs/sphinx_setup/_static/css/openVinoDataTables.css @@ -1,8 +1,100 @@ -table.dataTable.stripe tbody tr.odd, -table.dataTable.display tbody tr.odd { +div.dt-buttons>.dt-button, div.dt-buttons>div.dt-button-split .dt-button { + border-radius: 0px !important; + background-color: white !important; + border: 1px solid #aaa !important; + background:none !important; +} + +div.dt-container .dt-paging .dt-paging-button:hover { + color: white !important; + border: 1px solid #aaa; + background:none !important; + background-color: var(--bttn-act-bg-hover) !important +} + +div.dt-buttons>.dt-button:hover, div.dt-buttons>div.dt-button-split .dt-button:hover { + background-color: var(--bttn-act-bg-hover) !important; + color: white !important; +} + +table.dataTable thead th { + font-weight: 400 !important; +} + +table.dataTable { + table-layout: fixed; + width: 100%; +} + +th span p { + margin-bottom: .15rem !important; + white-space: nowrap; + text-overflow: ellipsis; + overflow: hidden; + + + @supports (-webkit-line-clamp: 2) { + overflow: hidden; + text-overflow: ellipsis; + white-space: initial; + display: -webkit-box; + -webkit-line-clamp: 2; + -webkit-box-orient: vertical; + } +} + +table.dataTable.stripe>tbody>tr:nth-child(odd)>*, table.dataTable.display>tbody>tr:nth-child(odd)>* { + box-shadow: inset 0 0 0 9999px #D7E2F6 !important; background: var(--sd-color-info-bg) !important; } +table.dataTable>thead>tr>th, table.dataTable>thead>tr>td { + border-bottom: 0 !important; +} + +div.dt-container .dt-search input { + border-radius: 0px !important +} + +div.dt-button-collection { + background-color: white; + border-radius: 0px !important; + box-shadow: 3px 4px 10px 1px rgba(0, 0, 0, 0.3); + box-sizing: border-box; +} + +div.dt-button-collection .dt-button-active:after { + position: absolute; + top: 50%; + margin-top: -10px; + right: 1em; + display: inline-block; + content: "✓"; + color: inherit; +} + +div.dt-button-collection { + position: absolute; + top: 0; + left: 0; + width: 300px !important; + margin-top: 3px; + margin-bottom: 3px; + padding: .75em 0; + border: 1px solid rgba(0, 0, 0, 0.4); + background-color: white; + overflow: hidden; + z-index: 2002; + border-radius: 5px; + box-shadow: 3px 4px 10px 1px rgba(0, 0, 0, 0.3); + box-sizing: border-box; +} + +button, input, optgroup, select, textarea { + font-size:small; +} + + .dataTables_wrapper .dataTables_paginate .paginate_button:hover { background: var(--bttn-act-bg-hover) !important; color: white !important; @@ -10,15 +102,16 @@ table.dataTable.display tbody tr.odd { border-radius: 0px !important; border: 1px !important } +div.dt-container .dt-search input { + border-radius: 0px !important +} -.dataTables_wrapper .dataTables_paginate .paginate_button.current { - background-color: var(--bttn-act-bg-active) !important; - border-color: var(--bttn-act-bg-active) !important; +div.dt-button-collection { + background-color: white; border-radius: 0px !important; - color: white !important; - border: 1px !important + box-shadow: 3px 4px 10px 1px rgba(0, 0, 0, 0.3); + box-sizing: border-box; } - .dataTables_wrapper .dataTables_paginate .paginate_button.disabled { color: #666 !important; background-color: transparent !important; @@ -26,88 +119,77 @@ table.dataTable.display tbody tr.odd { border: 1px !important; } -tbody, -td, -tfoot, -th, -thead, -tr { - border: 0 !important; -} - -.dataTables_filter input { - border-radius: 0px !important; -} - -.dataTables_length select { - border-radius: 0px !important; +div.dt-button-collection .dt-button-active:after { + position: absolute; + top: 50%; + margin-top: -10px; + right: 1em; + display: inline-block; + content: "✓"; + color: inherit; } .column-container { display: inline-block; position: relative; padding-left: 20px; - margin-bottom: 12px; - cursor: pointer; - -webkit-user-select: none; - -moz-user-select: none; - -ms-user-select: none; - user-select: none; -} -/* Hide the browser's default checkbox */ -.column-container input { - opacity: 0; - cursor: pointer; - height: 0; - width: 0; } -/* Create a custom checkbox */ -.checkmark { - top: 5px; - left: 0; - height: 15px; - width: 15px; - border: #0054AE 2px solid; +div.dt-button-collection { + width: 300px !important; + margin-top: 3px; + margin-bottom: 3px; + padding: .75em 0; + border: 1px solid rgba(0, 0, 0, 0.4); + background-color: white; + overflow: hidden; + z-index: 2002; + border-radius: 0px; + box-shadow: 3px 4px 10px 1px rgba(0, 0, 0, 0.3); + box-sizing: border-box; } -/* On mouse-over, add a grey background color */ -.column-container:hover input~.checkmark { - background-color: #ccc; +button, input, optgroup, select, textarea { + font-size:small; } -/* When the checkbox is checked, add a blue background */ -.column-container input:checked~.checkmark { - background-color: #0054AE; +table.dataTable thead>tr>th.dt-orderable-asc span.dt-column-order, table.dataTable thead>tr>th.dt-orderable-desc span.dt-column-order, table.dataTable thead>tr>th.dt-ordering-asc span.dt-column-order, table.dataTable thead>tr>th.dt-ordering-desc span.dt-column-order, table.dataTable thead>tr>td.dt-orderable-asc span.dt-column-order, table.dataTable thead>tr>td.dt-orderable-desc span.dt-column-order, table.dataTable thead>tr>td.dt-ordering-asc span.dt-column-order, table.dataTable thead>tr>td.dt-ordering-desc span.dt-column-order { + left: 5px !important; } -.column-container input:disabled~.checkmark { - background: #d3d3d3; - border: 2px solid #8C8C8C; +table.dataTable thead>tr>th.dt-orderable-asc, table.dataTable thead>tr>th.dt-orderable-desc, table.dataTable thead>tr>th.dt-ordering-asc, table.dataTable thead>tr>th.dt-ordering-desc, table.dataTable thead>tr>td.dt-orderable-asc, table.dataTable thead>tr>td.dt-orderable-desc, table.dataTable thead>tr>td.dt-ordering-asc, table.dataTable thead>tr>td.dt-ordering-desc { + padding-right: 2px !important; } -/* Create the checkmark/indicator (hidden when not checked) */ -.checkmark:after { - content: ""; - position: absolute; - display: none; +table.dataTable thead>tr>th.dt-orderable-asc, table.dataTable thead>tr>th.dt-orderable-desc, table.dataTable thead>tr>th.dt-ordering-asc, table.dataTable thead>tr>th.dt-ordering-desc, table.dataTable thead>tr>td.dt-orderable-asc, table.dataTable thead>tr>td.dt-orderable-desc, table.dataTable thead>tr>td.dt-ordering-asc, table.dataTable thead>tr>td.dt-ordering-desc { + padding-left: 20px !important; + text-align:left !important; } -/* Show the checkmark when checked */ -.column-container input:checked~.checkmark:after { - display: block; +div.dt-container .dt-paging .dt-paging-button { + box-sizing: border-box; + display: inline-block; + min-width: 1.5em; + padding: .5em 1em; + margin-left: 2px; + text-align: center; + text-decoration: none !important; + cursor: pointer; + color: inherit !important; + border: 1px solid transparent; + border-radius: 0px !important; + background: transparent; } -/* Style the checkmark/indicator */ -.column-container .checkmark:after { - left: 3px; - top: 0; - width: 5px; - height: 9px; - border: solid white; - border-width: 0 2px 2px 0; - -webkit-transform: rotate(45deg); - -ms-transform: rotate(45deg); - transform: rotate(45deg); +div.dt-container .dt-paging .dt-paging-button.current, div.dt-container .dt-paging .dt-paging-button.current:hover { + background: none !important; + background-color: var(--bttn-act-bg-active) !important; + border-color: var(--bttn-act-bg-active) !important; + border-radius: 0px !important; + color: white !important; + border: 1px !important +} +table.dataTable thead>tr>th.dt-orderable-asc span.dt-column-order:before, table.dataTable thead>tr>th.dt-orderable-asc span.dt-column-order:after, table.dataTable thead>tr>th.dt-orderable-desc span.dt-column-order:before, table.dataTable thead>tr>th.dt-orderable-desc span.dt-column-order:after, table.dataTable thead>tr>th.dt-ordering-asc span.dt-column-order:before, table.dataTable thead>tr>th.dt-ordering-asc span.dt-column-order:after, table.dataTable thead>tr>th.dt-ordering-desc span.dt-column-order:before, table.dataTable thead>tr>th.dt-ordering-desc span.dt-column-order:after, table.dataTable thead>tr>td.dt-orderable-asc span.dt-column-order:before, table.dataTable thead>tr>td.dt-orderable-asc span.dt-column-order:after, table.dataTable thead>tr>td.dt-orderable-desc span.dt-column-order:before, table.dataTable thead>tr>td.dt-orderable-desc span.dt-column-order:after, table.dataTable thead>tr>td.dt-ordering-asc span.dt-column-order:before, table.dataTable thead>tr>td.dt-ordering-asc span.dt-column-order:after, table.dataTable thead>tr>td.dt-ordering-desc span.dt-column-order:before, table.dataTable thead>tr>td.dt-ordering-desc span.dt-column-order:after { + padding-bottom: 2px !important; } \ No newline at end of file diff --git a/docs/sphinx_setup/_static/download/llm_models.csv b/docs/sphinx_setup/_static/download/llm_models.csv index 28ac9f2c79ec83..2ff93f503a6d3b 100644 --- a/docs/sphinx_setup/_static/download/llm_models.csv +++ b/docs/sphinx_setup/_static/download/llm_models.csv @@ -1,22 +1,22 @@ -Model name:,"Throughput: (tokens/sec. 2nd token)",1st token latency (msec),Max_RSS_memory used. (MB),Input tokens:,Output tokens:,Model Precision:,Beam:,Batch size:,Framework:, -OPT-2.7b,20.2,2757,7084,937,128,INT4,1,1,PT, -Phi-3-mini-4k-instruct,19.9,2776,7028,1062,128,INT4,1,1,PT, -Orca-mini-3b,19.2,2966,7032,1024,128,INT4,1,1,PT, -Phi-2,17.8,2162,7032,1024,128,INT4,1,1,PT, -Stable-Zephyr-3b-dpo,17.0,1791,7007,946,128,INT4,1,1,PT, -ChatGLM3-6b,16.5,3569,6741,1024,128,INT4,1,1,PT, -Dolly-v2-3b,15.8,6891,6731,1024,128,INT4,1,1,PT, -Stablelm-3b-4e1t,15.7,2051,7018,1024,128,INT4,1,1,PT, -Red-Pajama-Incite-Chat-3b-V1,14.8,6582,7028,1020,128,INT4,1,1,PT, -Falcon-7b-instruct,14.5,4552,7033,1049,128,INT4,1,1,PT, -Codegen25-7b,13.3,3982,6732,1024,128,INT4,1,1,PT, -GPT-j-6b,13.2,7213,6882,1024,128,INT4,1,1,PT, -Stablelm-7b,12.8,6339,7013,1020,128,INT4,1,1,PT, -Llama-3-8b,12.8,4356,6953,1024,128,INT4,1,1,PT, -Llama-2-7b-chat,12.3,4205,6906,1024,128,INT4,1,1,PT, -Llama-7b,11.7,4315,6927,1024,128,INT4,1,1,PT, -Mistral-7b-v0.1,10.5,4462,7242,1007,128,INT4,1,1,PT, -Zephyr-7b-beta,10.5,4500,7039,1024,128,INT4,1,1,PT, -Qwen1.5-7b-chat,9.9,4318,7034,1024,128,INT4,1,1,PT, -Baichuan2-7b-chat,9.8,4668,6724,1024,128,INT4,1,1,PT, -Qwen-7b-chat,9.0,5141,6996,1024,128,INT4,1,1,PT, \ No newline at end of file +Model name,"Throughput: (tokens/sec. 2nd token)",1st token latency (msec),Max RSS memory used. (MB),Input tokens,Output tokens,Model Precision,Beam,Batch size,Framework +OPT-2.7b,20.2,2757,7084,937,128,INT4,1,1,PT +Phi-3-mini-4k-instruct,19.9,2776,7028,1062,128,INT4,1,1,PT +Orca-mini-3b,19.2,2966,7032,1024,128,INT4,1,1,PT +Phi-2,17.8,2162,7032,1024,128,INT4,1,1,PT +Stable-Zephyr-3b-dpo,17.0,1791,7007,946,128,INT4,1,1,PT +ChatGLM3-6b,16.5,3569,6741,1024,128,INT4,1,1,PT +Dolly-v2-3b,15.8,6891,6731,1024,128,INT4,1,1,PT +Stablelm-3b-4e1t,15.7,2051,7018,1024,128,INT4,1,1,PT +Red-Pajama-Incite-Chat-3b-V1,14.8,6582,7028,1020,128,INT4,1,1,PT +Falcon-7b-instruct,14.5,4552,7033,1049,128,INT4,1,1,PT +Codegen25-7b,13.3,3982,6732,1024,128,INT4,1,1,PT +GPT-j-6b,13.2,7213,6882,1024,128,INT4,1,1,PT +Stablelm-7b,12.8,6339,7013,1020,128,INT4,1,1,PT +Llama-3-8b,12.8,4356,6953,1024,128,INT4,1,1,PT +Llama-2-7b-chat,12.3,4205,6906,1024,128,INT4,1,1,PT +Llama-7b,11.7,4315,6927,1024,128,INT4,1,1,PT +Mistral-7b-v0.1,10.5,4462,7242,1007,128,INT4,1,1,PT +Zephyr-7b-beta,10.5,4500,7039,1024,128,INT4,1,1,PT +Qwen1.5-7b-chat,9.9,4318,7034,1024,128,INT4,1,1,PT +Baichuan2-7b-chat,9.8,4668,6724,1024,128,INT4,1,1,PT +Qwen-7b-chat,9.0,5141,6996,1024,128,INT4,1,1,PT \ No newline at end of file diff --git a/docs/sphinx_setup/_static/js/jquery.dataTables.min.js b/docs/sphinx_setup/_static/js/jquery.dataTables.min.js deleted file mode 100644 index c89263c6d5a02f..00000000000000 --- a/docs/sphinx_setup/_static/js/jquery.dataTables.min.js +++ /dev/null @@ -1,4 +0,0 @@ -/*! DataTables 1.13.4 - * ©2008-2023 SpryMedia Ltd - datatables.net/license - */ -!function(n){"use strict";var a;"function"==typeof define&&define.amd?define(["jquery"],function(t){return n(t,window,document)}):"object"==typeof exports?(a=require("jquery"),"undefined"!=typeof window?module.exports=function(t,e){return t=t||window,e=e||a(t),n(e,t,t.document)}:n(a,window,window.document)):window.DataTable=n(jQuery,window,document)}(function(P,j,y,N){"use strict";function d(t){var e=parseInt(t,10);return!isNaN(e)&&isFinite(t)?e:null}function l(t,e,n){var a=typeof t,r="string"==a;return"number"==a||"bigint"==a||!!h(t)||(e&&r&&(t=G(t,e)),n&&r&&(t=t.replace(q,"")),!isNaN(parseFloat(t))&&isFinite(t))}function a(t,e,n){var a;return!!h(t)||(h(a=t)||"string"==typeof a)&&!!l(t.replace(V,""),e,n)||null}function m(t,e,n,a){var r=[],o=0,i=e.length;if(a!==N)for(;o").appendTo(l)),h.nTHead=n[0],l.children("tbody")),n=(0===a.length&&(a=P("").insertAfter(n)),h.nTBody=a[0],l.children("tfoot"));if(0===(n=0===n.length&&0").appendTo(l):n).length||0===n.children().length?l.addClass(p.sNoFooter):0/g,X=/^\d{2,4}[\.\/\-]\d{1,2}[\.\/\-]\d{1,2}([T ]{1}\d{1,2}[:\.]\d{2}([\.:]\d{2})?)?$/,J=new RegExp("(\\"+["/",".","*","+","?","|","(",")","[","]","{","}","\\","$","^","-"].join("|\\")+")","g"),q=/['\u00A0,$£€¥%\u2009\u202F\u20BD\u20a9\u20BArfkɃΞ]/gi,h=function(t){return!t||!0===t||"-"===t},G=function(t,e){return c[e]||(c[e]=new RegExp(Ot(e),"g")),"string"==typeof t&&"."!==e?t.replace(/\./g,"").replace(c[e],"."):t},H=function(t,e,n){var a=[],r=0,o=t.length;if(n!==N)for(;r").css({position:"fixed",top:0,left:-1*P(j).scrollLeft(),height:1,width:1,overflow:"hidden"}).append(P("
").css({position:"absolute",top:1,left:1,width:100,overflow:"scroll"}).append(P("
").css({width:"100%",height:10}))).appendTo("body")).children()).children(),e.barWidth=a[0].offsetWidth-a[0].clientWidth,e.bScrollOversize=100===r[0].offsetWidth&&100!==a[0].clientWidth,e.bScrollbarLeft=1!==Math.round(r.offset().left),e.bBounding=!!n[0].getBoundingClientRect().width,n.remove()),P.extend(t.oBrowser,w.__browser),t.oScroll.iBarWidth=w.__browser.barWidth}function et(t,e,n,a,r,o){var i,l=a,s=!1;for(n!==N&&(i=n,s=!0);l!==r;)t.hasOwnProperty(l)&&(i=s?e(i,t[l],l,t):t[l],s=!0,l+=o);return i}function nt(t,e){var n=w.defaults.column,a=t.aoColumns.length,n=P.extend({},w.models.oColumn,n,{nTh:e||y.createElement("th"),sTitle:n.sTitle||(e?e.innerHTML:""),aDataSort:n.aDataSort||[a],mData:n.mData||a,idx:a}),n=(t.aoColumns.push(n),t.aoPreSearchCols);n[a]=P.extend({},w.models.oSearch,n[a]),at(t,a,P(e).data())}function at(t,e,n){function a(t){return"string"==typeof t&&-1!==t.indexOf("@")}var e=t.aoColumns[e],r=t.oClasses,o=P(e.nTh),i=(!e.sWidthOrig&&(e.sWidthOrig=o.attr("width")||null,u=(o.attr("style")||"").match(/width:\s*(\d+[pxem%]+)/))&&(e.sWidthOrig=u[1]),n!==N&&null!==n&&(Q(n),C(w.defaults.column,n,!0),n.mDataProp===N||n.mData||(n.mData=n.mDataProp),n.sType&&(e._sManualType=n.sType),n.className&&!n.sClass&&(n.sClass=n.className),n.sClass&&o.addClass(n.sClass),u=e.sClass,P.extend(e,n),F(e,n,"sWidth","sWidthOrig"),u!==e.sClass&&(e.sClass=u+" "+e.sClass),n.iDataSort!==N&&(e.aDataSort=[n.iDataSort]),F(e,n,"aDataSort")),e.mData),l=A(i),s=e.mRender?A(e.mRender):null,u=(e._bAttrSrc=P.isPlainObject(i)&&(a(i.sort)||a(i.type)||a(i.filter)),e._setter=null,e.fnGetData=function(t,e,n){var a=l(t,e,N,n);return s&&e?s(a,e,t,n):a},e.fnSetData=function(t,e,n){return b(i)(t,e,n)},"number"==typeof i||e._isArrayHost||(t._rowReadObject=!0),t.oFeatures.bSort||(e.bSortable=!1,o.addClass(r.sSortableNone)),-1!==P.inArray("asc",e.asSorting)),n=-1!==P.inArray("desc",e.asSorting);e.bSortable&&(u||n)?u&&!n?(e.sSortingClass=r.sSortableAsc,e.sSortingClassJUI=r.sSortJUIAscAllowed):!u&&n?(e.sSortingClass=r.sSortableDesc,e.sSortingClassJUI=r.sSortJUIDescAllowed):(e.sSortingClass=r.sSortable,e.sSortingClassJUI=r.sSortJUI):(e.sSortingClass=r.sSortableNone,e.sSortingClassJUI="")}function O(t){if(!1!==t.oFeatures.bAutoWidth){var e=t.aoColumns;ee(t);for(var n=0,a=e.length;ne&&t[r]--;-1!=a&&n===N&&t.splice(a,1)}function bt(n,a,t,e){function r(t,e){for(;t.childNodes.length;)t.removeChild(t.firstChild);t.innerHTML=S(n,a,e,"display")}var o,i,l=n.aoData[a];if("dom"!==t&&(t&&"auto"!==t||"dom"!==l.src)){var s=l.anCells;if(s)if(e!==N)r(s[e],e);else for(o=0,i=s.length;o").appendTo(r)),c=0,f=s.length;c=s.fnRecordsDisplay()?0:l,s.iInitDisplayStart=-1);var n=R(t,"aoPreDrawCallback","preDraw",[t]);if(-1!==P.inArray(!1,n))D(t,!1);else{var a=[],r=0,o=t.asStripeClasses,i=o.length,l=t.oLanguage,s="ssp"==E(t),u=t.aiDisplay,n=t._iDisplayStart,c=t.fnDisplayEnd();if(t.bDrawing=!0,t.bDeferLoading)t.bDeferLoading=!1,t.iDraw++,D(t,!1);else if(s){if(!t.bDestroying&&!e)return void xt(t)}else t.iDraw++;if(0!==u.length)for(var f=s?t.aoData.length:c,d=s?0:n;d",{class:i?o[0]:""}).append(P("",{valign:"top",colSpan:T(t),class:t.oClasses.sRowEmpty}).html(e))[0]}R(t,"aoHeaderCallback","header",[P(t.nTHead).children("tr")[0],ht(t),n,c,u]),R(t,"aoFooterCallback","footer",[P(t.nTFoot).children("tr")[0],ht(t),n,c,u]);s=P(t.nTBody);s.children().detach(),s.append(P(a)),R(t,"aoDrawCallback","draw",[t]),t.bSorted=!1,t.bFiltered=!1,t.bDrawing=!1}}function u(t,e){var n=t.oFeatures,a=n.bSort,n=n.bFilter;a&&ie(t),n?Rt(t,t.oPreviousSearch):t.aiDisplay=t.aiDisplayMaster.slice(),!0!==e&&(t._iDisplayStart=0),t._drawHold=e,v(t),t._drawHold=!1}function _t(t){for(var e,n,a,r,o,i,l,s=t.oClasses,u=P(t.nTable),u=P("
").insertBefore(u),c=t.oFeatures,f=P("
",{id:t.sTableId+"_wrapper",class:s.sWrapper+(t.nTFoot?"":" "+s.sNoFooter)}),d=(t.nHolding=u[0],t.nTableWrapper=f[0],t.nTableReinsertBefore=t.nTable.nextSibling,t.sDom.split("")),h=0;h")[0],"'"==(r=d[h+1])||'"'==r){for(o="",i=2;d[h+i]!=r;)o+=d[h+i],i++;"H"==o?o=s.sJUIHeader:"F"==o&&(o=s.sJUIFooter),-1!=o.indexOf(".")?(l=o.split("."),a.id=l[0].substr(1,l[0].length-1),a.className=l[1]):"#"==o.charAt(0)?a.id=o.substr(1,o.length-1):a.className=o,h+=i}f.append(a),f=P(a)}else if(">"==n)f=f.parent();else if("l"==n&&c.bPaginate&&c.bLengthChange)e=$t(t);else if("f"==n&&c.bFilter)e=Lt(t);else if("r"==n&&c.bProcessing)e=Zt(t);else if("t"==n)e=Kt(t);else if("i"==n&&c.bInfo)e=Ut(t);else if("p"==n&&c.bPaginate)e=zt(t);else if(0!==w.ext.feature.length)for(var p=w.ext.feature,g=0,b=p.length;g',s=(s=r.sSearch).match(/_INPUT_/)?s.replace("_INPUT_",l):s+l,l=P("
",{id:i.f?null:a+"_filter",class:t.sFilter}).append(P("
").addClass(t.sLength);return a.aanFeatures.l||(c[0].id=e+"_length"),c.children().append(a.oLanguage.sLengthMenu.replace("_MENU_",l[0].outerHTML)),P("select",c).val(a._iDisplayLength).on("change.DT",function(t){Gt(a,P(this).val()),v(a)}),P(a.nTable).on("length.dt.DT",function(t,e,n){a===e&&P("select",c).val(n)}),c[0]}function zt(t){function c(t){v(t)}var e=t.sPaginationType,f=w.ext.pager[e],d="function"==typeof f,e=P("
").addClass(t.oClasses.sPaging+e)[0],h=t.aanFeatures;return d||f.fnInit(t,e,c),h.p||(e.id=t.sTableId+"_paginate",t.aoDrawCallback.push({fn:function(t){if(d)for(var e=t._iDisplayStart,n=t._iDisplayLength,a=t.fnRecordsDisplay(),r=-1===n,o=r?0:Math.ceil(e/n),i=r?1:Math.ceil(a/n),l=f(o,i),s=0,u=h.p.length;s",{id:t.aanFeatures.r?null:t.sTableId+"_processing",class:t.oClasses.sProcessing,role:"status"}).html(t.oLanguage.sProcessing).append("
").insertBefore(t.nTable)[0]}function D(t,e){t.oFeatures.bProcessing&&P(t.aanFeatures.r).css("display",e?"block":"none"),R(t,null,"processing",[t,e])}function Kt(t){var e,n,a,r,o,i,l,s,u,c,f,d,h=P(t.nTable),p=t.oScroll;return""===p.sX&&""===p.sY?t.nTable:(e=p.sX,n=p.sY,a=t.oClasses,o=(r=h.children("caption")).length?r[0]._captionSide:null,s=P(h[0].cloneNode(!1)),i=P(h[0].cloneNode(!1)),u=function(t){return t?M(t):null},(l=h.children("tfoot")).length||(l=null),s=P(f="
",{class:a.sScrollWrapper}).append(P(f,{class:a.sScrollHead}).css({overflow:"hidden",position:"relative",border:0,width:e?u(e):"100%"}).append(P(f,{class:a.sScrollHeadInner}).css({"box-sizing":"content-box",width:p.sXInner||"100%"}).append(s.removeAttr("id").css("margin-left",0).append("top"===o?r:null).append(h.children("thead"))))).append(P(f,{class:a.sScrollBody}).css({position:"relative",overflow:"auto",width:u(e)}).append(h)),l&&s.append(P(f,{class:a.sScrollFoot}).css({overflow:"hidden",border:0,width:e?u(e):"100%"}).append(P(f,{class:a.sScrollFootInner}).append(i.removeAttr("id").css("margin-left",0).append("bottom"===o?r:null).append(h.children("tfoot"))))),u=s.children(),c=u[0],f=u[1],d=l?u[2]:null,e&&P(f).on("scroll.DT",function(t){var e=this.scrollLeft;c.scrollLeft=e,l&&(d.scrollLeft=e)}),P(f).css("max-height",n),p.bCollapse||P(f).css("height",n),t.nScrollHead=c,t.nScrollBody=f,t.nScrollFoot=d,t.aoDrawCallback.push({fn:Qt,sName:"scrolling"}),s[0])}function Qt(n){function t(t){(t=t.style).paddingTop="0",t.paddingBottom="0",t.borderTopWidth="0",t.borderBottomWidth="0",t.height=0}var e,a,r,o,i,l=n.oScroll,s=l.sX,u=l.sXInner,c=l.sY,l=l.iBarWidth,f=P(n.nScrollHead),d=f[0].style,h=f.children("div"),p=h[0].style,h=h.children("table"),g=n.nScrollBody,b=P(g),m=g.style,S=P(n.nScrollFoot).children("div"),v=S.children("table"),y=P(n.nTHead),D=P(n.nTable),_=D[0],w=_.style,C=n.nTFoot?P(n.nTFoot):null,T=n.oBrowser,x=T.bScrollOversize,A=(H(n.aoColumns,"nTh"),[]),I=[],F=[],L=[],R=g.scrollHeight>g.clientHeight;n.scrollBarVis!==R&&n.scrollBarVis!==N?(n.scrollBarVis=R,O(n)):(n.scrollBarVis=R,D.children("thead, tfoot").remove(),C&&(R=C.clone().prependTo(D),i=C.find("tr"),a=R.find("tr"),R.find("[id]").removeAttr("id")),R=y.clone().prependTo(D),y=y.find("tr"),e=R.find("tr"),R.find("th, td").removeAttr("tabindex"),R.find("[id]").removeAttr("id"),s||(m.width="100%",f[0].style.width="100%"),P.each(Ct(n,R),function(t,e){r=rt(n,t),e.style.width=n.aoColumns[r].sWidth}),C&&k(function(t){t.style.width=""},a),f=D.outerWidth(),""===s?(w.width="100%",x&&(D.find("tbody").height()>g.offsetHeight||"scroll"==b.css("overflow-y"))&&(w.width=M(D.outerWidth()-l)),f=D.outerWidth()):""!==u&&(w.width=M(u),f=D.outerWidth()),k(t,e),k(function(t){var e=j.getComputedStyle?j.getComputedStyle(t).width:M(P(t).width());F.push(t.innerHTML),A.push(e)},e),k(function(t,e){t.style.width=A[e]},y),P(e).css("height",0),C&&(k(t,a),k(function(t){L.push(t.innerHTML),I.push(M(P(t).css("width")))},a),k(function(t,e){t.style.width=I[e]},i),P(a).height(0)),k(function(t,e){t.innerHTML='
'+F[e]+"
",t.childNodes[0].style.height="0",t.childNodes[0].style.overflow="hidden",t.style.width=A[e]},e),C&&k(function(t,e){t.innerHTML='
'+L[e]+"
",t.childNodes[0].style.height="0",t.childNodes[0].style.overflow="hidden",t.style.width=I[e]},a),Math.round(D.outerWidth())g.offsetHeight||"scroll"==b.css("overflow-y")?f+l:f,x&&(g.scrollHeight>g.offsetHeight||"scroll"==b.css("overflow-y"))&&(w.width=M(o-l)),""!==s&&""===u||W(n,1,"Possible column misalignment",6)):o="100%",m.width=M(o),d.width=M(o),C&&(n.nScrollFoot.style.width=M(o)),c||x&&(m.height=M(_.offsetHeight+l)),R=D.outerWidth(),h[0].style.width=M(R),p.width=M(R),y=D.height()>g.clientHeight||"scroll"==b.css("overflow-y"),p[i="padding"+(T.bScrollbarLeft?"Left":"Right")]=y?l+"px":"0px",C&&(v[0].style.width=M(R),S[0].style.width=M(R),S[0].style[i]=y?l+"px":"0px"),D.children("colgroup").insertBefore(D.children("thead")),b.trigger("scroll"),!n.bSorted&&!n.bFiltered||n._drawHold||(g.scrollTop=0))}function k(t,e,n){for(var a,r,o=0,i=0,l=e.length;i/g;function ee(t){var e,n,a=t.nTable,r=t.aoColumns,o=t.oScroll,i=o.sY,l=o.sX,o=o.sXInner,s=r.length,u=it(t,"bVisible"),c=P("th",t.nTHead),f=a.getAttribute("width"),d=a.parentNode,h=!1,p=t.oBrowser,g=p.bScrollOversize,b=a.style.width;for(b&&-1!==b.indexOf("%")&&(f=b),D=0;D").appendTo(b.find("tbody")));for(b.find("thead, tfoot").remove(),b.append(P(t.nTHead).clone()).append(P(t.nTFoot).clone()),b.find("tfoot th, tfoot td").css("width",""),c=Ct(t,b.find("thead")[0]),D=0;D").css({width:e.sWidthOrig,margin:0,padding:0,border:0,height:1}));if(t.aoData.length)for(D=0;D").css(l||i?{position:"absolute",top:0,left:0,height:1,right:0,overflow:"hidden"}:{}).append(b).appendTo(d),y=(l&&o?b.width(o):l?(b.css("width","auto"),b.removeAttr("width"),b.width()").css("width",M(t)).appendTo(e||y.body))[0].offsetWidth,t.remove(),e):0}function re(t,e){var n,a=oe(t,e);return a<0?null:(n=t.aoData[a]).nTr?n.anCells[e]:P("").html(S(t,a,e,"display"))[0]}function oe(t,e){for(var n,a=-1,r=-1,o=0,i=t.aoData.length;oa&&(a=n.length,r=o);return r}function M(t){return null===t?"0px":"number"==typeof t?t<0?"0px":t+"px":t.match(/\d$/)?t+"px":t}function I(t){function e(t){t.length&&!Array.isArray(t[0])?h.push(t):P.merge(h,t)}var n,a,r,o,i,l,s,u=[],c=t.aoColumns,f=t.aaSortingFixed,d=P.isPlainObject(f),h=[];for(Array.isArray(f)&&e(f),d&&f.pre&&e(f.pre),e(t.aaSorting),d&&f.post&&e(f.post),n=0;n/g,""),u=i.nTh;u.removeAttribute("aria-sort"),i=i.bSortable?s+("asc"===(0=o.length?[0,e[1]]:e)})),t.search!==N&&P.extend(n.oPreviousSearch,Bt(t.search)),t.columns){for(a=0,r=t.columns.length;a").addClass(e),P("td",n).addClass(e).html(t)[0].colSpan=T(o),l.push(n[0]))}var l=[];i(e,n),t._details&&t._details.detach(),t._details=P(l),t._detailsShow&&t._details.insertAfter(t.nTr)}function xe(t,e){var n=t.context;if(n.length&&t.length){var a=n[0].aoData[t[0]];if(a._details){(a._detailsShow=e)?(a._details.insertAfter(a.nTr),P(a.nTr).addClass("dt-hasChild")):(a._details.detach(),P(a.nTr).removeClass("dt-hasChild")),R(n[0],null,"childRow",[e,t.row(t[0])]);var s=n[0],r=new B(s),a=".dt.DT_details",e="draw"+a,t="column-sizing"+a,a="destroy"+a,u=s.aoData;if(r.off(e+" "+t+" "+a),H(u,"_details").length>0){r.on(e,function(t,e){if(s!==e)return;r.rows({page:"current"}).eq(0).each(function(t){var e=u[t];if(e._detailsShow)e._details.insertAfter(e.nTr)})});r.on(t,function(t,e,n,a){if(s!==e)return;var r,o=T(e);for(var i=0,l=u.length;it?new B(e[t],this[t]):null},filter:function(t){var e=[];if(o.filter)e=o.filter.call(this,t,this);else for(var n=0,a=this.length;n").appendTo(t);p(u,n)}else{switch(g=null,b=n,a=c.iTabIndex,n){case"ellipsis":t.append('');break;case"first":g=S.sFirst,0===d&&(a=-1,b+=" "+o);break;case"previous":g=S.sPrevious,0===d&&(a=-1,b+=" "+o);break;case"next":g=S.sNext,0!==h&&d!==h-1||(a=-1,b+=" "+o);break;case"last":g=S.sLast,0!==h&&d!==h-1||(a=-1,b+=" "+o);break;default:g=c.fnFormatNumber(n+1),b=d===n?m.sPageButtonActive:""}null!==g&&(u=c.oInit.pagingTag||"a",r=-1!==b.indexOf(o),me(P("<"+u+">",{class:m.sPageButton+" "+b,"aria-controls":c.sTableId,"aria-disabled":r?"true":null,"aria-label":v[n],"aria-role":"link","aria-current":b===m.sPageButtonActive?"page":null,"data-dt-idx":n,tabindex:a,id:0===f&&"string"==typeof n?c.sTableId+"_"+n:null}).html(g).appendTo(t),{action:n},i))}}var g,b,n,m=c.oClasses,S=c.oLanguage.oPaginate,v=c.oLanguage.oAria.paginate||{};try{n=P(t).find(y.activeElement).data("dt-idx")}catch(t){}p(P(t).empty(),e),n!==N&&P(t).find("[data-dt-idx="+n+"]").trigger("focus")}}}),P.extend(w.ext.type.detect,[function(t,e){e=e.oLanguage.sDecimal;return l(t,e)?"num"+e:null},function(t,e){var n;return(!t||t instanceof Date||X.test(t))&&(null!==(n=Date.parse(t))&&!isNaN(n)||h(t))?"date":null},function(t,e){e=e.oLanguage.sDecimal;return l(t,e,!0)?"num-fmt"+e:null},function(t,e){e=e.oLanguage.sDecimal;return a(t,e)?"html-num"+e:null},function(t,e){e=e.oLanguage.sDecimal;return a(t,e,!0)?"html-num-fmt"+e:null},function(t,e){return h(t)||"string"==typeof t&&-1!==t.indexOf("<")?"html":null}]),P.extend(w.ext.type.search,{html:function(t){return h(t)?t:"string"==typeof t?t.replace(U," ").replace(V,""):""},string:function(t){return!h(t)&&"string"==typeof t?t.replace(U," "):t}});function ke(t,e,n,a){var r;return 0===t||t&&"-"!==t?"number"==(r=typeof t)||"bigint"==r?t:+(t=(t=e?G(t,e):t).replace&&(n&&(t=t.replace(n,"")),a)?t.replace(a,""):t):-1/0}function Me(n){P.each({num:function(t){return ke(t,n)},"num-fmt":function(t){return ke(t,n,q)},"html-num":function(t){return ke(t,n,V)},"html-num-fmt":function(t){return ke(t,n,V,q)}},function(t,e){p.type.order[t+n+"-pre"]=e,t.match(/^html\-/)&&(p.type.search[t+n]=p.type.search.html)})}P.extend(p.type.order,{"date-pre":function(t){t=Date.parse(t);return isNaN(t)?-1/0:t},"html-pre":function(t){return h(t)?"":t.replace?t.replace(/<.*?>/g,"").toLowerCase():t+""},"string-pre":function(t){return h(t)?"":"string"==typeof t?t.toLowerCase():t.toString?t.toString():""},"string-asc":function(t,e){return t").addClass(l.sSortJUIWrapper).append(o.contents()).append(P("").addClass(l.sSortIcon+" "+i.sSortingClassJUI)).appendTo(o),P(r.nTable).on("order.dt.DT",function(t,e,n,a){r===e&&(e=i.idx,o.removeClass(l.sSortAsc+" "+l.sSortDesc).addClass("asc"==a[e]?l.sSortAsc:"desc"==a[e]?l.sSortDesc:i.sSortingClass),o.find("span."+l.sSortIcon).removeClass(l.sSortJUIAsc+" "+l.sSortJUIDesc+" "+l.sSortJUI+" "+l.sSortJUIAscAllowed+" "+l.sSortJUIDescAllowed).addClass("asc"==a[e]?l.sSortJUIAsc:"desc"==a[e]?l.sSortJUIDesc:i.sSortingClassJUI))})}}});function We(t){return"string"==typeof(t=Array.isArray(t)?t.join(","):t)?t.replace(/&/g,"&").replace(//g,">").replace(/"/g,"""):t}function Ee(t,e,n,a,r){return j.moment?t[e](r):j.luxon?t[n](r):a?t[a](r):t}var Be=!1;function Ue(t,e,n){var a;if(j.moment){if(!(a=j.moment.utc(t,e,n,!0)).isValid())return null}else if(j.luxon){if(!(a=e&&"string"==typeof t?j.luxon.DateTime.fromFormat(t,e):j.luxon.DateTime.fromISO(t)).isValid)return null;a.setLocale(n)}else e?(Be||alert("DataTables warning: Formatted date without Moment.js or Luxon - https://datatables.net/tn/17"),Be=!0):a=new Date(t);return a}function Ve(s){return function(a,r,o,i){0===arguments.length?(o="en",a=r=null):1===arguments.length?(o="en",r=a,a=null):2===arguments.length&&(o=r,r=a,a=null);var l="datetime-"+r;return w.ext.type.order[l]||(w.ext.type.detect.unshift(function(t){return t===l&&l}),w.ext.type.order[l+"-asc"]=function(t,e){t=t.valueOf(),e=e.valueOf();return t===e?0:t { - if (el.checked) { - table.columns([el.getAttribute('data-column')]).visible(false, true); + ] + } } - - el.addEventListener('click', function (e) { - let columnIdx = e.target.getAttribute('data-column'); - let column = table.column(columnIdx); - column.visible(!column.visible()); - }); }); }); \ No newline at end of file diff --git a/docs/sphinx_setup/_templates/layout.html b/docs/sphinx_setup/_templates/layout.html index ffd61b3dff31f8..4c3775403f2afd 100644 --- a/docs/sphinx_setup/_templates/layout.html +++ b/docs/sphinx_setup/_templates/layout.html @@ -11,7 +11,7 @@ - + @@ -27,6 +27,6 @@ {% block docs_navbar %} {{ super() }} {% include 'baner.html' %} - + {% endblock %} From 5e38ce7743274208a364fc6c5e24c2a943efd499 Mon Sep 17 00:00:00 2001 From: Karol Blaszczak Date: Mon, 8 Jul 2024 11:49:47 +0200 Subject: [PATCH 07/19] [DOCS] minor tweaks in model conversion (#25409) --- .../openvino-workflow/model-preparation.rst | 14 ++-- .../model-preparation/convert-model-to-ir.rst | 82 +++++++++++-------- 2 files changed, 56 insertions(+), 40 deletions(-) diff --git a/docs/articles_en/openvino-workflow/model-preparation.rst b/docs/articles_en/openvino-workflow/model-preparation.rst index f4b4b6787590ad..c6c7eaeb17fb31 100644 --- a/docs/articles_en/openvino-workflow/model-preparation.rst +++ b/docs/articles_en/openvino-workflow/model-preparation.rst @@ -1,5 +1,3 @@ -.. {#openvino_docs_model_processing_introduction} - Model Preparation ================= @@ -66,15 +64,15 @@ The easiest way to obtain a model is to download it from an online database, suc For PyTorch models, `Python API <#convert-a-model-with-python-convert-model>`__ is the only conversion option. -Model States +Different model representations ############################################## -There are three states a model in OpenVINO can be: saved on disk, loaded but not compiled -(``ov.Model``) or loaded and compiled (``ov.CompiledModel``). +A model in OpenVINO can be represented in three ways: saved on disk, loaded but not compiled +(``ov.Model``), and loaded and compiled (``ov.CompiledModel``). | **Saved on disk** -| A model in this state consists of one or more files that fully represent the neural - network. A model can be stored in different ways. For example: +| One or more files saved on a drive, fully representing the neural network. + Different model formats are stored in different ways, for example: | OpenVINO IR: pair of .xml and .bin files | ONNX: .onnx file | TensorFlow: directory with a .pb file and two subfolders or just a .pb file @@ -88,7 +86,7 @@ There are three states a model in OpenVINO can be: saved on disk, loaded but not applying quantization or even adding preprocessing steps before compiling the model. | **Loaded and compiled** -| This state is achieved when one or more devices are specified for a model object to +| This representation is achieved when one or more devices are specified for a model object to run on (``ov.CompiledModel``), allowing device optimizations to be made and enabling inference. diff --git a/docs/articles_en/openvino-workflow/model-preparation/convert-model-to-ir.rst b/docs/articles_en/openvino-workflow/model-preparation/convert-model-to-ir.rst index be67f581173309..171422f932ea5b 100644 --- a/docs/articles_en/openvino-workflow/model-preparation/convert-model-to-ir.rst +++ b/docs/articles_en/openvino-workflow/model-preparation/convert-model-to-ir.rst @@ -1,6 +1,3 @@ -.. {#openvino_docs_OV_Converter_UG_prepare_model_convert_model_Convert_Model_IR} - - Convert to OpenVINO IR ============================================= @@ -18,14 +15,9 @@ Convert to OpenVINO IR Convert from PaddlePaddle -:doc:`IR (Intermediate Representation) <../../documentation/openvino-ir-format>` is -OpenVINO own format consisting of ``.xml`` and ``.bin`` files. -Convert the model into OpenVINO IR for `better performance <#ir-conversion-benefits>`__. - -Convert Models -############################################## -Here are code examples of how to use these methods with different model formats: +:doc:`OpenVINO IR <../../documentation/openvino-ir-format>` is the proprietary model format +used by OpenVINO, typically obtained by converting models of supported frameworks: .. tab-set:: @@ -75,7 +67,11 @@ Here are code examples of how to use these methods with different model formats: * The ``convert_model()`` method: - When you use the ``convert_model()`` method, you have more control and you can specify additional adjustments for ``ov.Model``. The ``read_model()`` and ``compile_model()`` methods are easier to use, however, they do not have such capabilities. With ``ov.Model`` you can choose to optimize, compile and run inference on it or serialize it into a file for subsequent use. + When you use the ``convert_model()`` method, you have more control and you can + specify additional adjustments for ``ov.Model``. The ``read_model()`` and + ``compile_model()`` methods are easier to use, however, they do not have such + capabilities. With ``ov.Model`` you can choose to optimize, compile and run + inference on it or serialize it into a file for subsequent use. .. dropdown:: List of supported formats: @@ -175,7 +171,8 @@ Here are code examples of how to use these methods with different model formats: .. tab-item:: CLI :sync: cli - You can use ``ovc`` command-line tool to convert a model to IR. The obtained IR can then be read by ``read_model()`` and inferred. + You can use ``ovc`` command-line tool to convert a model to IR. The obtained IR can + then be read by ``read_model()`` and inferred. .. code-block:: sh @@ -194,7 +191,11 @@ Here are code examples of how to use these methods with different model formats: * The ``convert_model()`` method: - When you use the ``convert_model()`` method, you have more control and you can specify additional adjustments for ``ov.Model``. The ``read_model()`` and ``compile_model()`` methods are easier to use, however, they do not have such capabilities. With ``ov.Model`` you can choose to optimize, compile and run inference on it or serialize it into a file for subsequent use. + When you use the ``convert_model()`` method, you have more control and you can + specify additional adjustments for ``ov.Model``. The ``read_model()`` and + ``compile_model()`` methods are easier to use, however, they do not have such + capabilities. With ``ov.Model`` you can choose to optimize, compile and run + inference on it or serialize it into a file for subsequent use. .. dropdown:: List of supported formats: @@ -294,7 +295,8 @@ Here are code examples of how to use these methods with different model formats: * The ``convert_model()`` method: - You can use ``mo`` command-line tool to convert a model to IR. The obtained IR can then be read by ``read_model()`` and inferred. + You can use ``mo`` command-line tool to convert a model to IR. The obtained IR can + then be read by ``read_model()`` and inferred. .. dropdown:: List of supported formats: @@ -319,7 +321,11 @@ Here are code examples of how to use these methods with different model formats: * The ``convert_model()`` method: - When you use the ``convert_model()`` method, you have more control and you can specify additional adjustments for ``ov.Model``. The ``read_model()`` and ``compile_model()`` methods are easier to use, however, they do not have such capabilities. With ``ov.Model`` you can choose to optimize, compile and run inference on it or serialize it into a file for subsequent use. + When you use the ``convert_model()`` method, you have more control and you can + specify additional adjustments for ``ov.Model``. The ``read_model()`` and + ``compile_model()`` methods are easier to use, however, they do not have such + capabilities. With ``ov.Model`` you can choose to optimize, compile and run + inference on it or serialize it into a file for subsequent use. .. dropdown:: List of supported formats: @@ -416,7 +422,8 @@ Here are code examples of how to use these methods with different model formats: * The ``convert_model()`` method: - You can use ``mo`` command-line tool to convert a model to IR. The obtained IR can then be read by ``read_model()`` and inferred. + You can use ``mo`` command-line tool to convert a model to IR. The obtained IR + can then be read by ``read_model()`` and inferred. .. dropdown:: List of supported formats: @@ -441,7 +448,11 @@ Here are code examples of how to use these methods with different model formats: * The ``convert_model()`` method: - When you use the ``convert_model()`` method, you have more control and you can specify additional adjustments for ``ov.Model``. The ``read_model()`` and ``compile_model()`` methods are easier to use, however, they do not have such capabilities. With ``ov.Model`` you can choose to optimize, compile and run inference on it or serialize it into a file for subsequent use. + When you use the ``convert_model()`` method, you have more control and you can + specify additional adjustments for ``ov.Model``. The ``read_model()`` and + ``compile_model()`` methods are easier to use, however, they do not have such + capabilities. With ``ov.Model`` you can choose to optimize, compile and run + inference on it or serialize it into a file for subsequent use. .. dropdown:: List of supported formats: @@ -545,7 +556,8 @@ Here are code examples of how to use these methods with different model formats: * The ``convert_model()`` method: - You can use ``mo`` command-line tool to convert a model to IR. The obtained IR can then be read by ``read_model()`` and inferred. + You can use ``mo`` command-line tool to convert a model to IR. The obtained IR + can then be read by ``read_model()`` and inferred. .. dropdown:: List of supported formats: @@ -561,32 +573,38 @@ Here are code examples of how to use these methods with different model formats: :doc:`article `. -* :doc:`How to convert PyTorch ` -* :doc:`How to convert ONNX ` -* :doc:`How to convert TensorFlow ` -* :doc:`How to convert TensorFlow Lite ` -* :doc:`How to convert PaddlePaddle ` -To choose the best workflow for your application, read the :doc:`Model Preparation section <../model-preparation>`. +These are basic examples, for detailed conversion instructions, see the individual guides on +:doc:`PyTorch `, :doc:`ONNX `, +:doc:`TensorFlow `, :doc:`TensorFlow Lite `, +and :doc:`PaddlePaddle `. Refer to the list of all supported conversion options in :doc:`Conversion Parameters `. IR Conversion Benefits ################################################ - | **Saving to IR to improve first inference latency** -| When first inference latency matters, rather than convert the framework model each time it is loaded, which may take some time depending on its size, it is better to do it once. Save the model as an OpenVINO IR with ``save_model`` and then load it with ``read_model`` as needed. This should improve the time it takes the model to make the first inference as it avoids the conversion step. +| When first inference latency matters, rather than convert the framework model each time it + is loaded, which may take some time depending on its size, it is better to do it once. Save + the model as an OpenVINO IR with ``save_model`` and then load it with ``read_model`` as + needed. This should improve the time it takes the model to make the first inference as it + avoids the conversion step. | **Saving to IR in FP16 to save space** -| Save storage space, even more so if FP16 is used as it may cut the size by about 50%, especially useful for large models, like Llama2-7B. +| Save storage space, even more so if FP16 is used as it may cut the size by about 50%, + especially useful for large models, like Llama2-7B. | **Saving to IR to avoid large dependencies in inference code** -| Frameworks such as TensorFlow and PyTorch tend to be large dependencies (multiple gigabytes), and not all inference environments have enough space to hold them. -| Converting models to OpenVINO IR allows them to be used in an environment where OpenVINO is the only dependency, so much less disk space is needed. -| Loading and compiling with OpenVINO directly usually takes less runtime memory than loading the model in the source framework and then converting and compiling it. - -An example showing how to take advantage of OpenVINO IR, saving a model in OpenVINO IR once, using it many times, is shown below: +| Frameworks such as TensorFlow and PyTorch tend to be large dependencies for applications + running inference (multiple gigabytes). Converting models to OpenVINO IR removes this + dependency, as OpenVINO can run its inference with no additional components. + This way, much less disk space is needed, while loading and compiling usually takes less + runtime memory than loading the model in the source framework and then converting + and compiling it. + +Here is an example of how to benefit from OpenVINO IR, saving a model once and running it +multiple times: .. code-block:: py From 0fea4c86f1c23ab16bdd1c9d90bd6cc28e8bd6fe Mon Sep 17 00:00:00 2001 From: Georgy Krivoruchko Date: Mon, 8 Jul 2024 14:13:50 +0400 Subject: [PATCH 08/19] [TF] Layer tests has a GPU mismatch (#25426) ### Details: - Skipping test on GPU with FP16 precision due to sporadic misalignment in a results between original framework and OV ### Tickets: - 137495 --- .../layer_tests/tensorflow_tests/test_tf_ApproximateEqual.py | 4 +++- tests/layer_tests/tensorflow_tests/test_tf_BinaryOps.py | 2 ++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/tests/layer_tests/tensorflow_tests/test_tf_ApproximateEqual.py b/tests/layer_tests/tensorflow_tests/test_tf_ApproximateEqual.py index 44fba61327f4cc..da4089cc27893a 100644 --- a/tests/layer_tests/tensorflow_tests/test_tf_ApproximateEqual.py +++ b/tests/layer_tests/tensorflow_tests/test_tf_ApproximateEqual.py @@ -40,6 +40,8 @@ def create_approximate_equal_net(self, input1_shape, input2_shape): @pytest.mark.nightly def test_approximate_equal_basic(self, params, ie_device, precision, ir_version, temp_dir, use_legacy_frontend): + if ie_device == 'GPU' and precision == 'FP16': + pytest.skip("Accuracy mismatch on GPU") self._test(*self.create_approximate_equal_net(**params), ie_device, precision, ir_version, temp_dir=temp_dir, - use_legacy_frontend=use_legacy_frontend) \ No newline at end of file + use_legacy_frontend=use_legacy_frontend) diff --git a/tests/layer_tests/tensorflow_tests/test_tf_BinaryOps.py b/tests/layer_tests/tensorflow_tests/test_tf_BinaryOps.py index 5c2f122a1602a4..20619c47acf7d0 100644 --- a/tests/layer_tests/tensorflow_tests/test_tf_BinaryOps.py +++ b/tests/layer_tests/tensorflow_tests/test_tf_BinaryOps.py @@ -114,5 +114,7 @@ def test_binary_op(self, x_shape, y_shape, ie_device, precision, ir_version, tem pytest.skip("GPU does not support Bitwise ops. For Mod and Pow it has inference mismatch") if op_type in ['Mod', 'FloorDiv', 'FloorMod']: pytest.skip("Inference mismatch for Mod and FloorDiv") + if ie_device == 'GPU' and precision == 'FP16' and op_type in ['Equal', 'NotEqual', 'Greater', 'GreaterEqual', 'Less', 'LessEqual']: + pytest.skip("Accuracy mismatch on GPU") self._test(*self.create_add_placeholder_const_net(x_shape=x_shape, y_shape=y_shape, op_type=op_type), ie_device, precision, ir_version, temp_dir=temp_dir, use_legacy_frontend=use_legacy_frontend) From a349dc82f9ac3bca06db3d052ecf37ab1de8b042 Mon Sep 17 00:00:00 2001 From: Tomasz Jankowski Date: Mon, 8 Jul 2024 12:14:00 +0200 Subject: [PATCH 09/19] [Core] Resolve static analysis issues (#25394) ### Details: - Added shape ranks' value assertion. ### Tickets: - CVS-145096 --- .../include/convolution_shape_inference_util.hpp | 11 +++++------ .../deformable_convolution_shape_inference.hpp | 3 ++- .../include/extract_image_patches_shape_inference.hpp | 8 ++++++-- 3 files changed, 13 insertions(+), 9 deletions(-) diff --git a/src/core/shape_inference/include/convolution_shape_inference_util.hpp b/src/core/shape_inference/include/convolution_shape_inference_util.hpp index edd5f875da76b0..1b049278003a29 100644 --- a/src/core/shape_inference/include/convolution_shape_inference_util.hpp +++ b/src/core/shape_inference/include/convolution_shape_inference_util.hpp @@ -20,13 +20,12 @@ constexpr size_t spatial_dim_offset = 2; /** * @brief Get num of spatial form convolution operator. * - * Tries get value from operator member if is not deduced (has -1 value) then tries evaluate it from input shapes. + * Tries to get value from operator member, if not deduced (has -1 value) then tries evaluate it from input shapes. * - * @tparam TConv Convolution type (this function must be a friend of TConv to access private member). - * @tparam TShape Shape type. - * @param op Pointer to convolution operator. - * @param data_shape Input data shape. - * @param flter_shape Input filter shape. + * @tparam TShape Shape type. + * @param data_shape Input data shape. + * @param filter_shape Input filter shape. + * @param filter_non_spatial_dims_count Number of non spatial dimensions in filter input * @return Value of spatial dimension number or infinite bound (-1) if cannot evaluate. */ template diff --git a/src/core/shape_inference/include/deformable_convolution_shape_inference.hpp b/src/core/shape_inference/include/deformable_convolution_shape_inference.hpp index efd70c4dd561ca..0e0a7943d31e32 100644 --- a/src/core/shape_inference/include/deformable_convolution_shape_inference.hpp +++ b/src/core/shape_inference/include/deformable_convolution_shape_inference.hpp @@ -18,7 +18,8 @@ size_t calculate_num_spatial(const util::DeformableConvolutionBase* op, const st auto num_spatial = util::num_spatial_from_shapes(input_shapes[0], input_shapes[2], non_spatial_count); if (num_spatial == convolution::num_spatial_undefined && input_shapes[1].rank().is_static()) { - num_spatial = input_shapes[1].size() - non_spatial_count; + constexpr size_t offsets_shape_rank = 4; + num_spatial = offsets_shape_rank - non_spatial_count; } return num_spatial; diff --git a/src/core/shape_inference/include/extract_image_patches_shape_inference.hpp b/src/core/shape_inference/include/extract_image_patches_shape_inference.hpp index 26a561b5399820..f527358d793c2c 100644 --- a/src/core/shape_inference/include/extract_image_patches_shape_inference.hpp +++ b/src/core/shape_inference/include/extract_image_patches_shape_inference.hpp @@ -17,13 +17,17 @@ std::vector shape_infer(const ExtractImagePatches* op, const std::vecto using TDim = typename T::value_type; constexpr size_t num_spatial_dim = 2; + constexpr size_t input_shape_static_rank = 4; constexpr auto is_zero = cmp::Less(1); const auto& input_shape = input_shapes[0]; auto output_shapes = std::vector(1); auto& output_shape = output_shapes[0]; - NODE_SHAPE_INFER_CHECK(op, input_shapes, input_shape.rank().compatible(4), "input tensor must be 4D tensor."); + NODE_SHAPE_INFER_CHECK(op, + input_shapes, + input_shape.rank().compatible(input_shape_static_rank), + "input tensor must be 4D tensor."); const auto& sizes = op->get_sizes(); NODE_VALIDATION_CHECK(op, @@ -53,7 +57,7 @@ std::vector shape_infer(const ExtractImagePatches* op, const std::vecto "Attribute padding should be in either valid or same_lower or same_upper."); if (input_shape.rank().is_static()) { - const auto num_non_spatial_dims = input_shape.size() - num_spatial_dim; + constexpr auto num_non_spatial_dims = input_shape_static_rank - num_spatial_dim; auto out_it = std::copy_n(input_shape.begin(), num_non_spatial_dims, std::back_inserter(output_shape)); output_shape[1] *= From d0faddd1121b996e99e68ecef600d93aa2b70b18 Mon Sep 17 00:00:00 2001 From: Kelvin Choi Date: Mon, 8 Jul 2024 20:30:37 +0900 Subject: [PATCH 10/19] [GPU] Reinterpret from 1 dim mem to 0 dim mem instead of allocating 0 bytes layout to OpenCL (#25296) ### Details: - *Requesting 0 bytes memory allocation to OpenCL returns error -61. Plugin should not request 0 bytes memory allocation.* ### Tickets: - *143586* - *143335* - *142909* --- .../include/intel_gpu/plugin/common_utils.hpp | 21 ++++ src/plugins/intel_gpu/src/graph/loop.cpp | 16 +-- .../intel_gpu/src/graph/primitive_inst.cpp | 4 +- .../tests/unit/test_cases/loop_gpu_test.cpp | 106 ++++++++++++++++++ 4 files changed, 138 insertions(+), 9 deletions(-) diff --git a/src/plugins/intel_gpu/include/intel_gpu/plugin/common_utils.hpp b/src/plugins/intel_gpu/include/intel_gpu/plugin/common_utils.hpp index 80fac9d10d9f5b..75e4ffd779036f 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/plugin/common_utils.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/plugin/common_utils.hpp @@ -6,6 +6,7 @@ #include #include +#include "intel_gpu/runtime/engine.hpp" #include "intel_gpu/runtime/layout.hpp" #include "intel_gpu/runtime/memory.hpp" #include "intel_gpu/runtime/optionals.hpp" @@ -111,6 +112,26 @@ inline ov::Shape predict_shape(const std::string& name, const cldnn::layout layo return layout.get_shape(); } +inline cldnn::memory::ptr allocate_memory_evenif_zero_bytes(cldnn::engine& _engine, + const cldnn::layout& layout, + cldnn::allocation_type type, + bool reset = true) { + if (layout.bytes_count() == 0) { + auto non_zero_layout = cldnn::layout({1}, layout.data_type, layout.format); + auto res = _engine.allocate_memory(non_zero_layout, type, false); + return _engine.reinterpret_buffer(*res, layout); + } else { + return _engine.allocate_memory(layout, type, reset); + } +} + +inline cldnn::memory::ptr allocate_memory_evenif_zero_bytes(cldnn::engine& _engine, + const cldnn::layout& layout, + bool reset = true) { + cldnn::allocation_type type = _engine.get_lockable_preferred_memory_allocation_type(layout.format.is_image_2d()); + return allocate_memory_evenif_zero_bytes(_engine, layout, type, reset); +} + /// WA: Force exit. Any opencl api call can be hang after CL_OUT_OF_RESOURCES. inline void ForceExit() { std::cerr << "[GPU] force exit.\n" diff --git a/src/plugins/intel_gpu/src/graph/loop.cpp b/src/plugins/intel_gpu/src/graph/loop.cpp index 8a1b29b9c3409f..08944f43a4287d 100644 --- a/src/plugins/intel_gpu/src/graph/loop.cpp +++ b/src/plugins/intel_gpu/src/graph/loop.cpp @@ -7,6 +7,7 @@ #include "mutable_data_inst.h" #include "json_object.h" #include "primitive_type_base.h" +#include "intel_gpu/plugin/common_utils.hpp" #include "intel_gpu/primitives/data.hpp" #include "intel_gpu/primitives/mutable_data.hpp" #include "intel_gpu/runtime/error_handler.hpp" @@ -319,7 +320,7 @@ void loop_inst::update_backedge_mapped_memory() { // generally, shouldn't go this way, but... auto output_prim = body_network->get_primitive(back_edge.from); layout output_layout = output_prim->output_memory().get_layout(); - backedge_mem = body_network->get_engine().allocate_memory(output_layout, 0); + backedge_mem = ov::intel_gpu::allocate_memory_evenif_zero_bytes(body_network->get_engine(), output_layout, false); } } else { auto external_id = output_mapping.front()->external_id; @@ -397,7 +398,7 @@ loop_inst::concatenated_memory_mapping::ptr loop_inst::create_concat_memory_map( << sliced_layout.get_partial_shape().to_string() << " to " << updated_sliced_layout.to_string() << std::endl; sliced_layout.set_partial_shape(updated_sliced_layout); - inter_mem_ptr = engine.allocate_memory(sliced_layout); + inter_mem_ptr = ov::intel_gpu::allocate_memory_evenif_zero_bytes(engine, sliced_layout); intern_prim->set_output_layout(sliced_layout, internal_id.idx); } @@ -408,7 +409,7 @@ loop_inst::concatenated_memory_mapping::ptr loop_inst::create_concat_memory_map( sliced_mems.reserve(num_iterations); sliced_mems.push_back(inter_mem_ptr); for (int j=1; j < num_iterations; ++j) { - memory::ptr sliced_mem = engine.allocate_memory(sliced_layout); + memory::ptr sliced_mem = ov::intel_gpu::allocate_memory_evenif_zero_bytes(engine, sliced_layout); sliced_mems.push_back(sliced_mem); } } @@ -500,7 +501,7 @@ void loop_inst::preprocess_input_memory(const int64_t num_iterations) { // if internal input memory is in backedge, allocate new memory. // Because internal input memory's data will be updated through backedge process. if (iter != _back_edges.end()) { - internal_input_memory = body_network->get_engine().allocate_memory(memory->get_layout(), false); + internal_input_memory = ov::intel_gpu::allocate_memory_evenif_zero_bytes(body_network->get_engine(), memory->get_layout(), false); internal_input_memory->copy_from(body_network->get_stream(), *memory); GPU_DEBUG_LOG << "Input memory of internal node(" << internal_id.to_string() << ") is set to new memory(" << internal_input_memory << ", " << internal_input_memory->get_layout().to_short_string() @@ -723,7 +724,7 @@ void loop_inst::postprocess_output_memory(bool is_dynamic, int64_t current_itera } else { if (!output_allocated || shape_changed()) { auto concat_layout = _impl_params->get_output_layout(external_id.idx); - auto concat_mem = _network.get_engine().allocate_memory(concat_layout, false); + auto concat_mem = ov::intel_gpu::allocate_memory_evenif_zero_bytes(_network.get_engine(), concat_layout, false); external_outputs[external_id.idx] = concat_mem; auto iter = std::find_if(concatenated_output_mem_mappings.begin(), concatenated_output_mem_mappings.end(), @@ -1082,7 +1083,8 @@ std::vector loop_inst::handle_buffers_for_next_iteration(const loop_ // Check backedge_to shape needs to be updated by initial_mem OPENVINO_ASSERT(mapping.initial_mem != nullptr, "initial_mem should not be null"); if (!mapping.initial_mem->get_layout().identical(to_mem->get_layout())) { - to_mem = body_network->get_engine().allocate_memory(mapping.initial_mem->get_layout(), false); + to_mem = ov::intel_gpu::allocate_memory_evenif_zero_bytes(body_network->get_engine(), mapping.initial_mem->get_layout(), false); + body_network->set_input_data(to_id, to_mem); ev = to_mem->copy_from(body_network->get_stream(), *(mapping.initial_mem)); GPU_DEBUG_LOG << iter << ") [SINGLE] Backedge_to node(" << to_id << ") is set to new memory(" @@ -1104,7 +1106,7 @@ std::vector loop_inst::handle_buffers_for_next_iteration(const loop_ // Check backedge_to shape needs to be updated by backedge_from if (!from_mem->get_layout().identical(to_mem->get_layout())) { - to_mem = body_network->get_engine().allocate_memory(from_mem->get_layout(), false); + to_mem = ov::intel_gpu::allocate_memory_evenif_zero_bytes(body_network->get_engine(), from_mem->get_layout(), false); GPU_DEBUG_LOG << iter << ") [SINGLE] Backedge_to node(" << to_id << ") is set to new memory(" << to_mem << ", " << to_mem->get_layout().to_short_string() << ") because of shape update from backedge_from()" << from_id diff --git a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp index e26b4a536e91df..460428477281ea 100644 --- a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp +++ b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp @@ -2037,11 +2037,11 @@ memory::ptr primitive_inst::allocate_output(engine& _engine, if ((_node.is_output() && is_reorder_weights) || (!_node.is_output() && _node.is_type())) reset = false; GPU_DEBUG_LOG << "[" << _node.id() << ": constant]" << std::endl; - return _engine.allocate_memory(layout, alloc_type, reset); + return ov::intel_gpu::allocate_memory_evenif_zero_bytes(_engine, layout, alloc_type, reset); } } else if (!_node.can_share_buffer() || _node.can_be_optimized() || _node.is_output()) { GPU_DEBUG_LOG << "[" << _node.id() << ": output]" << std::endl; - return _engine.allocate_memory(layout, alloc_type, reset); + return ov::intel_gpu::allocate_memory_evenif_zero_bytes(_engine, layout, alloc_type, reset); } else { return get_memory_from_pool(_engine, net_id, diff --git a/src/plugins/intel_gpu/tests/unit/test_cases/loop_gpu_test.cpp b/src/plugins/intel_gpu/tests/unit/test_cases/loop_gpu_test.cpp index d071c0f3416581..df403b4001e2c2 100644 --- a/src/plugins/intel_gpu/tests/unit/test_cases/loop_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/test_cases/loop_gpu_test.cpp @@ -7,6 +7,7 @@ #include #include #include +#include "intel_gpu/plugin/common_utils.hpp" #include #include "intel_gpu/primitives/eltwise.hpp" #include @@ -1212,3 +1213,108 @@ TEST(loop_gpu, support_loop_w_dynamic_input_update_primitive_id) { std::vector(), 2, 3); } + +template +void test_loop_gpu_zero_bytes_layout(bool is_caching_test) +{ + auto& engine = get_test_engine(); + + // shape for zero bytes layout + auto trip_count_mem = ov::intel_gpu::allocate_memory_evenif_zero_bytes(engine, { cldnn::layout{ ov::PartialShape({0}), data_types::i32, format::bfyx } }); + + auto input_mem = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 1, 4, 5 } }); + auto operand_mem = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 1, 4, 5 } }); + auto initial_condition_mem = engine.allocate_memory({ data_types::i32, format::bfyx, { 1, 1, 1, 1 } }); + auto num_iteration_mem = engine.allocate_memory({ data_types::i32, format::bfyx, { 1, 1, 1, 1 } }); + + std::vector input_data{ + 1.0f, 2.0f, -15.f, 3.0f, 4.0f, -15.f, 5.0f, 6.0f, -15.f, 7.0f, + -15.f, 0.0f, 0.0f, -15.f, 0.5f, -0.5f, -15.f, 8.0f, 1.5f, 5.2f + }; + std::vector eltwise_operand { + 1.f, -2.f, 3.f, -4.f, 3.0f, -2.0f, 1.f, -2.f, 3.0f, -4.0f, + 3.f, -2.f, 1.f, -2.f, 3.5f, -4.5f, 5.f, -4.f, 3.5f, -2.2f + }; + int trip_count = 8; + int initial_condition = 1; + + // initialize input buffers + set_values(input_mem, input_data); + set_values(operand_mem, eltwise_operand); + set_values(trip_count_mem, { trip_count }); + set_values(initial_condition_mem, {initial_condition}); + + topology body( + input_layout("input", input_mem->get_layout()), + data("eltwise_operand", operand_mem), + eltwise("eltwise", input_info("input"), input_info("eltwise_operand"), eltwise_mode::sum) + ); + + std::vector input_primitive_maps { loop::io_primitive_map("input", "input") }; + std::vector output_primitive_maps { loop::io_primitive_map("loop", "eltwise") }; + std::vector back_edges { loop::backedge_mapping("eltwise", "input") }; + + auto body_program = build_program(engine, body, "", output_primitive_maps, back_edges); + + topology topology( + input_layout("input", input_mem->get_layout()), + input_layout("trip_count", trip_count_mem->get_layout()), + input_layout("initial_condition", initial_condition_mem->get_layout()), + mutable_data("num_iteration", num_iteration_mem), + loop("loop", { input_info("num_iteration"), input_info("trip_count"), input_info("initial_condition"), input_info("input") }, body_program, + "trip_count", "initial_condition", "num_iteration", + input_primitive_maps, output_primitive_maps, back_edges, 8) + ); + + cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test); + + network->set_input_data("input", input_mem); + network->set_input_data("trip_count", trip_count_mem); + network->set_input_data("initial_condition", initial_condition_mem); + + auto outputs = network->execute(); + ASSERT_EQ(outputs.size(), 1); + auto output = outputs.begin()->second.get_memory(); + auto output_layout = output->get_layout(); + + ASSERT_EQ(output_layout.batch(), 1); + ASSERT_EQ(output_layout.feature(), 1); + ASSERT_EQ(output_layout.spatial(0), 4); + ASSERT_EQ(output_layout.spatial(1), 5); + + // value check + { + mem_lock output_ptr{ output, get_test_stream() }; + ASSERT_EQ(output_ptr.size(), input_data.size()); + for (size_t i = 0, iend = input_data.size(); i < iend; ++i) { + ASSERT_FLOAT_EQ(output_ptr[i], input_data[i] + eltwise_operand[i] * trip_count); + } + } + + // allocate new output memory + layout loop_l = network->get_output_memory("loop")->get_layout(); + auto output_mem = engine.allocate_memory(loop_l); + network->set_output_memory("loop", output_mem); + + //one more execute + set_values(input_mem, input_data); + set_values(operand_mem, eltwise_operand); + set_values(trip_count_mem, { trip_count }); + set_values(initial_condition_mem, { initial_condition }); + outputs = network->execute(); + + // check everything once again + ASSERT_EQ(outputs.size(), 1); + auto output2 = outputs.begin()->second.get_memory(); + { + mem_lock output_ptr2{ output2, get_test_stream() }; + ASSERT_EQ(output_ptr2.size(), input_data.size()); + for (size_t i = 0, iend = input_data.size(); i < iend; ++i) { + ASSERT_FLOAT_EQ(output_ptr2[i], input_data[i] + eltwise_operand[i] * trip_count); + } + } +} + +TEST(loop_gpu, zero_bytes_layout) { + test_loop_gpu_zero_bytes_layout(false); +} From 6598aeb80e0bbc01698eb880a314dbb2e8b96ce8 Mon Sep 17 00:00:00 2001 From: Dmitry Matveev Date: Mon, 8 Jul 2024 13:37:27 +0100 Subject: [PATCH 11/19] NPU: Updates to the "W" extension (#25154) ### Details: - Pipeline the parameter access with execution - Avoid several issues in debug capabilities - Also includes various stability and Coverity fixes ### Tickets: - n/a --------- Co-authored-by: Alexey Smirnov Co-authored-by: Ujjayant Kadian <118752727+ujjayant-kadian@users.noreply.github.com> --- .../al/include/intel_npu/al/config/config.hpp | 2 +- .../src/plugin/npuw/accuracy/comparator.cpp | 2 +- .../plugin/npuw/base_sync_infer_request.cpp | 43 +++ .../plugin/npuw/base_sync_infer_request.hpp | 17 +- .../src/plugin/npuw/compiled_model.cpp | 48 ++- .../src/plugin/npuw/compiled_model.hpp | 4 +- .../plugin/npuw/just_sync_infer_request.cpp | 341 ++++++++++++------ .../plugin/npuw/just_sync_infer_request.hpp | 37 +- .../intel_npu/src/plugin/npuw/logging.cpp | 2 +- .../npuw/partitioning/online/compiler.cpp | 5 +- .../plugin/npuw/partitioning/online/graph.hpp | 2 +- .../plugin/npuw/partitioning/online/group.cpp | 2 +- .../npuw/partitioning/online/snapshot.cpp | 23 +- .../npuw/partitioning/online/utils/utils.cpp | 4 +- .../plugin/npuw/partitioning/partitioning.cpp | 242 +++++++++++-- .../npuw/partitioning/patterns/avoid.cpp | 2 +- .../npuw/partitioning/patterns/dcoff.cpp | 27 +- .../intel_npu/src/plugin/npuw/util.cpp | 121 +++++-- 18 files changed, 686 insertions(+), 238 deletions(-) diff --git a/src/plugins/intel_npu/src/al/include/intel_npu/al/config/config.hpp b/src/plugins/intel_npu/src/al/include/intel_npu/al/config/config.hpp index dc74eccb51cd92..0fa1207bd9935a 100644 --- a/src/plugins/intel_npu/src/al/include/intel_npu/al/config/config.hpp +++ b/src/plugins/intel_npu/src/al/include/intel_npu/al/config/config.hpp @@ -123,7 +123,7 @@ struct OptionParser> final { OPENVINO_ASSERT(kv_delim_pos != std::string::npos); K key = OptionParser::parse(std::string_view(item.substr(0, kv_delim_pos))); V value = OptionParser::parse(std::string_view(item.substr(kv_delim_pos + 1))); - res[key] = value; + res[key] = std::move(value); }); return res; } diff --git a/src/plugins/intel_npu/src/plugin/npuw/accuracy/comparator.cpp b/src/plugins/intel_npu/src/plugin/npuw/accuracy/comparator.cpp index 7eb4e82e202804..4440027c818969 100644 --- a/src/plugins/intel_npu/src/plugin/npuw/accuracy/comparator.cpp +++ b/src/plugins/intel_npu/src/plugin/npuw/accuracy/comparator.cpp @@ -29,7 +29,7 @@ bool ov::npuw::metrics::NRMSE::operator()(const ov::SoPtr& actual, } else { ov::Tensor dst(ov::element::Type_t::f32, actual->get_shape()); ov::npuw::util::to_f32(ov::make_tensor(actual), dst); - actual_f32 = dst; + actual_f32 = std::move(dst); } if (ov::element::Type_t::f32 == reference->get_element_type()) { diff --git a/src/plugins/intel_npu/src/plugin/npuw/base_sync_infer_request.cpp b/src/plugins/intel_npu/src/plugin/npuw/base_sync_infer_request.cpp index a1fd1952b7c5b1..58036d299b3c1b 100644 --- a/src/plugins/intel_npu/src/plugin/npuw/base_sync_infer_request.cpp +++ b/src/plugins/intel_npu/src/plugin/npuw/base_sync_infer_request.cpp @@ -179,9 +179,11 @@ void ov::npuw::IBaseInferRequest::check_tensors() const { } void ov::npuw::IBaseInferRequest::infer() { + m_now_idx.reset(); prepare_for_infer(); bool failover_happened = false; for (std::size_t idx = 0u; idx < m_num_submodels; idx++) { + m_now_idx = idx; if (!valid_subrequest(idx)) { continue; } @@ -204,6 +206,7 @@ void ov::npuw::IBaseInferRequest::infer() { LOG_BLOCK(); m_npuw_model->log_device_dist(); } + m_now_idx.reset(); } void ov::npuw::IBaseInferRequest::dump_input_tensors(std::size_t idx) { @@ -279,3 +282,43 @@ std::string ov::npuw::IBaseInferRequest::iter_path_suffix(std::size_t idx) const // is exceeded) return "_iter_" + ov::npuw::util::fmt(m_run_iter, 1000); } + +bool ov::npuw::IBaseInferRequest::needs_copy(std::size_t idx) const { + // Answer if the given subgraph needs copy for I/O or tolerates + // the set/get_ tensor API + auto& comp_model_desc = m_npuw_model->m_compiled_submodels[idx]; + const auto real_idx = comp_model_desc.replaced_by.value_or(idx); + if (ov::npuw::util::starts_with(m_subrequest_devices[real_idx], "CPU")) { + return false; + } + + // Assume all others prefer copy unless remote tensors are supported + return true; +} + +std::size_t ov::npuw::IBaseInferRequest::next(std::size_t idx_base) const { + // Answer the next valid subrequest which is possible to prepare + // FIXME: this could be a predefined map, not a lookup + for (std::size_t idx = idx_base; idx < m_num_submodels; idx++) { + auto& comp_model_desc = m_npuw_model->m_compiled_submodels[idx]; + if (!comp_model_desc.compiled_model && !comp_model_desc.replaced_by) { + continue; + } + return idx; + } + + // went over entire list and nothing found? + // NOTE: this recursive call is a short-cut and may enter the recursion + // if all the subgraphs are OPTIMIZED OUT (shouldn't be possible but + // there's a Murphy's law on this). + return next(0); +} + +std::size_t ov::npuw::IBaseInferRequest::real(std::size_t idx) const { + auto& comp_model_desc = m_npuw_model->m_compiled_submodels[idx]; + return comp_model_desc.replaced_by.value_or(idx); +} + +ov::npuw::IBaseInferRequest::now_t ov::npuw::IBaseInferRequest::now_idx() const { + return m_now_idx; +} diff --git a/src/plugins/intel_npu/src/plugin/npuw/base_sync_infer_request.hpp b/src/plugins/intel_npu/src/plugin/npuw/base_sync_infer_request.hpp index 41bdf2368c2418..986ea78c378c32 100644 --- a/src/plugins/intel_npu/src/plugin/npuw/base_sync_infer_request.hpp +++ b/src/plugins/intel_npu/src/plugin/npuw/base_sync_infer_request.hpp @@ -68,6 +68,11 @@ class IBaseInferRequest : public ov::ISyncInferRequest { std::shared_ptr m_npuw_model; std::vector m_completion_cbs; RqPtrs m_subrequests; + + // This vector is used to track devices for individual subrequests + // here locally. Note that the models can be recompiled in + // contexts of other requests (if multiple of those are created) + // so this cached information is used to detect these situations. std::vector m_subrequest_devices; // Permanent storage for input & output tensors @@ -103,9 +108,19 @@ class IBaseInferRequest : public ov::ISyncInferRequest { // if we go over-designing the things. std::string iter_path_suffix(std::size_t idx) const; mutable std::optional m_iter_suffix_required; - std::size_t m_run_iter = 0; + std::size_t m_run_iter = 0u; + + bool needs_copy(std::size_t idx) const; + std::size_t next(std::size_t idx_base) const; + std::size_t real(std::size_t idx) const; RqPtrs m_ref_subrequests; + + using now_t = std::optional; + now_t now_idx() const; + +private: + now_t m_now_idx; }; } // namespace npuw diff --git a/src/plugins/intel_npu/src/plugin/npuw/compiled_model.cpp b/src/plugins/intel_npu/src/plugin/npuw/compiled_model.cpp index 780dcc9700ce87..596fc94bb7362d 100644 --- a/src/plugins/intel_npu/src/plugin/npuw/compiled_model.cpp +++ b/src/plugins/intel_npu/src/plugin/npuw/compiled_model.cpp @@ -278,6 +278,7 @@ ov::npuw::CompiledModel::CompiledModel(const std::shared_ptr& model, // Do it just once if that's a function if (real_id == id) { remove_long_output_names(m_compiled_submodels[real_id].model); + fill_empty_tensor_names(m_compiled_submodels[real_id].model); } if (ov::npuw::util::is_set(id, dump_sub_opt)) { @@ -383,7 +384,7 @@ ov::npuw::CompiledModel::CompiledModel(const std::shared_ptr& model, void ov::npuw::CompiledModel::remove_long_output_names(const std::shared_ptr& model) { NPUW_ASSERT(model.get() != nullptr); for (auto& output : model->outputs()) { - auto tensor_names = output.get_tensor().get_names(); + const auto& tensor_names = output.get_tensor().get_names(); if (tensor_names.size() > 32) { // maximum supported output.get_tensor().set_names({}); LOG_INFO("Removed output tensor names for " << model->get_friendly_name()); @@ -392,6 +393,32 @@ void ov::npuw::CompiledModel::remove_long_output_names(const std::shared_ptr& model) { + NPUW_ASSERT(model.get() != nullptr); + + size_t in_tensor_idx = 0; + size_t out_tensor_idx = 0; + + for (auto& input : model->inputs()) { + const auto& tensor_names = input.get_tensor().get_names(); + if (tensor_names.empty()) { + input.get_tensor().set_names({"npuw_in_tensor_" + std::to_string(in_tensor_idx)}); + LOG_INFO("Added input tensor name for " << model->get_friendly_name()); + LOG_BLOCK(); + } + in_tensor_idx++; + } + for (auto& output : model->outputs()) { + const auto& tensor_names = output.get_tensor().get_names(); + if (tensor_names.empty()) { + output.get_tensor().set_names({"npuw_out_tensor_" + std::to_string(out_tensor_idx)}); + LOG_INFO("Added output tensor name for " << model->get_friendly_name()); + LOG_BLOCK(); + } + out_tensor_idx++; + } +} + void ov::npuw::CompiledModel::reset_io() { // Restore inputs/outputs from compiled submodels // FIXME: this method is also called from IBaseInferReqeust::create_infer_request @@ -421,11 +448,11 @@ void ov::npuw::CompiledModel::reset_io() { LOG_VERB("Input (Parameter) " << inputs()[idx_in] << " from Subgraph[" << submodel_idx << "]/" << input_idx); idx_in++; } - for (const auto& to_submodel : m_outputs_to_submodels_outputs) { - NPUW_ASSERT(to_submodel != NO_LINK); + for (const auto& from_submodel : m_outputs_to_submodels_outputs) { + NPUW_ASSERT(from_submodel != NO_LINK); LOG_BLOCK(); // in fact, to_submodel from_submodel here, but who cares - const auto& submodel_idx = to_submodel.first; - const auto& output_idx = to_submodel.second; + const auto& submodel_idx = from_submodel.first; + const auto& output_idx = from_submodel.second; LOG_VERB("Output (Result) " << outputs()[idx_out] << " from Subgraph[" << submodel_idx << "]/" << output_idx); idx_out++; } @@ -576,7 +603,7 @@ void ov::npuw::CompiledModel::export_model(std::ostream& model_stream) const { std::string ov::npuw::CompiledModel::submodel_device(const std::size_t idx) const { std::size_t real_idx = m_compiled_submodels[idx].replaced_by.value_or(idx); - auto comp_subm_desc = m_compiled_submodels[real_idx]; + const auto& comp_subm_desc = m_compiled_submodels[real_idx]; if (!comp_subm_desc.compiled_model) { return ""; @@ -659,7 +686,6 @@ void ov::npuw::CompiledModel::implement_properties() { BIND(npuw::parallel_compilation, NPUW_PARALLEL_COMPILE), BIND(npuw::partitioning::dcoff_type, NPUW_DCOFF_TYPE), BIND(npuw::partitioning::dcoff_with_scale, NPUW_DCOFF_SCALE), - BIND(npuw::parallel_compilation, NPUW_PARALLEL_COMPILE), BIND(npuw::funcall_async, NPUW_FUNCALL_ASYNC), BIND(npuw::accuracy::check, NPUW_ACC_CHECK), BIND(npuw::accuracy::threshold, NPUW_ACC_THRESH), @@ -677,7 +703,7 @@ void ov::npuw::CompiledModel::implement_properties() { m_prop_to_opt.insert( {{ov::supported_properties.name(), {ov::PropertyMutability::RO, - [&](const ::intel_npu::Config&) { + [&](const ::intel_npu::Config&) -> const std::vector>& { return m_all_supported_props; }}}, {ov::device::properties.name(), @@ -685,7 +711,7 @@ void ov::npuw::CompiledModel::implement_properties() { [&](const ::intel_npu::Config&) { ov::AnyMap all_devices = {}; for (size_t i = 0; i < m_compiled_submodels.size(); ++i) { - auto comp_model_desc = m_compiled_submodels[i]; + const auto& comp_model_desc = m_compiled_submodels[i]; if (!comp_model_desc.compiled_model) // Handle if optimized out continue; ov::AnyMap device_properties = {}; @@ -702,7 +728,7 @@ void ov::npuw::CompiledModel::implement_properties() { }}}, {ov::model_name.name(), {ov::PropertyMutability::RO, - [&](const ::intel_npu::Config&) { + [&](const ::intel_npu::Config&) -> const std::string& { return m_name; }}}, {ov::optimal_number_of_infer_requests.name(), @@ -733,7 +759,7 @@ void ov::npuw::CompiledModel::implement_properties() { s.insert(submodel_device(i)); device_names.push_back(submodel_device(i)); } - return decltype(ov::execution_devices)::value_type{device_names}; + return decltype(ov::execution_devices)::value_type{std::move(device_names)}; }}}, {ov::loaded_from_cache.name(), {ov::PropertyMutability::RO, [&](const ::intel_npu::Config&) { return m_loaded_from_cache; diff --git a/src/plugins/intel_npu/src/plugin/npuw/compiled_model.hpp b/src/plugins/intel_npu/src/plugin/npuw/compiled_model.hpp index 9509d64d0c8851..ef260c91419781 100644 --- a/src/plugins/intel_npu/src/plugin/npuw/compiled_model.hpp +++ b/src/plugins/intel_npu/src/plugin/npuw/compiled_model.hpp @@ -58,6 +58,7 @@ class CompiledModel : public ov::ICompiledModel { // This is used for removing too long output tensor names to fix some compilation issues void remove_long_output_names(const std::shared_ptr& model); + void fill_empty_tensor_names(const std::shared_ptr& model); std::shared_ptr get_npuw_plugin() const; @@ -86,7 +87,8 @@ class CompiledModel : public ov::ICompiledModel { static const constexpr auto NO_LINK = ToSubmodel{-1, -1}; // In the below vector, index == compiled model's input/output port idex. - std::vector m_inputs_to_submodels_inputs, m_outputs_to_submodels_outputs; + std::vector m_inputs_to_submodels_inputs; + std::vector m_outputs_to_submodels_outputs; std::map> m_param_subscribers; diff --git a/src/plugins/intel_npu/src/plugin/npuw/just_sync_infer_request.cpp b/src/plugins/intel_npu/src/plugin/npuw/just_sync_infer_request.cpp index da9255ac235264..c47962dc475cff 100644 --- a/src/plugins/intel_npu/src/plugin/npuw/just_sync_infer_request.cpp +++ b/src/plugins/intel_npu/src/plugin/npuw/just_sync_infer_request.cpp @@ -111,30 +111,9 @@ ov::npuw::JustInferRequest::JustInferRequest(const std::shared_ptrinputs().size(); i++) { const auto& port = m_npuw_model->inputs()[i]; - const auto& to_submodel = m_npuw_model->m_inputs_to_submodels_inputs.at(i); m_input_tensors.push_back(ov::get_tensor_impl(ov::Tensor(port.get_element_type(), port.get_shape()))); m_port_to_tensor[port] = TensorStorage{m_input_tensors.back(), true}; - - if (to_submodel != ov::npuw::CompiledModel::NO_LINK) { - const auto& submodel_idx = to_submodel.first; - m_port_to_subrequest_idx[port] = submodel_idx; - m_reader_to_orig_port[to_submodel] = port; - m_port_orig_to_sub[port] = - get_real_subrequest(to_submodel.first)->get_compiled_model()->inputs()[to_submodel.second]; - } else { - // Quick hack to support models with unused Parameters... - m_port_to_subrequest_idx[port] = ov::npuw::JustInferRequest::INVALID_IDX; - } - } - // One more map to fill... - for (auto&& it : m_npuw_model->m_param_subscribers) { - const auto& prim_port = m_npuw_model->inputs()[it.first]; - for (auto&& to_submodel : it.second) { - m_reader_to_orig_port[to_submodel] = prim_port; - m_port_orig_to_sub[prim_port] = - get_real_subrequest(to_submodel.first)->get_compiled_model()->inputs()[to_submodel.second]; - } - } + } // for(inputs) // Preallocate output tensors LOG_INFO("Preallocating output tensors..."); @@ -149,18 +128,46 @@ ov::npuw::JustInferRequest::JustInferRequest(const std::shared_ptrsecond // Function calls have their tensors allocated, so just use one - : ov::get_tensor_impl(ov::Tensor(port.get_element_type(), port.get_shape())); + const auto& tensor = + funcall_result_iter != m_funcall_result.end() + ? funcall_result_iter->second // Function calls have their tensors allocated, so just use one + : ov::get_tensor_impl(ov::Tensor(port.get_element_type(), port.get_shape())); - const auto& submodel_idx = from_submodel.first; - m_port_to_subrequest_idx[port] = submodel_idx; - m_port_orig_to_sub[port] = - get_real_subrequest(from_submodel.first)->get_compiled_model()->outputs()[from_submodel.second]; m_output_tensors.push_back(tensor); m_port_to_tensor[port] = TensorStorage{tensor, true}; } connect_subrequests(); + + // Build the parameter/result mapping {{{ + m_subrequests_gio.resize(m_subrequests.size()); + + // Parameters: stage 1... + for (size_t i = 0; i < m_npuw_model->inputs().size(); i++) { + const auto& to_submodel = m_npuw_model->m_inputs_to_submodels_inputs.at(i); + if (to_submodel != CompiledModel::NO_LINK) { + std::size_t sub_idx{}, in_idx{}; + std::tie(sub_idx, in_idx) = m_npuw_model->m_inputs_to_submodels_inputs.at(i); + m_subrequests_gio.at(sub_idx).global_params[i] = in_idx; + } + } // for(inputs) + + // Parameters: stage 2... + for (auto&& it : m_npuw_model->m_param_subscribers) { + const auto param_idx = it.first; + for (auto&& to_submodel : it.second) { + std::size_t sub_idx{}, in_idx{}; + std::tie(sub_idx, in_idx) = to_submodel; + m_subrequests_gio.at(sub_idx).global_params[param_idx] = in_idx; + } + } + + // Results + for (size_t i = 0; i < m_npuw_model->outputs().size(); i++) { + std::size_t sub_idx{}, out_idx{}; + std::tie(sub_idx, out_idx) = m_npuw_model->m_outputs_to_submodels_outputs.at(i); + m_subrequests_gio.at(sub_idx).global_results[i] = out_idx; + } + // }}} } void ov::npuw::JustInferRequest::connect_subrequests() { @@ -255,74 +262,15 @@ std::vector ov::npuw::JustInferRequest::get_profiling_info() return info; } -void ov::npuw::JustInferRequest::bind_params_results() { - // Parameters: Specify input tensors to the "input" infer requests - for (size_t i = 0; i < m_npuw_model->inputs().size(); i++) { - const auto& port = m_npuw_model->inputs()[i]; - - auto ptidx = m_port_to_subrequest_idx.at(port); - if (ptidx == ov::npuw::JustInferRequest::INVALID_IDX) - continue; - if (m_npuw_model->m_compiled_submodels[ptidx].replaced_by) { - // Don't do here - function call will take parameter - // itself. Note it may be implemented more efficently - // than now (and in some cases, parameter can be pre-set) - } else { - const auto& subr = m_subrequests[ptidx]; - LOG_DEBUG("Subrequest[" << ptidx << "]: set parameter[" << port << "] to global parameter [" << i - << "] (direct)"); - subr->set_tensor(m_port_orig_to_sub.at(port), m_port_to_tensor.at(port).tensor); - } - } // for(inputs) - // There may be (other) subrequests subscribed to Parameters tensors - // Update their inputs using The only Parameter tensor (exposed to OV) - // FIXME: BTW probably this is NOT necessary now - for (auto&& it : m_npuw_model->m_param_subscribers) { - const auto param_idx = it.first; - const auto& prim_port = m_npuw_model->inputs()[param_idx]; - - for (auto&& sit : it.second) { - const auto sub_idx = sit.first; - if (m_npuw_model->m_compiled_submodels[sub_idx].replaced_by) { - // Don't do here - function call will take parameter - // itself. Note it may be implemented more efficently - // than now (and in some cases, parameter can be pre-set) - } else { - const auto sub_param_idx = sit.second; - - const auto& sub_port = m_subrequests[sub_idx]->get_compiled_model()->inputs()[sub_param_idx]; - - LOG_DEBUG("Subrequest[" << sub_idx << "]: set parameter[" << sub_param_idx << "] to global parameter [" - << param_idx << "] (subscriber)"); - - m_subrequests[sub_idx]->set_tensor(sub_port, get_tensor(prim_port)); - } - } - } // for(subscribers) - - // Results: Specify output tensors to the "output" infer requests - for (size_t i = 0; i < m_npuw_model->outputs().size(); i++) { - const auto& port = m_npuw_model->outputs()[i]; - const auto sub_idx = m_port_to_subrequest_idx.at(port); - if (m_npuw_model->m_compiled_submodels[sub_idx].replaced_by) { - // here port_to_tensor may refer to a preallocated funcall result. - // in this case, no need to do anything here as functions are bound - // with their result tensors during the call (see notes) - } else { - const auto& subr = m_subrequests[sub_idx]; - subr->set_tensor(m_port_orig_to_sub.at(port), m_port_to_tensor.at(port).tensor); - } - } // for(outputs) -} - void ov::npuw::JustInferRequest::prepare_for_infer() { LOG_DEBUG("Preparing to infer..."); LOG_BLOCK(); - bind_params_results(); + // Submit global parameters (if needed) for the first subgraph + bind_global_parameters(next(0)); // If funcall pipelining is enabled, prefill the function "heads" - // with constant arguments + // with constant arguments. The list of heads is empty otherwise. for (auto&& id : m_funcall_heads) { LOG_DEBUG("Pre-initializing weights for subgraph[" << id << "]"); unpack_closure(id, m_subrequests[id]); @@ -345,6 +293,92 @@ void ov::npuw::JustInferRequest::start_subrequest(std::size_t idx) { m_subrequests[idx]->start_async(); } +void ov::npuw::JustInferRequest::bind_global_parameters(std::size_t idx) { + LOG_DEBUG("Binding parameters for Subgraph[" << idx << "]"); + LOG_BLOCK(); + + auto& comp_model_desc = m_npuw_model->m_compiled_submodels[idx]; + const auto real_idx = comp_model_desc.replaced_by.value_or(idx); + + LOG_DEBUG("Real idx is..." << real_idx); + + const bool do_copy = needs_copy(idx); + const auto& iodesc = m_subrequests_gio.at(idx); + + // a list of ports to copy tensors, if needed: FROM -> TO + std::vector, ov::Output>> copy_list; + + // pick which subrequest we actually work on here + auto subr = [&]() { + if (now_idx() && real_idx == real(now_idx().value()) && m_use_function_pipelining) { + LOG_DEBUG("Accessing the pipeline subrequest"); + // The real index of request we need to prepare IS + // the same request which executes now AND + // function_pipelining enabled - select the reserve request. + NPUW_ASSERT(m_funcall_pipeline[real_idx].subrequest); + return m_funcall_pipeline[real_idx].subrequest; + } + // Otherwise: Just a return a subrequest which is in place. + // If it is a function call and we have function pipelining ON, + // it is still the right subrequest we can use. + LOG_DEBUG("Accessing the primary subrequest"); + return m_subrequests[real_idx]; + }(); + + for (auto&& it : iodesc.global_params) { + std::size_t param_idx{}, sub_in_idx{}; + std::tie(param_idx, sub_in_idx) = it; + LOG_DEBUG("Processing " << param_idx << " -> " << sub_in_idx << std::endl); + const auto& g_port = m_npuw_model->inputs()[param_idx]; + const auto& g_tnsr = m_port_to_tensor.at(g_port).tensor; + const auto& s_port = subr->get_inputs()[sub_in_idx]; + LOG_DEBUG("Processing " << g_port << " -> " << s_port << "..."); + LOG_BLOCK(); + if (do_copy) { + LOG_DEBUG("Will be copied"); + copy_list.emplace_back(g_tnsr, s_port); + } else { + LOG_DEBUG("Will be set"); + subr->set_tensor(s_port, g_tnsr); + } + } + + LOG_DEBUG("Running copy..."); + ov::parallel_for(copy_list.size(), [&](std::size_t idx) { + auto& it = copy_list[idx]; + ov::SoPtr dst = subr->get_tensor(it.second); + it.first->copy_to(dst._ptr); + }); + + LOG_DEBUG("Done"); +} + +void ov::npuw::JustInferRequest::bind_global_results(std::size_t idx) { + LOG_DEBUG("Binding results for Subgraph[" << idx << "]"); + LOG_BLOCK(); + + auto& comp_model_desc = m_npuw_model->m_compiled_submodels[idx]; + const auto real_idx = comp_model_desc.replaced_by.value_or(idx); + if (real_idx != idx) { + // Don't do here - function call will take parameter + // itself. Note it may be implemented more efficently + // than now (and in some cases, parameter can be pre-set) + LOG_DEBUG("Skipping this too now - function will do it for itself"); + return; + } + + const auto& iodesc = m_subrequests_gio.at(idx); + for (auto&& it : iodesc.global_results) { + std::size_t result_idx{}, sub_out_idx{}; + std::tie(result_idx, sub_out_idx) = it; + const auto& g_port = m_npuw_model->outputs()[result_idx]; + const auto& s_port = m_subrequests[idx]->get_outputs()[sub_out_idx]; + m_subrequests[idx]->set_tensor(s_port, m_port_to_tensor.at(g_port).tensor); + } + + LOG_DEBUG("Done"); +} + void ov::npuw::JustInferRequest::function_prologue(std::size_t idx) { LOG_DEBUG("Preparing Subgraph[" << idx << "] funcall prologue"); LOG_BLOCK(); @@ -378,11 +412,6 @@ void ov::npuw::JustInferRequest::function_prologue(std::size_t idx) { // Take its tensor from the storage m_subrequests[real_idx]->set_tensor(iport, m_funcall_result.at({prod_idx, prod_port})); } - } else { - // Take input from the associated parameter. - LOG_DEBUG("Finding the original model port for " << idx << "/" << i << "..."); - auto& orig_port = m_reader_to_orig_port.at({idx, i}); - m_subrequests[real_idx]->set_tensor(iport, m_port_to_tensor.at(orig_port).tensor); } } // for(param_base) @@ -395,7 +424,11 @@ void ov::npuw::JustInferRequest::function_prologue(std::size_t idx) { unpack_closure(idx, m_subrequests[real_idx]); } - // 3. Tell the function which results to produce (this time) + // 3. Tell the function which results to produce (this time). + // Note it covers both internal tensors used by other subgraphs as well as + // the Result tensors for the entire network. + // ..Since the tensors allocated for outputs of the networks ARE taken from the + // "funcall_results" if those are produced by funcall results. for (std::size_t i = 0; i < func_desc.compiled_model->outputs().size(); i++) { LOG_DEBUG("Binding result[" << i << "]..."); auto& oport = func_desc.compiled_model->outputs()[i]; @@ -486,7 +519,6 @@ void ov::npuw::JustInferRequest::recreate_subrequests(std::size_t idx) { // overkill - only affected subrequest(s) could be updated instead, // but it is a more complex thing and can be implemented separately connect_subrequests(); - bind_params_results(); m_subrequest_devices[idx] = *comp_model_desc.device_it; } @@ -498,14 +530,24 @@ void ov::npuw::JustInferRequest::run_subrequest_for_success(std::size_t idx, boo // Infer is also fail-safe... bool job_done = false; bool dump_in = false; + bool next_prepared = false; while (!job_done) { bool should_recreate = false; if (m_subrequest_devices[real_idx] != *m_npuw_model->m_compiled_submodels[real_idx].device_it) { + // This may happen when there's multiple NPUW's infer + // requests created and some failure occurs in one of + // those before another reaches this point. LOG_INFO("Recreating subrequest[" << real_idx << "] because model was recompiled for " << *m_npuw_model->m_compiled_submodels[real_idx].device_it << " device."); recreate_subrequests(real_idx); } + // Feeding the global Parameters is now part of the common + // execution pipeline: See how it is done in + // `unsafe_run_this_prep_next()`. Now we only need to bind + // the subrequest' outputs to global Results, if relevant. + bind_global_results(idx); + if (comp_model_desc.replaced_by) { function_prologue(idx); } @@ -513,29 +555,13 @@ void ov::npuw::JustInferRequest::run_subrequest_for_success(std::size_t idx, boo dump_in = true; dump_input_tensors(idx); } + try { LOG_DEBUG("Trying to run subrequest[" << idx << "]..."); - if (m_use_function_pipelining && comp_model_desc.replaced_by) { - // Start THIS request -- everything is ready for it at this point - m_subrequests[real_idx]->start_async(); - if (m_funcall_pipeline[idx].next) { - const auto next_id = m_funcall_pipeline[idx].next.value(); - // Prepare the NEXT request while this one is running - LOG_DEBUG("Unpacking closures for the NEXT subrequest[" << next_id << "]..."); - LOG_BLOCK(); - unpack_closure(next_id, m_funcall_pipeline[real_idx].subrequest); - } - - // Wait for completion of this one. Once completed, - // its' brother was already updated all the right - // closure tensors. Swapping the subrequests will - // happen in the very end to allow dumping the proper - // tensors, if needed (see job_done check). - m_subrequests[real_idx]->wait(); - } else { - m_subrequests[real_idx]->infer(); - } + LOG_BLOCK(); + unsafe_run_this_prep_next(idx, next_prepared); job_done = true; + LOG_DEBUG("Done: " << idx << "(exec subrequest)"); } catch (const std::exception& ex) { LOG_ERROR("Subgraph [" << idx << "] - FAILED to run infer request:" << std::endl << ex.what()); should_recreate = true; @@ -566,8 +592,85 @@ void ov::npuw::JustInferRequest::run_subrequest_for_success(std::size_t idx, boo } } +namespace { +template +void during(R&& r, F&& f) { + r->start_async(); + f(); // expect noexcept + r->wait(); +} +} // namespace + +void ov::npuw::JustInferRequest::unsafe_run_this_prep_next(std::size_t idx, bool& next_prepared) { + auto& comp_model_desc = m_npuw_model->m_compiled_submodels[idx]; + auto real_idx = comp_model_desc.replaced_by.value_or(idx); + auto& this_subr = m_subrequests[real_idx]; + const std::size_t next_idx = next(idx + 1); + + if (comp_model_desc.replaced_by) { + // This is a function call! + if (real_idx == real(next_idx)) { + // The next subgraph is a call to the same function... + // At this point, THIS infer request is already prepared. + // Run it, then prepare it again for the next entrace + if (m_use_function_pipelining) { + // function pipelining is here! and the next rq is ours. + NPUW_ASSERT(m_funcall_pipeline[idx].next.value() == next_idx); + during(this_subr, [&]() { + LOG_DEBUG("Unpacking closures for the NEXT subrequest[" << next_idx << "]..."); + LOG_BLOCK(); + // Note: do it here unconditionally - if this request fails, + // have to resubmit all the data to the recompiled pair anyway + bind_global_parameters(next_idx); + unpack_closure(next_idx, m_funcall_pipeline[real_idx].subrequest); + }); + } else { + // Function pipelining is not used. THIS infer request + // is also the NEXT one. Nothing much to do here + this_subr->infer(); + bind_global_parameters(next_idx); + } + } else { + // The next subgraph is NOT a call to the same function! + // Trigger execution of the current one + // FIXME: pipelining? + if (next_idx == 0) { + // Note: even if m_function_pipelining is ON, + // SWAP won't happen here - see the below check for .next + this_subr->infer(); + } else { + during(this_subr, [&]() { + if (!next_prepared) { + bind_global_parameters(next_idx); + next_prepared = true; + } + if (m_use_function_pipelining && m_funcall_pipeline[idx].next) { + const auto my_next_idx = m_funcall_pipeline[idx].next.value(); + LOG_DEBUG("Unpacking closures for the NEXT subrequest[" << my_next_idx << "]..."); + LOG_BLOCK(); + unpack_closure(my_next_idx, m_funcall_pipeline[real_idx].subrequest); + } + }); + } + } + } else { + // This is a regular subgraph. Start it async to prepare the next + // parameters + if (next_idx == 0) { + this_subr->infer(); + } else { + during(this_subr, [&]() { + if (!next_prepared) { + bind_global_parameters(next_idx); + next_prepared = true; + } + }); + } + } // if (replaced_by) +} + void ov::npuw::JustInferRequest::subscribe_subrequest(std::size_t idx, Completed cb) { - get_real_subrequest(idx)->set_callback(cb); + get_real_subrequest(idx)->set_callback(std::move(cb)); } void ov::npuw::JustInferRequest::complete_subrequest(std::size_t idx) { diff --git a/src/plugins/intel_npu/src/plugin/npuw/just_sync_infer_request.hpp b/src/plugins/intel_npu/src/plugin/npuw/just_sync_infer_request.hpp index dbeb2bee871f91..e63f2f18b85ece 100644 --- a/src/plugins/intel_npu/src/plugin/npuw/just_sync_infer_request.hpp +++ b/src/plugins/intel_npu/src/plugin/npuw/just_sync_infer_request.hpp @@ -26,6 +26,7 @@ class JustInferRequest final : public IBaseInferRequest { std::vector get_profiling_info() const override; private: + //////////////////////////////////// // implement IBaseInferRequest void prepare_for_infer() override; bool valid_subrequest(std::size_t idx) const override; @@ -39,18 +40,23 @@ class JustInferRequest final : public IBaseInferRequest { void update_subrequest_links(std::size_t idx) override; + //////////////////////////////////// + // now own API + // FIXME: probably this one should go to the base class too RqPtr get_real_subrequest(std::size_t idx); - void bind_params_results(); + + void bind_global_parameters(std::size_t idx); + void bind_global_results(std::size_t idx); + void function_prologue(std::size_t idx); void unpack_closure(std::size_t idx, RqPtr request); - void connect_subrequests(); + void unsafe_run_this_prep_next(std::size_t idx, bool& next_prepared_p); + void connect_subrequests(); void recreate_subrequests(std::size_t idx); - static constexpr const std::size_t INVALID_IDX = std::numeric_limits::max(); - using LinkFrom = std::pair; std::map m_funcall_result; - using ToSubmodel = std::pair; // Fixme: fourth installment? - std::map> m_reader_to_orig_port; - - // FIXME: STOP USING ov::Output<> AT ALL! It is a weak feature - // These objects get discarded on occasional model recompilation - std::map, size_t> m_port_to_subrequest_idx; - std::map, - ov::Output> - m_port_orig_to_sub; // FIXME: this one likely replaces `m_port_to_subrequest_idx' - bool m_use_function_pipelining = false; struct FuncallPipeline { // A "brother" subrequest for a "primary" subrequest. Initialized only @@ -87,6 +80,16 @@ class JustInferRequest final : public IBaseInferRequest { // subgraphs, but with only function call-related elements // initialized. std::vector m_funcall_pipeline; + + // This structure tracks how every individual subrequest + // access the model's top-level (global, public, etc) parameters + // and results + struct GlobalIO { + using map_t = std::map; + map_t global_params; // param idx -> input idx + map_t global_results; // result idx -> output idx + }; + std::vector m_subrequests_gio; }; } // namespace npuw diff --git a/src/plugins/intel_npu/src/plugin/npuw/logging.cpp b/src/plugins/intel_npu/src/plugin/npuw/logging.cpp index 080e57fbdca5a4..15f0e8cb504c21 100644 --- a/src/plugins/intel_npu/src/plugin/npuw/logging.cpp +++ b/src/plugins/intel_npu/src/plugin/npuw/logging.cpp @@ -119,7 +119,7 @@ void ov::npuw::dump_failure(const std::shared_ptr& model, const std:: std::ofstream details(extra_path, std::ios_base::app); auto t = std::time(nullptr); - auto tm = *std::localtime(&t); + const auto& tm = *std::localtime(&t); details << std::put_time(&tm, "%d-%m-%Y %H:%M:%S") << ": Failed to compile submodel for " << device << ", error:\n" << extra << "\n" << std::endl; diff --git a/src/plugins/intel_npu/src/plugin/npuw/partitioning/online/compiler.cpp b/src/plugins/intel_npu/src/plugin/npuw/partitioning/online/compiler.cpp index 27c1c38f3ab02a..7233f428904c4c 100644 --- a/src/plugins/intel_npu/src/plugin/npuw/partitioning/online/compiler.cpp +++ b/src/plugins/intel_npu/src/plugin/npuw/partitioning/online/compiler.cpp @@ -49,7 +49,7 @@ std::vector getAvoids(const std::shared_ptr& model, ::intel_np return {}; } - std::string s = avoids_opt; + std::string s = std::move(avoids_opt); size_t pos = 0; size_t start = 0; @@ -247,6 +247,7 @@ class Compiler { Group::GPtr group = graph->meta(nh).get(); LOG_DEBUG("Group " << group->getId() << ", size " << group->size()); } + LOG_INFO("Done"); } @@ -272,7 +273,7 @@ class Compiler { repeated.insert({reptag_and_matches.first, block}); LOG_INFO("Got " << block.matches.at(0).size() << " repeated blocks of size " << block.matches.size()); } - ens.repeated = repeated; + ens.repeated = std::move(repeated); std::string dump_plan_path = m_cfg.get<::intel_npu::NPUW_ONLINE_DUMP_PLAN>(); if (!dump_plan_path.empty()) { diff --git a/src/plugins/intel_npu/src/plugin/npuw/partitioning/online/graph.hpp b/src/plugins/intel_npu/src/plugin/npuw/partitioning/online/graph.hpp index a8992dcc483ae9..58b3e96918de48 100644 --- a/src/plugins/intel_npu/src/plugin/npuw/partitioning/online/graph.hpp +++ b/src/plugins/intel_npu/src/plugin/npuw/partitioning/online/graph.hpp @@ -118,7 +118,7 @@ class Edge { using Ptr = std::shared_ptr; public: - Edge(NodeHandle src, NodeHandle dst) : m_src(src), m_dst(dst) {} + Edge(NodeHandle src, NodeHandle dst) : m_src(std::move(src)), m_dst(std::move(dst)) {} NodeHandle srcNode() const { return m_src; } diff --git a/src/plugins/intel_npu/src/plugin/npuw/partitioning/online/group.cpp b/src/plugins/intel_npu/src/plugin/npuw/partitioning/online/group.cpp index c10c96088a3bc4..08e599e86177ad 100644 --- a/src/plugins/intel_npu/src/plugin/npuw/partitioning/online/group.cpp +++ b/src/plugins/intel_npu/src/plugin/npuw/partitioning/online/group.cpp @@ -25,7 +25,7 @@ Group::Group(const std::shared_ptr& node, ade::NodeHandle nh, const std::shared_ptr& g, const std::weak_ptr& snapshot) - : m_nh(nh), + : m_nh(std::move(nh)), m_id(gid), m_graph(g), m_snapshot(snapshot) { diff --git a/src/plugins/intel_npu/src/plugin/npuw/partitioning/online/snapshot.cpp b/src/plugins/intel_npu/src/plugin/npuw/partitioning/online/snapshot.cpp index 4231b80467dcc6..be118934b59a2a 100644 --- a/src/plugins/intel_npu/src/plugin/npuw/partitioning/online/snapshot.cpp +++ b/src/plugins/intel_npu/src/plugin/npuw/partitioning/online/snapshot.cpp @@ -252,7 +252,7 @@ void Snapshot::fuseInputs() { } if (!group_prod->hasCycle(group_prod_other) && !group_prod_other->hasCycle(group_prod)) { // no cycles -> fusion allowed - inputs_to_fuse.second = group_prod_other; + inputs_to_fuse.second = std::move(group_prod_other); break; } } @@ -335,7 +335,7 @@ void Snapshot::identifyUniques() { // thus check and use only the single initial layer auto ov_node = group->getInitialNode(); auto metadesc = ov::npuw::online::util::getMetaDesc(ov_node); - auto avoids = group->avoidedTargets(); + const auto& avoids = group->avoidedTargets(); uniques[{metadesc, avoids}].insert(group); } @@ -393,7 +393,7 @@ void Snapshot::mergeUniques() { std::shared_ptr Snapshot::tryGrowRepeatingGroups(const detail::GPtrSet& repeating_groups) { auto this_rep_tag = (*(repeating_groups.begin()))->repeated(); // should be the same for each group inside - auto this_avoided = (*(repeating_groups.begin()))->avoidedTargets(); + const auto& this_avoided = (*(repeating_groups.begin()))->avoidedTargets(); std::unordered_map, std::vector>> mics; @@ -401,10 +401,17 @@ std::shared_ptr Snapshot::tryGrowRepeatingGroups(const detail::GPtrSet // FIXME: this was introduced to make the partitioning // the same every run when created the same way. + // This std::sort allows to prioritize the groups from the tail + // of the original model. It's possible due to preservation of + // group IDs in topological order throughout the whole partitioning process. + // In the networks we're looking at, ensuring the merge order from the bottom + // of the network gives a better generalization for the identified repeated blocks, + // e.g. we can guarantee we can find one more, which otherwise would fuse into + // head or tail (depending on the topology). std::sort(repeating_groups_sorted.begin(), repeating_groups_sorted.end(), [&](const Group::GPtr& gptr_a, const Group::GPtr& gptr_b) { - return gptr_a->getId() < gptr_b->getId(); + return gptr_a->getId() > gptr_b->getId(); }); for (const auto& group : repeating_groups_sorted) { @@ -579,9 +586,9 @@ void Snapshot::completeRepeating(const std::shared_ptr& reptag, const for (const auto& gptr : gset) { for (const auto& layer : gptr->getContent()) { // FIXME: should it be a part of group's API instead? - auto metadesc = ov::npuw::online::util::getMetaDesc(layer); - auto archetype = gptr->getReptrack(layer); - matches[{metadesc, archetype}].insert(layer); + const auto& metadesc = ov::npuw::online::util::getMetaDesc(layer); + const auto& archetype = gptr->getReptrack(layer); + matches[{std::move(metadesc), std::move(archetype)}].insert(layer); } } @@ -590,7 +597,7 @@ void Snapshot::completeRepeating(const std::shared_ptr& reptag, const // equal to the number of groups. // 2. Total count of archetypes must be equal to size of every individual group for (const auto& elem : matches) { - auto node_set = elem.second; + const auto& node_set = elem.second; if (node_set.size() != gset.size()) { OPENVINO_THROW("Online partitioning couldn't match properly " "during repeated blocks pass (node archetype). " diff --git a/src/plugins/intel_npu/src/plugin/npuw/partitioning/online/utils/utils.cpp b/src/plugins/intel_npu/src/plugin/npuw/partitioning/online/utils/utils.cpp index 7bfe63f753a4e0..998989883653a2 100644 --- a/src/plugins/intel_npu/src/plugin/npuw/partitioning/online/utils/utils.cpp +++ b/src/plugins/intel_npu/src/plugin/npuw/partitioning/online/utils/utils.cpp @@ -69,8 +69,8 @@ std::optional ov::npuw::online::util::parseAvoid(const Avoid avoid; avoid.type = type == "Op" ? AvoidType::OP : AvoidType::PATTERN; - avoid.pattern = pattern; - avoid.device = device; + avoid.pattern = std::move(pattern); + avoid.device = std::move(device); return std::optional{avoid}; } diff --git a/src/plugins/intel_npu/src/plugin/npuw/partitioning/partitioning.cpp b/src/plugins/intel_npu/src/plugin/npuw/partitioning/partitioning.cpp index 71d4b4ee584c00..0be3ee97f22f86 100644 --- a/src/plugins/intel_npu/src/plugin/npuw/partitioning/partitioning.cpp +++ b/src/plugins/intel_npu/src/plugin/npuw/partitioning/partitioning.cpp @@ -8,6 +8,7 @@ #include "../util.hpp" #include "intel_npu/al/config/npuw.hpp" #include "online/compiler.hpp" +#include "online/utils/utils.hpp" // getMetaDesc #include "openvino/op/convert.hpp" #include "openvino/op/slice.hpp" #include "openvino/op/util/op_types.hpp" @@ -66,6 +67,19 @@ struct BankContains { } }; +struct ProducesResult { + bool operator()(const std::shared_ptr& node) { + std::set> all_readers; + for (auto&& out : node->outputs()) { + const auto& these_readers = out.get_target_inputs(); + all_readers.insert(these_readers.begin(), these_readers.end()); + } + return std::any_of(all_readers.begin(), all_readers.end(), [](const ov::Input& iport) { + return ov::op::util::is_output(iport.get_node()); + }); + } +}; + ov::npuw::Ensemble load_groups(const std::shared_ptr& model, const std::string& path_to_plan) { // Try to load the partitioning plan... NPUW_ASSERT(!path_to_plan.empty()); @@ -132,7 +146,7 @@ ov::npuw::Ensemble load_groups(const std::shared_ptr& model, const st LOG_INFO("Found " << repeated.size() << " different repeated block(s)"); - return ov::npuw::Ensemble{get_float_attr(root, "gflops"), partitions, repeated}; + return ov::npuw::Ensemble{get_float_attr(root, "gflops"), std::move(partitions), std::move(repeated)}; } class Partitioner { @@ -216,6 +230,17 @@ class Partitioner { } } + // FIXME: a fix to overcome the model with duplicate friendly names in constants + std::string get_unique_name(const std::shared_ptr node_ptr) { + if (!node_ptr) { + OPENVINO_THROW("NPUW: Fatal error"); + } + if (!ov::is_type(node_ptr)) { + OPENVINO_THROW("NPUW: trying to get a unique name of a non-Constant node"); + } + return node_ptr->get_friendly_name() + " with meta " + ov::npuw::online::util::getMetaDesc(node_ptr); + } + public: Partitioner(const std::shared_ptr& _model, ov::npuw::Ensemble& _ens, @@ -243,6 +268,7 @@ class Partitioner { void propagateConverts(const std::string& func_name); void propagateWeights(const std::string& func_name); void propagateScalars(const std::string& func_name); + void propagateConvertsOut(const std::string& func_name); void sanityCheck(const std::string& func_name); void saveTinyConstants(const std::string& func_name); void saveScaleFactors(const std::string& func_name); @@ -320,7 +346,11 @@ void Partitioner::identifySubgraphs() { // Input layers may be connected to the same producer nodes, weights, // or parameters. Cache those to avoid duplicating the parameters. std::unordered_map input_mapping; - std::unordered_set slice_params; + + // In several cases a model can be slightly altered after the partitioning + // plan was done. E.g., new slices or converts may be added on inputs/ + // outputs. Add a special handling for this case. + std::unordered_set extra_params; auto parameter_as_is = [&input_mapping](NodeSPtr orig_node) { auto it = input_mapping.find(orig_node); if (it != input_mapping.end()) { @@ -368,6 +398,7 @@ void Partitioner::identifySubgraphs() { for (auto&& input_desc : input_layer_ptr->inputs()) { LOG_BLOCK(); const auto input_node = input_desc.get_source_output().get_node_shared_ptr(); + LOG_DEBUG("Checking " << input_node); if (ov::op::util::is_parameter(input_node)) { // Input to this subgraph layer is already a Parameter (original graph @@ -382,17 +413,18 @@ void Partitioner::identifySubgraphs() { // FIXME: Finally introduce my own test routine for that! // Don't do anything here too. continue; - } else if (ov::is_type(input_node) && + } else if ((ov::is_type(input_node) || + ov::is_type(input_node)) && ov::op::util::is_parameter(input_node->input(0).get_source_output().get_node_shared_ptr())) { // So the situation is: // - a group has an input layer - // - which reads from a Slice + // - which reads from a Slice or Convert // - which reads from a Parameter // This happens when an offline plan is used with a kvcache // model extended with slices to maintain zero-copy (LLM case) - auto slice_param = input_node->input(0).get_source_output().get_node_shared_ptr(); - input_mapping[input_node] = slice_param; - slice_params.insert(slice_param); + auto extra_param = input_node->input(0).get_source_output().get_node_shared_ptr(); + input_mapping[input_node] = extra_param; + extra_params.insert(extra_param); } else { // Ok, this input is connected to some other node's output // Replace this connection with a link to a newly created Parameter @@ -422,20 +454,27 @@ void Partitioner::identifySubgraphs() { // Also track the connectivity LOG_VERB("Populating _parameters..."); group.sg._parameters.clear(); + + // Now (after unknown slices/converts were introduced) params may be referred to + // from multiple places in the model - so may be added multiple times to the + // input mapping. This is a w/a, better they're added only once (TODO). + // This set handles it. + std::set> unique_params; for (auto&& im : input_mapping) { LOG_BLOCK(); auto& src_node = im.first; auto& maybe_param = im.second; - if (ov::op::util::is_parameter(maybe_param)) { + if (ov::op::util::is_parameter(maybe_param) && unique_params.count(maybe_param) == 0) { // some Parameters could fold into Constants, so only add real parameters auto this_param = std::static_pointer_cast(maybe_param); group.sg._parameters.push_back(this_param); - if (src_node != this_param && slice_params.count(this_param) == 0) { + unique_params.insert(maybe_param); + if (src_node != this_param && extra_params.count(this_param) == 0) { // Parameter node and the recorded src node are different // so it is a cut-off point (see above, parameter_from()): // - record connectivity between subgraphs. - // Exception: param is registered via slice - const auto link_from = result_cache.at(src_node); + // Exception: param is registered via slice or convert + const auto& link_from = result_cache.at(src_node); const auto link_to = LinkPtrTo{this_group_idx, this_param}; subgraph_ptr_links[link_to] = link_from; } @@ -448,6 +487,45 @@ void Partitioner::identifySubgraphs() { // connections with Result stubs (but remember where these outputs // were going to). LOG_VERB("Populating _results..."); + { + // Before populating the output layers, do a quick Result->Output Layer + // propagation to extend out output layers with the layers not mentioned + // in the partitioning plan. This may happen if the plan was already exported, + // but some changes were done to the model (like kvcache regrouping) after + // that. + // The idea is simple: walk over the group's all_layers and check if those + // are producing results. If they are and they're not parts of the output_layers, + // add them there. + // Another case which is handled here is an extra Convert which can be + // set as part of kvcache conversion routune. + LOG_BLOCK(); + std::set output_layers_cache(group.output_layers.begin(), group.output_layers.end()); + + // Have to switch clang-format here to make cpplint happy + // clang-format off + + for (auto&& op_name : group.all_layers) { + auto layer_ptr = node_id_cache.at(op_name); + if (ProducesResult {}(layer_ptr) && !output_layers_cache.count(op_name)) { + LOG_VERB("Adding " << op_name << " as an extra output layer since it is produces a Result"); + output_layers_cache.insert(op_name); + group.output_layers.push_back(op_name); + } + for (auto&& oport : layer_ptr->outputs()) { + for (auto&& inport : oport.get_target_inputs()) { + auto reader_ptr = inport.get_node(); + if (ov::is_type(reader_ptr) && + ProducesResult {}(reader_ptr->shared_from_this()) && + !output_layers_cache.count(reader_ptr->get_friendly_name())) { + const auto& cvt_name = reader_ptr->get_friendly_name(); + output_layers_cache.insert(cvt_name); + group.output_layers.push_back(cvt_name); + } + } + } + } // for(all_layers) + // clang-format on + } std::size_t num_optimized_out_layers = 0u; for (auto&& output_layer_name : group.output_layers) { LOG_VERB("Processing group's output layer " << output_layer_name); @@ -486,7 +564,7 @@ void Partitioner::identifySubgraphs() { // at the npuw::CompiledModel level) auto reader_node_ptr = r.get_node()->shared_from_this(); if (ov::op::util::is_output(reader_node_ptr)) { - maybe_result = reader_node_ptr; + maybe_result = std::move(reader_node_ptr); } else if (group_nodes.find(reader_node_ptr) == group_nodes.end()) { has_external_readers = true; } @@ -557,11 +635,11 @@ std::vector Partitioner::initFunctionPipeline(FunctionPipelineType for (auto&& part_sg : P.subgraphs) { if (!part_sg._repeated_id.empty()) { auto pfix = "__" + std::to_string(idx[part_sg._repeated_id]++); - auto fcid = func_pipeline_type == FunctionPipelineType::FOLD - ? part_sg._repeated_id // with folding, functions of the - // same group have the same id - : part_sg._repeated_id + pfix; // with CWAI (which is not checked here) - // every function gets its own id + const auto& fcid = func_pipeline_type == FunctionPipelineType::FOLD + ? part_sg._repeated_id // with folding, functions of the + // same group have the same id + : part_sg._repeated_id + pfix; // with CWAI (which is not checked here) + // every function gets its own id auto& u = all_functions[fcid]; u.refs.push_back(std::ref(part_sg)); u.mdls.push_back( @@ -651,7 +729,9 @@ void Partitioner::propagate(const std::string& func_name, for (auto&& node_ptr : model->get_ordered_ops()) { if (test(node_ptr)) { LOG_DEBUG("Process node " << node_ptr); - const auto& this_layer_name = node_ptr->get_friendly_name(); + const auto& this_layer_name = ov::is_type(node_ptr) + ? get_unique_name(node_ptr) + : node_ptr->get_friendly_name(); ProtoReaders this_node_readers, this_node_proto_readers; for (auto&& this_reader_iport : node_ptr->output(0).get_target_inputs()) { @@ -688,7 +768,7 @@ void Partitioner::propagate(const std::string& func_name, const auto& this_writer_proto = bank_writer_iter->second; auto suitable_bank_iter = std::find_if(bank.begin(), bank.end(), BankContains{this_writer_proto}); if (suitable_bank_iter == bank.end()) { - OPENVINO_THROW("Fatal. Just fatal"); + OPENVINO_THROW("Fatal: no suitable bank found"); } // FIXME: add IF(DEBUG) to put the whole thing under condition LOG_DEBUG("Register that " << this_layer_name << " is in fact " << this_writer_proto); @@ -753,7 +833,7 @@ void Partitioner::propagateConverts(const std::string& func_name) { // template registers its own bank. The eligibility is defined with // the following rules: // 0. The node is missing in the matching bank - // 1. The Convert node reads a Const + // 1. The Convert node reads a Const (or Parameter, since recently) // 2. The Convert node has a sole consumer // 3. This sole consumer is present in the bank.'' // @@ -761,10 +841,15 @@ void Partitioner::propagateConverts(const std::string& func_name) { auto& bank = ens.repeated.at(func_name).matches; auto match_fcn = [&](const std::shared_ptr& node_ptr) -> bool { const auto& this_layer_name = node_ptr->get_friendly_name(); + if (!ov::is_type(node_ptr)) { + return false; + } + const auto& input_node_ptr = node_ptr->input(0).get_source_output().get_node_shared_ptr(); return ov::is_type(node_ptr) && - bank.end() == std::find_if(bank.begin(), bank.end(), BankContains{this_layer_name}) // (0) - && ov::op::util::is_constant(node_ptr->input(0).get_source_output().get_node_shared_ptr()) // (1) - && node_ptr->output(0).get_target_inputs().size() == 1 // (2) + bank.end() == std::find_if(bank.begin(), bank.end(), BankContains{this_layer_name}) // (0) + && (ov::op::util::is_constant(input_node_ptr) || // (1) + ov::op::util::is_parameter(input_node_ptr)) // (1) + && node_ptr->output(0).get_target_inputs().size() == 1 // (2) && bank.end() != std::find_if( @@ -786,7 +871,8 @@ void Partitioner::propagateWeights(const std::string& func_name) { auto& const_bank = ens.repeated.at(func_name).consts; auto& layer_bank = ens.repeated.at(func_name).matches; auto match_fcn = [&](const std::shared_ptr& node_ptr) -> bool { - const auto& this_layer_name = node_ptr->get_friendly_name(); + const auto& this_layer_name = + ov::is_type(node_ptr) ? get_unique_name(node_ptr) : node_ptr->get_friendly_name(); return ov::is_type(node_ptr) && const_bank.end() == std::find_if(const_bank.begin(), const_bank.end(), BankContains{this_layer_name}) // FIXME: workaround for scalars which might pass the weights check @@ -822,14 +908,15 @@ void Partitioner::propagateScalars(const std::string& func_name) { // The propagation procedure is generic, but the matching isn't. auto& scalar_bank = ens.repeated.at(func_name).scalars; auto match_fcn = [&](const std::shared_ptr& node_ptr) -> bool { - const auto& this_layer_name = node_ptr->get_friendly_name(); + const auto& this_layer_name = + ov::is_type(node_ptr) ? get_unique_name(node_ptr) : node_ptr->get_friendly_name(); auto res = ov::is_type(node_ptr) && scalar_bank.end() == std::find_if(scalar_bank.begin(), scalar_bank.end(), BankContains{this_layer_name}); if (ov::is_type(node_ptr) && scalar_bank.end() != std::find_if(scalar_bank.begin(), scalar_bank.end(), BankContains{this_layer_name})) { // FIXME: incorrect logic! This will also increment in case of multiple scalar outputs. - // Instead it should only take shared scalars in to account! + // Instead it should only take shared scalars into account! dup_scalars[{func_name, this_layer_name}]++; } return res; @@ -839,6 +926,93 @@ void Partitioner::propagateScalars(const std::string& func_name) { LOG_VERB("Done"); } +void Partitioner::propagateConvertsOut(const std::string& func_name) { + LOG_VERB("Propagate Converts on output nodes to match banks for model " << model->get_friendly_name() << "..."); + LOG_BLOCK(); + + using ProtoWriter = std::pair; + std::map proto_reader_of; + + auto& model_group = all_functions.at(func_name).mdls; + auto& bank = ens.repeated.at(func_name).matches; + + // Nodes we're looking for: + // 1. Converts + // 2. Missing in our match banks + // 3. Its producer should be present in our match banks + // 4. Standing in front of results + auto test = [&](const std::shared_ptr& node_ptr) { + if (!ov::is_type(node_ptr)) { // 1 + return false; + } + auto this_layer_name = node_ptr->get_friendly_name(); + if (bank.end() != std::find_if(bank.begin(), bank.end(), BankContains{this_layer_name})) { // 2 + return false; + } + auto in_layer_name = node_ptr->input(0).get_source_output().get_node_shared_ptr()->get_friendly_name(); + if (bank.end() == std::find_if(bank.begin(), bank.end(), BankContains{in_layer_name})) { // 3 + return false; + } + const auto& these_readers = node_ptr->output(0).get_target_inputs(); + return these_readers.size() == 1 && + ov::op::util::is_output(these_readers.begin()->get_node()->shared_from_this()); // 4 + }; + + for (auto&& model : model_group) { + LOG_DEBUG("Process function call " << model->get_friendly_name() << "..."); + LOG_BLOCK(); + + for (auto&& node_ptr : model->get_ordered_ops()) { + if (test(node_ptr)) { + LOG_DEBUG("Process node " << node_ptr); + const auto& this_layer_name = node_ptr->get_friendly_name(); + + const auto& writer_out = node_ptr->input(0).get_source_output(); + { + LOG_BLOCK(); + LOG_DEBUG("Written by " << writer_out); + } + ProtoWriter this_writer = {writer_out.get_node_shared_ptr()->get_friendly_name(), + writer_out.get_index()}; + + LOG_DEBUG("Looking for proto accessess..."); + ProtoWriter this_proto_writer = {layer_to_prototype.at(this_writer.first), this_writer.second}; + auto bank_writer_iter = proto_reader_of.find(this_proto_writer); + if (bank_writer_iter == proto_reader_of.end()) { + // Register a new occasion + LOG_DEBUG("Register that " << this_layer_name << " is written by " << this_proto_writer.first + << " : " << this_proto_writer.second); + proto_reader_of[this_proto_writer] = this_layer_name; + layer_to_prototype[this_layer_name] = this_layer_name; + bank.push_back({this_layer_name}); + } else { + // Find a suitable bank and find node there + const auto& this_reader_proto = bank_writer_iter->second; + auto suitable_bank_iter = std::find_if(bank.begin(), bank.end(), BankContains{this_reader_proto}); + if (suitable_bank_iter == bank.end()) { + OPENVINO_THROW("Fatal: No suitable bank found"); + } + LOG_DEBUG("Register that " << this_layer_name << " is in fact " << this_reader_proto); + LOG_DEBUG("- As it is written by:"); + { + LOG_BLOCK(); + LOG_DEBUG(this_writer.first << " : " << this_writer.second); + } + LOG_DEBUG("- Which in turn is:"); + { + LOG_BLOCK(); + LOG_DEBUG(this_proto_writer.first << " : " << this_proto_writer.second); + } + suitable_bank_iter->insert(this_layer_name); + layer_to_prototype[this_layer_name] = this_reader_proto; + } + } + } // for(ordered_ops) + } + + LOG_VERB("Done"); +} + void Partitioner::sanityCheck(const std::string& func_name) { LOG_VERB("Sanity check function " << func_name << " in model " << model->get_friendly_name() << "..."); LOG_BLOCK(); @@ -914,9 +1088,8 @@ void Partitioner::sanityCheck(const std::string& func_name) { for (auto&& node : submodel->get_ordered_ops()) { if (ov::op::util::is_constant(node) && - consts.end() == std::find_if(consts.begin(), consts.end(), BankContains{node->get_friendly_name()}) && - scalars.end() == - std::find_if(scalars.begin(), scalars.end(), BankContains{node->get_friendly_name()})) { + consts.end() == std::find_if(consts.begin(), consts.end(), BankContains{get_unique_name(node)}) && + scalars.end() == std::find_if(scalars.begin(), scalars.end(), BankContains{get_unique_name(node)})) { LOG_ERROR("Fatal: Const " << node->get_friendly_name() << "{ " << node->output(0) << " }" << " wasn't found in any bank"); LOG_BLOCK(); @@ -1036,7 +1209,7 @@ void Partitioner::saveRepeatedConstants(const std::string& func_name) { for (auto&& m : model_group) { for (auto&& n : m->get_ordered_ops()) { if (ov::is_type(n)) { - const_cache[n->get_friendly_name()] = std::static_pointer_cast(n); + const_cache[get_unique_name(n)] = std::static_pointer_cast(n); } } } // for(models) @@ -1267,7 +1440,7 @@ void Partitioner::createFunction(FunctionPipeline& func_ggg) { auto new_param = std::make_shared(prod_output.get_element_type(), prod_output.get_partial_shape()); input_desc.replace_source_output(new_param); // (n)/1/i/a - function._model->add_parameters({new_param}); + function._model->add_parameters({std::move(new_param)}); LOG_DEBUG("Register Parameter[" << new_param_idx << "] as input to " << iport.first << " / " << iport.second); function._param_mapping[iport] = new_param_idx; // (n)/1/i/b @@ -1591,7 +1764,7 @@ ov::npuw::Partitioning ov::npuw::getPartitioning(const std::shared_ptrget_parameters(); subgraph._results = model->get_results(); subgraph._sinks = model->get_sinks(); - return Partitioning{std::vector{subgraph}}; + return Partitioning{std::vector{std::move(subgraph)}}; } // Handle funcall everywhere, if needed @@ -1605,10 +1778,10 @@ ov::npuw::Partitioning ov::npuw::getPartitioning(const std::shared_ptr return false; // root hasn't changed }; - register_matcher(std::make_shared(sqrt, "TagRMSNorm"), callback); + register_matcher(std::make_shared(sqrt, "TagRMSNorm"), std::move(callback)); } } // namespace patterns diff --git a/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/dcoff.cpp b/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/dcoff.cpp index 3184915f58786c..6059206a9edd54 100644 --- a/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/dcoff.cpp +++ b/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/dcoff.cpp @@ -207,7 +207,7 @@ bool DCOFFPassBase::matcher_callback(ov::pass::pattern::Matcher& m) { LOG_DEBUG("Matched: " << matched_paramB << " - parameter to remove..."); // Record mapping from the Scale coeff paramter to the Real weight parameter - m_params_to.get().scales[matched_paramB] = matched_paramA; + m_params_to.get().scales[matched_paramB] = std::move(matched_paramA); // Disconnect Multiply and Convert from their outputs auto matched_mulply = node_to_output.at(mulply).get_node_shared_ptr(); @@ -220,8 +220,8 @@ bool DCOFFPassBase::matcher_callback(ov::pass::pattern::Matcher& m) { } }; LOG_DEBUG("Dropping the connections..."); - drop_outputs(matched_mulply); - drop_outputs(matched_convrt); + drop_outputs(std::move(matched_mulply)); + drop_outputs(std::move(matched_convrt)); LOG_DEBUG("Reconnecting the root..."); reconnect_root_to_convert(m); @@ -352,8 +352,8 @@ bool DCOFFPassBase::matcher_callback(ov::pass::pattern::Matcher& m) { // it can be probably eliminated as well) // Record mapping from the Scale coeff paramter to the Real weight parameter - m_params_to.get().zerops[matched_paramA] = matched_valueB; - m_params_to.get().scales[matched_paramC] = matched_paramA; + m_params_to.get().zerops[matched_paramA] = std::move(matched_valueB); + m_params_to.get().scales[matched_paramC] = std::move(matched_paramA); // Disconnect Multiply and Convert from their outputs auto matched_mulply = node_to_output.at(mulply).get_node_shared_ptr(); @@ -366,8 +366,8 @@ bool DCOFFPassBase::matcher_callback(ov::pass::pattern::Matcher& m) { } }; LOG_DEBUG("Dropping the connections..."); - drop_outputs(matched_mulply); - drop_outputs(matched_convrt); + drop_outputs(std::move(matched_mulply)); + drop_outputs(std::move(matched_convrt)); LOG_DEBUG("Reconnecting the root..."); reconnect_root(m); @@ -451,7 +451,6 @@ DCOFFPassReshape2::DCOFFPassReshape2(DCOffMode dcoff_mode, ov::element::Type dco auto scalar = opp::wrap_type(); auto reshpe = opp::wrap_type({mulply, scalar}); - // Note: Use [=] to make sure the above objects stay alive in the callback auto callback = [=](ov::pass::pattern::Matcher& m) { auto& node_to_output = m.get_pattern_value_map(); auto matched_nodeA = node_to_output.at(paramA).get_node_shared_ptr(); @@ -485,8 +484,8 @@ DCOFFPassReshape2::DCOFFPassReshape2(DCOffMode dcoff_mode, ov::element::Type dco // Reshape will be reconnected to Convert directly // Record mapping from the Scale coeff parameter to the Real weight parameter - pref.get().zerops[matched_paramA] = matched_valueB; - pref.get().scales[matched_paramC] = matched_paramA; + pref.get().zerops[matched_paramA] = std::move(matched_valueB); + pref.get().scales[matched_paramC] = std::move(matched_paramA); // Disconnect Multiply and Convert from their outputs auto matched_mulply = node_to_output.at(mulply).get_node_shared_ptr(); @@ -499,7 +498,7 @@ DCOFFPassReshape2::DCOFFPassReshape2(DCOffMode dcoff_mode, ov::element::Type dco } }; LOG_DEBUG("Dropping the connections..."); - drop_outputs(matched_mulply); + drop_outputs(std::move(matched_mulply)); drop_outputs(matched_convrt); LOG_DEBUG("Reconnecting the Root..."); @@ -510,7 +509,7 @@ DCOFFPassReshape2::DCOFFPassReshape2(DCOffMode dcoff_mode, ov::element::Type dco } return false; // root node hasn't changed }; - register_matcher(std::make_shared(reshpe, "TagDCOFFReshape2"), callback); + register_matcher(std::make_shared(reshpe, "TagDCOFFReshape2"), std::move(callback)); } //------------------------------------------------------------------------------ @@ -573,7 +572,7 @@ CWAI1::CWAI1(CWAI1::Results scales) { return true; }; // matcher_callback - register_matcher(std::make_shared(mulply, "TagCWAI1"), matcher_callback); + register_matcher(std::make_shared(mulply, "TagCWAI1"), std::move(matcher_callback)); } // FIXME: Think how it can be unified with the above. THIS is the GPTQ verision @@ -626,7 +625,7 @@ CWAI2::CWAI2(CWAI2::Results scales) { return true; }; // matcher_callback - register_matcher(std::make_shared(mulply, "TagCWAI2"), matcher_callback); + register_matcher(std::make_shared(mulply, "TagCWAI2"), std::move(matcher_callback)); } // As seen in LLaMa-v2-7b: diff --git a/src/plugins/intel_npu/src/plugin/npuw/util.cpp b/src/plugins/intel_npu/src/plugin/npuw/util.cpp index 0a501b0d834a07..417c259478018c 100644 --- a/src/plugins/intel_npu/src/plugin/npuw/util.cpp +++ b/src/plugins/intel_npu/src/plugin/npuw/util.cpp @@ -82,6 +82,7 @@ inline int8_t upc(int8_t h) { return h | (-((h & (1 << 3)) >> 3) & (-8)); } +// NOTE: This routine implements the OLD ORDER #define avx2_i4toi8(vinput, vout0, vout1) \ { \ __m256i himask = _mm256_broadcastb_epi8(_mm_set_epi32(0, 0, 0, 0xF0)); \ @@ -133,8 +134,82 @@ inline __m128i avx2_u8tof16_lo(__m128i vu8, __m256 z, __m256 s) { return avx2_u8tof16_hi(vu8h, z, s); } -inline __m128i avx2_f32tof16(__m256 f32vec) { - return _mm256_cvtps_ph(f32vec, _MM_FROUND_TO_NEAREST_INT); +// NOTE: This routine implements the NEW ORDER +inline void avx2_u4tof16(__m256i vinput, __m128i vout[8], __m256 zvalVec, __m256 svalVec[8]) { + // vinput - 64 x u4 elements - 256 bits + // vout[] - 64 (8x8) x f16 elements + + // NOTE: This is largely a copy of unpack_u4f16() {{ + __m256i himask = _mm256_set1_epi8(static_cast(0xF0)); + __m256i lomask = _mm256_set1_epi8(static_cast(0x0F)); + + // unpacking with interleaving + __m256i vht = _mm256_and_si256(vinput, himask); + __m256i xmmUnpackedLo = _mm256_srli_epi16(vht, 4); // 32 x i8 - Extracting High Nibbles + __m256i xmmUnpackedHi = _mm256_and_si256(vinput, lomask); // 32 x i8 - Extracting Low Nibbles + + // need 4 portions of 16 x i8 elements + __m128i unpacked32LoHi = _mm256_castsi256_si128(xmmUnpackedLo); // lower 16 x i8 - Lower 16 of High Nibbles + __m128i unpacked32LoLo = _mm256_extractf128_si256(xmmUnpackedLo, 1); // higher 16 x i8 - Higher 16 of High Nibbles + + __m128i unpacked32HiHi = _mm256_castsi256_si128(xmmUnpackedHi); // lower 16 x i8 - Lower 16 of Low Nibbles + __m128i unpacked32HiLo = _mm256_extractf128_si256(xmmUnpackedHi, 1); // higher 16 x i8 - Higher 16 of Low Nibbles + + // Rearranging of scales + __m256i indices = _mm256_setr_epi32(0, 2, 4, 6, 1, 3, 5, 7); + // Extracting all 64 scales as per the indices specified above + __m256 scale_v_rearranged[] = {_mm256_permutevar8x32_ps(svalVec[0], indices), + _mm256_permutevar8x32_ps(svalVec[1], indices), + _mm256_permutevar8x32_ps(svalVec[2], indices), + _mm256_permutevar8x32_ps(svalVec[3], indices), + _mm256_permutevar8x32_ps(svalVec[4], indices), + _mm256_permutevar8x32_ps(svalVec[5], indices), + _mm256_permutevar8x32_ps(svalVec[6], indices), + _mm256_permutevar8x32_ps(svalVec[7], indices)}; + + // Scaling should happen like this: + // low_nibble[0]->scale[0], high_nibble[0]->scale[1]...low_nibble[31]->scale[60],high_nibble[31]->scale[61] + + // Extracting all the even-indexed scales for the low nibbles + __m256 scale_v_even[] = { + _mm256_permute2f128_ps(scale_v_rearranged[0], scale_v_rearranged[1], 0x20), + _mm256_permute2f128_ps(scale_v_rearranged[2], scale_v_rearranged[3], 0x20), + _mm256_permute2f128_ps(scale_v_rearranged[4], scale_v_rearranged[5], 0x20), + _mm256_permute2f128_ps(scale_v_rearranged[6], scale_v_rearranged[7], 0x20), + }; + + // Extracting all the odd-indexed scales for the high nibbles + __m256 scale_v_odd[] = { + _mm256_permute2f128_ps(scale_v_rearranged[0], scale_v_rearranged[1], 0x31), + _mm256_permute2f128_ps(scale_v_rearranged[2], scale_v_rearranged[3], 0x31), + _mm256_permute2f128_ps(scale_v_rearranged[4], scale_v_rearranged[5], 0x31), + _mm256_permute2f128_ps(scale_v_rearranged[6], scale_v_rearranged[7], 0x31), + }; + + // converting to 64 x f16 + // Higher 16 of High Nibbles + __m128i f16LoLo[] = {avx2_u8tof16_hi(unpacked32LoLo, zvalVec, scale_v_odd[2]), + avx2_u8tof16_lo(unpacked32LoLo, zvalVec, scale_v_odd[3])}; + // Lower 16 of High Nibbles + __m128i f16LoHi[] = {avx2_u8tof16_hi(unpacked32LoHi, zvalVec, scale_v_odd[0]), + avx2_u8tof16_lo(unpacked32LoHi, zvalVec, scale_v_odd[1])}; + // Higher 16 of Low Nibbles + __m128i f16HiLo[] = {avx2_u8tof16_hi(unpacked32HiLo, zvalVec, scale_v_even[2]), + avx2_u8tof16_lo(unpacked32HiLo, zvalVec, scale_v_even[3])}; + // Lower 16 of Low Nibbles + __m128i f16HiHi[] = {avx2_u8tof16_hi(unpacked32HiHi, zvalVec, scale_v_even[0]), + avx2_u8tof16_lo(unpacked32HiHi, zvalVec, scale_v_even[1])}; + + // interleaving back: + // Interleaving lower 8 of low nibbles with lower 8 of high nibbles and so on + vout[0] = _mm_unpacklo_epi16(f16HiHi[0], f16LoHi[0]); + vout[1] = _mm_unpackhi_epi16(f16HiHi[0], f16LoHi[0]); + vout[2] = _mm_unpacklo_epi16(f16HiHi[1], f16LoHi[1]); + vout[3] = _mm_unpackhi_epi16(f16HiHi[1], f16LoHi[1]); + vout[4] = _mm_unpacklo_epi16(f16HiLo[0], f16LoLo[0]); + vout[5] = _mm_unpackhi_epi16(f16HiLo[0], f16LoLo[0]); + vout[6] = _mm_unpacklo_epi16(f16HiLo[1], f16LoLo[1]); + vout[7] = _mm_unpackhi_epi16(f16HiLo[1], f16LoLo[1]); } inline __m256 avx2_load_scale(const int8_t* data, ov::element::Type type) { @@ -622,14 +697,14 @@ void unpack_u4f16(const ov::SoPtr& from, // Only single-size ZP is supported NPUW_ASSERT(zerop->get_size() == 1); - const auto from_shape = from->get_shape(); + const auto& from_shape = from->get_shape(); NPUW_ASSERT(from_shape.back() % 64 == 0); // 2-channel (Symmetric) and 3-channel (group-wise) // scale factors are supported. The scale/value loop // iteration is based on stotal, so should work for // both cases. - const auto scale_shape = scale->get_shape(); + const auto& scale_shape = scale->get_shape(); NPUW_ASSERT(scale_shape.size() == 3 || scale_shape.size() == 2); if (scale_shape.size() == 3) { NPUW_ASSERT(scale_shape[0] == from_shape[0]); @@ -799,10 +874,10 @@ void unpack_u4f16_z(const ov::SoPtr& from, // Only single-size ZP is supported NPUW_ASSERT(zerop->get_size() == 1); - const auto from_shape = from->get_shape(); + const auto& from_shape = from->get_shape(); NPUW_ASSERT(from_shape.back() % 64 == 0); - const auto scale_shape = scale->get_shape(); + const auto& scale_shape = scale->get_shape(); NPUW_ASSERT(scale_shape.size() == 3); NPUW_ASSERT(scale_shape[0] == from_shape[0]); NPUW_ASSERT(scale_shape[2] == from_shape[2]); @@ -824,9 +899,10 @@ void unpack_u4f16_z(const ov::SoPtr& from, const uint8_t* const pSrc = static_cast(from->data()); // 2 x u4 elements const float* const pScl = static_cast(scale->data()); // 1 x f32 element - const int16_t* pDst = static_cast(to->data()); // 1 x f16 element + int16_t* pDst = static_cast(to->data()); // 1 x f16 element const float zval = avx2_load_f32(reinterpret_cast(zerop->data()), zerop_elem_type); + __m256 zvalVec = _mm256_set1_ps(zval); auto unpack_body = [&](size_t job_index, size_t stride) { size_t start_c = job_index * stride; @@ -835,23 +911,22 @@ void unpack_u4f16_z(const ov::SoPtr& from, for (size_t c = start_c; c < end_c; ++c) { for (size_t h = 0; h < H; ++h) { for (size_t w = 0; w < W; w += 64) { - float tmp[64]; - for (size_t i = 0; i < 32; ++i) { - size_t input_index = w + i * 2 + W * h + W * H * c; - uint8_t packed_val = pSrc[input_index / 2]; - float f0 = static_cast(lo4(packed_val)); - float f1 = static_cast(hi4(packed_val)); - size_t scale_index = w + i * 2 + W * c; - tmp[i * 2] = (f0 - zval) * pScl[scale_index]; - tmp[i * 2 + 1] = (f1 - zval) * pScl[scale_index + 1]; - } - __m128i vresults[8]; + const uint8_t* pSrc_iter = pSrc + (w + W * h + W * H * c) / 2; + __m256i vinput = _mm256_lddqu_si256(reinterpret_cast(pSrc_iter)); + const float* pScl_iter = pScl + w + W * c; + int16_t* pDst_iter = pDst + w + W * h + W * H * c; + + __m256 svalVec[8]; for (int i = 0; i < 8; ++i) { - vresults[i] = avx2_f32tof16(_mm256_loadu_ps(tmp + i * 8)); + svalVec[i] = _mm256_loadu_ps(pScl_iter + i * 8); } - int16_t* pDstLocal = const_cast(pDst) + w + W * h + W * H * c; + + // vectorized unpack u4 to f16 + __m128i htmp[8]; // 64 x f16 + avx2_u4tof16(vinput, htmp, zvalVec, svalVec); + for (int i = 0; i < 8; ++i) { - _mm_storeu_si128(reinterpret_cast<__m128i*>(pDstLocal + i * 8), vresults[i]); + _mm_storeu_si128(reinterpret_cast<__m128i*>(pDst_iter + i * 8), htmp[i]); } } } @@ -1049,8 +1124,8 @@ void ov::npuw::util::unpack(const ov::SoPtr& from, // Unsupported Case for scale tensor: // - [s1, 1, s2, 1, s3] - const auto from_shape = from->get_shape(); - const auto scale_shape = scale->get_shape(); + const auto& from_shape = from->get_shape(); + const auto& scale_shape = scale->get_shape(); if (scale_shape.size() == 3 && scale_shape[0] == from_shape[0] && scale_shape[1] == 1 && scale_shape[2] == from_shape[2]) { From 89d6389098b74aa85b70341b23d44f37828af431 Mon Sep 17 00:00:00 2001 From: Sebastian Golebiewski Date: Mon, 8 Jul 2024 15:06:24 +0200 Subject: [PATCH 12/19] [DOCS] Update deployment with Conda Forge (#25439) Added information on how to install selected components of OpenVINO from Conda Forge. This PR addresses JIRA ticket: CVS-112511 --- .../install-openvino-conda.rst | 50 ++++++++++++++++++- 1 file changed, 49 insertions(+), 1 deletion(-) diff --git a/docs/articles_en/get-started/install-openvino/install-openvino-conda.rst b/docs/articles_en/get-started/install-openvino/install-openvino-conda.rst index d5461348e35112..28fd0e8dfd1903 100644 --- a/docs/articles_en/get-started/install-openvino/install-openvino-conda.rst +++ b/docs/articles_en/get-started/install-openvino/install-openvino-conda.rst @@ -71,6 +71,48 @@ need to install additional components. Check the description below, as well as t :doc:`list of additional configurations <../configurations>` to see if your case needs any of them. +Installing specific components of OpenVINO from Conda Forge ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + +You do not have to install the entire OpenVINO package. You can install selected +components by using: + +.. code-block:: sh + + conda install conda-forge:: + +```` may be one of the components of OpenVINO listed below: + +- ``libopenvino-auto-batch-plugin`` +- ``libopenvino-auto-plugin`` +- ``libopenvino-hetero-plugin`` +- ``libopenvino-intel-cpu-plugin`` +- ``libopenvino-intel-gpu-plugin`` +- ``libopenvino-intel-npu-plugin`` +- ``libopenvino-ir-frontend`` +- ``libopenvino-onnx-frontend`` +- ``libopenvino-paddle-frontend`` +- ``libopenvino-pytorch-frontend`` +- ``libopenvino-tensorflow-frontend`` +- ``libopenvino-tensorflow-lite-frontend`` +- ``libopenvino-dev`` +- ``libopenvino-python`` +- ``libopenvino-arm-cpu-plugin`` + + +For example, to install a single component, use: + +.. code-block:: sh + + conda install conda-forge::libopenvino-intel-cpu-plugin + +For multiple components, use: + +.. code-block:: sh + + conda install conda-forge::libopenvino-intel-cpu-plugin conda-forge::libopenvino-arm-cpu-plugin conda-forge::libopenvino-intel-npu-plugin conda-forge::libopenvino-intel-gpu-plugin + + Compiling with OpenVINO Runtime from Conda-Forge on Linux +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ @@ -110,13 +152,19 @@ OpenCL™ Driver is included with the Intel® Graphics Driver package. Uninstalling OpenVINO™ Runtime ########################################################### -Once OpenVINO Runtime is installed via Conda, you can remove it using the following command, +Once OpenVINO Runtime is installed via Conda, you can remove it, using the following command, with the proper OpenVINO version number: .. code-block:: sh conda remove openvino=2024.2.0 +If you have installed specific components of OpenVINO, you can remove them, using: + +.. code-block:: sh + + conda remove conda-forge:: + What's Next? ############################################################ From aec10ccb1320d562662c5668950c4d1c3fb04b22 Mon Sep 17 00:00:00 2001 From: Vladimir Paramuzov Date: Mon, 8 Jul 2024 17:06:58 +0400 Subject: [PATCH 13/19] [GPU] Fix for micro SDPA to avoid page fault (#25432) ### Details: - Fix `PREFETCH_K0` path to avoid page fault --- .../intel_gpu/src/kernel_selector/cl_kernels/sdpa_micro.cl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/sdpa_micro.cl b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/sdpa_micro.cl index 575d9fdff5bd32..9824c994790211 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/sdpa_micro.cl +++ b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/sdpa_micro.cl @@ -223,7 +223,7 @@ KERNEL(micro_sdpa)(OPTIONAL_SHAPE_INFO_ARG #ifdef PREFETCH_K0 /* Prefetch first K tile. */ - cooperative_prefetch_2d_k(K, k, d, ugemm_kq_wg_tile_m, PREFETCH_D_MAX, ldk, + cooperative_prefetch_2d_k(K, d, k, ugemm_kq_wg_tile_m, PREFETCH_D_MAX, ldk, sg_ij, sg_per_wg, SUBGROUP_SIZE, LSC_LDCC_L1C_L3C); #endif From dbd7640d59a407cf3b669b7da7bd0dc52c38b045 Mon Sep 17 00:00:00 2001 From: Maciej Smyk Date: Mon, 8 Jul 2024 15:21:23 +0200 Subject: [PATCH 14/19] [DOCS] Adding Ubuntu 24.04 for master (#25437) Jira: 141643 --------- Co-authored-by: Tatiana Savina --- .../release-notes-openvino/system-requirements.rst | 4 ++++ docs/articles_en/get-started/install-openvino.rst | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/docs/articles_en/about-openvino/release-notes-openvino/system-requirements.rst b/docs/articles_en/about-openvino/release-notes-openvino/system-requirements.rst index a02d133453c8b9..4e7a284dcb4bc5 100644 --- a/docs/articles_en/about-openvino/release-notes-openvino/system-requirements.rst +++ b/docs/articles_en/about-openvino/release-notes-openvino/system-requirements.rst @@ -30,6 +30,7 @@ CPU .. tab-item:: Supported Operating Systems + * Ubuntu 24.04 long-term support (LTS), 64-bit (Kernel 6.8+) * Ubuntu 22.04 long-term support (LTS), 64-bit (Kernel 5.15+) * Ubuntu 20.04 long-term support (LTS), 64-bit (Kernel 5.15+) * Ubuntu 18.04 long-term support (LTS) with limitations, 64-bit (Kernel 5.4+) @@ -59,6 +60,7 @@ GPU .. tab-item:: Supported Operating Systems + * Ubuntu 24.04 long-term support (LTS), 64-bit * Ubuntu 22.04 long-term support (LTS), 64-bit * Ubuntu 20.04 long-term support (LTS), 64-bit * Windows 10, 64-bit @@ -88,6 +90,7 @@ Intel® Neural Processing Unit .. tab-item:: Operating Systems for NPU + * Ubuntu 24.04 long-term support (LTS), 64-bit * Ubuntu 22.04 long-term support (LTS), 64-bit * Windows 11, 64-bit (22H2, 23H2) @@ -106,6 +109,7 @@ Operating systems and developer environment .. tab-item:: Linux OS + * Ubuntu 24.04 with Linux kernel 6.8+ * Ubuntu 22.04 with Linux kernel 5.15+ * Ubuntu 20.04 with Linux kernel 5.15+ * Red Hat Enterprise Linux 8 with Linux kernel 5.4 diff --git a/docs/articles_en/get-started/install-openvino.rst b/docs/articles_en/get-started/install-openvino.rst index f8fefc4abacece..6bc7ebeecbe7ad 100644 --- a/docs/articles_en/get-started/install-openvino.rst +++ b/docs/articles_en/get-started/install-openvino.rst @@ -47,7 +47,7 @@ All currently supported versions are: NPU V\* V\* V\ * n/a n/a n/a n/a V\* =============== ========== ====== =============== ======== ============ ========== ========== ========== - | \* **Of the Linux systems, version 22.04 includes drivers for NPU.** + | \* **Of the Linux systems, versions 22.04 and 24.04 include drivers for NPU.** | **For Windows, CPU inference on ARM64 is not supported.** .. dropdown:: Effortless GenAI integration with OpenVINO GenAI Flavor From 68b26d2882634f2d23cabf75f4f4ff5b7e92a966 Mon Sep 17 00:00:00 2001 From: Georgy Krivoruchko Date: Mon, 8 Jul 2024 17:52:38 +0400 Subject: [PATCH 15/19] [IR] Changed RT Info nodes naming (compatible with an old) (#25159) ### Details: - Fixed an xml serialization/deserialization in case name isn't xml-compatible ### Tickets: - 131514 --------- Co-authored-by: Pawel Raasz --- src/core/src/pass/serialize.cpp | 29 +++++++- .../serialization/rt_info_serialization.cpp | 44 +++++++++++++ src/frontends/ir/src/ir_deserializer.cpp | 18 +++-- .../ir/tests/rt_info_deserialization.cpp | 66 +++++++++++++++++++ .../tools/mo/utils/ir_engine/ir_engine.py | 4 +- 5 files changed, 154 insertions(+), 7 deletions(-) diff --git a/src/core/src/pass/serialize.cpp b/src/core/src/pass/serialize.cpp index 61338de1457f16..d3e1d501a7ee00 100644 --- a/src/core/src/pass/serialize.cpp +++ b/src/core/src/pass/serialize.cpp @@ -885,8 +885,35 @@ class PaddingsFixer { } }; +bool is_correct_tag_name(const std::string& name) { + if (name.length() == 0) { + return false; + } + if (!std::all_of(name.begin(), name.end(), [](const int c) { + return std::isalnum(c) || (c == '_') || (c == '-') || (c == '.'); + })) { + return false; + } + if (std::isalpha(name[0]) == false && name[0] != '_') { + return false; + } + if (name.length() >= 3 && (name[0] == 'X' || name[0] == 'x') && (name[1] == 'M' || name[1] == 'm') && + (name[2] == 'l' || name[2] == 'L')) { + return false; + } + return true; +} + void serialize_rt_info(pugi::xml_node& root, const std::string& name, const ov::Any& data) { - auto child = root.append_child(name.c_str()); + pugi::xml_node child; + if (is_correct_tag_name(name)) { + child = root.append_child(name.c_str()); + } else { + // Name may brake XML-naming specification, so better to store it as an attribute of typical + // node + child = root.append_child("info"); + child.append_attribute("name").set_value(name.c_str()); + } if (data.is>()) { std::shared_ptr meta = data.as>(); ov::AnyMap& map = *meta; diff --git a/src/core/tests/pass/serialization/rt_info_serialization.cpp b/src/core/tests/pass/serialization/rt_info_serialization.cpp index a0131853704e3a..2664ab5edca18a 100644 --- a/src/core/tests/pass/serialization/rt_info_serialization.cpp +++ b/src/core/tests/pass/serialization/rt_info_serialization.cpp @@ -177,3 +177,47 @@ TEST_F(RTInfoSerializationTest, all_attributes_v10) { check_info(add->output(0).get_rt_info()); EXPECT_EQ(f->get_parameters()[0]->get_layout(), ""); } + +TEST_F(RTInfoSerializationTest, tag_names_verification) { + std::map test_cases = { + {"0", "bad"}, + {"0a", "bad"}, + {"-a", "bad"}, + {"a 0", "bad"}, + {"a0", "good"}, + {"a.0", "good"}, + {".a0", "bad"}, + {"a_0", "good"}, + {"_0a", "bad"}, + {"aXmL", "good"}, + {"xMLa", "bad"}, + {"XML", "bad"}, + }; + auto init_info = [&test_cases](ov::RTMap& info) { + for (const auto& item : test_cases) { + info[item.first] = item.second; + } + }; + + std::shared_ptr model; + { + auto data = std::make_shared(ov::element::Type_t::f32, ov::Shape{1, 3, 10, 10}); + model = std::make_shared(ov::OutputVector{data}, ov::ParameterVector{data}); + init_info(model->get_rt_info()); + } + + ov::pass::Manager pass_manager; + pass_manager.register_pass(m_out_xml_path, m_out_bin_path); + pass_manager.run_passes(model); + + auto ir_model = getWithIRFrontend(m_out_xml_path, m_out_bin_path); + ASSERT_NE(nullptr, ir_model); + + auto model_rt_info = ir_model->get_rt_info(); + std::for_each(test_cases.begin(), + test_cases.end(), + [&model_rt_info](const std::pair& item) { + ASSERT_TRUE(model_rt_info.count(item.first)); + ASSERT_EQ(model_rt_info[item.first], item.second); + }); +} diff --git a/src/frontends/ir/src/ir_deserializer.cpp b/src/frontends/ir/src/ir_deserializer.cpp index c4b7824388af45..8c8aec333e107e 100644 --- a/src/frontends/ir/src/ir_deserializer.cpp +++ b/src/frontends/ir/src/ir_deserializer.cpp @@ -618,9 +618,14 @@ class MetaDataParser : public ov::Meta { ov::AnyMap parse_node(const pugi::xml_node& node) const { ov::AnyMap result; - const std::string node_name = node.name(); + // Old version may produce nodes like , but it may brake xml-naming convention + // Now it should look like . + // Also we keep an option to read an old XMLs where it doesn't have name attribute + const auto name_attr = node.attribute("name"); + const std::string node_name = name_attr.empty() ? node.name() : name_attr.value(); for (const auto& data : node.children()) { - const std::string data_name = data.name(); + const auto name_attr = data.attribute("name"); + const std::string data_name = name_attr.empty() ? data.name() : name_attr.value(); // WA for legacy POT config if (data_name == "config" && node_name == "quantization_parameters") { // Read legacy pot config @@ -658,12 +663,17 @@ void ov::XmlDeserializer::read_meta_data(const std::shared_ptr& model for (const auto& data : meta_section.children()) { if (data.empty()) continue; + // Old version may produce nodes like , but it may brake xml-naming convention + // Now it should look like . + // Also we keep an option to read an old XMLs where it doesn't have name attribute + const auto name_attr = data.attribute("name"); + const auto node_name = name_attr.empty() ? data.name() : name_attr.value(); if (!data.attribute("value").empty()) { - rt_info[data.name()] = pugixml::get_str_attr(data, "value"); + rt_info[node_name] = pugixml::get_str_attr(data, "value"); } else { // Use meta data for set of parameters std::shared_ptr meta = std::make_shared(data.name(), data); - rt_info[data.name()] = meta; + rt_info[node_name] = meta; } } } diff --git a/src/frontends/ir/tests/rt_info_deserialization.cpp b/src/frontends/ir/tests/rt_info_deserialization.cpp index 92ff767f96145f..4313b4d19be515 100644 --- a/src/frontends/ir/tests/rt_info_deserialization.cpp +++ b/src/frontends/ir/tests/rt_info_deserialization.cpp @@ -819,3 +819,69 @@ TEST_F(RTInfoDeserialization, indexes_input_and_output_v11) { ASSERT_EQ(f->get_results()[0]->get_friendly_name(), "output2"); ASSERT_EQ(f->get_results()[1]->get_friendly_name(), "output1"); } + +TEST_F(RTInfoDeserialization, node_naming_v11) { + std::string model = R"V0G0N( + + + + + + + 1 + 3 + 224 + 224 + + + + + + + 1 + 3 + 224 + 224 + + + + + + + + + + + + + + + + + + +)V0G0N"; + auto f = getWithIRFrontend(model); + ASSERT_NE(nullptr, f); + + auto check_version = [](const std::shared_ptr& f, int ref_version) { + auto& rt_info = f->get_rt_info(); + ASSERT_TRUE(rt_info.count("version")); + ASSERT_TRUE(rt_info.at("version").is()); + ASSERT_EQ(rt_info.at("version").as(), ref_version); + }; + check_version(f, 11); + + auto& rt_info = f->get_rt_info(); + ASSERT_TRUE(rt_info.count("framework")); + ASSERT_TRUE(rt_info.count("conversion_parameters")); + + auto& item0 = f->get_rt_info("framework", "item0"); + ASSERT_EQ(item0, "0"); + + auto& item1 = f->get_rt_info("framework", "item1"); + ASSERT_EQ(item1, "1"); + + auto& is_python_api_used = f->get_rt_info("conversion_parameters", "is_python_api_used"); + ASSERT_EQ(is_python_api_used, "True"); +} diff --git a/tools/mo/openvino/tools/mo/utils/ir_engine/ir_engine.py b/tools/mo/openvino/tools/mo/utils/ir_engine/ir_engine.py index 3b1575a800a45c..aec4049c6b32e8 100644 --- a/tools/mo/openvino/tools/mo/utils/ir_engine/ir_engine.py +++ b/tools/mo/openvino/tools/mo/utils/ir_engine/ir_engine.py @@ -35,7 +35,7 @@ def read_rt_info_attr(elem): val_dict = {} for child in elem: child_val = read_rt_info_attr(child) - val_dict[child.tag] = child_val + val_dict[child.attrib.get('name', child.tag)] = child_val return val_dict @@ -104,7 +104,7 @@ def __load_xml(self): statistics[layer.find('name').text] = {'min': layer.find('min').text, 'max': layer.find('max').text} elif child.tag == 'rt_info': for elem in child: - self.meta_data[elem.tag] = read_rt_info_attr(elem) + self.meta_data[elem.attrib.get('name', elem.tag)] = read_rt_info_attr(elem) # TODO: Remove this part when POT updates to using of rt_info elif child.tag == 'quantization_parameters': From 7ca8cb13f271679018a9a658463e36005d47cad9 Mon Sep 17 00:00:00 2001 From: Maxim Vafin Date: Mon, 8 Jul 2024 16:06:30 +0200 Subject: [PATCH 16/19] [PT FE] Convert all modules with parameters and buffers in patching (#25339) ### Details: - *item1* - *...* ### Tickets: - *ticket-id* --- .../python/src/openvino/frontend/pytorch/patch_model.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/bindings/python/src/openvino/frontend/pytorch/patch_model.py b/src/bindings/python/src/openvino/frontend/pytorch/patch_model.py index 56ee32fa8ca0c0..173ec572cb20ae 100644 --- a/src/bindings/python/src/openvino/frontend/pytorch/patch_model.py +++ b/src/bindings/python/src/openvino/frontend/pytorch/patch_model.py @@ -113,5 +113,6 @@ def __make_16bit_traceable(model: torch.nn.Module): patch_model(model, extensions, "_openvino_module_extension_patch_orig_forward") for _, module in model.named_modules(): - if module.__class__ not in extensions and hasattr(module, "weight") and module.weight.dtype in [torch.float16, torch.bfloat16]: + if module.__class__ not in extensions and (any([p.dtype in [torch.float16, torch.bfloat16] for p in module.parameters(False)]) + or any([b.dtype in [torch.float16, torch.bfloat16] for b in module.buffers(False)])): module.float() From 49d71b1fa24a9942e62bcc458fea98dab0fde48f Mon Sep 17 00:00:00 2001 From: Karol Blaszczak Date: Mon, 8 Jul 2024 16:20:47 +0200 Subject: [PATCH 17/19] [DOCS] tweaks in footer and menu (#25443) --- .../compatibility-and-support/supported-devices.rst | 7 ++----- docs/articles_en/about-openvino/performance-benchmarks.rst | 2 +- docs/sphinx_setup/_static/html/footer.html | 2 +- 3 files changed, 4 insertions(+), 7 deletions(-) diff --git a/docs/articles_en/about-openvino/compatibility-and-support/supported-devices.rst b/docs/articles_en/about-openvino/compatibility-and-support/supported-devices.rst index fbb3b948165dd2..4aa43c0aca58ab 100644 --- a/docs/articles_en/about-openvino/compatibility-and-support/supported-devices.rst +++ b/docs/articles_en/about-openvino/compatibility-and-support/supported-devices.rst @@ -1,8 +1,5 @@ -.. {#openvino_supported_devices} - - -Inference Device Support -======================== +Supported Inference Devices +============================ .. meta:: :description: Check the list of devices used by OpenVINO to run inference diff --git a/docs/articles_en/about-openvino/performance-benchmarks.rst b/docs/articles_en/about-openvino/performance-benchmarks.rst index 763a05cea3ee49..e884dd0b90370b 100644 --- a/docs/articles_en/about-openvino/performance-benchmarks.rst +++ b/docs/articles_en/about-openvino/performance-benchmarks.rst @@ -18,7 +18,7 @@ Performance Benchmarks This page presents benchmark results for `Intel® Distribution of OpenVINO™ toolkit `__ -and :doc:`OpenVINO Model Server <../ovms_what_is_openvino_model_server>`, for a representative "./../" +and :doc:`OpenVINO Model Server <../ovms_what_is_openvino_model_server>`, for a representative selection of public neural networks and Intel® devices. The results may help you decide which hardware to use in your applications or plan AI workload for the hardware you have already implemented in your solutions. Click the buttons below to see the chosen benchmark data. diff --git a/docs/sphinx_setup/_static/html/footer.html b/docs/sphinx_setup/_static/html/footer.html index 311b98e223d52b..2bdc5966d8a8d4 100644 --- a/docs/sphinx_setup/_static/html/footer.html +++ b/docs/sphinx_setup/_static/html/footer.html @@ -112,7 +112,7 @@ ©2024 Intel Corporation
    - OpenVino + OpenVINO
  • OpenVINO™ Telemetry
  • Glossary
  • Case Studies
  • From d63f2cbe979f70b86be10ed6706de77353894f1b Mon Sep 17 00:00:00 2001 From: Vladimir Paramuzov Date: Mon, 8 Jul 2024 18:30:06 +0400 Subject: [PATCH 18/19] [GPU] Fix resample type infer (#25438) ### Details: - Added fused op type handling to resample calc_output_layouts() impl ### Tickets: - *CVS-146270* --- src/plugins/intel_gpu/src/graph/resample.cpp | 14 ++- .../unit/shape_infer/resample_si_test.cpp | 106 ++++++++++++++++++ 2 files changed, 117 insertions(+), 3 deletions(-) create mode 100644 src/plugins/intel_gpu/tests/unit/shape_infer/resample_si_test.cpp diff --git a/src/plugins/intel_gpu/src/graph/resample.cpp b/src/plugins/intel_gpu/src/graph/resample.cpp index b70028bc4ddc4b..7f253265334e98 100644 --- a/src/plugins/intel_gpu/src/graph/resample.cpp +++ b/src/plugins/intel_gpu/src/graph/resample.cpp @@ -72,7 +72,12 @@ static std::vector calc_output_layouts(resample_node const& /*node*/, co auto pads_end = desc->pads_end; const auto output_shapes = ov::op::v4::shape_infer(&op, input_shapes, pads_begin, pads_end, ta); - return { layout{output_shapes[0], input_layout.data_type, format::adjust_to_rank(input_layout.format, output_shapes[0].size())} }; + auto output_type = input_layout.data_type; + if (impl_param.has_fused_primitives()) { + output_type = impl_param.get_output_element_type(); + } + + return { layout{output_shapes[0], output_type, format::adjust_to_rank(input_layout.format, output_shapes[0].size())} }; } } // namespace v4 @@ -119,8 +124,11 @@ static std::vector calc_output_layouts(resample_node const& /*node*/, co auto pads_begin = desc->pads_begin; auto pads_end = desc->pads_end; const auto output_shapes = ov::op::v11::shape_infer(&op, input_shapes, pads_begin, pads_end, ta); - - return { layout{output_shapes[0], input_layout.data_type, format::adjust_to_rank(input_layout.format, output_shapes[0].size())} }; + auto output_type = input_layout.data_type; + if (impl_param.has_fused_primitives()) { + output_type = impl_param.get_output_element_type(); + } + return { layout{output_shapes[0], output_type, format::adjust_to_rank(input_layout.format, output_shapes[0].size())} }; } } // namespace v11 diff --git a/src/plugins/intel_gpu/tests/unit/shape_infer/resample_si_test.cpp b/src/plugins/intel_gpu/tests/unit/shape_infer/resample_si_test.cpp new file mode 100644 index 00000000000000..f39539cee46462 --- /dev/null +++ b/src/plugins/intel_gpu/tests/unit/shape_infer/resample_si_test.cpp @@ -0,0 +1,106 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "test_utils.h" + +#include +#include +#include + +#include "resample_inst.h" + +#include "program_wrapper.h" + +#include + +using namespace cldnn; +using namespace ::tests; + +namespace shape_infer_tests { + +struct resample_test_params { + layout input; + std::vector sizes; + std::vector scales; + std::vector axes; + std::vector pads_begin; + std::vector pads_end; + resample::InterpolateOp::InterpolateMode operation_type; + resample::InterpolateOp::ShapeCalcMode shape_calc_mode; + resample::InterpolateOp::CoordinateTransformMode ctm; + resample::InterpolateOp::NearestMode nm; + layout expected_layout; +}; + +class resample_test : public testing::TestWithParam { }; + +TEST_P(resample_test, shape_infer) { + auto p = GetParam(); + + auto& engine = get_test_engine(); + + auto input_prim = std::make_shared("input", p.input); + auto resample_prim = std::make_shared("output", input_info("input"), p.sizes, p.scales, p.axes, p.pads_begin, p.pads_end, 0, 0, + p.operation_type, p.shape_calc_mode, p.ctm, p.nm); + + cldnn::program prog(engine); + + auto& input_node = prog.get_or_create(input_prim); + auto& resample_node = prog.get_or_create(resample_prim); + program_wrapper::add_connection(prog, input_node, resample_node); + auto res = resample_inst::calc_output_layouts(resample_node, *resample_node.get_kernel_impl_params()); + + ASSERT_EQ(res.size(), 1); + ASSERT_EQ(res[0], p.expected_layout); +} + +TEST_P(resample_test, shape_infer_with_fused_op) { + auto p = GetParam(); + + auto& engine = get_test_engine(); + + auto input_prim = std::make_shared("input", p.input); + auto resample_prim = std::make_shared("output", input_info("input"), p.sizes, p.scales, p.axes, p.pads_begin, p.pads_end, 0, 0, + p.operation_type, p.shape_calc_mode, p.ctm, p.nm); + + cldnn::program prog(engine); + + auto& input_node = prog.get_or_create(input_prim); + auto& resample_node = prog.get_or_create(resample_prim); + program_wrapper::add_connection(prog, input_node, resample_node); + + + auto expected_layout = p.expected_layout; + expected_layout.data_type = data_types::u8; + + auto dummy_prim = std::make_shared("output1", input_info("output"), activation_func::abs); + fused_primitive_desc desc(dummy_prim); + desc.output_layout = expected_layout; + + resample_node.add_fused_primitive(desc); + + auto res = resample_inst::calc_output_layouts(resample_node, *resample_node.get_kernel_impl_params()); + + ASSERT_EQ(res.size(), 1); + ASSERT_EQ(res[0], expected_layout); +} + +INSTANTIATE_TEST_SUITE_P(smoke, resample_test, + testing::ValuesIn(std::vector{ + { + layout{ov::PartialShape{1, 40, 128, 128}, data_types::f32, format::bfyx}, + std::vector{64, 64}, + std::vector{1.0f, 1.0f}, + std::vector{2, 3}, + std::vector{}, + std::vector{}, + resample::InterpolateOp::InterpolateMode::NEAREST, + resample::InterpolateOp::ShapeCalcMode::SIZES, + resample::InterpolateOp::CoordinateTransformMode::ASYMMETRIC, + resample::InterpolateOp::NearestMode::SIMPLE, + layout{ov::PartialShape{1, 40, 64, 64}, data_types::f32, format::bfyx} + }, + })); + +} // shape_infer_tests From a71137d9c6edd0bb1f30047ae3efad16ea1c6536 Mon Sep 17 00:00:00 2001 From: barnasm1 Date: Mon, 8 Jul 2024 16:40:12 +0200 Subject: [PATCH 19/19] [CORE] [GPU] nan to inf bug repro test (#25403) ### Details: - GPU bug test_float_to_f8e8m0_convert repro ### Tickets: - [*CVS-145281*](https://jira.devtools.intel.com/browse/CVS-145281) --- .../python/tests/test_graph/test_constant.py | 61 +++++++++++++++++++ 1 file changed, 61 insertions(+) diff --git a/src/bindings/python/tests/test_graph/test_constant.py b/src/bindings/python/tests/test_graph/test_constant.py index bc5b6f87a09659..a9de17eb19423a 100644 --- a/src/bindings/python/tests/test_graph/test_constant.py +++ b/src/bindings/python/tests/test_graph/test_constant.py @@ -481,6 +481,65 @@ def test_float_to_f8e4m3_constant(ov_type, numpy_dtype, opset): assert np.allclose(result, target, equal_nan=True) +@pytest.mark.parametrize(("opset"), OPSETS) +@pytest.mark.parametrize( + ("ov_type", "numpy_dtype"), + [ + (Type.f32, np.float32), + (Type.f16, np.float16), + ], +) +def test_float_to_f8e8m0_constant_matrix(ov_type, numpy_dtype, opset): + pytest.skip("CVS-145281 BUG: nan to inf repro.") + + shape = (2, 2) + data = np.full(shape, np.nan) + + compressed_const = opset.constant(data, dtype=ov_type, name="fx_constant") + convert_to_fp8 = opset.convert(compressed_const, Type.f8e8m0) + convert_back = opset.convert(convert_to_fp8, ov_type) + parameter = opset.parameter(ov.PartialShape([-1, -1]), ov_type) + add_op = opset.add(parameter, convert_back) + model = ov.Model([add_op], [parameter]) + + compiled = ov.compile_model(model, "GPU") + tensor = np.zeros(data.shape, dtype=numpy_dtype) + result = compiled(tensor)[0] + + target = np.full(shape, np.nan) + + assert np.allclose(result, target, equal_nan=True) + + +@pytest.mark.parametrize(("opset"), OPSETS) +@pytest.mark.parametrize( + ("ov_type", "numpy_dtype"), + [ + (Type.f32, np.float32), + (Type.f16, np.float16), + ], +) +def test_float_to_f8e8m0_constant_single_nan(ov_type, numpy_dtype, opset): + pytest.skip("CVS-145281 BUG: nan to inf repro.") + + data = np.array([np.nan], dtype=numpy_dtype) + + compressed_const = opset.constant(data, dtype=ov.Type.f8e8m0, name="f8e8m0_constant") + convert = opset.convert(compressed_const, data.dtype) + parameter = opset.parameter(ov.PartialShape([-1]), ov_type) + add_op = opset.add(parameter, convert) + model = ov.Model([add_op], [parameter]) + + compiled = ov.compile_model(model) + tensor = np.zeros(data.shape, dtype=numpy_dtype) + result = compiled(tensor)[0] + + target = [np.nan] + target = np.array(target, dtype=numpy_dtype) + + assert np.allclose(result, target, equal_nan=True) + + @pytest.mark.parametrize(("opset"), OPSETS) @pytest.mark.parametrize( ("ov_type", "numpy_dtype"), @@ -490,6 +549,8 @@ def test_float_to_f8e4m3_constant(ov_type, numpy_dtype, opset): ], ) def test_float_to_f8e8m0_constant(ov_type, numpy_dtype, opset): + pytest.skip("CVS-145281 BUG: nan to inf repro. [random - depends on the device]") + data = np.array([4.75, 4.5, 5.25, 0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1, -0.0, 1.1, 1.2, 1.3, 1.4, 1.5, 1.6, 1.7, 1.8, 1.9, 2.0, 448, 512, np.nan], dtype=numpy_dtype)