From 375db365725f1cdef0af96c0d3b1b011ddc05830 Mon Sep 17 00:00:00 2001 From: Georgy Krivoruchko Date: Fri, 20 Dec 2024 10:34:30 +0400 Subject: [PATCH] [ONNX] Removed redundant memory copying for small data types when MMAP is using (#28120) ### Details: - Removed a redundant memory copying for data types less than 4 bytes when MMAP is using - Implementation verified by a previously added tests named onnx_external_data_* ### Tickets: - 159161 --- .../onnx/frontend/src/core/tensor.cpp | 96 +++++++++++++ .../onnx/frontend/src/core/tensor.hpp | 136 ++++-------------- .../src/utils/tensor_external_data.hpp | 8 ++ 3 files changed, 128 insertions(+), 112 deletions(-) diff --git a/src/frontends/onnx/frontend/src/core/tensor.cpp b/src/frontends/onnx/frontend/src/core/tensor.cpp index b23f6c55253ac1..1c3a943e6481d1 100644 --- a/src/frontends/onnx/frontend/src/core/tensor.cpp +++ b/src/frontends/onnx/frontend/src/core/tensor.cpp @@ -266,6 +266,102 @@ std::vector Tensor::get_data() const { ONNX_INVALID_DATA_TYPE(m_tensor_proto->data_type(), "STRING"); } +std::shared_ptr Tensor::get_ov_constant() const { + if (m_tensor_proto->has_segment()) { + FRONT_END_THROW("Loading segments isn't supported"); + } + std::shared_ptr constant{nullptr}; + ov::element::Type ov_type = get_ov_type(); + size_t element_count = get_data_size(); + if (ov::element::is_nibble_type(ov_type)) { + element_count *= 2; // Each byte contains 2 data items + } + if (has_external_data()) { + const auto ext_data = detail::TensorExternalData(*m_tensor_proto); + if (m_mmap_cache) { + constant = + std::make_shared(ov_type, + m_shape, + ext_data.load_external_mmap_data(m_model_dir, m_mmap_cache)); + } else { + constant = + std::make_shared(ov_type, m_shape, ext_data.load_external_data(m_model_dir)); + } + // ext_data.size() might be zero, need to recalc by using info about actually red data (for byte-size) + element_count = constant->get_byte_size() / ov_type.size(); + if (ov::element::is_nibble_type(ov_type)) { + element_count *= 2; // Each byte contains 2 data items, so byte size must be multiplicated + } + if (element_count != ov::shape_size(m_shape) || + (ext_data.size() != 0 && constant->get_byte_size() != ext_data.size())) { + throw error::invalid_external_data( + "The size of the external data file does not match the byte size of an initializer '" + get_name() + + "' in the model"); + } + } else if (element_count == shape_size(m_shape)) { + switch (m_tensor_proto->data_type()) { + case TensorProto_DataType::TensorProto_DataType_FLOAT: + case TensorProto_DataType::TensorProto_DataType_DOUBLE: + case TensorProto_DataType::TensorProto_DataType_INT32: + case TensorProto_DataType::TensorProto_DataType_INT64: + case TensorProto_DataType::TensorProto_DataType_UINT32: + case TensorProto_DataType::TensorProto_DataType_UINT64: + constant = std::make_shared(ov_type, m_shape, get_data_ptr()); + break; + case TensorProto_DataType::TensorProto_DataType_INT4: + constant = std::make_shared(ov_type, m_shape, get_data().data()); + break; + case TensorProto_DataType::TensorProto_DataType_INT8: + constant = std::make_shared(ov_type, m_shape, get_data().data()); + break; + case TensorProto_DataType::TensorProto_DataType_INT16: + constant = std::make_shared(ov_type, m_shape, get_data().data()); + break; + case TensorProto_DataType::TensorProto_DataType_UINT4: + constant = std::make_shared(ov_type, m_shape, get_data().data()); + break; + case TensorProto_DataType::TensorProto_DataType_UINT8: + constant = std::make_shared(ov_type, m_shape, get_data().data()); + break; + case TensorProto_DataType::TensorProto_DataType_UINT16: + constant = std::make_shared(ov_type, m_shape, get_data().data()); + break; + case TensorProto_DataType::TensorProto_DataType_BOOL: + constant = std::make_shared(ov_type, m_shape, get_data().data()); + break; + case TensorProto_DataType::TensorProto_DataType_BFLOAT16: + constant = std::make_shared(ov_type, m_shape, get_data().data()); + break; + case TensorProto_DataType::TensorProto_DataType_FLOAT16: + constant = std::make_shared(ov_type, m_shape, get_data().data()); + break; + case TensorProto_DataType::TensorProto_DataType_FLOAT8E4M3FN: + constant = std::make_shared(ov_type, m_shape, get_data().data()); + break; + case TensorProto_DataType::TensorProto_DataType_FLOAT8E5M2: + constant = std::make_shared(ov_type, m_shape, get_data().data()); + break; + case TensorProto_DataType::TensorProto_DataType_STRING: + constant = std::make_shared(ov_type, m_shape, get_data().data()); + break; + default: + ONNX_UNSUPPORTED_DATA_TYPE( + m_tensor_proto->data_type(), + "BOOL, BFLOAT16, FLOAT8E4M3FN, FLOAT8E5M2, FLOAT, FLOAT16, DOUBLE, INT4, INT8, INT16, INT32, INT64, " + "UINT4, UINT8, UINT16, UINT32, UINT64, STRING"); + } + } else if (element_count == 0 && m_shape.size() == 0) { + constant = common::make_failsafe_constant(ov_type); + } else { + FRONT_END_THROW("Tensor shape doesn't match data size"); + } + + if (m_tensor_proto->has_name()) { + constant->set_friendly_name(get_name()); + } + return constant; +} + } // namespace onnx } // namespace frontend } // namespace ov diff --git a/src/frontends/onnx/frontend/src/core/tensor.hpp b/src/frontends/onnx/frontend/src/core/tensor.hpp index a63cdfd1906bb0..7321311e4b4775 100644 --- a/src/frontends/onnx/frontend/src/core/tensor.hpp +++ b/src/frontends/onnx/frontend/src/core/tensor.hpp @@ -186,119 +186,9 @@ class Tensor { return static_cast(m_tensor_proto->data_type()); } - std::shared_ptr get_ov_constant() const { - if (m_tensor_proto->has_segment()) { - FRONT_END_THROW("Loading segments isn't supported"); - } - switch (m_tensor_proto->data_type()) { - case TensorProto_DataType::TensorProto_DataType_BOOL: - return make_ov_constant(ov::element::boolean); - case TensorProto_DataType::TensorProto_DataType_FLOAT: - return make_ov_constant(ov::element::f32); - case TensorProto_DataType::TensorProto_DataType_FLOAT16: - return make_ov_constant(ov::element::f16); - case TensorProto_DataType::TensorProto_DataType_DOUBLE: - return make_ov_constant(ov::element::f64); - case TensorProto_DataType::TensorProto_DataType_INT4: - return make_ov_constant(ov::element::i4); - case TensorProto_DataType::TensorProto_DataType_INT8: - return make_ov_constant(ov::element::i8); - case TensorProto_DataType::TensorProto_DataType_INT16: - return make_ov_constant(ov::element::i16); - case TensorProto_DataType::TensorProto_DataType_INT32: - return make_ov_constant(ov::element::i32); - case TensorProto_DataType::TensorProto_DataType_INT64: - return make_ov_constant(ov::element::i64); - case TensorProto_DataType::TensorProto_DataType_UINT4: - return make_ov_constant(ov::element::u4); - case TensorProto_DataType::TensorProto_DataType_UINT8: - return make_ov_constant(ov::element::u8); - case TensorProto_DataType::TensorProto_DataType_UINT16: - return make_ov_constant(ov::element::u16); - case TensorProto_DataType::TensorProto_DataType_UINT32: - return make_ov_constant(ov::element::u32); - case TensorProto_DataType::TensorProto_DataType_UINT64: - return make_ov_constant(ov::element::u64); - case TensorProto_DataType::TensorProto_DataType_BFLOAT16: - return make_ov_constant(ov::element::bf16); - case TensorProto_DataType::TensorProto_DataType_FLOAT8E4M3FN: - return make_ov_constant(ov::element::f8e4m3); - case TensorProto_DataType::TensorProto_DataType_FLOAT8E5M2: - return make_ov_constant(ov::element::f8e5m2); - case TensorProto_DataType::TensorProto_DataType_STRING: - return make_ov_constant(ov::element::string); - default: - ONNX_UNSUPPORTED_DATA_TYPE( - m_tensor_proto->data_type(), - "BOOL, BFLOAT16, FLOAT8E4M3FN, FLOAT8E5M2, FLOAT, FLOAT16, DOUBLE, INT4, INT8, INT16, INT32, INT64, " - "UINT4, UINT8, UINT16, UINT32, UINT64, STRING"); - } - } + std::shared_ptr get_ov_constant() const; private: - template ::value || std::is_same::value || - std::is_same::value || std::is_same::value || - std::is_same::value, - bool>::type = true> - std::shared_ptr make_ov_constant(const ov::element::Type& type) const { - std::shared_ptr constant{nullptr}; - size_t data_size = get_data_size(); - if (has_external_data()) { - const auto ext_data = detail::TensorExternalData(*m_tensor_proto); - if (m_mmap_cache) { - constant = - std::make_shared(type, - m_shape, - ext_data.load_external_mmap_data(m_model_dir, m_mmap_cache)); - } else { - constant = - std::make_shared(type, m_shape, ext_data.load_external_data(m_model_dir)); - } - if (constant->get_byte_size() != ov::shape_size(m_shape) * type.size()) { - throw error::invalid_external_data( - "The size of the external data file does not match the byte size of an initializer '" + get_name() + - "' in the model"); - } - } else if (data_size == shape_size(m_shape)) { - constant = std::make_shared(type, m_shape, get_data_ptr()); - } else if (data_size == 0 && m_shape.size() == 0) { - constant = common::make_failsafe_constant(type); - } else { - FRONT_END_THROW("Tensor shape doesn't match data size"); - } - - if (m_tensor_proto->has_name()) { - constant->set_friendly_name(get_name()); - } - return constant; - } - - template ::value && !std::is_same::value && - !std::is_same::value && !std::is_same::value && - !std::is_same::value, - bool>::type = true> - std::shared_ptr make_ov_constant(const ov::element::Type& type) const { - std::shared_ptr constant{nullptr}; - auto data = get_data(); - auto element_count = data.size(); - if (ov::element::is_nibble_type(type)) { - element_count *= 2; // Each byte contains 2 data items - } - if (element_count == shape_size(m_shape)) { - constant = std::make_shared(type, m_shape, data.data()); - } else if (element_count == 0 && m_shape.size() == 0) { - constant = common::make_failsafe_constant(type); - } else { - FRONT_END_THROW("Tensor shape doesn't match data size"); - } - if (m_tensor_proto->has_name()) { - constant->set_friendly_name(get_name()); - } - return constant; - } - bool has_external_data() const { return m_tensor_proto->has_data_location() && m_tensor_proto->data_location() == TensorProto_DataLocation::TensorProto_DataLocation_EXTERNAL; @@ -317,6 +207,9 @@ class Tensor { } const void* get_data_ptr() const { + if (has_external_data()) { + FRONT_END_THROW("Unexpected usage of method for externally stored data"); + } if (m_tensor_proto->has_raw_data()) { return m_tensor_proto->raw_data().data(); } @@ -336,6 +229,10 @@ class Tensor { } size_t get_data_size() const { + if (has_external_data()) { + const auto ext_data = detail::TensorExternalData(*m_tensor_proto); + return ext_data.size() / get_onnx_data_size(m_tensor_proto->data_type()); + } if (m_tensor_proto->has_raw_data()) { return m_tensor_proto->raw_data().size() / get_onnx_data_size(m_tensor_proto->data_type()); } @@ -352,8 +249,23 @@ class Tensor { return m_tensor_proto->double_data_size(); case TensorProto_DataType::TensorProto_DataType_STRING: return m_tensor_proto->string_data_size(); + case TensorProto_DataType::TensorProto_DataType_INT4: + case TensorProto_DataType::TensorProto_DataType_INT8: + case TensorProto_DataType::TensorProto_DataType_INT16: + case TensorProto_DataType::TensorProto_DataType_UINT4: + case TensorProto_DataType::TensorProto_DataType_UINT8: + case TensorProto_DataType::TensorProto_DataType_UINT16: + case TensorProto_DataType::TensorProto_DataType_BOOL: + case TensorProto_DataType::TensorProto_DataType_BFLOAT16: + case TensorProto_DataType::TensorProto_DataType_FLOAT16: + case TensorProto_DataType::TensorProto_DataType_FLOAT8E4M3FN: + case TensorProto_DataType::TensorProto_DataType_FLOAT8E5M2: + return m_tensor_proto->int32_data_size(); } - ONNX_INVALID_DATA_TYPE(m_tensor_proto->data_type(), "FLOAT, INT32, INT64, UINT64, DOUBLE, STRING"); + ONNX_INVALID_DATA_TYPE( + m_tensor_proto->data_type(), + "BOOL, BFLOAT16, FLOAT8E4M3FN, FLOAT8E5M2, FLOAT, FLOAT16, DOUBLE, INT4, INT8, INT16, INT32, INT64, " + "UINT4, UINT8, UINT16, UINT32, UINT64, STRING"); } const TensorProto* m_tensor_proto; diff --git a/src/frontends/onnx/frontend/src/utils/tensor_external_data.hpp b/src/frontends/onnx/frontend/src/utils/tensor_external_data.hpp index 983e53895c1148..e715a8e7e61cdc 100644 --- a/src/frontends/onnx/frontend/src/utils/tensor_external_data.hpp +++ b/src/frontends/onnx/frontend/src/utils/tensor_external_data.hpp @@ -46,6 +46,14 @@ class TensorExternalData { /// \return State of TensorExternalData as string representation std::string to_string() const; + /// \brief Object contains a data length after construction. Method allows read-only access to this + /// information. + /// + /// \return Returns a stored data size in bytes + uint64_t size() const { + return m_data_length; + } + private: std::string m_data_location{}; uint64_t m_offset = 0;