openvinotoolkit · praasz · Mar 31, 2024 · Mar 12, 2024 · Mar 12, 2024 · Mar 13, 2024
@@ -5,6 +5,7 @@
 #pragma once
 
 #include "openvino/core/type/element_type_traits.hpp"
+#include "openvino/core/type/nf4.hpp"
 
 namespace ov {
 namespace util {
@@ -96,6 +97,11 @@ constexpr size_t bit_width<Type_t::u4>() {
     return 4;
 }
 
+template <>
+constexpr size_t bit_width<Type_t::nf4>() {
+    return 4;
+}
+
 template <>
 constexpr size_t bit_width<Type_t::i4>() {
     return 4;
@@ -147,6 +153,12 @@ class BitProxy<T, ET, typename std::enable_if<is_bit_type(ET) || is_nibble_type(
         return (*m_ptr >> m_bit_shift) & value_mask;
     }
 
+    void set_bit_value(uint8_t value) {
+        constexpr auto value_mask = util::make_n_bit_mask(m_bits);
+        *m_ptr &= ~(value_mask << m_bit_shift);
+        *m_ptr |= value << m_bit_shift;
+    }
+
 public:
     using value_type = typename std::decay<T>::type;  //!< Fundamental type of bound to BitProxy.
 
@@ -182,6 +194,19 @@ class BitProxy<T, ET, typename std::enable_if<is_bit_type(ET) || is_nibble_type(
         return static_cast<value_type>(get_bit_value());
     }
 
+    /**
+     * @brief Converts to float for NF4.
+     *
+     * @note Implementation aligned to ConvertNF4::unpack, de-quantization applied only when converting to
+     * floating point.  For integral types get bit value.
+     *
+     * @return Converted NF4 value to float.
+     */
+    template <Type_t ETT = ET, typename std::enable_if<ETT == nf4>::type* = nullptr>
+    operator float() const {
+        return ConvertNF4::dequantize(get_bit_value());
+    }
+
     /**
      * @brief Converts to fundamental type.
      *
@@ -207,8 +232,17 @@ class BitProxy<T, ET, typename std::enable_if<is_bit_type(ET) || is_nibble_type(
      */
     BitProxy<T, ET>& operator=(const value_type v) {
         constexpr auto value_mask = util::make_n_bit_mask(m_bits);
-        *m_ptr &= ~(value_mask << m_bit_shift);
-        *m_ptr |= (static_cast<uint8_t>(v) & value_mask) << m_bit_shift;
+        set_bit_value(static_cast<uint8_t>(v) & value_mask);
+        return *this;
+    }
+
+    /**
+     * @brief Sets current NF4 value from float using qunatization.
+     * @param v  Value to be set.
+     */
+    template <Type_t ETT = ET, typename std::enable_if<ETT == nf4>::type* = nullptr>
+    BitProxy<T, ET>& operator=(const float v) {
+        set_bit_value(ConvertNF4::quantize(v));
         return *this;
     }
 };
@@ -490,13 +524,57 @@ class Iterator {
  *
  * @tparam ET  Type of ov::element::Type_t.
  * @tparam T   Type of pointer data. Must be fundamental type of ET.
-
+ *
  * @param ptr  Pointer to data.
  * @return Element iterator for type ET.
  */
 template <Type_t ET, class T, typename std::enable_if<!is_byte_type(ET) && ET != string>::type* = nullptr>
 constexpr Iterator<ET, T> iterator(T* ptr) {
     return {ptr};
 }
+
+/**
+ * @brief Make iterator from pointer for standard types.
+ *
+ * To have common interface for all ov::element::Type. Just return input pointer.
+ *
+ * @tparam ET  Type of ov::element::Type_t.
+ * @tparam T   Type of pointer data. Must be fundamental type of ET.
+ *
+ * @param ptr  Pointer to data.
+ * @return Element iterator same as input pointer.
+ */
+template <Type_t ET, class T, typename std::enable_if<is_byte_type(ET) || ET == string>::type* = nullptr>
+constexpr T* iterator(T* ptr) {
+    return ptr;
+}
+
+/**
+ * @brief Make iterator from void pointer.
+ *
+ * Data will be reinterpreted using fundamental type for ov::element::Type.
+ *
+ * @tparam ET  OpenVINO element type.
+ * @param ptr  Pointer to data.
+ * @return Iterator for given ET.
+ */
+template <Type_t ET, class T = ov::fundamental_type_for<ET>>
+constexpr auto iterator(void* ptr) -> decltype(iterator<ET, T>(reinterpret_cast<T*>(ptr))) {
+    return iterator<ET, T>(reinterpret_cast<T*>(ptr));
+}
+
+/**
+ * @brief Make iterator from constant void pointer.
+ *
+ * Data will be reinterpreted using fundamental type for ov::element::Type.
+ *
+ * @tparam ET  OpenVINO element type.
+ * @param ptr  Pointer to data.
+ * @return Iterator for given ET.
+ */
+template <Type_t ET, class T = typename std::add_const<ov::fundamental_type_for<ET>>::type>
+constexpr auto iterator(const void* ptr) -> decltype(iterator<ET, T>(reinterpret_cast<T*>(ptr))) {
+    return iterator<ET, T>(reinterpret_cast<T*>(ptr));
+}
 }  // namespace element
 }  // namespace ov
@@ -7,94 +7,23 @@
 #include <algorithm>
 #include <cstddef>
 
+#include "openvino/core/type/element_iterator.hpp"
 #include "openvino/core/type/element_type.hpp"
 #include "openvino/core/type/float16.hpp"
 #include "openvino/core/type/nf4.hpp"
 
 namespace ov {
-namespace reference {
-namespace detail {
-inline void set_u1(uint8_t* buf, size_t idx, uint8_t val) {
-    const size_t byte_idx = idx / 8;
-    const uint8_t bit_idx = 7 - (idx % 8);  // Reversed order of bits
-    if (val) {
-        buf[byte_idx] |= (1 << bit_idx);
-    } else {
-        buf[byte_idx] &= ~(1 << bit_idx);
-    }
-}
-
-inline uint8_t get_u1(const uint8_t* buf, size_t idx) {
-    const size_t byte_idx = idx / 8;
-    const uint8_t bit_idx = 7 - (idx % 8);  // Reversed order of bits
-    return (buf[byte_idx] & (1 << bit_idx)) ? 1 : 0;
-}
-
-inline void set_u4(uint8_t* buf, size_t idx, uint8_t val) {
-    const size_t byte_idx = idx / 2;
-    const uint8_t bit_shift = 4 * (idx % 2);
-    buf[byte_idx] &= ~(0xF << bit_shift);         // half byte zeroed
-    buf[byte_idx] |= ((val & 0xF) << bit_shift);  // set 1's
-}
-
-inline uint8_t get_u4(const uint8_t* buf, size_t idx) {
-    const size_t byte_idx = idx / 2;
-    const uint8_t bit_shift = 4 * (idx % 2);
-    return (buf[byte_idx] >> bit_shift) & 0xF;
-}
-
-inline void set_i4(uint8_t* buf, size_t idx, int8_t val) {
-    const size_t byte_idx = idx / 2;
-    const uint8_t bit_shift = 4 * (idx % 2);
-    buf[byte_idx] &= ~(0xF << bit_shift);         // half byte zeroed
-    buf[byte_idx] |= ((val & 0xF) << bit_shift);  // set 1's
-}
 
-inline int8_t get_i4(const uint8_t* buf, size_t idx) {
-    const size_t byte_idx = idx / 2;
-    const uint8_t bit_shift = 4 * (idx % 2);
-    uint8_t val = (buf[byte_idx] >> bit_shift) & 0xF;
-    if (val & 0x08) {  // negative number
-        val |= 0xF0;
-    }
-    return val;
-}
-template <typename TO, typename TI>
-TO get_value(const uint8_t* buf, size_t idx, element::Type from_type) {
-    if (from_type == element::u1) {
-        return detail::get_u1(buf, idx);
-    }
-
-    if (from_type == element::u4) {
-        return detail::get_u4(buf, idx);
-    }
-
-    if (from_type == element::i4) {
-        return detail::get_i4(buf, idx);
-    }
-
-    auto v = reinterpret_cast<const TI*>(buf);
-    return static_cast<TO>(v[idx]);
+template <class ElementIter>
+constexpr bool is_nf4_iterator() {
+    using it = typename std::decay<ElementIter>::type;
+    using T = fundamental_type_for<element::nf4>;
+    return std::is_same<it, element::Iterator<element::nf4, const T>>::value ||
+           std::is_same<it, element::Iterator<element::nf4, T>>::value;
 }
 
-template <typename TI, typename TO>
-void lp_convert(const TI* arg, TO* out, size_t count, element::Type_t src_type, element::Type_t dst_type) {
-    const uint8_t* input = reinterpret_cast<const uint8_t*>(arg);
-    uint8_t* output = reinterpret_cast<uint8_t*>(out);
-    for (size_t i = 0; i < count; ++i) {
-        if (dst_type == element::u1) {
-            detail::set_u1(output, i, detail::get_value<uint8_t, TI>(input, i, src_type));
-        } else if (dst_type == element::u4) {
-            detail::set_u4(output, i, detail::get_value<uint8_t, TI>(input, i, src_type));
-        } else if (dst_type == element::i4) {
-            detail::set_i4(output, i, detail::get_value<int8_t, TI>(input, i, src_type));
-        } else if (src_type == element::nf4) {
-            ov::ConvertNF4::unpack(out, input, i);
-        } else {
-            out[i] = detail::get_value<TO, TI>(input, i, src_type);
-        }
-    }
-}
+namespace reference {
+namespace detail {
 
 template <typename TI, typename TO>
 typename std::enable_if<!std::is_same<TO, char>::value, TO>::type convert(const TI v) {
@@ -107,6 +36,20 @@ typename std::enable_if<std::is_same<TO, char>::value, TO>::type convert(const T
 }
 }  // namespace detail
 
+template <typename InputIt, typename OutputIt>
+void convert(InputIt arg, OutputIt out, const size_t count) {
+    using IN_T = typename std::iterator_traits<InputIt>::value_type;
+    using OUT_T = typename std::iterator_traits<OutputIt>::value_type;
+
+    // Deduce types for NF4 <-> floating point conversion to use quantization.
+    using From = typename std::
+        conditional<is_nf4_iterator<InputIt>() && !std::is_integral<OUT_T>::value, const float, IN_T>::type;
+    using To =
+        typename std::conditional<is_nf4_iterator<OutputIt>() && !std::is_integral<IN_T>::value, float, OUT_T>::type;
+
+    std::transform(arg, arg + count, out, detail::convert<From, To>);
+}
+
 template <typename TI, typename TO>
 void convert(const TI* arg, TO* out, const size_t count) {
     std::transform(arg, arg + count, out, detail::convert<TI, TO>);
@@ -130,7 +73,7 @@ void convert<float16, int8_t>(const float16* arg, int8_t* out, size_t count);
 // Count how many f32 values is out of normal finite numbers range when converted to f16
 size_t count_out_of_f16_range(const float* arg, size_t count);
 
-// Convert values from f32 to f16 with claming to f16 min/max when value is out of normal finite numbers range
+// Convert values from f32 to f16 with clamping to f16 min/max when value is out of normal finite numbers range
 void convert_from_f32_to_f16_with_clamp(const float* arg, float16* out, size_t count);
 }  // namespace reference
 }  // namespace ov
@@ -15,50 +15,31 @@ namespace ov {
 namespace op {
 namespace convert {
 
-constexpr bool is_lp_type(const element::Type_t et) {
-    return (et == element::i4) || (et == element::u1) || (et == element::u4) || (et == element::nf4);
-}
-
 #define CONVERT_ET_LIST \
-    boolean, bf16, f16, f32, f64, i4, i8, i16, i32, i64, u1, u4, u8, u16, u32, u64, nf4, f8e4m3, f8e5m2
+    boolean, bf16, f16, f32, f64, i4, i8, i16, i32, i64, u1, u2, u3, u4, u6, u8, u16, u32, u64, nf4, f8e4m3, f8e5m2
 
 struct Evaluate : public element::NoAction<bool> {
     using element::NoAction<bool>::visit;
-    template <element::Type_t ET, class TI = fundamental_type_for<ET>>
+
+    template <element::Type_t ET_IN, class TI = fundamental_type_for<ET_IN>>
     static result_type visit(const Tensor& arg, Tensor& out, const size_t count) {
         using namespace ov::element;
         return IF_TYPE_OF(Convert_out,
                           CONVERT_ET_LIST,
-                          EvalByOutputType<is_lp_type(ET)>,
+                          EvalByOutputType,
                           out.get_element_type(),
-                          reinterpret_cast<const TI*>(arg.data()),
+                          iterator<ET_IN>(reinterpret_cast<const TI*>(arg.data())),
                           out,
-                          count,
-                          ET);
+                          count);
     }
 
 private:
-    template <bool IS_ARG_ET_LP>
     struct EvalByOutputType : public element::NoAction<bool> {
         using element::NoAction<bool>::visit;
 
-        template <element::Type_t ET,
-                  class T,
-                  class T_ET,
-                  class U = ov::fundamental_type_for<ET>,
-                  typename std::enable_if<is_lp_type(ET) || IS_ARG_ET_LP>::type* = nullptr>
-        static result_type visit(const T* arg, Tensor& out, const size_t count, T_ET&& arg_et) {
-            reference::detail::lp_convert(arg, reinterpret_cast<U*>(out.data()), count, arg_et, ET);
-            return true;
-        }
-
-        template <element::Type_t ET,
-                  class T,
-                  class T_ET,
-                  class U = ov::fundamental_type_for<ET>,
-                  typename std::enable_if<!is_lp_type(ET) && !IS_ARG_ET_LP>::type* = nullptr>
-        static result_type visit(const T* arg, Tensor& out, const size_t count, T_ET&&) {
-            reference::convert(arg, out.data<U>(), count);
+        template <element::Type_t ET_OUT, class InputIter, class TO = ov::fundamental_type_for<ET_OUT>>
+        static result_type visit(InputIter arg, Tensor& out, const size_t count) {
+            reference::convert(arg, element::iterator<ET_OUT>(out.data()), count);
             return true;
         }
     };

@@ -8,8 +8,10 @@
 
 #include "openvino/core/except.hpp"
 #include "openvino/core/shape_util.hpp"
+#include "openvino/core/type/element_iterator.hpp"
 #include "openvino/runtime/allocator.hpp"
 #include "openvino/runtime/iremote_tensor.hpp"
+#include "openvino/runtime/make_tensor.hpp"
 #include "openvino/runtime/properties.hpp"
 
 namespace ov {
@@ -21,7 +23,19 @@ size_t ITensor::get_size() const {
 }
 
 size_t ITensor::get_byte_size() const {
-    return (get_size() * get_element_type().bitwidth() + 8 - 1) / 8;
+    const auto& et = get_element_type();
+    auto byte_size = get_size() * et.bitwidth();
+    if (element::is_split_bit_type(et)) {
+        constexpr size_t storage_unit_size = 24;
+        byte_size += storage_unit_size - 1;
+        byte_size /= storage_unit_size;
+        byte_size *= 3;
+    } else {
+        constexpr size_t storage_unit_size = 8;
+        byte_size += storage_unit_size - 1;
+        byte_size /= storage_unit_size;
+    }
+    return byte_size;
 }
 
 bool ITensor::is_continuous() const {