Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[core] Convert operator supports u2, u3, u6 types #23490

Merged
merged 24 commits into from
Mar 31, 2024
Merged
Show file tree
Hide file tree
Changes from 11 commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
77cd9ef
Add fp32 -> nf4 convert test
praasz Mar 12, 2024
11c3e35
Use element iterator in Convert
praasz Mar 12, 2024
55372c2
Update tensor byte size calculation
praasz Mar 13, 2024
232f9f2
Add u2, u3, u6 type to Convert
praasz Mar 13, 2024
e86a545
Add helpers to create iterator from void pointer
praasz Mar 14, 2024
0ee0ef6
Fix capture list in AllocatedTensor ctor
praasz Mar 16, 2024
23a0e32
Correct NF4 <-> floating point deduction
praasz Mar 17, 2024
5ae1dd7
Restore removed include
praasz Mar 19, 2024
1c6e52d
Merge branch 'master' into feature/add-u2-u3-u6-to-convert
praasz Mar 21, 2024
d470bfc
Merge branch 'master' into feature/add-u2-u3-u6-to-convert
praasz Mar 25, 2024
e989dbd
Fix cast of input tensor data
praasz Mar 25, 2024
2c6cf1d
Merge branch 'master' into feature/add-u2-u3-u6-to-convert
praasz Mar 26, 2024
6c88412
Merge branch 'master' into feature/add-u2-u3-u6-to-convert
praasz Mar 26, 2024
3ffd8f6
Merge branch 'master' into feature/add-u2-u3-u6-to-convert
mlukasze Mar 27, 2024
3c15fba
Merge branch 'master' into feature/add-u2-u3-u6-to-convert
praasz Mar 27, 2024
b8e4e3c
Merge branch 'master' into feature/add-u2-u3-u6-to-convert
praasz Mar 27, 2024
f34fcb8
Merge branch 'master' into feature/add-u2-u3-u6-to-convert
praasz Mar 28, 2024
19a60eb
Merge branch 'master' into feature/add-u2-u3-u6-to-convert
praasz Mar 28, 2024
0221b65
Merge branch 'master' into feature/add-u2-u3-u6-to-convert
praasz Mar 28, 2024
d50d8f2
Merge branch 'master' into feature/add-u2-u3-u6-to-convert
praasz Mar 28, 2024
e9d789e
Merge branch 'master' into feature/add-u2-u3-u6-to-convert
praasz Mar 29, 2024
b601124
Merge branch 'master' into feature/add-u2-u3-u6-to-convert
mlukasze Mar 29, 2024
f26bc10
Merge branch 'master' into feature/add-u2-u3-u6-to-convert
praasz Mar 29, 2024
4264d13
Merge branch 'master' into feature/add-u2-u3-u6-to-convert
praasz Mar 30, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
84 changes: 81 additions & 3 deletions src/core/dev_api/openvino/core/type/element_iterator.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
#pragma once

#include "openvino/core/type/element_type_traits.hpp"
#include "openvino/core/type/nf4.hpp"

namespace ov {
namespace util {
Expand Down Expand Up @@ -96,6 +97,11 @@ constexpr size_t bit_width<Type_t::u4>() {
return 4;
}

template <>
constexpr size_t bit_width<Type_t::nf4>() {
return 4;
}

template <>
constexpr size_t bit_width<Type_t::i4>() {
return 4;
Expand Down Expand Up @@ -147,6 +153,12 @@ class BitProxy<T, ET, typename std::enable_if<is_bit_type(ET) || is_nibble_type(
return (*m_ptr >> m_bit_shift) & value_mask;
}

void set_bit_value(uint8_t value) {
constexpr auto value_mask = util::make_n_bit_mask(m_bits);
*m_ptr &= ~(value_mask << m_bit_shift);
*m_ptr |= value << m_bit_shift;
}

public:
using value_type = typename std::decay<T>::type; //!< Fundamental type of bound to BitProxy.

Expand Down Expand Up @@ -182,6 +194,19 @@ class BitProxy<T, ET, typename std::enable_if<is_bit_type(ET) || is_nibble_type(
return static_cast<value_type>(get_bit_value());
}

/**
* @brief Converts to float for NF4.
*
* @note Implementation aligned to ConvertNF4::unpack, de-quantization applied only when converting to
* floating point. For integral types get bit value.
*
* @return Converted NF4 value to float.
*/
template <Type_t ETT = ET, typename std::enable_if<ETT == nf4>::type* = nullptr>
operator float() const {
return ConvertNF4::dequantize(get_bit_value());
}

/**
* @brief Converts to fundamental type.
*
Expand All @@ -207,8 +232,17 @@ class BitProxy<T, ET, typename std::enable_if<is_bit_type(ET) || is_nibble_type(
*/
BitProxy<T, ET>& operator=(const value_type v) {
constexpr auto value_mask = util::make_n_bit_mask(m_bits);
*m_ptr &= ~(value_mask << m_bit_shift);
*m_ptr |= (static_cast<uint8_t>(v) & value_mask) << m_bit_shift;
set_bit_value(static_cast<uint8_t>(v) & value_mask);
return *this;
}

/**
* @brief Sets current NF4 value from float using qunatization.
* @param v Value to be set.
*/
template <Type_t ETT = ET, typename std::enable_if<ETT == nf4>::type* = nullptr>
BitProxy<T, ET>& operator=(const float v) {
set_bit_value(ConvertNF4::quantize(v));
return *this;
}
};
Expand Down Expand Up @@ -490,13 +524,57 @@ class Iterator {
*
* @tparam ET Type of ov::element::Type_t.
* @tparam T Type of pointer data. Must be fundamental type of ET.

*
* @param ptr Pointer to data.
* @return Element iterator for type ET.
*/
template <Type_t ET, class T, typename std::enable_if<!is_byte_type(ET) && ET != string>::type* = nullptr>
constexpr Iterator<ET, T> iterator(T* ptr) {
return {ptr};
}

/**
* @brief Make iterator from pointer for standard types.
*
* To have common interface for all ov::element::Type. Just return input pointer.
*
* @tparam ET Type of ov::element::Type_t.
* @tparam T Type of pointer data. Must be fundamental type of ET.
*
* @param ptr Pointer to data.
* @return Element iterator same as input pointer.
*/
template <Type_t ET, class T, typename std::enable_if<is_byte_type(ET) || ET == string>::type* = nullptr>
constexpr T* iterator(T* ptr) {
return ptr;
}

/**
* @brief Make iterator from void pointer.
*
* Data will be reinterpreted using fundamental type for ov::element::Type.
*
* @tparam ET OpenVINO element type.
* @param ptr Pointer to data.
* @return Iterator for given ET.
*/
template <Type_t ET, class T = ov::fundamental_type_for<ET>>
constexpr auto iterator(void* ptr) -> decltype(iterator<ET, T>(reinterpret_cast<T*>(ptr))) {
return iterator<ET, T>(reinterpret_cast<T*>(ptr));
}

/**
* @brief Make iterator from constant void pointer.
*
* Data will be reinterpreted using fundamental type for ov::element::Type.
*
* @tparam ET OpenVINO element type.
* @param ptr Pointer to data.
* @return Iterator for given ET.
*/
template <Type_t ET, class T = typename std::add_const<ov::fundamental_type_for<ET>>::type>
constexpr auto iterator(const void* ptr) -> decltype(iterator<ET, T>(reinterpret_cast<T*>(ptr))) {
return iterator<ET, T>(reinterpret_cast<T*>(ptr));
}
} // namespace element
} // namespace ov
105 changes: 24 additions & 81 deletions src/core/reference/include/openvino/reference/convert.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,94 +7,23 @@
#include <algorithm>
#include <cstddef>

#include "openvino/core/type/element_iterator.hpp"
#include "openvino/core/type/element_type.hpp"
#include "openvino/core/type/float16.hpp"
#include "openvino/core/type/nf4.hpp"

namespace ov {
namespace reference {
namespace detail {
inline void set_u1(uint8_t* buf, size_t idx, uint8_t val) {
const size_t byte_idx = idx / 8;
const uint8_t bit_idx = 7 - (idx % 8); // Reversed order of bits
if (val) {
buf[byte_idx] |= (1 << bit_idx);
} else {
buf[byte_idx] &= ~(1 << bit_idx);
}
}

inline uint8_t get_u1(const uint8_t* buf, size_t idx) {
const size_t byte_idx = idx / 8;
const uint8_t bit_idx = 7 - (idx % 8); // Reversed order of bits
return (buf[byte_idx] & (1 << bit_idx)) ? 1 : 0;
}

inline void set_u4(uint8_t* buf, size_t idx, uint8_t val) {
const size_t byte_idx = idx / 2;
const uint8_t bit_shift = 4 * (idx % 2);
buf[byte_idx] &= ~(0xF << bit_shift); // half byte zeroed
buf[byte_idx] |= ((val & 0xF) << bit_shift); // set 1's
}

inline uint8_t get_u4(const uint8_t* buf, size_t idx) {
const size_t byte_idx = idx / 2;
const uint8_t bit_shift = 4 * (idx % 2);
return (buf[byte_idx] >> bit_shift) & 0xF;
}

inline void set_i4(uint8_t* buf, size_t idx, int8_t val) {
const size_t byte_idx = idx / 2;
const uint8_t bit_shift = 4 * (idx % 2);
buf[byte_idx] &= ~(0xF << bit_shift); // half byte zeroed
buf[byte_idx] |= ((val & 0xF) << bit_shift); // set 1's
}

inline int8_t get_i4(const uint8_t* buf, size_t idx) {
const size_t byte_idx = idx / 2;
const uint8_t bit_shift = 4 * (idx % 2);
uint8_t val = (buf[byte_idx] >> bit_shift) & 0xF;
if (val & 0x08) { // negative number
val |= 0xF0;
}
return val;
}
template <typename TO, typename TI>
TO get_value(const uint8_t* buf, size_t idx, element::Type from_type) {
if (from_type == element::u1) {
return detail::get_u1(buf, idx);
}

if (from_type == element::u4) {
return detail::get_u4(buf, idx);
}

if (from_type == element::i4) {
return detail::get_i4(buf, idx);
}

auto v = reinterpret_cast<const TI*>(buf);
return static_cast<TO>(v[idx]);
template <class ElementIter>
constexpr bool is_nf4_iterator() {
using it = typename std::decay<ElementIter>::type;
using T = fundamental_type_for<element::nf4>;
return std::is_same<it, element::Iterator<element::nf4, const T>>::value ||
std::is_same<it, element::Iterator<element::nf4, T>>::value;
}

template <typename TI, typename TO>
void lp_convert(const TI* arg, TO* out, size_t count, element::Type_t src_type, element::Type_t dst_type) {
const uint8_t* input = reinterpret_cast<const uint8_t*>(arg);
uint8_t* output = reinterpret_cast<uint8_t*>(out);
for (size_t i = 0; i < count; ++i) {
if (dst_type == element::u1) {
detail::set_u1(output, i, detail::get_value<uint8_t, TI>(input, i, src_type));
} else if (dst_type == element::u4) {
detail::set_u4(output, i, detail::get_value<uint8_t, TI>(input, i, src_type));
} else if (dst_type == element::i4) {
detail::set_i4(output, i, detail::get_value<int8_t, TI>(input, i, src_type));
} else if (src_type == element::nf4) {
ov::ConvertNF4::unpack(out, input, i);
} else {
out[i] = detail::get_value<TO, TI>(input, i, src_type);
}
}
}
namespace reference {
namespace detail {

template <typename TI, typename TO>
typename std::enable_if<!std::is_same<TO, char>::value, TO>::type convert(const TI v) {
Expand All @@ -107,6 +36,20 @@ typename std::enable_if<std::is_same<TO, char>::value, TO>::type convert(const T
}
} // namespace detail

template <typename InputIt, typename OutputIt>
void convert(InputIt arg, OutputIt out, const size_t count) {
using IN_T = typename std::iterator_traits<InputIt>::value_type;
using OUT_T = typename std::iterator_traits<OutputIt>::value_type;

// Deduce types for NF4 <-> floating point conversion to use quantization.
using From = typename std::
conditional<is_nf4_iterator<InputIt>() && !std::is_integral<OUT_T>::value, const float, IN_T>::type;
using To =
typename std::conditional<is_nf4_iterator<OutputIt>() && !std::is_integral<IN_T>::value, float, OUT_T>::type;

std::transform(arg, arg + count, out, detail::convert<From, To>);
}

template <typename TI, typename TO>
void convert(const TI* arg, TO* out, const size_t count) {
std::transform(arg, arg + count, out, detail::convert<TI, TO>);
Expand All @@ -130,7 +73,7 @@ void convert<float16, int8_t>(const float16* arg, int8_t* out, size_t count);
// Count how many f32 values is out of normal finite numbers range when converted to f16
size_t count_out_of_f16_range(const float* arg, size_t count);

// Convert values from f32 to f16 with claming to f16 min/max when value is out of normal finite numbers range
// Convert values from f32 to f16 with clamping to f16 min/max when value is out of normal finite numbers range
void convert_from_f32_to_f16_with_clamp(const float* arg, float16* out, size_t count);
} // namespace reference
} // namespace ov
37 changes: 9 additions & 28 deletions src/core/src/op/convert.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,50 +15,31 @@ namespace ov {
namespace op {
namespace convert {

constexpr bool is_lp_type(const element::Type_t et) {
return (et == element::i4) || (et == element::u1) || (et == element::u4) || (et == element::nf4);
}

#define CONVERT_ET_LIST \
boolean, bf16, f16, f32, f64, i4, i8, i16, i32, i64, u1, u4, u8, u16, u32, u64, nf4, f8e4m3, f8e5m2
boolean, bf16, f16, f32, f64, i4, i8, i16, i32, i64, u1, u2, u3, u4, u6, u8, u16, u32, u64, nf4, f8e4m3, f8e5m2

struct Evaluate : public element::NoAction<bool> {
using element::NoAction<bool>::visit;
template <element::Type_t ET, class TI = fundamental_type_for<ET>>

template <element::Type_t ET_IN, class TI = fundamental_type_for<ET_IN>>
static result_type visit(const Tensor& arg, Tensor& out, const size_t count) {
using namespace ov::element;
return IF_TYPE_OF(Convert_out,
CONVERT_ET_LIST,
EvalByOutputType<is_lp_type(ET)>,
EvalByOutputType,
out.get_element_type(),
reinterpret_cast<const TI*>(arg.data()),
iterator<ET_IN>(reinterpret_cast<const TI*>(arg.data())),
out,
count,
ET);
count);
}

private:
template <bool IS_ARG_ET_LP>
struct EvalByOutputType : public element::NoAction<bool> {
using element::NoAction<bool>::visit;

template <element::Type_t ET,
class T,
class T_ET,
class U = ov::fundamental_type_for<ET>,
typename std::enable_if<is_lp_type(ET) || IS_ARG_ET_LP>::type* = nullptr>
static result_type visit(const T* arg, Tensor& out, const size_t count, T_ET&& arg_et) {
reference::detail::lp_convert(arg, reinterpret_cast<U*>(out.data()), count, arg_et, ET);
return true;
}

template <element::Type_t ET,
class T,
class T_ET,
class U = ov::fundamental_type_for<ET>,
typename std::enable_if<!is_lp_type(ET) && !IS_ARG_ET_LP>::type* = nullptr>
static result_type visit(const T* arg, Tensor& out, const size_t count, T_ET&&) {
reference::convert(arg, out.data<U>(), count);
template <element::Type_t ET_OUT, class InputIter, class TO = ov::fundamental_type_for<ET_OUT>>
static result_type visit(InputIter arg, Tensor& out, const size_t count) {
reference::convert(arg, element::iterator<ET_OUT>(out.data()), count);
praasz marked this conversation as resolved.
Show resolved Hide resolved
return true;
}
};
Expand Down
16 changes: 15 additions & 1 deletion src/core/src/runtime/itensor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,10 @@

#include "openvino/core/except.hpp"
#include "openvino/core/shape_util.hpp"
#include "openvino/core/type/element_iterator.hpp"
#include "openvino/runtime/allocator.hpp"
#include "openvino/runtime/iremote_tensor.hpp"
#include "openvino/runtime/make_tensor.hpp"
#include "openvino/runtime/properties.hpp"

namespace ov {
Expand All @@ -21,7 +23,19 @@ size_t ITensor::get_size() const {
}

size_t ITensor::get_byte_size() const {
return (get_size() * get_element_type().bitwidth() + 8 - 1) / 8;
const auto& et = get_element_type();
auto byte_size = get_size() * et.bitwidth();
if (element::is_split_bit_type(et)) {
constexpr size_t storage_unit_size = 24;
byte_size += storage_unit_size - 1;
byte_size /= storage_unit_size;
byte_size *= 3;
} else {
constexpr size_t storage_unit_size = 8;
byte_size += storage_unit_size - 1;
byte_size /= storage_unit_size;
}
return byte_size;
}

bool ITensor::is_continuous() const {
Expand Down
Loading
Loading