From 77e101fc647b20ed57257e71debdb6cd8ca901fa Mon Sep 17 00:00:00 2001 From: david dali susanibar arce Date: Wed, 31 Jan 2024 13:38:54 -0500 Subject: [PATCH] GH-39680: [Java] enable half float support on Java module (#39681) ### Rationale for this change - To enable half float support on Java module. ### What changes are included in this PR? - [x] Add initial Float16 type support - [x] Unit test - [x] Integration test - [x] Documentation ### Are these changes tested? Yes. ### Are there any user-facing changes? No * Closes: #39680 Authored-by: david dali susanibar arce Signed-off-by: David Li --- docs/source/status.rst | 9 +- .../apache/arrow/dataset/TestAllTypes.java | 6 +- .../org/apache/arrow/memory/util/Float16.java | 271 +++++++++++ .../org/apache/arrow/memory/TestArrowBuf.java | 11 + .../main/codegen/data/ValueVectorTypes.tdd | 10 + .../main/codegen/templates/UnionReader.java | 6 +- .../org/apache/arrow/vector/Float2Vector.java | 434 ++++++++++++++++++ .../org/apache/arrow/vector/types/Types.java | 16 +- .../apache/arrow/vector/TestValueVector.java | 198 ++++++++ 9 files changed, 953 insertions(+), 8 deletions(-) create mode 100644 java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/Float16.java create mode 100644 java/vector/src/main/java/org/apache/arrow/vector/Float2Vector.java diff --git a/docs/source/status.rst b/docs/source/status.rst index 03a87012342c2..11dd9c2c2965c 100644 --- a/docs/source/status.rst +++ b/docs/source/status.rst @@ -40,7 +40,7 @@ Data Types +-------------------+-------+-------+-------+------------+-------+-------+-------+-------+ | UInt8/16/32/64 | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | +-------------------+-------+-------+-------+------------+-------+-------+-------+-------+ -| Float16 | ✓ (1) | | ✓ | ✓ | ✓ (2)| ✓ | ✓ | | +| Float16 | ✓ (1) | ✓ (2) | ✓ | ✓ | ✓ (3)| ✓ | ✓ | | +-------------------+-------+-------+-------+------------+-------+-------+-------+-------+ | Float32/64 | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | +-------------------+-------+-------+-------+------------+-------+-------+-------+-------+ @@ -104,7 +104,7 @@ Data Types | Data type | C++ | Java | Go | JavaScript | C# | Rust | Julia | Swift | | (special) | | | | | | | | | +===================+=======+=======+=======+============+=======+=======+=======+=======+ -| Dictionary | ✓ | ✓ (3) | ✓ | ✓ | ✓ | ✓ (3) | ✓ | | +| Dictionary | ✓ | ✓ (4) | ✓ | ✓ | ✓ | ✓ (3) | ✓ | | +-------------------+-------+-------+-------+------------+-------+-------+-------+-------+ | Extension | ✓ | ✓ | ✓ | | | ✓ | ✓ | | +-------------------+-------+-------+-------+------------+-------+-------+-------+-------+ @@ -114,8 +114,9 @@ Data Types Notes: * \(1) Casting to/from Float16 in C++ is not supported. -* \(2) Float16 support in C# is only available when targeting .NET 6+. -* \(3) Nested dictionaries not supported +* \(2) Casting to/from Float16 in Java is not supported. +* \(3) Float16 support in C# is only available when targeting .NET 6+. +* \(4) Nested dictionaries not supported .. seealso:: The :ref:`format_columnar` specification. diff --git a/java/dataset/src/test/java/org/apache/arrow/dataset/TestAllTypes.java b/java/dataset/src/test/java/org/apache/arrow/dataset/TestAllTypes.java index 13b247452348d..6d33cf057ed3a 100644 --- a/java/dataset/src/test/java/org/apache/arrow/dataset/TestAllTypes.java +++ b/java/dataset/src/test/java/org/apache/arrow/dataset/TestAllTypes.java @@ -32,6 +32,7 @@ import org.apache.arrow.dataset.file.DatasetFileWriter; import org.apache.arrow.dataset.file.FileFormat; import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.memory.util.Float16; import org.apache.arrow.vector.BigIntVector; import org.apache.arrow.vector.BitVector; import org.apache.arrow.vector.DateMilliVector; @@ -39,6 +40,7 @@ import org.apache.arrow.vector.DecimalVector; import org.apache.arrow.vector.DurationVector; import org.apache.arrow.vector.FixedSizeBinaryVector; +import org.apache.arrow.vector.Float2Vector; import org.apache.arrow.vector.Float4Vector; import org.apache.arrow.vector.Float8Vector; import org.apache.arrow.vector.IntVector; @@ -89,7 +91,6 @@ public class TestAllTypes extends TestDataset { private VectorSchemaRoot generateAllTypesVector(BufferAllocator allocator) { // Notes: - // - Float16 is not supported by Java. // - IntervalMonthDayNano is not supported by Parquet. // - Map (GH-38250) and SparseUnion are resulting in serialization errors when writing with the Dataset API. // "Unhandled type for Arrow to Parquet schema conversion" errors: IntervalDay, IntervalYear, DenseUnion @@ -109,6 +110,7 @@ private VectorSchemaRoot generateAllTypesVector(BufferAllocator allocator) { Field.nullablePrimitive("uint16", new ArrowType.Int(16, false)), Field.nullablePrimitive("uint32", new ArrowType.Int(32, false)), Field.nullablePrimitive("uint64", new ArrowType.Int(64, false)), + Field.nullablePrimitive("float16", new ArrowType.FloatingPoint(FloatingPointPrecision.HALF)), Field.nullablePrimitive("float32", new ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE)), Field.nullablePrimitive("float64", new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE)), Field.nullablePrimitive("utf8", ArrowType.Utf8.INSTANCE), @@ -148,6 +150,7 @@ private VectorSchemaRoot generateAllTypesVector(BufferAllocator allocator) { root.getVector("uint16").setNull(0); root.getVector("uint32").setNull(0); root.getVector("uint64").setNull(0); + root.getVector("float16").setNull(0); root.getVector("float32").setNull(0); root.getVector("float64").setNull(0); root.getVector("utf8").setNull(0); @@ -180,6 +183,7 @@ private VectorSchemaRoot generateAllTypesVector(BufferAllocator allocator) { ((UInt2Vector) root.getVector("uint16")).set(1, 1); ((UInt4Vector) root.getVector("uint32")).set(1, 1); ((UInt8Vector) root.getVector("uint64")).set(1, 1); + ((Float2Vector) root.getVector("float16")).set(1, Float16.toFloat16(+32.875f)); ((Float4Vector) root.getVector("float32")).set(1, 1.0f); ((Float8Vector) root.getVector("float64")).set(1, 1.0); ((VarCharVector) root.getVector("utf8")).set(1, new Text("a")); diff --git a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/Float16.java b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/Float16.java new file mode 100644 index 0000000000000..8040158fd090e --- /dev/null +++ b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/Float16.java @@ -0,0 +1,271 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.memory.util; + + +import org.apache.arrow.util.VisibleForTesting; + +/** + * Lifted from Apache Parquet MR project: + * https://github.com/apache/parquet-mr/blob/e87b80308869b77f914fcfd04364686e11158950/parquet-column/src/main/java/org/apache/parquet/schema/Float16.java + *
    + * Changes made: + *
  • Modify the data type input from Parquet-MR Binary (toFloat(Binary b)) to Arrow Java short (toFloat(short b))
  • + *
  • Expose NAN and POSITIVE_INFINITY variables
  • + *
+ * + * + * The class is a utility class to manipulate half-precision 16-bit + * IEEE 754 + * floating point data types (also called fp16 or binary16). A half-precision float can be + * created from or converted to single-precision floats, and is stored in a short data type. + * The IEEE 754 standard specifies an float16 as having the following format: + *
    + *
  • Sign bit: 1 bit
  • + *
  • Exponent width: 5 bits
  • + *
  • Significand: 10 bits
  • + *
+ * + *

The format is laid out as follows:

+ *
+ * 1   11111   1111111111
+ * ^   --^--   -----^----
+ * sign  |          |_______ significand
+ *       |
+ *      -- exponent
+ * 
+ * Half-precision floating points can be useful to save memory and/or + * bandwidth at the expense of range and precision when compared to single-precision + * floating points (float32). + * Ref: https://android.googlesource.com/platform/libcore/+/master/luni/src/main/java/libcore/util/FP16.java + */ +public class Float16 { + // Positive infinity of type half-precision float. + public static final short POSITIVE_INFINITY = (short) 0x7c00; + // A Not-a-Number representation of a half-precision float. + public static final short NaN = (short) 0x7e00; + // The bitmask to and a number with to obtain the sign bit. + private static final int SIGN_MASK = 0x8000; + // The offset to shift by to obtain the exponent bits. + private static final int EXPONENT_SHIFT = 10; + // The bitmask to and a number shifted by EXPONENT_SHIFT right, to obtain exponent bits. + private static final int SHIFTED_EXPONENT_MASK = 0x1f; + // The bitmask to and a number with to obtain significand bits. + private static final int SIGNIFICAND_MASK = 0x3ff; + // The offset of the exponent from the actual value. + private static final int EXPONENT_BIAS = 15; + // The offset to shift by to obtain the sign bit. + private static final int SIGN_SHIFT = 15; + // The bitmask to AND with to obtain exponent and significand bits. + private static final int EXPONENT_SIGNIFICAND_MASK = 0x7fff; + + private static final int FP32_SIGN_SHIFT = 31; + private static final int FP32_EXPONENT_SHIFT = 23; + private static final int FP32_SHIFTED_EXPONENT_MASK = 0xff; + private static final int FP32_SIGNIFICAND_MASK = 0x7fffff; + private static final int FP32_EXPONENT_BIAS = 127; + private static final int FP32_QNAN_MASK = 0x400000; + private static final int FP32_DENORMAL_MAGIC = 126 << 23; + private static final float FP32_DENORMAL_FLOAT = Float.intBitsToFloat(FP32_DENORMAL_MAGIC); + + /** + * Returns true if the specified half-precision float value represents + * a Not-a-Number, false otherwise. + * + * @param h A half-precision float value + * @return True if the value is a NaN, false otherwise + * + */ + @VisibleForTesting + public static boolean isNaN(short h) { + return (h & EXPONENT_SIGNIFICAND_MASK) > POSITIVE_INFINITY; + } + + /** + *

Compares the two specified half-precision float values. The following + * conditions apply during the comparison:

+ * + *
    + *
  • NaN is considered by this method to be equal to itself and greater + * than all other half-precision float values (including {@code #POSITIVE_INFINITY})
  • + *
  • POSITIVE_ZERO is considered by this method to be greater than NEGATIVE_ZERO.
  • + *
+ * + * @param x The first half-precision float value to compare. + * @param y The second half-precision float value to compare + * + * @return The value {@code 0} if {@code x} is numerically equal to {@code y}, a + * value less than {@code 0} if {@code x} is numerically less than {@code y}, + * and a value greater than {@code 0} if {@code x} is numerically greater + * than {@code y} + * + */ + @VisibleForTesting + public static int compare(short x, short y) { + boolean xIsNaN = isNaN(x); + boolean yIsNaN = isNaN(y); + + if (!xIsNaN && !yIsNaN) { + int first = ((x & SIGN_MASK) != 0 ? 0x8000 - (x & 0xffff) : x & 0xffff); + int second = ((y & SIGN_MASK) != 0 ? 0x8000 - (y & 0xffff) : y & 0xffff); + // Returns true if the first half-precision float value is less + // (smaller toward negative infinity) than the second half-precision float value. + if (first < second) { + return -1; + } + + // Returns true if the first half-precision float value is greater + // (larger toward positive infinity) than the second half-precision float value. + if (first > second) { + return 1; + } + } + + // Collapse NaNs, akin to halfToIntBits(), but we want to keep + // (signed) short value types to preserve the ordering of -0.0 + // and +0.0 + short xBits = xIsNaN ? NaN : x; + short yBits = yIsNaN ? NaN : y; + return (xBits == yBits ? 0 : (xBits < yBits ? -1 : 1)); + } + + /** + * Converts the specified half-precision float value into a + * single-precision float value. The following special cases are handled: + * If the input is NaN, the returned value is Float NaN. + * If the input is POSITIVE_INFINITY or NEGATIVE_INFINITY, the returned value is respectively + * Float POSITIVE_INFINITY or Float NEGATIVE_INFINITY. + * If the input is 0 (positive or negative), the returned value is +/-0.0f. + * Otherwise, the returned value is a normalized single-precision float value. + * + * @param b The half-precision float value to convert to single-precision + * @return A normalized single-precision float value + */ + @VisibleForTesting + public static float toFloat(short b) { + int bits = b & 0xffff; + int s = bits & SIGN_MASK; + int e = (bits >>> EXPONENT_SHIFT) & SHIFTED_EXPONENT_MASK; + int m = (bits) & SIGNIFICAND_MASK; + int outE = 0; + int outM = 0; + if (e == 0) { // Denormal or 0 + if (m != 0) { + // Convert denorm fp16 into normalized fp32 + float o = Float.intBitsToFloat(FP32_DENORMAL_MAGIC + m); + o -= FP32_DENORMAL_FLOAT; + return s == 0 ? o : -o; + } + } else { + outM = m << 13; + if (e == 0x1f) { // Infinite or NaN + outE = 0xff; + if (outM != 0) { // SNaNs are quieted + outM |= FP32_QNAN_MASK; + } + } else { + outE = e - EXPONENT_BIAS + FP32_EXPONENT_BIAS; + } + } + int out = (s << 16) | (outE << FP32_EXPONENT_SHIFT) | outM; + return Float.intBitsToFloat(out); + } + + /** + * Converts the specified single-precision float value into a + * half-precision float value. The following special cases are handled: + * + * If the input is NaN, the returned value is NaN. + * If the input is Float POSITIVE_INFINITY or Float NEGATIVE_INFINITY, + * the returned value is respectively POSITIVE_INFINITY or NEGATIVE_INFINITY. + * If the input is 0 (positive or negative), the returned value is + * POSITIVE_ZERO or NEGATIVE_ZERO. + * If the input is a less than MIN_VALUE, the returned value + * is flushed to POSITIVE_ZERO or NEGATIVE_ZERO. + * If the input is a less than MIN_NORMAL, the returned value + * is a denorm half-precision float. + * Otherwise, the returned value is rounded to the nearest + * representable half-precision float value. + * + * @param f The single-precision float value to convert to half-precision + * @return A half-precision float value + */ + public static short toFloat16(float f) { + int bits = Float.floatToRawIntBits(f); + int s = (bits >>> FP32_SIGN_SHIFT); + int e = (bits >>> FP32_EXPONENT_SHIFT) & FP32_SHIFTED_EXPONENT_MASK; + int m = (bits) & FP32_SIGNIFICAND_MASK; + int outE = 0; + int outM = 0; + if (e == 0xff) { // Infinite or NaN + outE = 0x1f; + outM = m != 0 ? 0x200 : 0; + } else { + e = e - FP32_EXPONENT_BIAS + EXPONENT_BIAS; + if (e >= 0x1f) { // Overflow + outE = 0x1f; + } else if (e <= 0) { // Underflow + if (e < -10) { + // The absolute fp32 value is less than MIN_VALUE, flush to +/-0 + } else { + // The fp32 value is a normalized float less than MIN_NORMAL, + // we convert to a denorm fp16 + m = m | 0x800000; + int shift = 14 - e; + outM = m >> shift; + int lowm = m & ((1 << shift) - 1); + int hway = 1 << (shift - 1); + // if above halfway or exactly halfway and outM is odd + if (lowm + (outM & 1) > hway) { + // Round to nearest even + // Can overflow into exponent bit, which surprisingly is OK. + // This increment relies on the +outM in the return statement below + outM++; + } + } + } else { + outE = e; + outM = m >> 13; + // if above halfway or exactly halfway and outM is odd + if ((m & 0x1fff) + (outM & 0x1) > 0x1000) { + // Round to nearest even + // Can overflow into exponent bit, which surprisingly is OK. + // This increment relies on the +outM in the return statement below + outM++; + } + } + } + // The outM is added here as the +1 increments for outM above can + // cause an overflow in the exponent bit which is OK. + return (short) ((s << SIGN_SHIFT) | (outE << EXPONENT_SHIFT) + outM); + } + + /** + * Returns a string representation of the specified half-precision + * float value. Calling this method is equivalent to calling + * Float.toString(toFloat(h)). See {@link Float#toString(float)} + * for more information on the format of the string representation. + * + * @param h A half-precision float value in binary little-endian format + * @return A string representation of the specified value + */ + @VisibleForTesting + public static String toFloatString(short h) { + return Float.toString(Float16.toFloat(h)); + } +} diff --git a/java/memory/memory-core/src/test/java/org/apache/arrow/memory/TestArrowBuf.java b/java/memory/memory-core/src/test/java/org/apache/arrow/memory/TestArrowBuf.java index 9ba42abc1ce89..b4385b72a38cf 100644 --- a/java/memory/memory-core/src/test/java/org/apache/arrow/memory/TestArrowBuf.java +++ b/java/memory/memory-core/src/test/java/org/apache/arrow/memory/TestArrowBuf.java @@ -29,6 +29,7 @@ import java.nio.ByteOrder; import java.util.Arrays; +import org.apache.arrow.memory.util.Float16; import org.junit.Test; import org.slf4j.LoggerFactory; @@ -180,4 +181,14 @@ public void testEnabledHistoricalLog() { ((Logger) LoggerFactory.getLogger("org.apache.arrow")).setLevel(null); } } + + @Test + public void testArrowBufFloat16() { + try (BufferAllocator allocator = new RootAllocator(); + ArrowBuf buf = allocator.buffer(1024) + ) { + buf.setShort(0, Float16.toFloat16(+32.875f)); + assertEquals((short) 0x501c, buf.getShort(0)); + } + } } diff --git a/java/vector/src/main/codegen/data/ValueVectorTypes.tdd b/java/vector/src/main/codegen/data/ValueVectorTypes.tdd index 2a921804202f0..6c2a967712454 100644 --- a/java/vector/src/main/codegen/data/ValueVectorTypes.tdd +++ b/java/vector/src/main/codegen/data/ValueVectorTypes.tdd @@ -49,6 +49,16 @@ { class: "SmallInt", valueHolder: "Int2Holder"}, ] }, + { + major: "Fixed", + width: 2, + javaType: "short", + boxedType: "Short", + fields: [{name: "value", type: "short"}], + minor: [ + { class: "Float2", valueHolder: "Int2Holder"}, + ] + }, { major: "Fixed", width: 4, diff --git a/java/vector/src/main/codegen/templates/UnionReader.java b/java/vector/src/main/codegen/templates/UnionReader.java index 56a6cc90b321b..822d4822987fb 100644 --- a/java/vector/src/main/codegen/templates/UnionReader.java +++ b/java/vector/src/main/codegen/templates/UnionReader.java @@ -39,7 +39,9 @@ @SuppressWarnings("unused") public class UnionReader extends AbstractFieldReader { - private BaseReader[] readers = new BaseReader[45]; + private static final int NUM_SUPPORTED_TYPES = 46; + + private BaseReader[] readers = new BaseReader[NUM_SUPPORTED_TYPES]; public UnionVector data; public UnionReader(UnionVector data) { @@ -50,7 +52,7 @@ public MinorType getMinorType() { return TYPES[data.getTypeValue(idx())]; } - private static MinorType[] TYPES = new MinorType[45]; + private static MinorType[] TYPES = new MinorType[NUM_SUPPORTED_TYPES]; static { for (MinorType minorType : MinorType.values()) { diff --git a/java/vector/src/main/java/org/apache/arrow/vector/Float2Vector.java b/java/vector/src/main/java/org/apache/arrow/vector/Float2Vector.java new file mode 100644 index 0000000000000..9d3f25769abff --- /dev/null +++ b/java/vector/src/main/java/org/apache/arrow/vector/Float2Vector.java @@ -0,0 +1,434 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.vector; + +import static org.apache.arrow.vector.NullCheckingForGet.NULL_CHECKING_ENABLED; + + +import org.apache.arrow.memory.ArrowBuf; +import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.memory.util.Float16; +import org.apache.arrow.vector.complex.impl.Float2ReaderImpl; +import org.apache.arrow.vector.complex.reader.FieldReader; +import org.apache.arrow.vector.holders.Float2Holder; +import org.apache.arrow.vector.holders.NullableFloat2Holder; +import org.apache.arrow.vector.types.Types.MinorType; +import org.apache.arrow.vector.types.pojo.Field; +import org.apache.arrow.vector.types.pojo.FieldType; +import org.apache.arrow.vector.util.TransferPair; + +/** + * Float2Vector implements a fixed width (2 bytes) vector of + * short values which could be null. A validity buffer (bit vector) is + * maintained to track which elements in the vector are null. + */ +public final class Float2Vector extends BaseFixedWidthVector implements FloatingPointVector { + public static final byte TYPE_WIDTH = 2; + + /** + * Instantiate a Float2Vector. This doesn't allocate any memory for + * the data in vector. + * + * @param name name of the vector + * @param allocator allocator for memory management. + */ + public Float2Vector(String name, BufferAllocator allocator) { + this(name, FieldType.nullable(MinorType.FLOAT2.getType()), allocator); + } + + /** + * Instantiate a Float2Vector. This doesn't allocate any memory for + * the data in vector. + * + * @param name name of the vector + * @param fieldType type of Field materialized by this vector + * @param allocator allocator for memory management. + */ + public Float2Vector(String name, FieldType fieldType, BufferAllocator allocator) { + this(new Field(name, fieldType, null), allocator); + } + + /** + * Instantiate a Float2Vector. This doesn't allocate any memory for + * the data in vector. + * + * @param field field materialized by this vector + * @param allocator allocator for memory management. + */ + public Float2Vector(Field field, BufferAllocator allocator) { + super(field, allocator, TYPE_WIDTH); + } + + @Override + protected FieldReader getReaderImpl() { + return new Float2ReaderImpl(Float2Vector.this); + } + + /** + * Get minor type for this vector. The vector holds values belonging + * to a particular type. + * + * @return {@link MinorType} + */ + @Override + public MinorType getMinorType() { + return MinorType.FLOAT2; + } + + + /*----------------------------------------------------------------* + | | + | vector value retrieval methods | + | | + *----------------------------------------------------------------*/ + + + /** + * Get the element at the given index from the vector. + * + * @param index position of element + * @return element at given index + */ + public short get(int index) throws IllegalStateException { + if (NULL_CHECKING_ENABLED && isSet(index) == 0) { + throw new IllegalStateException("Value at index is null"); + } + return valueBuffer.getShort((long) index * TYPE_WIDTH); + } + + /** + * Get the element at the given index from the vector and + * sets the state in holder. If element at given index + * is null, holder.isSet will be zero. + * + * @param index position of element + */ + public void get(int index, NullableFloat2Holder holder) { + if (isSet(index) == 0) { + holder.isSet = 0; + return; + } + holder.isSet = 1; + holder.value = valueBuffer.getShort((long) index * TYPE_WIDTH); + } + + /** + * Same as {@link #get(int)}. + * + * @param index position of element + * @return element at given index + */ + @Override + public Short getObject(int index) { + if (isSet(index) == 0) { + return null; + } else { + return valueBuffer.getShort((long) index * TYPE_WIDTH); + } + } + + /** + * Given a data buffer, get the value stored at a particular position + * in the vector. + * + *

This method should not be used externally. + * + * @param buffer data buffer + * @param index position of the element. + * @return value stored at the index. + */ + static short get(final ArrowBuf buffer, final int index) { + return buffer.getShort((long) index * TYPE_WIDTH); + } + + @Override + public double getValueAsDouble(int index) { + return getValueAsFloat(index); + } + + public float getValueAsFloat(int index) { + return Float16.toFloat(this.get(index)); + } + + /*----------------------------------------------------------------* + | | + | vector value setter methods | + | | + *----------------------------------------------------------------*/ + + private void setValue(int index, short value) { + valueBuffer.setShort((long) index * TYPE_WIDTH, value); + } + + private void setValue(int index, float value) { + valueBuffer.setShort((long) index * TYPE_WIDTH, Float16.toFloat16(value)); + } + + /** + * Set the element at the given index to the given value. + * + * @param index position of element + * @param value value of element + */ + public void set(int index, short value) { + BitVectorHelper.setBit(validityBuffer, index); + setValue(index, value); + } + + /** + * Set the element at the given index to the given value. + * + * @param index position of element + * @param value value of element + */ + public void setWithPossibleTruncate(int index, float value) { + BitVectorHelper.setBit(validityBuffer, index); + setValue(index, value); + } + + /** + * Set the element at the given index to the value set in data holder. + * If the value in holder is not indicated as set, element in the + * at the given index will be null. + * + * @param index position of element + * @param holder nullable data holder for value of element + */ + public void set(int index, NullableFloat2Holder holder) throws IllegalArgumentException { + if (holder.isSet < 0) { + throw new IllegalArgumentException(); + } else if (holder.isSet > 0) { + BitVectorHelper.setBit(validityBuffer, index); + setValue(index, holder.value); + } else { + BitVectorHelper.unsetBit(validityBuffer, index); + } + } + + /** + * Set the element at the given index to the value set in data holder. + * + * @param index position of element + * @param holder data holder for value of element + */ + public void set(int index, Float2Holder holder) { + BitVectorHelper.setBit(validityBuffer, index); + setValue(index, holder.value); + } + + /** + * Same as {@link #set(int, short)} except that it handles the + * case when index is greater than or equal to existing + * value capacity {@link #getValueCapacity()}. + * + * @param index position of element + * @param value value of element + */ + public void setSafe(int index, short value) { + handleSafe(index); + set(index, value); + } + + /** + * Same as {@link #setWithPossibleTruncate(int, float)} except that it handles the + * case when index is greater than or equal to existing + * value capacity {@link #getValueCapacity()}. + * + * @param index position of element + * @param value value of element + */ + public void setSafeWithPossibleTruncate(int index, float value) { + handleSafe(index); + setWithPossibleTruncate(index, value); + } + + /** + * Same as {@link #set(int, NullableFloat2Holder)} except that it handles the + * case when index is greater than or equal to existing + * value capacity {@link #getValueCapacity()}. + * + * @param index position of element + * @param holder nullable data holder for value of element + */ + public void setSafe(int index, NullableFloat2Holder holder) throws IllegalArgumentException { + handleSafe(index); + set(index, holder); + } + + /** + * Same as {@link #set(int, Float2Holder)} except that it handles the + * case when index is greater than or equal to existing + * value capacity {@link #getValueCapacity()}. + * + * @param index position of element + * @param holder data holder for value of element + */ + public void setSafe(int index, Float2Holder holder) { + handleSafe(index); + set(index, holder); + } + + /** + * Store the given value at a particular position in the vector. isSet indicates + * whether the value is NULL or not. + * + * @param index position of the new value + * @param isSet 0 for NULL value, 1 otherwise + * @param value element value + */ + public void set(int index, int isSet, short value) { + if (isSet > 0) { + set(index, value); + } else { + BitVectorHelper.unsetBit(validityBuffer, index); + } + } + + /** + * Store the given value at a particular position in the vector. isSet indicates + * whether the value is NULL or not. + * + * @param index position of the new value + * @param isSet 0 for NULL value, 1 otherwise + * @param value element value + */ + public void setWithPossibleTruncate(int index, int isSet, float value) { + if (isSet > 0) { + setWithPossibleTruncate(index, value); + } else { + BitVectorHelper.unsetBit(validityBuffer, index); + } + } + + /** + * Same as {@link #set(int, int, short)} except that it handles the case + * when index is greater than or equal to current value capacity of the + * vector. + * + * @param index position of the new value + * @param isSet 0 for NULL value, 1 otherwise + * @param value element value + */ + public void setSafe(int index, int isSet, short value) { + handleSafe(index); + set(index, isSet, value); + } + + /** + * Same as {@link #set(int, int, short)} except that it handles the case + * when index is greater than or equal to current value capacity of the + * vector. + * + * @param index position of the new value + * @param isSet 0 for NULL value, 1 otherwise + * @param value element value + */ + public void setSafeWithPossibleTruncate(int index, int isSet, float value) { + handleSafe(index); + setWithPossibleTruncate(index, isSet, value); + } + + @Override + public void setWithPossibleTruncate(int index, double value) { + throw new UnsupportedOperationException("The operation for double data types is not supported."); + } + + @Override + public void setSafeWithPossibleTruncate(int index, double value) { + throw new UnsupportedOperationException("The operation for double data types is not supported."); + } + + /*----------------------------------------------------------------* + | | + | vector transfer | + | | + *----------------------------------------------------------------*/ + + /** + * Construct a TransferPair comprising this and a target vector of + * the same type. + * + * @param ref name of the target vector + * @param allocator allocator for the target vector + * @return {@link TransferPair} + */ + @Override + public TransferPair getTransferPair(String ref, BufferAllocator allocator) { + return new TransferImpl(ref, allocator); + } + + /** + * Construct a TransferPair comprising this and a target vector of + * the same type. + * + * @param field Field object used by the target vector + * @param allocator allocator for the target vector + * @return {@link TransferPair} + */ + @Override + public TransferPair getTransferPair(Field field, BufferAllocator allocator) { + return new TransferImpl(field, allocator); + } + + /** + * Construct a TransferPair with a desired target vector of the same type. + * + * @param to target vector + * @return {@link TransferPair} + */ + @Override + public TransferPair makeTransferPair(ValueVector to) { + return new TransferImpl((Float2Vector) to); + } + + private class TransferImpl implements TransferPair { + Float2Vector to; + + public TransferImpl(String ref, BufferAllocator allocator) { + to = new Float2Vector(ref, field.getFieldType(), allocator); + } + + public TransferImpl(Field field, BufferAllocator allocator) { + to = new Float2Vector(field, allocator); + } + + public TransferImpl(Float2Vector to) { + this.to = to; + } + + @Override + public Float2Vector getTo() { + return to; + } + + @Override + public void transfer() { + transferTo(to); + } + + @Override + public void splitAndTransfer(int startIndex, int length) { + splitAndTransferTo(startIndex, length, to); + } + + @Override + public void copyValueSafe(int fromIndex, int toIndex) { + to.copyFromSafe(fromIndex, toIndex, Float2Vector.this); + } + } +} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/types/Types.java b/java/vector/src/main/java/org/apache/arrow/vector/types/Types.java index f29157524f2df..0b0e0d66a98f0 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/types/Types.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/types/Types.java @@ -18,6 +18,7 @@ package org.apache.arrow.vector.types; import static org.apache.arrow.vector.types.FloatingPointPrecision.DOUBLE; +import static org.apache.arrow.vector.types.FloatingPointPrecision.HALF; import static org.apache.arrow.vector.types.FloatingPointPrecision.SINGLE; import static org.apache.arrow.vector.types.UnionMode.Dense; import static org.apache.arrow.vector.types.UnionMode.Sparse; @@ -33,6 +34,7 @@ import org.apache.arrow.vector.ExtensionTypeVector; import org.apache.arrow.vector.FieldVector; import org.apache.arrow.vector.FixedSizeBinaryVector; +import org.apache.arrow.vector.Float2Vector; import org.apache.arrow.vector.Float4Vector; import org.apache.arrow.vector.Float8Vector; import org.apache.arrow.vector.IntVector; @@ -79,6 +81,7 @@ import org.apache.arrow.vector.complex.impl.DenseUnionWriter; import org.apache.arrow.vector.complex.impl.DurationWriterImpl; import org.apache.arrow.vector.complex.impl.FixedSizeBinaryWriterImpl; +import org.apache.arrow.vector.complex.impl.Float2WriterImpl; import org.apache.arrow.vector.complex.impl.Float4WriterImpl; import org.apache.arrow.vector.complex.impl.Float8WriterImpl; import org.apache.arrow.vector.complex.impl.IntWriterImpl; @@ -432,6 +435,17 @@ public FieldWriter getNewFieldWriter(ValueVector vector) { return new IntervalYearWriterImpl((IntervalYearVector) vector); } }, + FLOAT2(new FloatingPoint(HALF)) { + @Override + public FieldVector getNewVector(Field field, BufferAllocator allocator, CallBack schemaChangeCallback) { + return new Float2Vector(field, allocator); + } + + @Override + public FieldWriter getNewFieldWriter(ValueVector vector) { + return new Float2WriterImpl((Float2Vector) vector); + } + }, // 4 byte ieee 754 FLOAT4(new FloatingPoint(SINGLE)) { @Override @@ -894,7 +908,7 @@ public MinorType visit(Int type) { public MinorType visit(FloatingPoint type) { switch (type.getPrecision()) { case HALF: - throw new UnsupportedOperationException("NYI: " + type); + return MinorType.FLOAT2; case SINGLE: return MinorType.FLOAT4; case DOUBLE: diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java index 614aff18d4554..10091aebdd50b 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java @@ -332,6 +332,204 @@ public void testSizeOfValueBuffer() { } } + @Test + public void testFixedFloat2() { + try (final Float2Vector floatVector = new Float2Vector(EMPTY_SCHEMA_PATH, allocator)) { + boolean error = false; + int initialCapacity = 16; + + /* we should not throw exception for these values of capacity */ + floatVector.setInitialCapacity(MAX_VALUE_COUNT - 1); + floatVector.setInitialCapacity(MAX_VALUE_COUNT); + + try { + floatVector.setInitialCapacity(MAX_VALUE_COUNT * 4); + } catch (OversizedAllocationException oe) { + error = true; + } finally { + assertTrue(error); + error = false; + } + + floatVector.setInitialCapacity(initialCapacity); + /* no memory allocation has happened yet so capacity of underlying buffer should be 0 */ + assertEquals(0, floatVector.getValueCapacity()); + + /* allocate 32 bytes (16 * 2) */ + floatVector.allocateNew(); + /* underlying buffer should be able to store 16 values */ + assertTrue(floatVector.getValueCapacity() >= initialCapacity); + initialCapacity = floatVector.getValueCapacity(); + + floatVector.zeroVector(); + + /* populate the floatVector */ + floatVector.set(0, (short) 0x101c); // Float16.toFloat16(+0.00050163269043f) + floatVector.set(2, (short) 0x901c); // Float16.toFloat16(-0.00050163269043f) + floatVector.set(4, (short) 0x101d); // Float16.toFloat16(+0.000502109527588f) + floatVector.set(6, (short) 0x901d); // Float16.toFloat16(-0.000502109527588f) + floatVector.set(8, (short) 0x121c); // Float16.toFloat16(+0.00074577331543f) + floatVector.set(10, (short) 0x921c); // Float16.toFloat16(-0.00074577331543f) + floatVector.set(12, (short) 0x501c); // Float16.toFloat16(+32.875f) + floatVector.set(14, (short) 0xd01c); // Float16.toFloat16(-32.875f) + + try { + floatVector.set(initialCapacity, (short) 0x141c); + } catch (IndexOutOfBoundsException ie) { + error = true; + } finally { + assertTrue(error); + error = false; + } + + /* check vector contents */ + assertEquals((short) 0x101c, floatVector.get(0)); + assertEquals((short) 0x901c, floatVector.get(2)); + assertEquals((short) 0x101d, floatVector.get(4)); + assertEquals((short) 0x901d, floatVector.get(6)); + assertEquals((short) 0x121c, floatVector.get(8)); + assertEquals((short) 0x921c, floatVector.get(10)); + assertEquals((short) 0x501c, floatVector.get(12)); + assertEquals((short) 0xd01c, floatVector.get(14)); + + try { + floatVector.get(initialCapacity); + } catch (IndexOutOfBoundsException ie) { + error = true; + } finally { + assertTrue(error); + } + + /* this should trigger a realloc() */ + floatVector.setSafe(initialCapacity, (short) 0x141c); // Float16.toFloat16(+0.00100326538086f) + + /* underlying buffer should now be able to store double the number of values */ + assertTrue(floatVector.getValueCapacity() >= initialCapacity * 2); + + /* vector data should still be intact after realloc */ + assertEquals((short) 0x101c, floatVector.get(0)); + assertEquals((short) 0x901c, floatVector.get(2)); + assertEquals((short) 0x101d, floatVector.get(4)); + assertEquals((short) 0x901d, floatVector.get(6)); + assertEquals((short) 0x121c, floatVector.get(8)); + assertEquals((short) 0x921c, floatVector.get(10)); + assertEquals((short) 0x501c, floatVector.get(12)); + assertEquals((short) 0xd01c, floatVector.get(14)); + assertEquals((short) 0x141c, floatVector.get(initialCapacity)); + + /* reset the vector */ + int capacityBeforeReset = floatVector.getValueCapacity(); + floatVector.reset(); + + /* capacity shouldn't change after reset */ + assertEquals(capacityBeforeReset, floatVector.getValueCapacity()); + + /* vector data should be zeroed out */ + for (int i = 0; i < capacityBeforeReset; i++) { + assertTrue("non-zero data not expected at index: " + i, floatVector.isNull(i)); + } + } + } + + @Test + public void testFixedFloat2WithPossibleTruncate() { + try (final Float2Vector floatVector = new Float2Vector(EMPTY_SCHEMA_PATH, allocator)) { + boolean error = false; + int initialCapacity = 16; + + /* we should not throw exception for these values of capacity */ + floatVector.setInitialCapacity(MAX_VALUE_COUNT - 1); + floatVector.setInitialCapacity(MAX_VALUE_COUNT); + + try { + floatVector.setInitialCapacity(MAX_VALUE_COUNT * 4); + } catch (OversizedAllocationException oe) { + error = true; + } finally { + assertTrue(error); + error = false; + } + + floatVector.setInitialCapacity(initialCapacity); + /* no memory allocation has happened yet so capacity of underlying buffer should be 0 */ + assertEquals(0, floatVector.getValueCapacity()); + + /* allocate 32 bytes (16 * 2) */ + floatVector.allocateNew(); + /* underlying buffer should be able to store 16 values */ + assertTrue(floatVector.getValueCapacity() >= initialCapacity); + initialCapacity = floatVector.getValueCapacity(); + + floatVector.zeroVector(); + + /* populate the floatVector */ + floatVector.set(0, (short) 0x101c); // Float16.toFloat16(+0.00050163269043f) + floatVector.set(2, (short) 0x901c); // Float16.toFloat16(-0.00050163269043f) + floatVector.set(4, (short) 0x101d); // Float16.toFloat16(+0.000502109527588f) + floatVector.setWithPossibleTruncate(6, 2049.0f); // in f32=2049.000000, out f16=2048 + floatVector.setWithPossibleTruncate(8, 4098.0f); // in f32=4098.000000, out f16=4096 + floatVector.setWithPossibleTruncate(10, 8196.0f); // in f32=8196.000000, out f16=8192 + floatVector.setWithPossibleTruncate(12, 16392.0f); // in f32=16392.000000, out f16=16384 + floatVector.setWithPossibleTruncate(14, 32784.0f); // in f32=32784.000000, out f16=32768 + + try { + floatVector.setWithPossibleTruncate(initialCapacity, 1.618034f); // in f32=1.618034, out f16=1.6181641 + } catch (IndexOutOfBoundsException ie) { + error = true; + } finally { + assertTrue(error); + error = false; + } + + /* check vector contents */ + assertEquals((short) 0x101c, floatVector.get(0)); + assertEquals((short) 0x901c, floatVector.get(2)); + assertEquals((short) 0x101d, floatVector.get(4)); + assertEquals(2048.0f, floatVector.getValueAsFloat(6), 0); + assertEquals(4096.0f, floatVector.getValueAsFloat(8), 0); + assertEquals(8192.0f, floatVector.getValueAsFloat(10), 0); + assertEquals(16384.0f, floatVector.getValueAsDouble(12), 0); + assertEquals(32768.0f, floatVector.getValueAsDouble(14), 0); + + try { + floatVector.get(initialCapacity); + } catch (IndexOutOfBoundsException ie) { + error = true; + } finally { + assertTrue(error); + } + + /* this should trigger a realloc() */ + floatVector.setSafeWithPossibleTruncate(initialCapacity, 1.618034f); // in f32=1.618034, out f16=1.6181641 + + /* underlying buffer should now be able to store double the number of values */ + assertTrue(floatVector.getValueCapacity() >= initialCapacity * 2); + + /* vector data should still be intact after realloc */ + assertEquals((short) 0x101c, floatVector.get(0)); + assertEquals((short) 0x901c, floatVector.get(2)); + assertEquals((short) 0x101d, floatVector.get(4)); + assertEquals(2048.0f, floatVector.getValueAsFloat(6), 0); + assertEquals(4096.0f, floatVector.getValueAsFloat(8), 0); + assertEquals(8192.0f, floatVector.getValueAsFloat(10), 0); + assertEquals(16384.0f, floatVector.getValueAsDouble(12), 0); + assertEquals(32768.0f, floatVector.getValueAsDouble(14), 0); + assertEquals(1.6181641f, floatVector.getValueAsDouble(initialCapacity), 0); + + /* reset the vector */ + int capacityBeforeReset = floatVector.getValueCapacity(); + floatVector.reset(); + + /* capacity shouldn't change after reset */ + assertEquals(capacityBeforeReset, floatVector.getValueCapacity()); + + /* vector data should be zeroed out */ + for (int i = 0; i < capacityBeforeReset; i++) { + assertTrue("non-zero data not expected at index: " + i, floatVector.isNull(i)); + } + } + } + @Test /* Float4Vector */ public void testFixedType3() { try (final Float4Vector floatVector = new Float4Vector(EMPTY_SCHEMA_PATH, allocator)) {