diff --git a/docs/source/status.rst b/docs/source/status.rst index 03a87012342c2..11dd9c2c2965c 100644 --- a/docs/source/status.rst +++ b/docs/source/status.rst @@ -40,7 +40,7 @@ Data Types +-------------------+-------+-------+-------+------------+-------+-------+-------+-------+ | UInt8/16/32/64 | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | +-------------------+-------+-------+-------+------------+-------+-------+-------+-------+ -| Float16 | ✓ (1) | | ✓ | ✓ | ✓ (2)| ✓ | ✓ | | +| Float16 | ✓ (1) | ✓ (2) | ✓ | ✓ | ✓ (3)| ✓ | ✓ | | +-------------------+-------+-------+-------+------------+-------+-------+-------+-------+ | Float32/64 | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | +-------------------+-------+-------+-------+------------+-------+-------+-------+-------+ @@ -104,7 +104,7 @@ Data Types | Data type | C++ | Java | Go | JavaScript | C# | Rust | Julia | Swift | | (special) | | | | | | | | | +===================+=======+=======+=======+============+=======+=======+=======+=======+ -| Dictionary | ✓ | ✓ (3) | ✓ | ✓ | ✓ | ✓ (3) | ✓ | | +| Dictionary | ✓ | ✓ (4) | ✓ | ✓ | ✓ | ✓ (3) | ✓ | | +-------------------+-------+-------+-------+------------+-------+-------+-------+-------+ | Extension | ✓ | ✓ | ✓ | | | ✓ | ✓ | | +-------------------+-------+-------+-------+------------+-------+-------+-------+-------+ @@ -114,8 +114,9 @@ Data Types Notes: * \(1) Casting to/from Float16 in C++ is not supported. -* \(2) Float16 support in C# is only available when targeting .NET 6+. -* \(3) Nested dictionaries not supported +* \(2) Casting to/from Float16 in Java is not supported. +* \(3) Float16 support in C# is only available when targeting .NET 6+. +* \(4) Nested dictionaries not supported .. seealso:: The :ref:`format_columnar` specification. diff --git a/java/dataset/src/test/java/org/apache/arrow/dataset/TestAllTypes.java b/java/dataset/src/test/java/org/apache/arrow/dataset/TestAllTypes.java index 13b247452348d..6d33cf057ed3a 100644 --- a/java/dataset/src/test/java/org/apache/arrow/dataset/TestAllTypes.java +++ b/java/dataset/src/test/java/org/apache/arrow/dataset/TestAllTypes.java @@ -32,6 +32,7 @@ import org.apache.arrow.dataset.file.DatasetFileWriter; import org.apache.arrow.dataset.file.FileFormat; import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.memory.util.Float16; import org.apache.arrow.vector.BigIntVector; import org.apache.arrow.vector.BitVector; import org.apache.arrow.vector.DateMilliVector; @@ -39,6 +40,7 @@ import org.apache.arrow.vector.DecimalVector; import org.apache.arrow.vector.DurationVector; import org.apache.arrow.vector.FixedSizeBinaryVector; +import org.apache.arrow.vector.Float2Vector; import org.apache.arrow.vector.Float4Vector; import org.apache.arrow.vector.Float8Vector; import org.apache.arrow.vector.IntVector; @@ -89,7 +91,6 @@ public class TestAllTypes extends TestDataset { private VectorSchemaRoot generateAllTypesVector(BufferAllocator allocator) { // Notes: - // - Float16 is not supported by Java. // - IntervalMonthDayNano is not supported by Parquet. // - Map (GH-38250) and SparseUnion are resulting in serialization errors when writing with the Dataset API. // "Unhandled type for Arrow to Parquet schema conversion" errors: IntervalDay, IntervalYear, DenseUnion @@ -109,6 +110,7 @@ private VectorSchemaRoot generateAllTypesVector(BufferAllocator allocator) { Field.nullablePrimitive("uint16", new ArrowType.Int(16, false)), Field.nullablePrimitive("uint32", new ArrowType.Int(32, false)), Field.nullablePrimitive("uint64", new ArrowType.Int(64, false)), + Field.nullablePrimitive("float16", new ArrowType.FloatingPoint(FloatingPointPrecision.HALF)), Field.nullablePrimitive("float32", new ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE)), Field.nullablePrimitive("float64", new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE)), Field.nullablePrimitive("utf8", ArrowType.Utf8.INSTANCE), @@ -148,6 +150,7 @@ private VectorSchemaRoot generateAllTypesVector(BufferAllocator allocator) { root.getVector("uint16").setNull(0); root.getVector("uint32").setNull(0); root.getVector("uint64").setNull(0); + root.getVector("float16").setNull(0); root.getVector("float32").setNull(0); root.getVector("float64").setNull(0); root.getVector("utf8").setNull(0); @@ -180,6 +183,7 @@ private VectorSchemaRoot generateAllTypesVector(BufferAllocator allocator) { ((UInt2Vector) root.getVector("uint16")).set(1, 1); ((UInt4Vector) root.getVector("uint32")).set(1, 1); ((UInt8Vector) root.getVector("uint64")).set(1, 1); + ((Float2Vector) root.getVector("float16")).set(1, Float16.toFloat16(+32.875f)); ((Float4Vector) root.getVector("float32")).set(1, 1.0f); ((Float8Vector) root.getVector("float64")).set(1, 1.0); ((VarCharVector) root.getVector("utf8")).set(1, new Text("a")); diff --git a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/Float16.java b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/Float16.java new file mode 100644 index 0000000000000..8040158fd090e --- /dev/null +++ b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/Float16.java @@ -0,0 +1,271 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.memory.util; + + +import org.apache.arrow.util.VisibleForTesting; + +/** + * Lifted from Apache Parquet MR project: + * https://github.com/apache/parquet-mr/blob/e87b80308869b77f914fcfd04364686e11158950/parquet-column/src/main/java/org/apache/parquet/schema/Float16.java + *
The format is laid out as follows:
+ *+ * 1 11111 1111111111 + * ^ --^-- -----^---- + * sign | |_______ significand + * | + * -- exponent + *+ * Half-precision floating points can be useful to save memory and/or + * bandwidth at the expense of range and precision when compared to single-precision + * floating points (float32). + * Ref: https://android.googlesource.com/platform/libcore/+/master/luni/src/main/java/libcore/util/FP16.java + */ +public class Float16 { + // Positive infinity of type half-precision float. + public static final short POSITIVE_INFINITY = (short) 0x7c00; + // A Not-a-Number representation of a half-precision float. + public static final short NaN = (short) 0x7e00; + // The bitmask to and a number with to obtain the sign bit. + private static final int SIGN_MASK = 0x8000; + // The offset to shift by to obtain the exponent bits. + private static final int EXPONENT_SHIFT = 10; + // The bitmask to and a number shifted by EXPONENT_SHIFT right, to obtain exponent bits. + private static final int SHIFTED_EXPONENT_MASK = 0x1f; + // The bitmask to and a number with to obtain significand bits. + private static final int SIGNIFICAND_MASK = 0x3ff; + // The offset of the exponent from the actual value. + private static final int EXPONENT_BIAS = 15; + // The offset to shift by to obtain the sign bit. + private static final int SIGN_SHIFT = 15; + // The bitmask to AND with to obtain exponent and significand bits. + private static final int EXPONENT_SIGNIFICAND_MASK = 0x7fff; + + private static final int FP32_SIGN_SHIFT = 31; + private static final int FP32_EXPONENT_SHIFT = 23; + private static final int FP32_SHIFTED_EXPONENT_MASK = 0xff; + private static final int FP32_SIGNIFICAND_MASK = 0x7fffff; + private static final int FP32_EXPONENT_BIAS = 127; + private static final int FP32_QNAN_MASK = 0x400000; + private static final int FP32_DENORMAL_MAGIC = 126 << 23; + private static final float FP32_DENORMAL_FLOAT = Float.intBitsToFloat(FP32_DENORMAL_MAGIC); + + /** + * Returns true if the specified half-precision float value represents + * a Not-a-Number, false otherwise. + * + * @param h A half-precision float value + * @return True if the value is a NaN, false otherwise + * + */ + @VisibleForTesting + public static boolean isNaN(short h) { + return (h & EXPONENT_SIGNIFICAND_MASK) > POSITIVE_INFINITY; + } + + /** + *
Compares the two specified half-precision float values. The following + * conditions apply during the comparison:
+ * + *Float.toString(toFloat(h))
. See {@link Float#toString(float)}
+ * for more information on the format of the string representation.
+ *
+ * @param h A half-precision float value in binary little-endian format
+ * @return A string representation of the specified value
+ */
+ @VisibleForTesting
+ public static String toFloatString(short h) {
+ return Float.toString(Float16.toFloat(h));
+ }
+}
diff --git a/java/memory/memory-core/src/test/java/org/apache/arrow/memory/TestArrowBuf.java b/java/memory/memory-core/src/test/java/org/apache/arrow/memory/TestArrowBuf.java
index 9ba42abc1ce89..b4385b72a38cf 100644
--- a/java/memory/memory-core/src/test/java/org/apache/arrow/memory/TestArrowBuf.java
+++ b/java/memory/memory-core/src/test/java/org/apache/arrow/memory/TestArrowBuf.java
@@ -29,6 +29,7 @@
import java.nio.ByteOrder;
import java.util.Arrays;
+import org.apache.arrow.memory.util.Float16;
import org.junit.Test;
import org.slf4j.LoggerFactory;
@@ -180,4 +181,14 @@ public void testEnabledHistoricalLog() {
((Logger) LoggerFactory.getLogger("org.apache.arrow")).setLevel(null);
}
}
+
+ @Test
+ public void testArrowBufFloat16() {
+ try (BufferAllocator allocator = new RootAllocator();
+ ArrowBuf buf = allocator.buffer(1024)
+ ) {
+ buf.setShort(0, Float16.toFloat16(+32.875f));
+ assertEquals((short) 0x501c, buf.getShort(0));
+ }
+ }
}
diff --git a/java/vector/src/main/codegen/data/ValueVectorTypes.tdd b/java/vector/src/main/codegen/data/ValueVectorTypes.tdd
index 2a921804202f0..6c2a967712454 100644
--- a/java/vector/src/main/codegen/data/ValueVectorTypes.tdd
+++ b/java/vector/src/main/codegen/data/ValueVectorTypes.tdd
@@ -49,6 +49,16 @@
{ class: "SmallInt", valueHolder: "Int2Holder"},
]
},
+ {
+ major: "Fixed",
+ width: 2,
+ javaType: "short",
+ boxedType: "Short",
+ fields: [{name: "value", type: "short"}],
+ minor: [
+ { class: "Float2", valueHolder: "Int2Holder"},
+ ]
+ },
{
major: "Fixed",
width: 4,
diff --git a/java/vector/src/main/codegen/templates/UnionReader.java b/java/vector/src/main/codegen/templates/UnionReader.java
index 56a6cc90b321b..822d4822987fb 100644
--- a/java/vector/src/main/codegen/templates/UnionReader.java
+++ b/java/vector/src/main/codegen/templates/UnionReader.java
@@ -39,7 +39,9 @@
@SuppressWarnings("unused")
public class UnionReader extends AbstractFieldReader {
- private BaseReader[] readers = new BaseReader[45];
+ private static final int NUM_SUPPORTED_TYPES = 46;
+
+ private BaseReader[] readers = new BaseReader[NUM_SUPPORTED_TYPES];
public UnionVector data;
public UnionReader(UnionVector data) {
@@ -50,7 +52,7 @@ public MinorType getMinorType() {
return TYPES[data.getTypeValue(idx())];
}
- private static MinorType[] TYPES = new MinorType[45];
+ private static MinorType[] TYPES = new MinorType[NUM_SUPPORTED_TYPES];
static {
for (MinorType minorType : MinorType.values()) {
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/Float2Vector.java b/java/vector/src/main/java/org/apache/arrow/vector/Float2Vector.java
new file mode 100644
index 0000000000000..9d3f25769abff
--- /dev/null
+++ b/java/vector/src/main/java/org/apache/arrow/vector/Float2Vector.java
@@ -0,0 +1,434 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import static org.apache.arrow.vector.NullCheckingForGet.NULL_CHECKING_ENABLED;
+
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.util.Float16;
+import org.apache.arrow.vector.complex.impl.Float2ReaderImpl;
+import org.apache.arrow.vector.complex.reader.FieldReader;
+import org.apache.arrow.vector.holders.Float2Holder;
+import org.apache.arrow.vector.holders.NullableFloat2Holder;
+import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.util.TransferPair;
+
+/**
+ * Float2Vector implements a fixed width (2 bytes) vector of
+ * short values which could be null. A validity buffer (bit vector) is
+ * maintained to track which elements in the vector are null.
+ */
+public final class Float2Vector extends BaseFixedWidthVector implements FloatingPointVector {
+ public static final byte TYPE_WIDTH = 2;
+
+ /**
+ * Instantiate a Float2Vector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param name name of the vector
+ * @param allocator allocator for memory management.
+ */
+ public Float2Vector(String name, BufferAllocator allocator) {
+ this(name, FieldType.nullable(MinorType.FLOAT2.getType()), allocator);
+ }
+
+ /**
+ * Instantiate a Float2Vector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param name name of the vector
+ * @param fieldType type of Field materialized by this vector
+ * @param allocator allocator for memory management.
+ */
+ public Float2Vector(String name, FieldType fieldType, BufferAllocator allocator) {
+ this(new Field(name, fieldType, null), allocator);
+ }
+
+ /**
+ * Instantiate a Float2Vector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param field field materialized by this vector
+ * @param allocator allocator for memory management.
+ */
+ public Float2Vector(Field field, BufferAllocator allocator) {
+ super(field, allocator, TYPE_WIDTH);
+ }
+
+ @Override
+ protected FieldReader getReaderImpl() {
+ return new Float2ReaderImpl(Float2Vector.this);
+ }
+
+ /**
+ * Get minor type for this vector. The vector holds values belonging
+ * to a particular type.
+ *
+ * @return {@link MinorType}
+ */
+ @Override
+ public MinorType getMinorType() {
+ return MinorType.FLOAT2;
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector value retrieval methods |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ /**
+ * Get the element at the given index from the vector.
+ *
+ * @param index position of element
+ * @return element at given index
+ */
+ public short get(int index) throws IllegalStateException {
+ if (NULL_CHECKING_ENABLED && isSet(index) == 0) {
+ throw new IllegalStateException("Value at index is null");
+ }
+ return valueBuffer.getShort((long) index * TYPE_WIDTH);
+ }
+
+ /**
+ * Get the element at the given index from the vector and
+ * sets the state in holder. If element at given index
+ * is null, holder.isSet will be zero.
+ *
+ * @param index position of element
+ */
+ public void get(int index, NullableFloat2Holder holder) {
+ if (isSet(index) == 0) {
+ holder.isSet = 0;
+ return;
+ }
+ holder.isSet = 1;
+ holder.value = valueBuffer.getShort((long) index * TYPE_WIDTH);
+ }
+
+ /**
+ * Same as {@link #get(int)}.
+ *
+ * @param index position of element
+ * @return element at given index
+ */
+ @Override
+ public Short getObject(int index) {
+ if (isSet(index) == 0) {
+ return null;
+ } else {
+ return valueBuffer.getShort((long) index * TYPE_WIDTH);
+ }
+ }
+
+ /**
+ * Given a data buffer, get the value stored at a particular position
+ * in the vector.
+ *
+ * This method should not be used externally. + * + * @param buffer data buffer + * @param index position of the element. + * @return value stored at the index. + */ + static short get(final ArrowBuf buffer, final int index) { + return buffer.getShort((long) index * TYPE_WIDTH); + } + + @Override + public double getValueAsDouble(int index) { + return getValueAsFloat(index); + } + + public float getValueAsFloat(int index) { + return Float16.toFloat(this.get(index)); + } + + /*----------------------------------------------------------------* + | | + | vector value setter methods | + | | + *----------------------------------------------------------------*/ + + private void setValue(int index, short value) { + valueBuffer.setShort((long) index * TYPE_WIDTH, value); + } + + private void setValue(int index, float value) { + valueBuffer.setShort((long) index * TYPE_WIDTH, Float16.toFloat16(value)); + } + + /** + * Set the element at the given index to the given value. + * + * @param index position of element + * @param value value of element + */ + public void set(int index, short value) { + BitVectorHelper.setBit(validityBuffer, index); + setValue(index, value); + } + + /** + * Set the element at the given index to the given value. + * + * @param index position of element + * @param value value of element + */ + public void setWithPossibleTruncate(int index, float value) { + BitVectorHelper.setBit(validityBuffer, index); + setValue(index, value); + } + + /** + * Set the element at the given index to the value set in data holder. + * If the value in holder is not indicated as set, element in the + * at the given index will be null. + * + * @param index position of element + * @param holder nullable data holder for value of element + */ + public void set(int index, NullableFloat2Holder holder) throws IllegalArgumentException { + if (holder.isSet < 0) { + throw new IllegalArgumentException(); + } else if (holder.isSet > 0) { + BitVectorHelper.setBit(validityBuffer, index); + setValue(index, holder.value); + } else { + BitVectorHelper.unsetBit(validityBuffer, index); + } + } + + /** + * Set the element at the given index to the value set in data holder. + * + * @param index position of element + * @param holder data holder for value of element + */ + public void set(int index, Float2Holder holder) { + BitVectorHelper.setBit(validityBuffer, index); + setValue(index, holder.value); + } + + /** + * Same as {@link #set(int, short)} except that it handles the + * case when index is greater than or equal to existing + * value capacity {@link #getValueCapacity()}. + * + * @param index position of element + * @param value value of element + */ + public void setSafe(int index, short value) { + handleSafe(index); + set(index, value); + } + + /** + * Same as {@link #setWithPossibleTruncate(int, float)} except that it handles the + * case when index is greater than or equal to existing + * value capacity {@link #getValueCapacity()}. + * + * @param index position of element + * @param value value of element + */ + public void setSafeWithPossibleTruncate(int index, float value) { + handleSafe(index); + setWithPossibleTruncate(index, value); + } + + /** + * Same as {@link #set(int, NullableFloat2Holder)} except that it handles the + * case when index is greater than or equal to existing + * value capacity {@link #getValueCapacity()}. + * + * @param index position of element + * @param holder nullable data holder for value of element + */ + public void setSafe(int index, NullableFloat2Holder holder) throws IllegalArgumentException { + handleSafe(index); + set(index, holder); + } + + /** + * Same as {@link #set(int, Float2Holder)} except that it handles the + * case when index is greater than or equal to existing + * value capacity {@link #getValueCapacity()}. + * + * @param index position of element + * @param holder data holder for value of element + */ + public void setSafe(int index, Float2Holder holder) { + handleSafe(index); + set(index, holder); + } + + /** + * Store the given value at a particular position in the vector. isSet indicates + * whether the value is NULL or not. + * + * @param index position of the new value + * @param isSet 0 for NULL value, 1 otherwise + * @param value element value + */ + public void set(int index, int isSet, short value) { + if (isSet > 0) { + set(index, value); + } else { + BitVectorHelper.unsetBit(validityBuffer, index); + } + } + + /** + * Store the given value at a particular position in the vector. isSet indicates + * whether the value is NULL or not. + * + * @param index position of the new value + * @param isSet 0 for NULL value, 1 otherwise + * @param value element value + */ + public void setWithPossibleTruncate(int index, int isSet, float value) { + if (isSet > 0) { + setWithPossibleTruncate(index, value); + } else { + BitVectorHelper.unsetBit(validityBuffer, index); + } + } + + /** + * Same as {@link #set(int, int, short)} except that it handles the case + * when index is greater than or equal to current value capacity of the + * vector. + * + * @param index position of the new value + * @param isSet 0 for NULL value, 1 otherwise + * @param value element value + */ + public void setSafe(int index, int isSet, short value) { + handleSafe(index); + set(index, isSet, value); + } + + /** + * Same as {@link #set(int, int, short)} except that it handles the case + * when index is greater than or equal to current value capacity of the + * vector. + * + * @param index position of the new value + * @param isSet 0 for NULL value, 1 otherwise + * @param value element value + */ + public void setSafeWithPossibleTruncate(int index, int isSet, float value) { + handleSafe(index); + setWithPossibleTruncate(index, isSet, value); + } + + @Override + public void setWithPossibleTruncate(int index, double value) { + throw new UnsupportedOperationException("The operation for double data types is not supported."); + } + + @Override + public void setSafeWithPossibleTruncate(int index, double value) { + throw new UnsupportedOperationException("The operation for double data types is not supported."); + } + + /*----------------------------------------------------------------* + | | + | vector transfer | + | | + *----------------------------------------------------------------*/ + + /** + * Construct a TransferPair comprising this and a target vector of + * the same type. + * + * @param ref name of the target vector + * @param allocator allocator for the target vector + * @return {@link TransferPair} + */ + @Override + public TransferPair getTransferPair(String ref, BufferAllocator allocator) { + return new TransferImpl(ref, allocator); + } + + /** + * Construct a TransferPair comprising this and a target vector of + * the same type. + * + * @param field Field object used by the target vector + * @param allocator allocator for the target vector + * @return {@link TransferPair} + */ + @Override + public TransferPair getTransferPair(Field field, BufferAllocator allocator) { + return new TransferImpl(field, allocator); + } + + /** + * Construct a TransferPair with a desired target vector of the same type. + * + * @param to target vector + * @return {@link TransferPair} + */ + @Override + public TransferPair makeTransferPair(ValueVector to) { + return new TransferImpl((Float2Vector) to); + } + + private class TransferImpl implements TransferPair { + Float2Vector to; + + public TransferImpl(String ref, BufferAllocator allocator) { + to = new Float2Vector(ref, field.getFieldType(), allocator); + } + + public TransferImpl(Field field, BufferAllocator allocator) { + to = new Float2Vector(field, allocator); + } + + public TransferImpl(Float2Vector to) { + this.to = to; + } + + @Override + public Float2Vector getTo() { + return to; + } + + @Override + public void transfer() { + transferTo(to); + } + + @Override + public void splitAndTransfer(int startIndex, int length) { + splitAndTransferTo(startIndex, length, to); + } + + @Override + public void copyValueSafe(int fromIndex, int toIndex) { + to.copyFromSafe(fromIndex, toIndex, Float2Vector.this); + } + } +} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/types/Types.java b/java/vector/src/main/java/org/apache/arrow/vector/types/Types.java index f29157524f2df..0b0e0d66a98f0 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/types/Types.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/types/Types.java @@ -18,6 +18,7 @@ package org.apache.arrow.vector.types; import static org.apache.arrow.vector.types.FloatingPointPrecision.DOUBLE; +import static org.apache.arrow.vector.types.FloatingPointPrecision.HALF; import static org.apache.arrow.vector.types.FloatingPointPrecision.SINGLE; import static org.apache.arrow.vector.types.UnionMode.Dense; import static org.apache.arrow.vector.types.UnionMode.Sparse; @@ -33,6 +34,7 @@ import org.apache.arrow.vector.ExtensionTypeVector; import org.apache.arrow.vector.FieldVector; import org.apache.arrow.vector.FixedSizeBinaryVector; +import org.apache.arrow.vector.Float2Vector; import org.apache.arrow.vector.Float4Vector; import org.apache.arrow.vector.Float8Vector; import org.apache.arrow.vector.IntVector; @@ -79,6 +81,7 @@ import org.apache.arrow.vector.complex.impl.DenseUnionWriter; import org.apache.arrow.vector.complex.impl.DurationWriterImpl; import org.apache.arrow.vector.complex.impl.FixedSizeBinaryWriterImpl; +import org.apache.arrow.vector.complex.impl.Float2WriterImpl; import org.apache.arrow.vector.complex.impl.Float4WriterImpl; import org.apache.arrow.vector.complex.impl.Float8WriterImpl; import org.apache.arrow.vector.complex.impl.IntWriterImpl; @@ -432,6 +435,17 @@ public FieldWriter getNewFieldWriter(ValueVector vector) { return new IntervalYearWriterImpl((IntervalYearVector) vector); } }, + FLOAT2(new FloatingPoint(HALF)) { + @Override + public FieldVector getNewVector(Field field, BufferAllocator allocator, CallBack schemaChangeCallback) { + return new Float2Vector(field, allocator); + } + + @Override + public FieldWriter getNewFieldWriter(ValueVector vector) { + return new Float2WriterImpl((Float2Vector) vector); + } + }, // 4 byte ieee 754 FLOAT4(new FloatingPoint(SINGLE)) { @Override @@ -894,7 +908,7 @@ public MinorType visit(Int type) { public MinorType visit(FloatingPoint type) { switch (type.getPrecision()) { case HALF: - throw new UnsupportedOperationException("NYI: " + type); + return MinorType.FLOAT2; case SINGLE: return MinorType.FLOAT4; case DOUBLE: diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java index 614aff18d4554..10091aebdd50b 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java @@ -332,6 +332,204 @@ public void testSizeOfValueBuffer() { } } + @Test + public void testFixedFloat2() { + try (final Float2Vector floatVector = new Float2Vector(EMPTY_SCHEMA_PATH, allocator)) { + boolean error = false; + int initialCapacity = 16; + + /* we should not throw exception for these values of capacity */ + floatVector.setInitialCapacity(MAX_VALUE_COUNT - 1); + floatVector.setInitialCapacity(MAX_VALUE_COUNT); + + try { + floatVector.setInitialCapacity(MAX_VALUE_COUNT * 4); + } catch (OversizedAllocationException oe) { + error = true; + } finally { + assertTrue(error); + error = false; + } + + floatVector.setInitialCapacity(initialCapacity); + /* no memory allocation has happened yet so capacity of underlying buffer should be 0 */ + assertEquals(0, floatVector.getValueCapacity()); + + /* allocate 32 bytes (16 * 2) */ + floatVector.allocateNew(); + /* underlying buffer should be able to store 16 values */ + assertTrue(floatVector.getValueCapacity() >= initialCapacity); + initialCapacity = floatVector.getValueCapacity(); + + floatVector.zeroVector(); + + /* populate the floatVector */ + floatVector.set(0, (short) 0x101c); // Float16.toFloat16(+0.00050163269043f) + floatVector.set(2, (short) 0x901c); // Float16.toFloat16(-0.00050163269043f) + floatVector.set(4, (short) 0x101d); // Float16.toFloat16(+0.000502109527588f) + floatVector.set(6, (short) 0x901d); // Float16.toFloat16(-0.000502109527588f) + floatVector.set(8, (short) 0x121c); // Float16.toFloat16(+0.00074577331543f) + floatVector.set(10, (short) 0x921c); // Float16.toFloat16(-0.00074577331543f) + floatVector.set(12, (short) 0x501c); // Float16.toFloat16(+32.875f) + floatVector.set(14, (short) 0xd01c); // Float16.toFloat16(-32.875f) + + try { + floatVector.set(initialCapacity, (short) 0x141c); + } catch (IndexOutOfBoundsException ie) { + error = true; + } finally { + assertTrue(error); + error = false; + } + + /* check vector contents */ + assertEquals((short) 0x101c, floatVector.get(0)); + assertEquals((short) 0x901c, floatVector.get(2)); + assertEquals((short) 0x101d, floatVector.get(4)); + assertEquals((short) 0x901d, floatVector.get(6)); + assertEquals((short) 0x121c, floatVector.get(8)); + assertEquals((short) 0x921c, floatVector.get(10)); + assertEquals((short) 0x501c, floatVector.get(12)); + assertEquals((short) 0xd01c, floatVector.get(14)); + + try { + floatVector.get(initialCapacity); + } catch (IndexOutOfBoundsException ie) { + error = true; + } finally { + assertTrue(error); + } + + /* this should trigger a realloc() */ + floatVector.setSafe(initialCapacity, (short) 0x141c); // Float16.toFloat16(+0.00100326538086f) + + /* underlying buffer should now be able to store double the number of values */ + assertTrue(floatVector.getValueCapacity() >= initialCapacity * 2); + + /* vector data should still be intact after realloc */ + assertEquals((short) 0x101c, floatVector.get(0)); + assertEquals((short) 0x901c, floatVector.get(2)); + assertEquals((short) 0x101d, floatVector.get(4)); + assertEquals((short) 0x901d, floatVector.get(6)); + assertEquals((short) 0x121c, floatVector.get(8)); + assertEquals((short) 0x921c, floatVector.get(10)); + assertEquals((short) 0x501c, floatVector.get(12)); + assertEquals((short) 0xd01c, floatVector.get(14)); + assertEquals((short) 0x141c, floatVector.get(initialCapacity)); + + /* reset the vector */ + int capacityBeforeReset = floatVector.getValueCapacity(); + floatVector.reset(); + + /* capacity shouldn't change after reset */ + assertEquals(capacityBeforeReset, floatVector.getValueCapacity()); + + /* vector data should be zeroed out */ + for (int i = 0; i < capacityBeforeReset; i++) { + assertTrue("non-zero data not expected at index: " + i, floatVector.isNull(i)); + } + } + } + + @Test + public void testFixedFloat2WithPossibleTruncate() { + try (final Float2Vector floatVector = new Float2Vector(EMPTY_SCHEMA_PATH, allocator)) { + boolean error = false; + int initialCapacity = 16; + + /* we should not throw exception for these values of capacity */ + floatVector.setInitialCapacity(MAX_VALUE_COUNT - 1); + floatVector.setInitialCapacity(MAX_VALUE_COUNT); + + try { + floatVector.setInitialCapacity(MAX_VALUE_COUNT * 4); + } catch (OversizedAllocationException oe) { + error = true; + } finally { + assertTrue(error); + error = false; + } + + floatVector.setInitialCapacity(initialCapacity); + /* no memory allocation has happened yet so capacity of underlying buffer should be 0 */ + assertEquals(0, floatVector.getValueCapacity()); + + /* allocate 32 bytes (16 * 2) */ + floatVector.allocateNew(); + /* underlying buffer should be able to store 16 values */ + assertTrue(floatVector.getValueCapacity() >= initialCapacity); + initialCapacity = floatVector.getValueCapacity(); + + floatVector.zeroVector(); + + /* populate the floatVector */ + floatVector.set(0, (short) 0x101c); // Float16.toFloat16(+0.00050163269043f) + floatVector.set(2, (short) 0x901c); // Float16.toFloat16(-0.00050163269043f) + floatVector.set(4, (short) 0x101d); // Float16.toFloat16(+0.000502109527588f) + floatVector.setWithPossibleTruncate(6, 2049.0f); // in f32=2049.000000, out f16=2048 + floatVector.setWithPossibleTruncate(8, 4098.0f); // in f32=4098.000000, out f16=4096 + floatVector.setWithPossibleTruncate(10, 8196.0f); // in f32=8196.000000, out f16=8192 + floatVector.setWithPossibleTruncate(12, 16392.0f); // in f32=16392.000000, out f16=16384 + floatVector.setWithPossibleTruncate(14, 32784.0f); // in f32=32784.000000, out f16=32768 + + try { + floatVector.setWithPossibleTruncate(initialCapacity, 1.618034f); // in f32=1.618034, out f16=1.6181641 + } catch (IndexOutOfBoundsException ie) { + error = true; + } finally { + assertTrue(error); + error = false; + } + + /* check vector contents */ + assertEquals((short) 0x101c, floatVector.get(0)); + assertEquals((short) 0x901c, floatVector.get(2)); + assertEquals((short) 0x101d, floatVector.get(4)); + assertEquals(2048.0f, floatVector.getValueAsFloat(6), 0); + assertEquals(4096.0f, floatVector.getValueAsFloat(8), 0); + assertEquals(8192.0f, floatVector.getValueAsFloat(10), 0); + assertEquals(16384.0f, floatVector.getValueAsDouble(12), 0); + assertEquals(32768.0f, floatVector.getValueAsDouble(14), 0); + + try { + floatVector.get(initialCapacity); + } catch (IndexOutOfBoundsException ie) { + error = true; + } finally { + assertTrue(error); + } + + /* this should trigger a realloc() */ + floatVector.setSafeWithPossibleTruncate(initialCapacity, 1.618034f); // in f32=1.618034, out f16=1.6181641 + + /* underlying buffer should now be able to store double the number of values */ + assertTrue(floatVector.getValueCapacity() >= initialCapacity * 2); + + /* vector data should still be intact after realloc */ + assertEquals((short) 0x101c, floatVector.get(0)); + assertEquals((short) 0x901c, floatVector.get(2)); + assertEquals((short) 0x101d, floatVector.get(4)); + assertEquals(2048.0f, floatVector.getValueAsFloat(6), 0); + assertEquals(4096.0f, floatVector.getValueAsFloat(8), 0); + assertEquals(8192.0f, floatVector.getValueAsFloat(10), 0); + assertEquals(16384.0f, floatVector.getValueAsDouble(12), 0); + assertEquals(32768.0f, floatVector.getValueAsDouble(14), 0); + assertEquals(1.6181641f, floatVector.getValueAsDouble(initialCapacity), 0); + + /* reset the vector */ + int capacityBeforeReset = floatVector.getValueCapacity(); + floatVector.reset(); + + /* capacity shouldn't change after reset */ + assertEquals(capacityBeforeReset, floatVector.getValueCapacity()); + + /* vector data should be zeroed out */ + for (int i = 0; i < capacityBeforeReset; i++) { + assertTrue("non-zero data not expected at index: " + i, floatVector.isNull(i)); + } + } + } + @Test /* Float4Vector */ public void testFixedType3() { try (final Float4Vector floatVector = new Float4Vector(EMPTY_SCHEMA_PATH, allocator)) {