diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/ColumnBinderArrowTypeVisitor.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/ColumnBinderArrowTypeVisitor.java index dc708724043d0..2be0801ca9e08 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/ColumnBinderArrowTypeVisitor.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/ColumnBinderArrowTypeVisitor.java @@ -98,6 +98,11 @@ public ColumnBinder visit(ArrowType.Union type) { throw new UnsupportedOperationException("No column binder implemented for type " + type); } + @Override + public ColumnBinder visit(ArrowType.RunEndEncoded type) { + throw new UnsupportedOperationException("No column binder implemented for type " + type); + } + @Override public ColumnBinder visit(ArrowType.Map type) { return new MapBinder((MapVector) vector); diff --git a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/utils/AvaticaParameterBinder.java b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/utils/AvaticaParameterBinder.java index b2bd8e745ecca..0437618523c24 100644 --- a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/utils/AvaticaParameterBinder.java +++ b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/utils/AvaticaParameterBinder.java @@ -244,6 +244,11 @@ public Boolean visit(ArrowType.Interval type) { public Boolean visit(ArrowType.Duration type) { return new DurationAvaticaParameterConverter(type).bindParameter(vector, typedValue, index); } + + @Override + public Boolean visit(ArrowType.RunEndEncoded type) { + throw new UnsupportedOperationException("No Avatica parameter binder implemented for type " + type); + } } } diff --git a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/utils/ConvertUtils.java b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/utils/ConvertUtils.java index 843fe0cb89d9f..a4968a8fb176b 100644 --- a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/utils/ConvertUtils.java +++ b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/utils/ConvertUtils.java @@ -262,6 +262,11 @@ public AvaticaParameter visit(ArrowType.Interval type) { public AvaticaParameter visit(ArrowType.Duration type) { return new DurationAvaticaParameterConverter(type).createParameter(field); } + + @Override + public AvaticaParameter visit(ArrowType.RunEndEncoded type) { + throw new UnsupportedOperationException("No Avatica parameter binder implemented for type " + type); + } } } diff --git a/java/vector/src/main/codegen/data/ArrowTypes.tdd b/java/vector/src/main/codegen/data/ArrowTypes.tdd index 3cf9a968791a4..c27275dfeca57 100644 --- a/java/vector/src/main/codegen/data/ArrowTypes.tdd +++ b/java/vector/src/main/codegen/data/ArrowTypes.tdd @@ -119,6 +119,11 @@ name: "Duration", fields: [{name: "unit", type: short, valueType: TimeUnit}], complex: false + }, + { + name: "RunEndEncoded", + fields: [], + complex: true } ] } diff --git a/java/vector/src/main/codegen/templates/UnionReader.java b/java/vector/src/main/codegen/templates/UnionReader.java index 822d4822987fb..b4f0398980d4d 100644 --- a/java/vector/src/main/codegen/templates/UnionReader.java +++ b/java/vector/src/main/codegen/templates/UnionReader.java @@ -39,11 +39,10 @@ @SuppressWarnings("unused") public class UnionReader extends AbstractFieldReader { - private static final int NUM_SUPPORTED_TYPES = 46; + private static final int NUM_SUPPORTED_TYPES = 47; private BaseReader[] readers = new BaseReader[NUM_SUPPORTED_TYPES]; public UnionVector data; - public UnionReader(UnionVector data) { this.data = data; } diff --git a/java/vector/src/main/codegen/templates/UnionRunEndEncodedWriter.java b/java/vector/src/main/codegen/templates/UnionRunEndEncodedWriter.java new file mode 100644 index 0000000000000..d1d95155286ce --- /dev/null +++ b/java/vector/src/main/codegen/templates/UnionRunEndEncodedWriter.java @@ -0,0 +1,317 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.arrow.memory.ArrowBuf; +import org.apache.arrow.vector.complex.writer.Decimal256Writer; +import org.apache.arrow.vector.complex.writer.DecimalWriter; +import org.apache.arrow.vector.holders.Decimal256Holder; +import org.apache.arrow.vector.holders.DecimalHolder; + + +import java.lang.UnsupportedOperationException; +import java.math.BigDecimal; + +<@pp.dropOutputFile /> +<@pp.changeOutputFile name="/org/apache/arrow/vector/complex/impl/UnionRunEndEncodedWriter.java" /> + +<#include "/@includes/license.ftl" /> + +package org.apache.arrow.vector.complex.impl; + +import static org.apache.arrow.memory.util.LargeMemoryUtil.checkedCastToInt; +<#include "/@includes/vv_imports.ftl" /> + +<#function is_timestamp_tz type> + <#return type?starts_with("TimeStamp") && type?ends_with("TZ")> + + +/* + * This class is generated using freemarker and the ${.template_name} template. + */ + +@SuppressWarnings("unused") +public class Union${listName}Writer extends AbstractFieldWriter { + + protected ${listName}Vector vector; + protected PromotableWriter writer; + private boolean inStruct = false; + private boolean listStarted = false; + private String structName; + <#if listName == "LargeList"> + private static final long OFFSET_WIDTH = 8; + <#else> + private static final int OFFSET_WIDTH = 4; + + + public Union${listName}Writer(${listName}Vector vector) { + this(vector, NullableStructWriterFactory.getNullableStructWriterFactoryInstance()); + } + + public Union${listName}Writer(${listName}Vector vector, NullableStructWriterFactory nullableStructWriterFactory) { + this.vector = vector; + this.writer = new PromotableWriter(vector.getDataVector(), vector, nullableStructWriterFactory); + } + + public Union${listName}Writer(${listName}Vector vector, AbstractFieldWriter parent) { + this(vector); + } + + @Override + public void allocate() { + vector.allocateNew(); + } + + @Override + public void clear() { + vector.clear(); + } + + @Override + public Field getField() { + return vector.getField(); + } + + public void setValueCount(int count) { + vector.setValueCount(count); + } + + @Override + public int getValueCapacity() { + return vector.getValueCapacity(); + } + + @Override + public void close() throws Exception { + vector.close(); + writer.close(); + } + + @Override + public void setPosition(int index) { + super.setPosition(index); + } + + <#list vv.types as type><#list type.minor as minor> + <#assign lowerName = minor.class?uncap_first /> + <#if lowerName == "int" ><#assign lowerName = "integer" /> + <#assign upperName = minor.class?upper_case /> + <#assign capName = minor.class?cap_first /> + <#assign vectName = capName /> + @Override + public ${minor.class}Writer ${lowerName}() { + return this; + } + + <#if minor.typeParams?? > + @Override + public ${minor.class}Writer ${lowerName}(String name<#list minor.typeParams as typeParam>, ${typeParam.type} ${typeParam.name}) { + return writer.${lowerName}(name<#list minor.typeParams as typeParam>, ${typeParam.name}); + } + + + @Override + public ${minor.class}Writer ${lowerName}(String name) { + structName = name; + return writer.${lowerName}(name); + } + + + + @Override + public StructWriter struct() { + inStruct = true; + return this; + } + + @Override + public ListWriter list() { + return writer; + } + + @Override + public ListWriter list(String name) { + ListWriter listWriter = writer.list(name); + return listWriter; + } + + @Override + public StructWriter struct(String name) { + StructWriter structWriter = writer.struct(name); + return structWriter; + } + + @Override + public MapWriter map() { + return writer; + } + + @Override + public MapWriter map(String name) { + MapWriter mapWriter = writer.map(name); + return mapWriter; + } + + @Override + public MapWriter map(boolean keysSorted) { + writer.map(keysSorted); + return writer; + } + + @Override + public MapWriter map(String name, boolean keysSorted) { + MapWriter mapWriter = writer.map(name, keysSorted); + return mapWriter; + } + + <#if listName == "LargeList"> + @Override + public void startList() { + vector.startNewValue(idx()); + writer.setPosition(checkedCastToInt(vector.getOffsetBuffer().getLong((idx() + 1L) * OFFSET_WIDTH))); + listStarted = true; + } + + @Override + public void endList() { + vector.getOffsetBuffer().setLong((idx() + 1L) * OFFSET_WIDTH, writer.idx()); + setPosition(idx() + 1); + listStarted = false; + } + <#else> + @Override + public void startList() { + vector.startNewValue(idx()); + writer.setPosition(vector.getOffsetBuffer().getInt((idx() + 1L) * OFFSET_WIDTH)); + listStarted = true; + } + + @Override + public void endList() { + vector.getOffsetBuffer().setInt((idx() + 1L) * OFFSET_WIDTH, writer.idx()); + setPosition(idx() + 1); + listStarted = false; + } + + + @Override + public void start() { + writer.start(); + } + + @Override + public void end() { + writer.end(); + inStruct = false; + } + + @Override + public void writeNull() { + if (!listStarted){ + vector.setNull(idx()); + } else { + writer.writeNull(); + } + } + + <#list vv.types as type> + <#list type.minor as minor> + <#assign name = minor.class?cap_first /> + <#assign fields = minor.fields!type.fields /> + <#assign uncappedName = name?uncap_first/> + @Override + public void write${name}(<#list fields as field>${field.type} ${field.name}<#if field_has_next>, ) { + writer.write${name}(<#list fields as field>${field.name}<#if field_has_next>, ); + writer.setPosition(writer.idx()+1); + } + + <#if is_timestamp_tz(minor.class) || minor.class == "Duration" || minor.class == "FixedSizeBinary"> + @Override + public void write(${name}Holder holder) { + writer.write(holder); + writer.setPosition(writer.idx()+1); + } + + <#elseif minor.class?starts_with("Decimal")> + public void write${name}(long start, ArrowBuf buffer, ArrowType arrowType) { + writer.write${name}(start, buffer, arrowType); + writer.setPosition(writer.idx()+1); + } + + @Override + public void write(${name}Holder holder) { + writer.write(holder); + writer.setPosition(writer.idx()+1); + } + + public void write${name}(BigDecimal value) { + writer.write${name}(value); + writer.setPosition(writer.idx()+1); + } + + public void writeBigEndianBytesTo${name}(byte[] value, ArrowType arrowType){ + writer.writeBigEndianBytesTo${name}(value, arrowType); + writer.setPosition(writer.idx() + 1); + } + <#else> + @Override + public void write(${name}Holder holder) { + writer.write${name}(<#list fields as field>holder.${field.name}<#if field_has_next>, ); + writer.setPosition(writer.idx()+1); + } + + + <#if minor.class?ends_with("VarBinary")> + @Override + public void write${minor.class}(byte[] value) { + writer.write${minor.class}(value); + writer.setPosition(writer.idx() + 1); + } + + @Override + public void write${minor.class}(byte[] value, int offset, int length) { + writer.write${minor.class}(value, offset, length); + writer.setPosition(writer.idx() + 1); + } + + @Override + public void write${minor.class}(ByteBuffer value) { + writer.write${minor.class}(value); + writer.setPosition(writer.idx() + 1); + } + + @Override + public void write${minor.class}(ByteBuffer value, int offset, int length) { + writer.write${minor.class}(value, offset, length); + writer.setPosition(writer.idx() + 1); + } + <#elseif minor.class?ends_with("VarChar")> + @Override + public void write${minor.class}(Text value) { + writer.write${minor.class}(value); + writer.setPosition(writer.idx() + 1); + } + + public void write${minor.class}(String value) { + writer.write${minor.class}(value); + writer.setPosition(writer.idx() + 1); + } + + + + +} + diff --git a/java/vector/src/main/java/org/apache/arrow/vector/TypeLayout.java b/java/vector/src/main/java/org/apache/arrow/vector/TypeLayout.java index ae465418cf2fd..139d448b0b50f 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/TypeLayout.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/TypeLayout.java @@ -41,6 +41,7 @@ import org.apache.arrow.vector.types.pojo.ArrowType.LargeUtf8; import org.apache.arrow.vector.types.pojo.ArrowType.Map; import org.apache.arrow.vector.types.pojo.ArrowType.Null; +import org.apache.arrow.vector.types.pojo.ArrowType.RunEndEncoded; import org.apache.arrow.vector.types.pojo.ArrowType.Struct; import org.apache.arrow.vector.types.pojo.ArrowType.Time; import org.apache.arrow.vector.types.pojo.ArrowType.Timestamp; @@ -247,6 +248,11 @@ public TypeLayout visit(Duration type) { return newFixedWidthTypeLayout(BufferLayout.dataBuffer(64)); } + @Override + public TypeLayout visit(RunEndEncoded type) { + return new TypeLayout(Collections.emptyList()); + } + }); return layout; } @@ -387,6 +393,11 @@ public Integer visit(Duration type) { return FIXED_WIDTH_BUFFER_COUNT; } + @Override + public Integer visit(RunEndEncoded type) { + return 0; + } + }); } diff --git a/java/vector/src/main/java/org/apache/arrow/vector/compare/RangeEqualsVisitor.java b/java/vector/src/main/java/org/apache/arrow/vector/compare/RangeEqualsVisitor.java index 5323ddda838c8..661796032e069 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/compare/RangeEqualsVisitor.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/compare/RangeEqualsVisitor.java @@ -37,6 +37,7 @@ import org.apache.arrow.vector.complex.LargeListVector; import org.apache.arrow.vector.complex.ListVector; import org.apache.arrow.vector.complex.NonNullableStructVector; +import org.apache.arrow.vector.complex.RunEndEncodedVector; import org.apache.arrow.vector.complex.UnionVector; /** @@ -236,6 +237,14 @@ protected RangeEqualsVisitor createInnerVisitor( return new RangeEqualsVisitor(leftInner, rightInner, typeComparator); } + @Override + public Boolean visit(RunEndEncodedVector left, Range range) { + if (!validate(left)) { + return false; + } + return true; // TODO + } + protected boolean compareUnionVectors(Range range) { UnionVector leftVector = (UnionVector) left; UnionVector rightVector = (UnionVector) right; diff --git a/java/vector/src/main/java/org/apache/arrow/vector/compare/TypeEqualsVisitor.java b/java/vector/src/main/java/org/apache/arrow/vector/compare/TypeEqualsVisitor.java index 443ee1f96e273..cb2b8336c3f83 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/compare/TypeEqualsVisitor.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/compare/TypeEqualsVisitor.java @@ -31,6 +31,7 @@ import org.apache.arrow.vector.complex.LargeListVector; import org.apache.arrow.vector.complex.ListVector; import org.apache.arrow.vector.complex.NonNullableStructVector; +import org.apache.arrow.vector.complex.RunEndEncodedVector; import org.apache.arrow.vector.complex.UnionVector; import org.apache.arrow.vector.types.pojo.Field; @@ -125,6 +126,11 @@ public Boolean visit(ExtensionTypeVector left, Void value) { return compareField(left.getField(), right.getField()); } + @Override + public Boolean visit(RunEndEncodedVector left, Void value) { + return compareField(left.getField(), right.getField()); + } + private boolean compareField(Field leftField, Field rightField) { if (leftField == rightField) { diff --git a/java/vector/src/main/java/org/apache/arrow/vector/compare/VectorVisitor.java b/java/vector/src/main/java/org/apache/arrow/vector/compare/VectorVisitor.java index aee090706b3c8..521b866544512 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/compare/VectorVisitor.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/compare/VectorVisitor.java @@ -27,6 +27,7 @@ import org.apache.arrow.vector.complex.LargeListVector; import org.apache.arrow.vector.complex.ListVector; import org.apache.arrow.vector.complex.NonNullableStructVector; +import org.apache.arrow.vector.complex.RunEndEncodedVector; import org.apache.arrow.vector.complex.UnionVector; /** @@ -57,5 +58,7 @@ public interface VectorVisitor { OUT visit(NullVector left, IN value); OUT visit(ExtensionTypeVector left, IN value); + + OUT visit(RunEndEncodedVector left, IN value); } diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/RunEndEncodedVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/RunEndEncodedVector.java new file mode 100644 index 0000000000000..984fbb8cf66f1 --- /dev/null +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/RunEndEncodedVector.java @@ -0,0 +1,635 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.vector.complex; + +import static org.apache.arrow.util.Preconditions.checkArgument; + +import java.util.Arrays; +import java.util.Iterator; +import java.util.List; +import java.util.NoSuchElementException; + +import org.apache.arrow.memory.ArrowBuf; +import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.memory.OutOfMemoryException; +import org.apache.arrow.memory.util.ByteFunctionHelpers; +import org.apache.arrow.memory.util.hash.ArrowBufHasher; +import org.apache.arrow.vector.BufferBacked; +import org.apache.arrow.vector.FieldVector; +import org.apache.arrow.vector.ValueVector; +import org.apache.arrow.vector.compare.VectorVisitor; +import org.apache.arrow.vector.complex.reader.FieldReader; +import org.apache.arrow.vector.complex.writer.FieldWriter; +import org.apache.arrow.vector.ipc.message.ArrowFieldNode; +import org.apache.arrow.vector.types.Types.MinorType; +import org.apache.arrow.vector.types.pojo.ArrowType; +import org.apache.arrow.vector.types.pojo.Field; +import org.apache.arrow.vector.types.pojo.FieldType; +import org.apache.arrow.vector.util.CallBack; +import org.apache.arrow.vector.util.TransferPair; + +/** + * A run-end encoded vector contains only two child vectors: a run_end vector of type int + * and a values vector of any type. There are no buffers associated with the parent vector. + */ +public class RunEndEncodedVector implements FieldVector { + + public static RunEndEncodedVector empty(String name, BufferAllocator allocator) { + return new RunEndEncodedVector(name, allocator, FieldType.notNullable(ArrowType.RunEndEncoded.INSTANCE), null); + } + + protected final BufferAllocator allocator; + protected final CallBack callBack; + protected Field field; + protected FieldVector runEndsVector; + protected FieldVector valuesVector; + protected int valueCount; + + /** + * Constructs a new instance. + * + * @param name The name of the instance. + * @param allocator The allocator to use for allocating/reallocating buffers. + * @param fieldType The type of the array that is run-end encoded. + * @param callBack A schema change callback. + */ + public RunEndEncodedVector(String name, BufferAllocator allocator, FieldType fieldType, CallBack callBack) { + this(new Field(name, fieldType, null), allocator, callBack); + } + + /** + * Constructs a new instance. + * + * @param field The field materialized by this vector. + * @param allocator The allocator to use for allocating/reallocating buffers. + * @param callBack A schema change callback. + */ + public RunEndEncodedVector(Field field, BufferAllocator allocator, CallBack callBack) { + this.field = field; + this.allocator = allocator; + this.callBack = callBack; + this.valueCount = 0; + } + + /** + * ValueVector interface + */ + + /** + * Allocate new buffers. ValueVector implements logic to determine how much to allocate. + * + * @throws OutOfMemoryException Thrown if no memory can be allocated. + */ + @Override + public void allocateNew() throws OutOfMemoryException { + if (!allocateNewSafe()) { + throw new OutOfMemoryException("Failure while allocating memory"); + } + } + + /** + * Allocates new buffers. ValueVector implements logic to determine how much to allocate. + * + * @return Returns true if allocation was successful. + */ + @Override + public boolean allocateNewSafe() { + initializeChildrenFromFields(field.getChildren()); + for (FieldVector v : getChildrenFromFields()) { + boolean isAllocated = v.allocateNewSafe(); + if (!isAllocated) { + v.clear(); + return false; + } + } + return true; + } + + /** + * Allocate new buffer with double capacity, and copy data into the new buffer. + * Replace vector's buffer with new buffer, and release old one + */ + @Override + public void reAlloc() { + for (FieldVector v : getChildrenFromFields()) { + v.reAlloc(); + } + } + + @Override + public BufferAllocator getAllocator() { + return allocator; + } + + /** + * Set the initial record capacity. + * + * @param numRecords the initial record capacity. + */ + @Override + public void setInitialCapacity(int numRecords) { + for (FieldVector v : getChildrenFromFields()) { + v.setInitialCapacity(numRecords); + } + } + + /** + * Returns the maximum number of values that can be stored in this vector instance. + * + * @return the maximum number of values that can be stored in this vector instance. + */ + @Override + public int getValueCapacity() { + return getChildrenFromFields() + .stream() + .mapToInt(ValueVector::getValueCapacity) + .min() + .orElseThrow(NoSuchElementException::new); + } + + /** + * Alternative to clear(). Allows use as an AutoCloseable in try-with-resources. + */ + @Override + public void close() { + for (FieldVector v : getChildrenFromFields()) { + v.close(); + } + } + + /** + * Release any owned ArrowBuf and reset the ValueVector to the initial state. If the + * vector has any child vectors, they will also be cleared. + */ + @Override + public void clear() { + for (FieldVector v : getChildrenFromFields()) { + v.clear(); + } + } + + /** + * Reset the ValueVector to the initial state without releasing any owned ArrowBuf. + * Buffer capacities will remain unchanged and any previous data will be zeroed out. + * This includes buffers for data, validity, offset, etc. If the vector has any + * child vectors, they will also be reset. + */ + @Override + public void reset() { + for (FieldVector v : getChildrenFromFields()) { + v.reset(); + } + valueCount = 0; + } + + /** + * Get information about how this field is materialized. + * + * @return the field corresponding to this vector + */ + @Override + public Field getField() { + return field; + } + + @Override + public MinorType getMinorType() { + return MinorType.RUNENDENCODED; + } + + /** + * To transfer quota responsibility. + * + * @param allocator the target allocator + * @return a {@link org.apache.arrow.vector.util.TransferPair transfer pair}, creating a new target vector of + * the same type. + */ + @Override + public TransferPair getTransferPair(BufferAllocator allocator) { + return null; + } + + /** + * To transfer quota responsibility. + * + * @param ref the name of the vector + * @param allocator the target allocator + * @return a {@link org.apache.arrow.vector.util.TransferPair transfer pair}, creating a new target vector of + * the same type. + */ + @Override + public TransferPair getTransferPair(String ref, BufferAllocator allocator) { + return getTransferPair(ref, allocator, null); + } + + /** + * To transfer quota responsibility. + * + * @param field the Field object used by the target vector + * @param allocator the target allocator + * @return a {@link org.apache.arrow.vector.util.TransferPair transfer pair}, creating a new target vector of + * the same type. + */ + @Override + public TransferPair getTransferPair(Field field, BufferAllocator allocator) { + return getTransferPair(field, allocator, null); + } + + /** + * To transfer quota responsibility. + * + * @param ref the name of the vector + * @param allocator the target allocator + * @param callBack A schema change callback. + * @return a {@link org.apache.arrow.vector.util.TransferPair transfer pair}, creating a new target vector of + * the same type. + */ + @Override + public TransferPair getTransferPair(String ref, BufferAllocator allocator, CallBack callBack) { + return null; + } + + /** + * To transfer quota responsibility. + * + * @param field the Field object used by the target vector + * @param allocator the target allocator + * @param callBack A schema change callback. + * @return a {@link org.apache.arrow.vector.util.TransferPair transfer pair}, creating a new target vector of + * the same type. + */ + @Override + public TransferPair getTransferPair(Field field, BufferAllocator allocator, CallBack callBack) { + return null; + } + + /** + * Makes a new transfer pair used to transfer underlying buffers. + * + * @param target the target for the transfer + * @return a new {@link org.apache.arrow.vector.util.TransferPair transfer pair} that is used to transfer underlying + * buffers into the target vector. + */ + @Override + public TransferPair makeTransferPair(ValueVector target) { + return null; + } + + /** + * Get a reader for this vector. + * + * @return a {@link org.apache.arrow.vector.complex.reader.FieldReader field reader} that supports reading values + * from this vector. + */ + @Override + public FieldReader getReader() { + return null; // TODO + } + + /** + * Get a writer for this vector. + * + * @return a {@link org.apache.arrow.vector.complex.writer.FieldWriter field writer} that supports writing values + * to this vector. + */ + public FieldWriter getWriter() { + return null; // TODO + } + + /** + * Get the number of bytes used by this vector. + * + * @return the number of bytes that is used by this vector instance. + */ + @Override + public int getBufferSize() { + int bufferSize = 0; + for (FieldVector v : getChildrenFromFields()) { + bufferSize += v.getBufferSize(); + } + return bufferSize; + } + + /** + * Returns the number of bytes that is used by this vector if it holds the given number + * of values. The result will be the same as if setValueCount() were called, followed + * by calling getBufferSize(), but without any of the closing side-effects that setValueCount() + * implies wrt finishing off the population of a vector. Some operations might wish to use + * this to determine how much memory has been used by a vector so far, even though it is + * not finished being populated. + * + * @param valueCount the number of values to assume this vector contains + * @return the buffer size if this vector is holding valueCount values + */ + @Override + public int getBufferSizeFor(int valueCount) { + int bufferSize = 0; + for (FieldVector v : getChildrenFromFields()) { + bufferSize += v.getBufferSizeFor(valueCount); + } + return bufferSize; + } + + /** + * Return the underlying buffers associated with this vector. Note that this doesn't impact the reference counts for + * this buffer so it only should be used for in-context access. Also note that this buffer changes regularly thus + * external classes shouldn't hold a reference to it (unless they change it). + * + * @param clear Whether to clear vector before returning; the buffers will still be refcounted; + * but the returned array will be the only reference to them + * @return The underlying {@link ArrowBuf buffers} that is used by this vector instance. + */ + @Override + public ArrowBuf[] getBuffers(boolean clear) { + return null; + } + + /** + * Gets the underlying buffer associated with validity vector. + * + * @return buffer + */ + @Override + public ArrowBuf getValidityBuffer() { + return null; + } + + /** + * Gets the underlying buffer associated with data vector. + * + * @return buffer + */ + @Override + public ArrowBuf getDataBuffer() { + return null; + } + + /** + * Gets the underlying buffer associated with offset vector. + * + * @return buffer + */ + @Override + public ArrowBuf getOffsetBuffer() { + return null; + } + + /** + * Gets the number of values. + * + * @return number of values in the vector + */ + @Override + public int getValueCount() { + return valueCount; + } + + /** + * Set number of values in the vector. + */ + @Override + public void setValueCount(int valueCount) { + this.valueCount = valueCount; + } + + /** + * Get friendly type object from the vector. + * + * @param index index of object to get + * @return friendly type object + */ + @Override + public Object getObject(int index) { + return valuesVector.getObject(index); + } + + /** + * Returns number of null elements in the vector. + * + * @return number of null elements + */ + @Override + public int getNullCount() { + return 0; + } + + /** + * Check whether an element in the vector is null. + * + * @param index index to check for null + * @return true if element is null + */ + @Override + public boolean isNull(int index) { + return false; + } + + /** + * Returns hashCode of element in index with the default hasher. + */ + @Override + public int hashCode(int index) { + return hashCode(index, null); + } + + /** + * Returns hashCode of element in index with the given hasher. + */ + @Override + public int hashCode(int index, ArrowBufHasher hasher) { + int hash = 0; + for (FieldVector v : getChildrenFromFields()) { + if (index < v.getValueCount()) { + hash = ByteFunctionHelpers.combineHash(hash, v.hashCode(index, hasher)); + } + } + return hash; + } + + /** + * Copy a cell value from a particular index in source vector to a particular + * position in this vector. + * + * @param fromIndex position to copy from in source vector + * @param thisIndex position to copy to in this vector + * @param from source vector + */ + @Override + public void copyFrom(int fromIndex, int thisIndex, ValueVector from) { + return; + } + + /** + * Same as {@link #copyFrom(int, int, ValueVector)} except that + * it handles the case when the capacity of the vector needs to be expanded + * before copy. + * + * @param fromIndex position to copy from in source vector + * @param thisIndex position to copy to in this vector + * @param from source vector + */ + @Override + public void copyFromSafe(int fromIndex, int thisIndex, ValueVector from) { + return; + } + + /** + * Accept a generic {@link VectorVisitor} and return the result. + * @param the output result type. + * @param the input data together with visitor. + */ + @Override + public OUT accept(VectorVisitor visitor, IN value) { + return visitor.visit(this, value); + } + + /** + * Gets the name of the vector. + * @return the name of the vector. + */ + @Override + public String getName() { + return this.field.getName(); + } + + @Override + public void validate() { + return; + } + + @Override + public void validateFull() { + return; + } + + @Override + public Iterator iterator() { + return null; + } + + /** + * FieldVector interface + */ + + /** + * Initializes the child vectors + * to be later loaded with loadBuffers. + * + * @param children the schema containing the run_ends column first + * and the values column second + */ + @Override + public void initializeChildrenFromFields(List children) { + checkArgument(children.size() == 2, + "Run-end encoded vectors must have two child Fields. Found: %s", children.isEmpty() ? "none" : children); + checkArgument( + Arrays.asList(MinorType.SMALLINT.getType(), MinorType.INT.getType(), MinorType.BIGINT.getType()) + .contains(children.get(0).getType()), + "The first field represents the run-end vector and must be of type int with size 16, 32, or 64 bits. Found: %s", + children.get(0).getType()); + runEndsVector = children.get(0).createVector(allocator); + valuesVector = children.get(1).createVector(allocator); + field = new Field(field.getName(), field.getFieldType(), children); + } + + /** + * The returned list is the same size as the list passed to initializeChildrenFromFields. + * + * @return the children according to schema (empty for primitive types) + */ + @Override + public List getChildrenFromFields() { + return Arrays.asList(runEndsVector, valuesVector); + } + + /** + * Loads data in the vectors. + * (ownBuffers must be the same size as getFieldVectors()) + * + * @param fieldNode the fieldNode + * @param ownBuffers the buffers for this Field (own buffers only, children not included) + */ + @Override + public void loadFieldBuffers(ArrowFieldNode fieldNode, List ownBuffers) { + throw new UnsupportedOperationException("Run-end encoded vectors do not have any associated buffers."); + } + + /** + * Get the buffers of the fields, (same size as getFieldVectors() since it is their content). + * + * @return the buffers containing the data for this vector (ready for reading) + */ + @Override + public List getFieldBuffers() { + return null; + } + + /** + * Get the inner vectors. + * + * @deprecated This API will be removed as the current implementations no longer support inner vectors. + * + * @return the inner vectors for this field as defined by the TypeLayout + */ + @Deprecated + @Override + public List getFieldInnerVectors() { + throw new UnsupportedOperationException("There are no inner vectors. Use getFieldBuffers()."); + } + + /** + * Gets the starting address of the underlying buffer associated with validity vector. + * + * @return buffer address + */ + @Override + public long getValidityBufferAddress() { + throw new UnsupportedOperationException("Run-end encoded vectors do not have a validity buffer."); + } + + /** + * Gets the starting address of the underlying buffer associated with data vector. + * + * @return buffer address + */ + @Override + public long getDataBufferAddress() { + throw new UnsupportedOperationException("Run-end encoded vectors do not have a data buffer."); + } + + /** + * Gets the starting address of the underlying buffer associated with offset vector. + * + * @return buffer address + */ + @Override + public long getOffsetBufferAddress() { + throw new UnsupportedOperationException("Run-end encoded vectors do not have an offset buffer."); + } + + /** + * Set the element at the given index to null. + * + * @param index the value to change + */ + @Override + public void setNull(int index) { + throw new UnsupportedOperationException("Run-end encoded vectors do not have a validity buffer."); + } + +} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/types/Types.java b/java/vector/src/main/java/org/apache/arrow/vector/types/Types.java index 0b0e0d66a98f0..4239d8ef5410b 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/types/Types.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/types/Types.java @@ -70,6 +70,7 @@ import org.apache.arrow.vector.complex.LargeListVector; import org.apache.arrow.vector.complex.ListVector; import org.apache.arrow.vector.complex.MapVector; +import org.apache.arrow.vector.complex.RunEndEncodedVector; import org.apache.arrow.vector.complex.StructVector; import org.apache.arrow.vector.complex.UnionVector; import org.apache.arrow.vector.complex.impl.BigIntWriterImpl; @@ -133,6 +134,7 @@ import org.apache.arrow.vector.types.pojo.ArrowType.List; import org.apache.arrow.vector.types.pojo.ArrowType.Map; import org.apache.arrow.vector.types.pojo.ArrowType.Null; +import org.apache.arrow.vector.types.pojo.ArrowType.RunEndEncoded; import org.apache.arrow.vector.types.pojo.ArrowType.Struct; import org.apache.arrow.vector.types.pojo.ArrowType.Time; import org.apache.arrow.vector.types.pojo.ArrowType.Timestamp; @@ -805,6 +807,20 @@ public FieldWriter getNewFieldWriter(ValueVector vector) { return ((ExtensionTypeVector) vector).getUnderlyingVector().getMinorType().getNewFieldWriter(vector); } }, + RUNENDENCODED(RunEndEncoded.INSTANCE) { + @Override + public FieldVector getNewVector( + Field field, + BufferAllocator allocator, + CallBack schemaChangeCallback) { + return new RunEndEncodedVector(field, allocator, schemaChangeCallback); + } + + @Override + public FieldWriter getNewFieldWriter(ValueVector vector) { + return null; // TODO + } + }, ; private final ArrowType type; @@ -1024,6 +1040,11 @@ public MinorType visit(Duration type) { public MinorType visit(ExtensionType type) { return MinorType.EXTENSIONTYPE; } + + @Override + public MinorType visit(RunEndEncoded type) { + return MinorType.RUNENDENCODED; + } }); } diff --git a/java/vector/src/main/java/org/apache/arrow/vector/util/VectorAppender.java b/java/vector/src/main/java/org/apache/arrow/vector/util/VectorAppender.java index 068717c7acbc7..662241448d501 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/util/VectorAppender.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/util/VectorAppender.java @@ -38,6 +38,7 @@ import org.apache.arrow.vector.complex.LargeListVector; import org.apache.arrow.vector.complex.ListVector; import org.apache.arrow.vector.complex.NonNullableStructVector; +import org.apache.arrow.vector.complex.RunEndEncodedVector; import org.apache.arrow.vector.complex.UnionVector; /** @@ -546,4 +547,9 @@ public ValueVector visit(ExtensionTypeVector deltaVector, Void value) { deltaVector.getUnderlyingVector().accept(underlyingAppender, null); return targetVector; } + + @Override + public ValueVector visit(RunEndEncodedVector deltaVector, Void value) { + return targetVector; // TODO + } } diff --git a/java/vector/src/main/java/org/apache/arrow/vector/validate/ValidateVectorBufferVisitor.java b/java/vector/src/main/java/org/apache/arrow/vector/validate/ValidateVectorBufferVisitor.java index d4abaa1945b94..eaacfdbeaba5c 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/validate/ValidateVectorBufferVisitor.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/validate/ValidateVectorBufferVisitor.java @@ -35,6 +35,7 @@ import org.apache.arrow.vector.complex.LargeListVector; import org.apache.arrow.vector.complex.ListVector; import org.apache.arrow.vector.complex.NonNullableStructVector; +import org.apache.arrow.vector.complex.RunEndEncodedVector; import org.apache.arrow.vector.complex.UnionVector; import org.apache.arrow.vector.types.pojo.ArrowType; @@ -243,4 +244,9 @@ public Void visit(ExtensionTypeVector vector, Void value) { vector.getUnderlyingVector().accept(this, value); return null; } + + @Override + public Void visit(RunEndEncodedVector vector, Void value) { + return null; // TODO + } } diff --git a/java/vector/src/main/java/org/apache/arrow/vector/validate/ValidateVectorDataVisitor.java b/java/vector/src/main/java/org/apache/arrow/vector/validate/ValidateVectorDataVisitor.java index 6d33be7a0dbac..4ae8323aadc3c 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/validate/ValidateVectorDataVisitor.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/validate/ValidateVectorDataVisitor.java @@ -32,6 +32,7 @@ import org.apache.arrow.vector.complex.LargeListVector; import org.apache.arrow.vector.complex.ListVector; import org.apache.arrow.vector.complex.NonNullableStructVector; +import org.apache.arrow.vector.complex.RunEndEncodedVector; import org.apache.arrow.vector.complex.UnionVector; /** @@ -182,4 +183,9 @@ public Void visit(ExtensionTypeVector vector, Void value) { vector.getUnderlyingVector().accept(this, value); return null; } + + @Override + public Void visit(RunEndEncodedVector vector, Void value) { + return null; // TODO + } } diff --git a/java/vector/src/main/java/org/apache/arrow/vector/validate/ValidateVectorTypeVisitor.java b/java/vector/src/main/java/org/apache/arrow/vector/validate/ValidateVectorTypeVisitor.java index 3d1c5a4f27f7c..60f2e4d54b1bf 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/validate/ValidateVectorTypeVisitor.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/validate/ValidateVectorTypeVisitor.java @@ -67,6 +67,7 @@ import org.apache.arrow.vector.complex.LargeListVector; import org.apache.arrow.vector.complex.ListVector; import org.apache.arrow.vector.complex.NonNullableStructVector; +import org.apache.arrow.vector.complex.RunEndEncodedVector; import org.apache.arrow.vector.complex.UnionVector; import org.apache.arrow.vector.types.DateUnit; import org.apache.arrow.vector.types.FloatingPointPrecision; @@ -394,4 +395,10 @@ public Void visit(ExtensionTypeVector vector, Void value) { validateExtensionTypeVector(vector); return null; } + + @Override + public Void visit(RunEndEncodedVector vector, Void value) { + validateVectorCommon(vector, ArrowType.RunEndEncoded.class); + return null; // TODO + } } diff --git a/java/vector/src/main/java/org/apache/arrow/vector/validate/ValidateVectorVisitor.java b/java/vector/src/main/java/org/apache/arrow/vector/validate/ValidateVectorVisitor.java index 7e99b1f90fb61..082b171eff98f 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/validate/ValidateVectorVisitor.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/validate/ValidateVectorVisitor.java @@ -33,6 +33,7 @@ import org.apache.arrow.vector.complex.LargeListVector; import org.apache.arrow.vector.complex.ListVector; import org.apache.arrow.vector.complex.NonNullableStructVector; +import org.apache.arrow.vector.complex.RunEndEncodedVector; import org.apache.arrow.vector.complex.UnionVector; import org.apache.arrow.vector.types.pojo.Field; import org.apache.arrow.vector.util.ValueVectorUtility; @@ -270,4 +271,9 @@ public Void visit(ExtensionTypeVector vector, Void value) { vector.getUnderlyingVector().accept(this, value); return null; } + + @Override + public Void visit(RunEndEncodedVector vector, Void value) { + return null; // TODO + } } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestRunEndEncodedVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestRunEndEncodedVector.java new file mode 100644 index 0000000000000..783b33cef106f --- /dev/null +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestRunEndEncodedVector.java @@ -0,0 +1,81 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.vector; + +import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.vector.complex.RunEndEncodedVector; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; +//import static org.junit.Assert.*; + +public class TestRunEndEncodedVector { + + private BufferAllocator allocator; + + @Before + public void init() { + allocator = new DirtyRootAllocator(Long.MAX_VALUE, (byte) 100); + } + + @After + public void terminate() throws Exception { + allocator.close(); + } + + /** + * Create REE vector representing: [1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 5]. + */ + @Test + public void testBasicOperation() { + try (RunEndEncodedVector reeVector = RunEndEncodedVector.empty("ree", allocator)) { + int valueCount = 5; + reeVector.allocateNew(); + reeVector.setInitialCapacity(valueCount); + for (int i = 1; i <= valueCount ; i++) { + //reeVector.set(i, i); + } + reeVector.setValueCount(valueCount); + System.out.println("Created reeVector: " + reeVector); + } + } + + /** + * Create REE vector representing: [1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 5] + */ + // @Test + // public void testBasicOperation() { + // int count = 5; + // try (RunEndEncodedVector reeVector = RunEndEncodedVector.empty("ree", allocator, false)) { + // reeVector.allocateNew(); + // UnionRunEndEncodedWriter reeWriter = reeVector.getWriter(); + // for (int i = 0; i < count; i++) { + // reeWriter.setPosition(i); + // reeWriter.run_length().integer().writeInt(1 + i); + // reeWriter.value().integer().writeInt(1 + i); + // } + // reeWriter.setValueCount(count); + // UnionRunEndEncodedReader reeReader = reeVector.getReader(); + // for (int i = 0; i < count; i++) { + // reeReader.setPosition(i); + // assertEquals(i, reeReader.run_length().readInteger().intValue()); + // assertEquals(i, reeReader.value().readInteger().intValue()); + // } + // } + // } +}