From c06ac68fe6b0dc15cf0f699446893f1ecb666db4 Mon Sep 17 00:00:00 2001 From: Ivan Chesnov Date: Thu, 12 Oct 2023 17:36:13 +0300 Subject: [PATCH 1/7] GH-38246 added new getTransferPair with Field argument for Complex type --- .../codegen/templates/DenseUnionVector.java | 46 ++++++++++-- .../main/codegen/templates/UnionVector.java | 45 +++++++++--- .../arrow/vector/BaseFixedWidthVector.java | 12 ++++ .../vector/BaseLargeVariableWidthVector.java | 21 ++++++ .../arrow/vector/BaseVariableWidthVector.java | 20 ++++++ .../arrow/vector/ExtensionTypeVector.java | 10 +++ .../arrow/vector/LargeVarBinaryVector.java | 9 +++ .../arrow/vector/LargeVarCharVector.java | 9 +++ .../org/apache/arrow/vector/NullVector.java | 12 +++- .../org/apache/arrow/vector/ValueVector.java | 4 ++ .../apache/arrow/vector/VarBinaryVector.java | 9 +++ .../apache/arrow/vector/VarCharVector.java | 9 +++ .../vector/complex/FixedSizeListVector.java | 53 +++++++++++--- .../arrow/vector/complex/LargeListVector.java | 46 ++++++++++-- .../arrow/vector/complex/ListVector.java | 39 +++++++++-- .../arrow/vector/complex/MapVector.java | 16 ++++- .../complex/NonNullableStructVector.java | 70 +++++++++++++++++-- .../arrow/vector/complex/StructVector.java | 54 +++++++++++++- 18 files changed, 436 insertions(+), 48 deletions(-) diff --git a/java/vector/src/main/codegen/templates/DenseUnionVector.java b/java/vector/src/main/codegen/templates/DenseUnionVector.java index fba9302f34221..313d449dedf9b 100644 --- a/java/vector/src/main/codegen/templates/DenseUnionVector.java +++ b/java/vector/src/main/codegen/templates/DenseUnionVector.java @@ -115,7 +115,7 @@ public class DenseUnionVector extends AbstractContainerVector implements FieldVe private long typeBufferAllocationSizeInBytes; private long offsetBufferAllocationSizeInBytes; - private final FieldType fieldType; + private final Field field; public static final byte TYPE_WIDTH = 1; public static final byte OFFSET_WIDTH = 4; @@ -131,7 +131,23 @@ public static DenseUnionVector empty(String name, BufferAllocator allocator) { public DenseUnionVector(String name, BufferAllocator allocator, FieldType fieldType, CallBack callBack) { super(name, allocator, callBack); - this.fieldType = fieldType; + this.field = new Field(name, fieldType, null); + this.internalStruct = new NonNullableStructVector( + "internal", + allocator, + INTERNAL_STRUCT_TYPE, + callBack, + AbstractStructVector.ConflictPolicy.CONFLICT_REPLACE, + false); + this.typeBuffer = allocator.getEmpty(); + this.typeBufferAllocationSizeInBytes = BaseValueVector.INITIAL_VALUE_ALLOCATION * TYPE_WIDTH; + this.offsetBuffer = allocator.getEmpty(); + this.offsetBufferAllocationSizeInBytes = BaseValueVector.INITIAL_VALUE_ALLOCATION * OFFSET_WIDTH; + } + + public DenseUnionVector(Field field, BufferAllocator allocator, CallBack callBack) { + super(field.getName(), allocator, callBack); + this.field = field; this.internalStruct = new NonNullableStructVector( "internal", allocator, @@ -234,8 +250,8 @@ public synchronized byte registerNewTypeId(Field field) { typeFields.length + " relative types. Please use union of union instead"); } byte typeId = nextTypeId; - if (fieldType != null) { - int[] typeIds = ((ArrowType.Union) fieldType.getType()).getTypeIds(); + if (field.getFieldType() != null) { + int[] typeIds = ((ArrowType.Union) field.getFieldType().getType()).getTypeIds(); if (typeIds != null) { int thisTypeId = typeIds[nextTypeId]; if (thisTypeId > Byte.MAX_VALUE) { @@ -528,12 +544,12 @@ public Field getField() { } FieldType fieldType; - if (this.fieldType == null) { + if (field.getFieldType() == null) { fieldType = FieldType.nullable(new ArrowType.Union(Dense, typeIds)); } else { final UnionMode mode = UnionMode.Dense; - fieldType = new FieldType(this.fieldType.isNullable(), new ArrowType.Union(mode, typeIds), - this.fieldType.getDictionary(), this.fieldType.getMetadata()); + fieldType = new FieldType(field.getFieldType().isNullable(), new ArrowType.Union(mode, typeIds), + field.getFieldType().getDictionary(), field.getFieldType().getMetadata()); } return new Field(name, fieldType, childFields); @@ -554,6 +570,16 @@ public TransferPair getTransferPair(String ref, BufferAllocator allocator, CallB return new org.apache.arrow.vector.complex.DenseUnionVector.TransferImpl(ref, allocator, callBack); } + @Override + public TransferPair getTransferPair(Field field, BufferAllocator allocator) { + return getTransferPair(field, allocator, null); + } + + @Override + public TransferPair getTransferPair(Field field, BufferAllocator allocator, CallBack callBack) { + return new org.apache.arrow.vector.complex.DenseUnionVector.TransferImpl(field, allocator, callBack); + } + @Override public TransferPair makeTransferPair(ValueVector target) { return new TransferImpl((DenseUnionVector) target); @@ -598,6 +624,12 @@ public TransferImpl(String name, BufferAllocator allocator, CallBack callBack) { createTransferPairs(); } + public TransferImpl(Field field, BufferAllocator allocator, CallBack callBack) { + to = new DenseUnionVector(field.getName(), allocator, null, callBack); + internalStruct.makeTransferPair(to.internalStruct); + createTransferPairs(); + } + public TransferImpl(DenseUnionVector to) { this.to = to; internalStruct.makeTransferPair(to.internalStruct); diff --git a/java/vector/src/main/codegen/templates/UnionVector.java b/java/vector/src/main/codegen/templates/UnionVector.java index 0446faab7a95c..2628146b596c2 100644 --- a/java/vector/src/main/codegen/templates/UnionVector.java +++ b/java/vector/src/main/codegen/templates/UnionVector.java @@ -103,7 +103,7 @@ public class UnionVector extends AbstractContainerVector implements FieldVector private int typeBufferAllocationSizeInBytes; - private final FieldType fieldType; + private final Field field; private final Field[] typeIds = new Field[Byte.MAX_VALUE + 1]; public static final byte TYPE_WIDTH = 1; @@ -118,7 +118,21 @@ public static UnionVector empty(String name, BufferAllocator allocator) { public UnionVector(String name, BufferAllocator allocator, FieldType fieldType, CallBack callBack) { super(name, allocator, callBack); - this.fieldType = fieldType; + this.field = new Field(name, fieldType, null); + this.internalStruct = new NonNullableStructVector( + "internal", + allocator, + INTERNAL_STRUCT_TYPE, + callBack, + AbstractStructVector.ConflictPolicy.CONFLICT_REPLACE, + false); + this.typeBuffer = allocator.getEmpty(); + this.typeBufferAllocationSizeInBytes = BaseValueVector.INITIAL_VALUE_ALLOCATION * TYPE_WIDTH; + } + + public UnionVector(Field field, BufferAllocator allocator, CallBack callBack) { + super(field.getName(), allocator, callBack); + this.field = field; this.internalStruct = new NonNullableStructVector( "internal", allocator, @@ -144,8 +158,8 @@ public void initializeChildrenFromFields(List children) { int count = 0; for (Field child: children) { int typeId = Types.getMinorTypeForArrowType(child.getType()).ordinal(); - if (fieldType != null) { - int[] typeIds = ((ArrowType.Union)fieldType.getType()).getTypeIds(); + if (field.getFieldType() != null) { + int[] typeIds = ((ArrowType.Union)field.getFieldType().getType()).getTypeIds(); if (typeIds != null) { typeId = typeIds[count++]; } @@ -469,12 +483,12 @@ public Field getField() { } FieldType fieldType; - if (this.fieldType == null) { + if (field.getFieldType() == null) { fieldType = FieldType.nullable(new ArrowType.Union(Sparse, typeIds)); } else { - final UnionMode mode = ((ArrowType.Union)this.fieldType.getType()).getMode(); - fieldType = new FieldType(this.fieldType.isNullable(), new ArrowType.Union(mode, typeIds), - this.fieldType.getDictionary(), this.fieldType.getMetadata()); + final UnionMode mode = ((ArrowType.Union)field.getFieldType().getType()).getMode(); + fieldType = new FieldType(field.getFieldType().isNullable(), new ArrowType.Union(mode, typeIds), + field.getFieldType().getDictionary(), field.getFieldType().getMetadata()); } return new Field(name, fieldType, childFields); @@ -495,6 +509,16 @@ public TransferPair getTransferPair(String ref, BufferAllocator allocator, CallB return new org.apache.arrow.vector.complex.UnionVector.TransferImpl(ref, allocator, callBack); } + @Override + public TransferPair getTransferPair(Field field, BufferAllocator allocator) { + return getTransferPair(field, allocator, null); + } + + @Override + public TransferPair getTransferPair(Field field, BufferAllocator allocator, CallBack callBack) { + return new org.apache.arrow.vector.complex.UnionVector.TransferImpl(field, allocator, callBack); + } + @Override public TransferPair makeTransferPair(ValueVector target) { return new TransferImpl((UnionVector) target); @@ -547,6 +571,11 @@ public TransferImpl(String name, BufferAllocator allocator, CallBack callBack) { internalStructVectorTransferPair = internalStruct.makeTransferPair(to.internalStruct); } + public TransferImpl(Field field, BufferAllocator allocator, CallBack callBack) { + to = new UnionVector(field.getName(), allocator, null, callBack); + internalStructVectorTransferPair = internalStruct.makeTransferPair(to.internalStruct); + } + public TransferImpl(UnionVector to) { this.to = to; internalStructVectorTransferPair = internalStruct.makeTransferPair(to.internalStruct); diff --git a/java/vector/src/main/java/org/apache/arrow/vector/BaseFixedWidthVector.java b/java/vector/src/main/java/org/apache/arrow/vector/BaseFixedWidthVector.java index 04a038a0b5dfd..d09664e6d313e 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/BaseFixedWidthVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/BaseFixedWidthVector.java @@ -569,6 +569,18 @@ public TransferPair getTransferPair(String ref, BufferAllocator allocator, CallB return getTransferPair(ref, allocator); } + /** + * Construct a transfer pair of this vector and another vector of same type. + * @param field The field materialized by this vector. + * @param allocator allocator for the target vector + * @param callBack not used + * @return TransferPair + */ + @Override + public TransferPair getTransferPair(Field field, BufferAllocator allocator, CallBack callBack) { + return getTransferPair(field, allocator); + } + /** * Construct a transfer pair of this vector and another vector of same type. * @param allocator allocator for the target vector diff --git a/java/vector/src/main/java/org/apache/arrow/vector/BaseLargeVariableWidthVector.java b/java/vector/src/main/java/org/apache/arrow/vector/BaseLargeVariableWidthVector.java index 4d5a8a5119c53..db922d6a70854 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/BaseLargeVariableWidthVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/BaseLargeVariableWidthVector.java @@ -662,6 +662,18 @@ public TransferPair getTransferPair(String ref, BufferAllocator allocator, CallB return getTransferPair(ref, allocator); } + /** + * Construct a transfer pair of this vector and another vector of same type. + * @param field The field materialized by this vector + * @param allocator allocator for the target vector + * @param callBack not used + * @return TransferPair + */ + @Override + public TransferPair getTransferPair(Field field, BufferAllocator allocator, CallBack callBack) { + return getTransferPair(field, allocator); + } + /** * Construct a transfer pair of this vector and another vector of same type. * @param allocator allocator for the target vector @@ -672,6 +684,7 @@ public TransferPair getTransferPair(BufferAllocator allocator) { return getTransferPair(getName(), allocator); } + /** * Construct a transfer pair of this vector and another vector of same type. * @param ref name of the target vector @@ -680,6 +693,14 @@ public TransferPair getTransferPair(BufferAllocator allocator) { */ public abstract TransferPair getTransferPair(String ref, BufferAllocator allocator); + /** + * Construct a transfer pair of this vector and another vector of same type. + * @param field The field materialized by this vector + * @param allocator allocator for the target vector + * @return TransferPair + */ + public abstract TransferPair getTransferPair(Field field, BufferAllocator allocator); + /** * Transfer this vector'data to another vector. The memory associated * with this vector is transferred to the allocator of target vector diff --git a/java/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthVector.java b/java/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthVector.java index d7f5ff05a935d..b57dd9343824a 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthVector.java @@ -692,6 +692,18 @@ public void validateScalars() { // No validation by default. } + /** + * Construct a transfer pair of this vector and another vector of same type. + * @param field The field materialized by this vector. + * @param allocator allocator for the target vector + * @param callBack not used + * @return TransferPair + */ + @Override + public TransferPair getTransferPair(Field field, BufferAllocator allocator, CallBack callBack) { + return getTransferPair(field, allocator); + } + /** * Construct a transfer pair of this vector and another vector of same type. * @param ref name of the target vector @@ -722,6 +734,14 @@ public TransferPair getTransferPair(BufferAllocator allocator) { */ public abstract TransferPair getTransferPair(String ref, BufferAllocator allocator); + /** + * Construct a transfer pair of this vector and another vector of same type. + * @param field The field materialized by this vector. + * @param allocator allocator for the target vector + * @return TransferPair + */ + public abstract TransferPair getTransferPair(Field field, BufferAllocator allocator); + /** * Transfer this vector'data to another vector. The memory associated * with this vector is transferred to the allocator of target vector diff --git a/java/vector/src/main/java/org/apache/arrow/vector/ExtensionTypeVector.java b/java/vector/src/main/java/org/apache/arrow/vector/ExtensionTypeVector.java index 9433719c5b8b2..a70efe61bcdfe 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/ExtensionTypeVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/ExtensionTypeVector.java @@ -125,6 +125,16 @@ public TransferPair getTransferPair(String ref, BufferAllocator allocator, CallB return underlyingVector.getTransferPair(ref, allocator, callBack); } + @Override + public TransferPair getTransferPair(Field field, BufferAllocator allocator) { + return underlyingVector.getTransferPair(field, allocator); + } + + @Override + public TransferPair getTransferPair(Field field, BufferAllocator allocator, CallBack callBack) { + return underlyingVector.getTransferPair(field, allocator, callBack); + } + @Override public TransferPair makeTransferPair(ValueVector target) { return underlyingVector.makeTransferPair(target); diff --git a/java/vector/src/main/java/org/apache/arrow/vector/LargeVarBinaryVector.java b/java/vector/src/main/java/org/apache/arrow/vector/LargeVarBinaryVector.java index 0063a61da570a..6806b958dad93 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/LargeVarBinaryVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/LargeVarBinaryVector.java @@ -253,6 +253,11 @@ public TransferPair getTransferPair(String ref, BufferAllocator allocator) { return new TransferImpl(ref, allocator); } + @Override + public TransferPair getTransferPair(Field field, BufferAllocator allocator) { + return new TransferImpl(field, allocator); + } + /** * Construct a TransferPair with a desired target vector of the same type. * @@ -271,6 +276,10 @@ public TransferImpl(String ref, BufferAllocator allocator) { to = new LargeVarBinaryVector(ref, field.getFieldType(), allocator); } + public TransferImpl(Field field, BufferAllocator allocator) { + to = new LargeVarBinaryVector(field, allocator); + } + public TransferImpl(LargeVarBinaryVector to) { this.to = to; } diff --git a/java/vector/src/main/java/org/apache/arrow/vector/LargeVarCharVector.java b/java/vector/src/main/java/org/apache/arrow/vector/LargeVarCharVector.java index e9472c9f2c71e..874079a0ef83f 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/LargeVarCharVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/LargeVarCharVector.java @@ -292,6 +292,11 @@ public TransferPair getTransferPair(String ref, BufferAllocator allocator) { return new LargeVarCharVector.TransferImpl(ref, allocator); } + @Override + public TransferPair getTransferPair(Field field, BufferAllocator allocator) { + return new LargeVarCharVector.TransferImpl(field, allocator); + } + /** * Construct a TransferPair with a desired target vector of the same type. * @@ -310,6 +315,10 @@ public TransferImpl(String ref, BufferAllocator allocator) { to = new LargeVarCharVector(ref, field.getFieldType(), allocator); } + public TransferImpl(Field field, BufferAllocator allocator) { + to = new LargeVarCharVector(field, allocator); + } + public TransferImpl(LargeVarCharVector to) { this.to = to; } diff --git a/java/vector/src/main/java/org/apache/arrow/vector/NullVector.java b/java/vector/src/main/java/org/apache/arrow/vector/NullVector.java index 6e4c2764bdcc4..05a718e910df5 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/NullVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/NullVector.java @@ -106,7 +106,7 @@ public Types.MinorType getMinorType() { @Override public TransferPair getTransferPair(BufferAllocator allocator) { - return getTransferPair(null, allocator); + return getTransferPair((String) null, allocator); } @Override @@ -162,11 +162,21 @@ public TransferPair getTransferPair(String ref, BufferAllocator allocator) { return new TransferImpl(); } + @Override + public TransferPair getTransferPair(Field field, BufferAllocator allocator) { + return new TransferImpl(); + } + @Override public TransferPair getTransferPair(String ref, BufferAllocator allocator, CallBack callBack) { return getTransferPair(ref, allocator); } + @Override + public TransferPair getTransferPair(Field field, BufferAllocator allocator, CallBack callBack) { + return getTransferPair(field, allocator); + } + @Override public TransferPair makeTransferPair(ValueVector target) { return new TransferImpl((NullVector) target); diff --git a/java/vector/src/main/java/org/apache/arrow/vector/ValueVector.java b/java/vector/src/main/java/org/apache/arrow/vector/ValueVector.java index 462b512c65436..62c13dd818784 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/ValueVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/ValueVector.java @@ -134,8 +134,12 @@ public interface ValueVector extends Closeable, Iterable { TransferPair getTransferPair(String ref, BufferAllocator allocator); + TransferPair getTransferPair(Field field, BufferAllocator allocator); + TransferPair getTransferPair(String ref, BufferAllocator allocator, CallBack callBack); + TransferPair getTransferPair(Field field, BufferAllocator allocator, CallBack callBack); + /** * Makes a new transfer pair used to transfer underlying buffers. * diff --git a/java/vector/src/main/java/org/apache/arrow/vector/VarBinaryVector.java b/java/vector/src/main/java/org/apache/arrow/vector/VarBinaryVector.java index 34e072aaa8324..b43cd33d05391 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/VarBinaryVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/VarBinaryVector.java @@ -254,6 +254,11 @@ public TransferPair getTransferPair(String ref, BufferAllocator allocator) { return new TransferImpl(ref, allocator); } + @Override + public TransferPair getTransferPair(Field field, BufferAllocator allocator) { + return new TransferImpl(field, allocator); + } + /** * Construct a TransferPair with a desired target vector of the same type. * @@ -272,6 +277,10 @@ public TransferImpl(String ref, BufferAllocator allocator) { to = new VarBinaryVector(ref, field.getFieldType(), allocator); } + public TransferImpl(Field field, BufferAllocator allocator) { + to = new VarBinaryVector(field, allocator); + } + public TransferImpl(VarBinaryVector to) { this.to = to; } diff --git a/java/vector/src/main/java/org/apache/arrow/vector/VarCharVector.java b/java/vector/src/main/java/org/apache/arrow/vector/VarCharVector.java index 2c83893819a1e..9ac275f75a771 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/VarCharVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/VarCharVector.java @@ -292,6 +292,11 @@ public TransferPair getTransferPair(String ref, BufferAllocator allocator) { return new TransferImpl(ref, allocator); } + @Override + public TransferPair getTransferPair(Field field, BufferAllocator allocator) { + return new TransferImpl(field, allocator); + } + /** * Construct a TransferPair with a desired target vector of the same type. * @@ -310,6 +315,10 @@ public TransferImpl(String ref, BufferAllocator allocator) { to = new VarCharVector(ref, field.getFieldType(), allocator); } + public TransferImpl(Field field, BufferAllocator allocator) { + to = new VarCharVector(field, allocator); + } + public TransferImpl(VarCharVector to) { this.to = to; } diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/FixedSizeListVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/FixedSizeListVector.java index 0f78829181142..8d84deeba5e86 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/FixedSizeListVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/FixedSizeListVector.java @@ -71,8 +71,7 @@ public static FixedSizeListVector empty(String name, int size, BufferAllocator a private FieldVector vector; private ArrowBuf validityBuffer; private final int listSize; - private final FieldType fieldType; - private final String name; + private final Field field; private UnionFixedSizeListReader reader; private int valueCount; @@ -92,20 +91,40 @@ public FixedSizeListVector(String name, CallBack unusedSchemaChangeCallback) { super(allocator); - this.name = name; this.validityBuffer = allocator.getEmpty(); this.vector = ZeroVector.INSTANCE; - this.fieldType = fieldType; this.listSize = ((ArrowType.FixedSizeList) fieldType.getType()).getListSize(); Preconditions.checkArgument(listSize >= 0, "list size must be non-negative"); this.valueCount = 0; this.validityAllocationSizeInBytes = getValidityBufferSizeFromCount(INITIAL_VALUE_ALLOCATION); + this.field = new Field(name, fieldType, null); + } + + /** + * Creates a new instance. + * + * @param field The field materialized by this vector. + * @param allocator The allocator to use for creating/reallocating buffers for the vector. + * @param unusedSchemaChangeCallback Currently unused. + */ + public FixedSizeListVector(Field field, + BufferAllocator allocator, + CallBack unusedSchemaChangeCallback) { + super(allocator); + + this.field = field; + this.validityBuffer = allocator.getEmpty(); + this.vector = ZeroVector.INSTANCE; + this.listSize = ((ArrowType.FixedSizeList) field.getFieldType().getType()).getListSize(); + Preconditions.checkArgument(listSize >= 0, "list size must be non-negative"); + this.valueCount = 0; + this.validityAllocationSizeInBytes = getValidityBufferSizeFromCount(INITIAL_VALUE_ALLOCATION); } @Override public Field getField() { - List children = Collections.singletonList(getDataVector().getField()); - return new Field(name, fieldType, children); + return field.getChildren().isEmpty() ? new Field(field.getName(), field.getFieldType(), + Collections.singletonList(getDataVector().getField())) : field; } @Override @@ -115,7 +134,7 @@ public MinorType getMinorType() { @Override public String getName() { - return name; + return field.getName(); } /** Get the fixed size for each list. */ @@ -172,8 +191,6 @@ private void setReaderAndWriterIndex() { /** * Get the inner vectors. * - * @deprecated This API will be removed as the current implementations no longer support inner vectors. - * * @return the inner vectors for this field as defined by the TypeLayout */ @Deprecated @@ -403,7 +420,7 @@ public void copyFrom(int fromIndex, int thisIndex, ValueVector from) { @Override public UnionVector promoteToUnion() { - UnionVector vector = new UnionVector(name, allocator, /* field type */ null, /* call-back */ null); + UnionVector vector = new UnionVector(getName(), allocator, /* field type */ null, /* call-back */ null); this.vector.clear(); this.vector = vector; invalidateReader(); @@ -519,11 +536,21 @@ public TransferPair getTransferPair(String ref, BufferAllocator allocator) { return getTransferPair(ref, allocator, null); } + @Override + public TransferPair getTransferPair(Field field, BufferAllocator allocator) { + return getTransferPair(field, allocator, null); + } + @Override public TransferPair getTransferPair(String ref, BufferAllocator allocator, CallBack callBack) { return new TransferImpl(ref, allocator, callBack); } + @Override + public TransferPair getTransferPair(Field field, BufferAllocator allocator, CallBack callBack) { + return new TransferImpl(field, allocator, callBack); + } + @Override public TransferPair makeTransferPair(ValueVector target) { return new TransferImpl((FixedSizeListVector) target); @@ -567,7 +594,11 @@ private class TransferImpl implements TransferPair { TransferPair dataPair; public TransferImpl(String name, BufferAllocator allocator, CallBack callBack) { - this(new FixedSizeListVector(name, allocator, fieldType, callBack)); + this(new FixedSizeListVector(name, allocator, field.getFieldType(), callBack)); + } + + public TransferImpl(Field field, BufferAllocator allocator, CallBack callBack) { + this(new FixedSizeListVector(field, allocator, callBack)); } public TransferImpl(FixedSizeListVector to) { diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/LargeListVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/LargeListVector.java index acb058cda3cb8..5d3522e7ac178 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/LargeListVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/LargeListVector.java @@ -98,12 +98,11 @@ public static LargeListVector empty(String name, BufferAllocator allocator) { protected final CallBack callBack; protected int valueCount; protected long offsetAllocationSizeInBytes = INITIAL_VALUE_ALLOCATION * OFFSET_WIDTH; - private final String name; protected String defaultDataVectorName = DATA_VECTOR_NAME; protected ArrowBuf validityBuffer; protected UnionLargeListReader reader; - private final FieldType fieldType; + private final Field field; private int validityAllocationSizeInBytes; /** @@ -121,9 +120,27 @@ public static LargeListVector empty(String name, BufferAllocator allocator) { */ public LargeListVector(String name, BufferAllocator allocator, FieldType fieldType, CallBack callBack) { super(allocator); - this.name = name; + this.field = new Field(name, checkNotNull(fieldType), null); + this.validityBuffer = allocator.getEmpty(); + this.callBack = callBack; + this.validityAllocationSizeInBytes = getValidityBufferSizeFromCount(INITIAL_VALUE_ALLOCATION); + this.lastSet = -1; + this.offsetBuffer = allocator.getEmpty(); + this.vector = vector == null ? DEFAULT_DATA_VECTOR : vector; + this.valueCount = 0; + } + + /** + * Creates a new instance. + * + * @param field The field materialized by this vector. + * @param allocator The allocator to use for creating/reallocating buffers for the vector. + * @param callBack A schema change callback. + */ + public LargeListVector(Field field, BufferAllocator allocator, CallBack callBack) { + super(allocator); + this.field = field; this.validityBuffer = allocator.getEmpty(); - this.fieldType = checkNotNull(fieldType); this.callBack = callBack; this.validityAllocationSizeInBytes = getValidityBufferSizeFromCount(INITIAL_VALUE_ALLOCATION); this.lastSet = -1; @@ -495,11 +512,21 @@ public TransferPair getTransferPair(String ref, BufferAllocator allocator) { return getTransferPair(ref, allocator, null); } + @Override + public TransferPair getTransferPair(Field field, BufferAllocator allocator) { + return getTransferPair(field, allocator, null); + } + @Override public TransferPair getTransferPair(String ref, BufferAllocator allocator, CallBack callBack) { return new TransferImpl(ref, allocator, callBack); } + @Override + public TransferPair getTransferPair(Field field, BufferAllocator allocator, CallBack callBack) { + return new TransferImpl(field, allocator, callBack); + } + @Override public TransferPair makeTransferPair(ValueVector target) { return new TransferImpl((LargeListVector) target); @@ -590,7 +617,11 @@ private class TransferImpl implements TransferPair { TransferPair dataTransferPair; public TransferImpl(String name, BufferAllocator allocator, CallBack callBack) { - this(new LargeListVector(name, allocator, fieldType, callBack)); + this(new LargeListVector(name, allocator, field.getFieldType(), callBack)); + } + + public TransferImpl(Field field, BufferAllocator allocator, CallBack callBack) { + this(new LargeListVector(field, allocator, callBack)); } public TransferImpl(LargeListVector to) { @@ -784,7 +815,8 @@ public int getBufferSizeFor(int valueCount) { @Override public Field getField() { - return new Field(getName(), fieldType, Collections.singletonList(getDataVector().getField())); + return field.getChildren().isEmpty() ? new Field(field.getName(), field.getFieldType(), + Collections.singletonList(getDataVector().getField())) : field; } @Override @@ -794,7 +826,7 @@ public MinorType getMinorType() { @Override public String getName() { - return name; + return field.getName(); } @Override diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java index 0d6ff11f8ccf3..8b28de73bcf59 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java @@ -76,7 +76,7 @@ public static ListVector empty(String name, BufferAllocator allocator) { protected ArrowBuf validityBuffer; protected UnionListReader reader; private CallBack callBack; - protected final FieldType fieldType; + protected final Field field; protected int validityAllocationSizeInBytes; /** @@ -95,7 +95,23 @@ public static ListVector empty(String name, BufferAllocator allocator) { public ListVector(String name, BufferAllocator allocator, FieldType fieldType, CallBack callBack) { super(name, allocator, callBack); this.validityBuffer = allocator.getEmpty(); - this.fieldType = checkNotNull(fieldType); + this.field = new Field(name, checkNotNull(fieldType), null); + this.callBack = callBack; + this.validityAllocationSizeInBytes = getValidityBufferSizeFromCount(INITIAL_VALUE_ALLOCATION); + this.lastSet = -1; + } + + /** + * Constructs a new instance. + * + * @param field The field materialized by this vector. + * @param allocator The allocator to use for allocating/reallocating buffers. + * @param callBack A schema change callback. + */ + public ListVector(Field field, BufferAllocator allocator, CallBack callBack) { + super(field.getName(), allocator, callBack); + this.validityBuffer = allocator.getEmpty(); + this.field = field; this.callBack = callBack; this.validityAllocationSizeInBytes = getValidityBufferSizeFromCount(INITIAL_VALUE_ALLOCATION); this.lastSet = -1; @@ -392,11 +408,21 @@ public TransferPair getTransferPair(String ref, BufferAllocator allocator) { return getTransferPair(ref, allocator, null); } + @Override + public TransferPair getTransferPair(Field field, BufferAllocator allocator) { + return getTransferPair(field, allocator, null); + } + @Override public TransferPair getTransferPair(String ref, BufferAllocator allocator, CallBack callBack) { return new TransferImpl(ref, allocator, callBack); } + @Override + public TransferPair getTransferPair(Field field, BufferAllocator allocator, CallBack callBack) { + return new TransferImpl(field, allocator, callBack); + } + @Override public TransferPair makeTransferPair(ValueVector target) { return new TransferImpl((ListVector) target); @@ -462,7 +488,11 @@ private class TransferImpl implements TransferPair { TransferPair dataTransferPair; public TransferImpl(String name, BufferAllocator allocator, CallBack callBack) { - this(new ListVector(name, allocator, fieldType, callBack)); + this(new ListVector(name, allocator, field.getFieldType(), callBack)); + } + + public TransferImpl(Field field, BufferAllocator allocator, CallBack callBack) { + this(new ListVector(field, allocator, callBack)); } public TransferImpl(ListVector to) { @@ -633,7 +663,8 @@ public int getBufferSizeFor(int valueCount) { @Override public Field getField() { - return new Field(getName(), fieldType, Collections.singletonList(getDataVector().getField())); + return field.getChildren().isEmpty() ? new Field(field.getName(), field.getFieldType(), + Collections.singletonList(getDataVector().getField())) : field; } @Override diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/MapVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/MapVector.java index b8f3f32a73a29..29e0ba3adc3dc 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/MapVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/MapVector.java @@ -76,6 +76,11 @@ public MapVector(String name, BufferAllocator allocator, FieldType fieldType, Ca defaultDataVectorName = DATA_VECTOR_NAME; } + public MapVector(Field field, BufferAllocator allocator, CallBack callBack) { + super(field, allocator, callBack); + defaultDataVectorName = DATA_VECTOR_NAME; + } + /** * Initialize child vectors of the map from the given list of fields. * @@ -130,6 +135,11 @@ public TransferPair getTransferPair(String ref, BufferAllocator allocator) { return getTransferPair(ref, allocator, null); } + @Override + public TransferPair getTransferPair(Field field, BufferAllocator allocator) { + return new TransferImpl(field, allocator, null); + } + @Override public TransferPair getTransferPair(String ref, BufferAllocator allocator, CallBack callBack) { return new TransferImpl(ref, allocator, callBack); @@ -146,7 +156,11 @@ private class TransferImpl implements TransferPair { TransferPair dataTransferPair; public TransferImpl(String name, BufferAllocator allocator, CallBack callBack) { - this(new MapVector(name, allocator, fieldType, callBack)); + this(new MapVector(name, allocator, field.getFieldType(), callBack)); + } + + public TransferImpl(Field field, BufferAllocator allocator, CallBack callBack) { + this(new MapVector(field, allocator, callBack)); } public TransferImpl(MapVector to) { diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/NonNullableStructVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/NonNullableStructVector.java index 7d724656cdab7..cd4ee7718817d 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/NonNullableStructVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/NonNullableStructVector.java @@ -62,7 +62,7 @@ public static NonNullableStructVector emptyWithDuplicates(String name, BufferAll } private final SingleStructReaderImpl reader = new SingleStructReaderImpl(this); - protected final FieldType fieldType; + protected final Field field; public int valueCount; /** @@ -81,7 +81,26 @@ public NonNullableStructVector(String name, callBack, null, true); - this.fieldType = checkNotNull(fieldType); + this.field = new Field(name, checkNotNull(fieldType), null); + this.valueCount = 0; + } + + /** + * Constructs a new instance. + * + * @param field The field materialized by this vector. + * @param allocator The allocator to use to allocating/reallocating buffers. + * @param callBack A schema change callback. + */ + public NonNullableStructVector(Field field, + BufferAllocator allocator, + CallBack callBack) { + super(field.getName(), + allocator, + callBack, + null, + true); + this.field = field; this.valueCount = 0; } @@ -101,7 +120,25 @@ public NonNullableStructVector(String name, ConflictPolicy conflictPolicy, boolean allowConflictPolicyChanges) { super(name, allocator, callBack, conflictPolicy, allowConflictPolicyChanges); - this.fieldType = checkNotNull(fieldType); + this.field = new Field(name, checkNotNull(fieldType), null); + this.valueCount = 0; + } + + /** + * Constructs a new instance. + * + * @param field The field materialized by this vector. + * @param allocator The allocator to use to allocating/reallocating buffers. + * @param callBack A schema change callback. + * @param conflictPolicy How to handle duplicate field names in the struct. + */ + public NonNullableStructVector(Field field, + BufferAllocator allocator, + CallBack callBack, + ConflictPolicy conflictPolicy, + boolean allowConflictPolicyChanges) { + super(field.getName(), allocator, callBack, conflictPolicy, allowConflictPolicyChanges); + this.field = field; this.valueCount = 0; } @@ -208,7 +245,7 @@ public TransferPair getTransferPair(BufferAllocator allocator) { public TransferPair getTransferPair(String ref, BufferAllocator allocator, CallBack callBack) { return new StructTransferPair(this, new NonNullableStructVector(name, allocator, - fieldType, + field.getFieldType(), callBack, getConflictPolicy(), allowConflictPolicyChanges), false); @@ -223,7 +260,25 @@ public TransferPair makeTransferPair(ValueVector to) { public TransferPair getTransferPair(String ref, BufferAllocator allocator) { return new StructTransferPair(this, new NonNullableStructVector(ref, allocator, - fieldType, + field.getFieldType(), + callBack, + getConflictPolicy(), + allowConflictPolicyChanges), false); + } + + @Override + public TransferPair getTransferPair(Field field, BufferAllocator allocator) { + return new StructTransferPair(this, new NonNullableStructVector(field, + allocator, + callBack, + getConflictPolicy(), + allowConflictPolicyChanges), false); + } + + @Override + public TransferPair getTransferPair(Field field, BufferAllocator allocator, CallBack callBack) { + return new StructTransferPair(this, new NonNullableStructVector(field, + allocator, callBack, getConflictPolicy(), allowConflictPolicyChanges), false); @@ -412,11 +467,14 @@ public void reset() { @Override public Field getField() { + if (!field.getChildren().isEmpty()) { + return field; + } List children = new ArrayList<>(); for (ValueVector child : getChildren()) { children.add(child.getField()); } - return new Field(name, fieldType, children); + return new Field(name, field.getFieldType(), children); } @Override diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/StructVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/StructVector.java index 2dabc6e014969..d947249fd3cdd 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/StructVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/StructVector.java @@ -42,6 +42,7 @@ import org.apache.arrow.vector.ipc.message.ArrowFieldNode; import org.apache.arrow.vector.types.pojo.ArrowType; import org.apache.arrow.vector.types.pojo.ArrowType.Struct; +import org.apache.arrow.vector.types.pojo.Field; import org.apache.arrow.vector.types.pojo.FieldType; import org.apache.arrow.vector.util.CallBack; import org.apache.arrow.vector.util.OversizedAllocationException; @@ -113,6 +114,44 @@ public StructVector(String name, BitVectorHelper.getValidityBufferSize(BaseValueVector.INITIAL_VALUE_ALLOCATION); } + /** + * Constructs a new instance. + * + * @param field The field materialized by this vector. + * @param allocator The allocator to use to allocating/reallocating buffers. + * @param callBack A schema change callback. + */ + public StructVector(Field field, + BufferAllocator allocator, + CallBack callBack) { + super(field, + checkNotNull(allocator), + callBack); + this.validityBuffer = allocator.getEmpty(); + this.validityAllocationSizeInBytes = + BitVectorHelper.getValidityBufferSize(BaseValueVector.INITIAL_VALUE_ALLOCATION); + } + + /** + * Constructs a new instance. + * + * @param field The field materialized by this vector. + * @param allocator The allocator to use to allocating/reallocating buffers. + * @param callBack A schema change callback. + * @param conflictPolicy policy to determine how duplicate names are handled. + * @param allowConflictPolicyChanges wether duplicate names are allowed at all. + */ + public StructVector(Field field, + BufferAllocator allocator, + CallBack callBack, + ConflictPolicy conflictPolicy, + boolean allowConflictPolicyChanges) { + super(field, checkNotNull(allocator), callBack, conflictPolicy, allowConflictPolicyChanges); + this.validityBuffer = allocator.getEmpty(); + this.validityAllocationSizeInBytes = + BitVectorHelper.getValidityBufferSize(BaseValueVector.INITIAL_VALUE_ALLOCATION); + } + @Override public void loadFieldBuffers(ArrowFieldNode fieldNode, List ownBuffers) { if (ownBuffers.size() != 1) { @@ -167,7 +206,7 @@ public NullableStructWriter getWriter() { public TransferPair getTransferPair(BufferAllocator allocator) { return new NullableStructTransferPair(this, new StructVector(name, allocator, - fieldType, + field.getFieldType(), null, getConflictPolicy(), allowConflictPolicyChanges), false); @@ -182,7 +221,7 @@ public TransferPair makeTransferPair(ValueVector to) { public TransferPair getTransferPair(String ref, BufferAllocator allocator) { return new NullableStructTransferPair(this, new StructVector(ref, allocator, - fieldType, + field.getFieldType(), null, getConflictPolicy(), allowConflictPolicyChanges), false); @@ -192,12 +231,21 @@ public TransferPair getTransferPair(String ref, BufferAllocator allocator) { public TransferPair getTransferPair(String ref, BufferAllocator allocator, CallBack callBack) { return new NullableStructTransferPair(this, new StructVector(ref, allocator, - fieldType, + field.getFieldType(), callBack, getConflictPolicy(), allowConflictPolicyChanges), false); } + @Override + public TransferPair getTransferPair(Field field, BufferAllocator allocator) { + return new NullableStructTransferPair(this, new StructVector(field, + allocator, + null, + getConflictPolicy(), + allowConflictPolicyChanges), false); + } + /** * {@link TransferPair} for this (nullable) {@link StructVector}. */ From ccf50a50dad4a9e0e9f106914ec0c7739975ed51 Mon Sep 17 00:00:00 2001 From: Ivan Chesnov Date: Thu, 12 Oct 2023 19:48:45 +0300 Subject: [PATCH 2/7] GH-38246 small fix --- .../codegen/templates/DenseUnionVector.java | 30 +++++-------------- .../main/codegen/templates/UnionVector.java | 30 +++++-------------- .../arrow/vector/complex/LargeListVector.java | 3 +- .../arrow/vector/complex/ListVector.java | 3 +- .../complex/NonNullableStructVector.java | 2 +- 5 files changed, 18 insertions(+), 50 deletions(-) diff --git a/java/vector/src/main/codegen/templates/DenseUnionVector.java b/java/vector/src/main/codegen/templates/DenseUnionVector.java index 313d449dedf9b..12fc52af3c18e 100644 --- a/java/vector/src/main/codegen/templates/DenseUnionVector.java +++ b/java/vector/src/main/codegen/templates/DenseUnionVector.java @@ -115,7 +115,7 @@ public class DenseUnionVector extends AbstractContainerVector implements FieldVe private long typeBufferAllocationSizeInBytes; private long offsetBufferAllocationSizeInBytes; - private final Field field; + private final FieldType fieldType; public static final byte TYPE_WIDTH = 1; public static final byte OFFSET_WIDTH = 4; @@ -131,23 +131,7 @@ public static DenseUnionVector empty(String name, BufferAllocator allocator) { public DenseUnionVector(String name, BufferAllocator allocator, FieldType fieldType, CallBack callBack) { super(name, allocator, callBack); - this.field = new Field(name, fieldType, null); - this.internalStruct = new NonNullableStructVector( - "internal", - allocator, - INTERNAL_STRUCT_TYPE, - callBack, - AbstractStructVector.ConflictPolicy.CONFLICT_REPLACE, - false); - this.typeBuffer = allocator.getEmpty(); - this.typeBufferAllocationSizeInBytes = BaseValueVector.INITIAL_VALUE_ALLOCATION * TYPE_WIDTH; - this.offsetBuffer = allocator.getEmpty(); - this.offsetBufferAllocationSizeInBytes = BaseValueVector.INITIAL_VALUE_ALLOCATION * OFFSET_WIDTH; - } - - public DenseUnionVector(Field field, BufferAllocator allocator, CallBack callBack) { - super(field.getName(), allocator, callBack); - this.field = field; + this.fieldType = fieldType; this.internalStruct = new NonNullableStructVector( "internal", allocator, @@ -250,8 +234,8 @@ public synchronized byte registerNewTypeId(Field field) { typeFields.length + " relative types. Please use union of union instead"); } byte typeId = nextTypeId; - if (field.getFieldType() != null) { - int[] typeIds = ((ArrowType.Union) field.getFieldType().getType()).getTypeIds(); + if (this.fieldType != null) { + int[] typeIds = ((ArrowType.Union) this.fieldType.getType()).getTypeIds(); if (typeIds != null) { int thisTypeId = typeIds[nextTypeId]; if (thisTypeId > Byte.MAX_VALUE) { @@ -544,12 +528,12 @@ public Field getField() { } FieldType fieldType; - if (field.getFieldType() == null) { + if (this.fieldType == null) { fieldType = FieldType.nullable(new ArrowType.Union(Dense, typeIds)); } else { final UnionMode mode = UnionMode.Dense; - fieldType = new FieldType(field.getFieldType().isNullable(), new ArrowType.Union(mode, typeIds), - field.getFieldType().getDictionary(), field.getFieldType().getMetadata()); + fieldType = new FieldType(this.fieldType.isNullable(), new ArrowType.Union(mode, typeIds), + this.fieldType.getDictionary(), this.fieldType.getMetadata()); } return new Field(name, fieldType, childFields); diff --git a/java/vector/src/main/codegen/templates/UnionVector.java b/java/vector/src/main/codegen/templates/UnionVector.java index 2628146b596c2..ea79c5c2fba76 100644 --- a/java/vector/src/main/codegen/templates/UnionVector.java +++ b/java/vector/src/main/codegen/templates/UnionVector.java @@ -103,7 +103,7 @@ public class UnionVector extends AbstractContainerVector implements FieldVector private int typeBufferAllocationSizeInBytes; - private final Field field; + private final FieldType fieldType; private final Field[] typeIds = new Field[Byte.MAX_VALUE + 1]; public static final byte TYPE_WIDTH = 1; @@ -118,21 +118,7 @@ public static UnionVector empty(String name, BufferAllocator allocator) { public UnionVector(String name, BufferAllocator allocator, FieldType fieldType, CallBack callBack) { super(name, allocator, callBack); - this.field = new Field(name, fieldType, null); - this.internalStruct = new NonNullableStructVector( - "internal", - allocator, - INTERNAL_STRUCT_TYPE, - callBack, - AbstractStructVector.ConflictPolicy.CONFLICT_REPLACE, - false); - this.typeBuffer = allocator.getEmpty(); - this.typeBufferAllocationSizeInBytes = BaseValueVector.INITIAL_VALUE_ALLOCATION * TYPE_WIDTH; - } - - public UnionVector(Field field, BufferAllocator allocator, CallBack callBack) { - super(field.getName(), allocator, callBack); - this.field = field; + this.fieldType = fieldType; this.internalStruct = new NonNullableStructVector( "internal", allocator, @@ -158,8 +144,8 @@ public void initializeChildrenFromFields(List children) { int count = 0; for (Field child: children) { int typeId = Types.getMinorTypeForArrowType(child.getType()).ordinal(); - if (field.getFieldType() != null) { - int[] typeIds = ((ArrowType.Union)field.getFieldType().getType()).getTypeIds(); + if (this.fieldType != null) { + int[] typeIds = ((ArrowType.Union)this.fieldType.getType()).getTypeIds(); if (typeIds != null) { typeId = typeIds[count++]; } @@ -483,12 +469,12 @@ public Field getField() { } FieldType fieldType; - if (field.getFieldType() == null) { + if (this.fieldType == null) { fieldType = FieldType.nullable(new ArrowType.Union(Sparse, typeIds)); } else { - final UnionMode mode = ((ArrowType.Union)field.getFieldType().getType()).getMode(); - fieldType = new FieldType(field.getFieldType().isNullable(), new ArrowType.Union(mode, typeIds), - field.getFieldType().getDictionary(), field.getFieldType().getMetadata()); + final UnionMode mode = ((ArrowType.Union)this.fieldType.getType()).getMode(); + fieldType = new FieldType(this.fieldType.isNullable(), new ArrowType.Union(mode, typeIds), + this.fieldType.getDictionary(), this.fieldType.getMetadata()); } return new Field(name, fieldType, childFields); diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/LargeListVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/LargeListVector.java index 5d3522e7ac178..d208beb1cb02f 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/LargeListVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/LargeListVector.java @@ -21,7 +21,6 @@ import static org.apache.arrow.memory.util.LargeMemoryUtil.capAtMaxInt; import static org.apache.arrow.memory.util.LargeMemoryUtil.checkedCastToInt; import static org.apache.arrow.util.Preconditions.checkArgument; -import static org.apache.arrow.util.Preconditions.checkNotNull; import java.util.ArrayList; import java.util.Arrays; @@ -120,7 +119,7 @@ public static LargeListVector empty(String name, BufferAllocator allocator) { */ public LargeListVector(String name, BufferAllocator allocator, FieldType fieldType, CallBack callBack) { super(allocator); - this.field = new Field(name, checkNotNull(fieldType), null); + this.field = new Field(name, fieldType, null); this.validityBuffer = allocator.getEmpty(); this.callBack = callBack; this.validityAllocationSizeInBytes = getValidityBufferSizeFromCount(INITIAL_VALUE_ALLOCATION); diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java index 8b28de73bcf59..71fe09ce4e890 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java @@ -21,7 +21,6 @@ import static org.apache.arrow.memory.util.LargeMemoryUtil.capAtMaxInt; import static org.apache.arrow.memory.util.LargeMemoryUtil.checkedCastToInt; import static org.apache.arrow.util.Preconditions.checkArgument; -import static org.apache.arrow.util.Preconditions.checkNotNull; import java.util.ArrayList; import java.util.Arrays; @@ -95,7 +94,7 @@ public static ListVector empty(String name, BufferAllocator allocator) { public ListVector(String name, BufferAllocator allocator, FieldType fieldType, CallBack callBack) { super(name, allocator, callBack); this.validityBuffer = allocator.getEmpty(); - this.field = new Field(name, checkNotNull(fieldType), null); + this.field = new Field(name, fieldType, null); this.callBack = callBack; this.validityAllocationSizeInBytes = getValidityBufferSizeFromCount(INITIAL_VALUE_ALLOCATION); this.lastSet = -1; diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/NonNullableStructVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/NonNullableStructVector.java index cd4ee7718817d..be349a5ffa99f 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/NonNullableStructVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/NonNullableStructVector.java @@ -81,7 +81,7 @@ public NonNullableStructVector(String name, callBack, null, true); - this.field = new Field(name, checkNotNull(fieldType), null); + this.field = new Field(name, fieldType, null); this.valueCount = 0; } From 454d196ac10a637d3da8aa9578b9511c895a8bb6 Mon Sep 17 00:00:00 2001 From: Ivan Chesnov Date: Fri, 13 Oct 2023 15:01:46 +0300 Subject: [PATCH 3/7] GH-38246 more improvements and tests --- .../vector/complex/FixedSizeListVector.java | 10 +++-- .../arrow/vector/complex/LargeListVector.java | 10 +++-- .../arrow/vector/complex/ListVector.java | 10 +++-- .../arrow/vector/complex/MapVector.java | 1 + .../complex/NonNullableStructVector.java | 3 +- .../apache/arrow/vector/types/pojo/Field.java | 6 ++- .../arrow/vector/TestLargeListVector.java | 23 ++++++++++ .../vector/TestLargeVarBinaryVector.java | 18 ++++++++ .../arrow/vector/TestLargeVarCharVector.java | 17 +++++++ .../apache/arrow/vector/TestListVector.java | 21 +++++++++ .../apache/arrow/vector/TestMapVector.java | 22 ++++++++++ .../apache/arrow/vector/TestStructVector.java | 44 +++++++++++++++++++ 12 files changed, 174 insertions(+), 11 deletions(-) diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/FixedSizeListVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/FixedSizeListVector.java index 8d84deeba5e86..da5eca6e9eb0a 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/FixedSizeListVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/FixedSizeListVector.java @@ -71,7 +71,7 @@ public static FixedSizeListVector empty(String name, int size, BufferAllocator a private FieldVector vector; private ArrowBuf validityBuffer; private final int listSize; - private final Field field; + private Field field; private UnionFixedSizeListReader reader; private int valueCount; @@ -123,8 +123,11 @@ public FixedSizeListVector(Field field, @Override public Field getField() { - return field.getChildren().isEmpty() ? new Field(field.getName(), field.getFieldType(), - Collections.singletonList(getDataVector().getField())) : field; + if(!field.getChildren().isEmpty()){ + return field; + } + field.setChildren(Collections.singletonList(getDataVector().getField())); + return field; } @Override @@ -152,6 +155,7 @@ public void initializeChildrenFromFields(List children) { checkArgument(addOrGetVector.isCreated(), "Child vector already existed: %s", addOrGetVector.getVector()); addOrGetVector.getVector().initializeChildrenFromFields(field.getChildren()); + this.field.setChildren(children); } @Override diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/LargeListVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/LargeListVector.java index d208beb1cb02f..cbdb24eb24c06 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/LargeListVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/LargeListVector.java @@ -101,7 +101,7 @@ public static LargeListVector empty(String name, BufferAllocator allocator) { protected String defaultDataVectorName = DATA_VECTOR_NAME; protected ArrowBuf validityBuffer; protected UnionLargeListReader reader; - private final Field field; + private Field field; private int validityAllocationSizeInBytes; /** @@ -158,6 +158,7 @@ public void initializeChildrenFromFields(List children) { checkArgument(addOrGetVector.isCreated(), "Child vector already existed: %s", addOrGetVector.getVector()); addOrGetVector.getVector().initializeChildrenFromFields(field.getChildren()); + this.field.setChildren(children); } @Override @@ -814,8 +815,11 @@ public int getBufferSizeFor(int valueCount) { @Override public Field getField() { - return field.getChildren().isEmpty() ? new Field(field.getName(), field.getFieldType(), - Collections.singletonList(getDataVector().getField())) : field; + if(!field.getChildren().isEmpty()){ + return field; + } + field.setChildren(Collections.singletonList(getDataVector().getField())); + return field; } @Override diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java index 71fe09ce4e890..baa7a4e611422 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java @@ -75,7 +75,7 @@ public static ListVector empty(String name, BufferAllocator allocator) { protected ArrowBuf validityBuffer; protected UnionListReader reader; private CallBack callBack; - protected final Field field; + protected Field field; protected int validityAllocationSizeInBytes; /** @@ -126,6 +126,7 @@ public void initializeChildrenFromFields(List children) { checkArgument(addOrGetVector.isCreated(), "Child vector already existed: %s", addOrGetVector.getVector()); addOrGetVector.getVector().initializeChildrenFromFields(field.getChildren()); + this.field.setChildren(children); } @Override @@ -662,8 +663,11 @@ public int getBufferSizeFor(int valueCount) { @Override public Field getField() { - return field.getChildren().isEmpty() ? new Field(field.getName(), field.getFieldType(), - Collections.singletonList(getDataVector().getField())) : field; + if(!field.getChildren().isEmpty()){ + return field; + } + field.setChildren(Collections.singletonList(getDataVector().getField())); + return field; } @Override diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/MapVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/MapVector.java index 29e0ba3adc3dc..f8fe644556e62 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/MapVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/MapVector.java @@ -104,6 +104,7 @@ public void initializeChildrenFromFields(List children) { checkArgument(addOrGetVector.isCreated(), "Child vector already existed: %s", addOrGetVector.getVector()); addOrGetVector.getVector().initializeChildrenFromFields(structField.getChildren()); + this.field.setChildren(children); } /** diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/NonNullableStructVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/NonNullableStructVector.java index be349a5ffa99f..aa844cfc7b62c 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/NonNullableStructVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/NonNullableStructVector.java @@ -474,7 +474,8 @@ public Field getField() { for (ValueVector child : getChildren()) { children.add(child.getField()); } - return new Field(name, field.getFieldType(), children); + field.setChildren(children); + return field; } @Override diff --git a/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/Field.java b/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/Field.java index 54c609d4a104f..072753d929552 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/Field.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/Field.java @@ -70,7 +70,7 @@ public static Field notNullable(String name, ArrowType type) { private final String name; private final FieldType fieldType; - private final List children; + private List children; private Field( String name, @@ -256,6 +256,10 @@ public List getChildren() { return children; } + public void setChildren(List children) { + this.children = Collections2.toImmutableList(children); + } + @JsonIgnore public Map getMetadata() { return fieldType.getMetadata(); diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestLargeListVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestLargeListVector.java index adf86183c0ada..993ce0b089769 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestLargeListVector.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestLargeListVector.java @@ -20,6 +20,7 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertSame; import static org.junit.Assert.assertTrue; import java.util.ArrayList; @@ -992,6 +993,28 @@ public void testTotalCapacity() { } } + @Test + public void testGetTransferPairWithField() throws Exception { + try (final LargeListVector fromVector = LargeListVector.empty("list", allocator)) { + + UnionLargeListWriter writer = fromVector.getWriter(); + writer.allocate(); + + //set some values + writer.startList(); + writer.integer().writeInt(1); + writer.integer().writeInt(2); + writer.endList(); + fromVector.setValueCount(2); + + final TransferPair transferPair = fromVector.getTransferPair(fromVector.getField(), + allocator); + final LargeListVector toVector = (LargeListVector) transferPair.getTo(); + // Field inside a new vector created by reusing a field should be the same in memory as the original field. + assertSame(toVector.getField(), fromVector.getField()); + } + } + private void writeIntValues(UnionLargeListWriter writer, int[] values) { writer.startList(); for (int v: values) { diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestLargeVarBinaryVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestLargeVarBinaryVector.java index 644827ce995e8..ce7bb15bb1482 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestLargeVarBinaryVector.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestLargeVarBinaryVector.java @@ -18,12 +18,14 @@ package org.apache.arrow.vector; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertSame; import static org.junit.Assert.assertTrue; import org.apache.arrow.memory.ArrowBuf; import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; import org.apache.arrow.vector.holders.NullableLargeVarBinaryHolder; +import org.apache.arrow.vector.util.TransferPair; import org.junit.After; import org.junit.Before; import org.junit.Test; @@ -101,4 +103,20 @@ public void testSetNullableLargeVarBinaryHolderSafe() { buf.close(); } } + + @Test + public void testGetTransferPairWithField() { + try (BufferAllocator childAllocator1 = allocator.newChildAllocator("child1", 1000000, 1000000); + LargeVarBinaryVector v1 = new LargeVarBinaryVector("v1", childAllocator1)) { + v1.allocateNew(); + v1.setSafe(4094, "hello world".getBytes(), 0, 11); + v1.setValueCount(4001); + + TransferPair tp = v1.getTransferPair(v1.getField(), allocator); + tp.transfer(); + LargeVarBinaryVector v2 = (LargeVarBinaryVector) tp.getTo(); + assertSame(v1.getField(), v2.getField()); + v2.clear(); + } + } } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestLargeVarCharVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestLargeVarCharVector.java index 1b81c6b209fbb..5f7863c6f6177 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestLargeVarCharVector.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestLargeVarCharVector.java @@ -21,6 +21,7 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertSame; import static org.junit.Assert.assertTrue; import java.nio.charset.StandardCharsets; @@ -794,6 +795,22 @@ public void testNullableType() { } } + @Test + public void testGetTransferPairWithField() { + try (BufferAllocator childAllocator1 = allocator.newChildAllocator("child1", 1000000, 1000000); + LargeVarCharVector v1 = new LargeVarCharVector("v1", childAllocator1)) { + v1.allocateNew(); + v1.setSafe(4094, "hello world".getBytes(), 0, 11); + v1.setValueCount(4001); + + TransferPair tp = v1.getTransferPair(v1.getField(), allocator); + tp.transfer(); + LargeVarCharVector v2 = (LargeVarCharVector) tp.getTo(); + assertSame(v1.getField(), v2.getField()); + v2.clear(); + } + } + private void populateLargeVarcharVector(final LargeVarCharVector vector, int valueCount, String[] values) { for (int i = 0; i < valueCount; i += 3) { final String s = String.format("%010d", i); diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestListVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestListVector.java index 2a1228c2a38c2..278f497b47991 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestListVector.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestListVector.java @@ -20,6 +20,7 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertSame; import static org.junit.Assert.assertTrue; import static org.junit.jupiter.api.Assertions.assertThrows; @@ -1135,6 +1136,26 @@ public void testTotalCapacity() { } } + @Test + public void testGetTransferPairWithField() { + try (ListVector fromVector = ListVector.empty("input", allocator)) { + UnionListWriter writer = fromVector.getWriter(); + writer.allocate(); + writer.setPosition(0); // optional + writer.startList(); + writer.bigInt().writeBigInt(1); + writer.bigInt().writeBigInt(2); + writer.bigInt().writeBigInt(3); + writer.endList(); + writer.setValueCount(1); + final TransferPair transferPair = fromVector.getTransferPair(fromVector.getField(), + allocator); + final ListVector toVector = (ListVector) transferPair.getTo(); + // Field inside a new vector created by reusing a field should be the same in memory as the original field. + assertSame(toVector.getField(), fromVector.getField()); + } + } + private void writeIntValues(UnionListWriter writer, int[] values) { writer.startList(); for (int v: values) { diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestMapVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestMapVector.java index d60d5611a5f7b..5c8fd55ec98dc 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestMapVector.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestMapVector.java @@ -1133,4 +1133,26 @@ public void testGetTransferPair() { vector.clear(); } } + + @Test + public void testGetTransferPairWithField() { + try (MapVector mapVector = MapVector.empty("mapVector", allocator, false)) { + + FieldType type = new FieldType(false, ArrowType.Struct.INSTANCE, null, null); + AddOrGetResult addResult = mapVector.addOrGetVector(type); + FieldType keyType = new FieldType(false, MinorType.BIGINT.getType(), null, null); + FieldType valueType = FieldType.nullable(MinorType.FLOAT8.getType()); + addResult.getVector().addOrGet(MapVector.KEY_NAME, keyType, BigIntVector.class); + addResult.getVector().addOrGet(MapVector.VALUE_NAME, valueType, Float8Vector.class); + mapVector.allocateNew(); + mapVector.setValueCount(0); + + assertEquals(-1, mapVector.getLastSet()); + TransferPair tp = mapVector.getTransferPair(mapVector.getField(), allocator); + tp.transfer(); + MapVector toVector = (MapVector) tp.getTo(); + assertSame(toVector.getField(), mapVector.getField()); + toVector.clear(); + } + } } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestStructVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestStructVector.java index 552d5752f236f..ee34f203b6320 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestStructVector.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestStructVector.java @@ -29,11 +29,16 @@ import org.apache.arrow.vector.complex.ListVector; import org.apache.arrow.vector.complex.StructVector; import org.apache.arrow.vector.complex.UnionVector; +import org.apache.arrow.vector.complex.impl.NullableStructWriter; +import org.apache.arrow.vector.complex.writer.Float8Writer; +import org.apache.arrow.vector.complex.writer.IntWriter; import org.apache.arrow.vector.holders.ComplexHolder; +import org.apache.arrow.vector.types.Types; import org.apache.arrow.vector.types.Types.MinorType; import org.apache.arrow.vector.types.pojo.ArrowType.Struct; import org.apache.arrow.vector.types.pojo.Field; import org.apache.arrow.vector.types.pojo.FieldType; +import org.apache.arrow.vector.util.TransferPair; import org.junit.After; import org.junit.Assert; import org.junit.Before; @@ -290,4 +295,43 @@ public void testTypedGetters() { assertEquals(IntVector.class, s1.getVectorById(0, IntVector.class).getClass()); } } + + @Test + public void testGetTransferPair() { + try (final StructVector fromVector = simpleStructVector("s1", allocator)) { + TransferPair tp = fromVector.getTransferPair(fromVector.getField(), allocator); + final StructVector toVector = (StructVector) tp.getTo(); + // Field inside a new vector created by reusing a field should be the same in memory as the original field. + assertSame(toVector.getField(), fromVector.getField()); + toVector.clear(); + } + } + + private StructVector simpleStructVector(String name, BufferAllocator allocator) { + final String INT_COL = "struct_int_child"; + final String FLT_COL = "struct_flt_child"; + StructVector structVector = StructVector.empty(name, allocator); + final int size = 6; // number of structs + + NullableStructWriter structWriter = structVector.getWriter(); + structVector.addOrGet( + INT_COL, FieldType.nullable(Types.MinorType.INT.getType()), IntVector.class); + structVector.addOrGet( + FLT_COL, FieldType.nullable(Types.MinorType.INT.getType()), IntVector.class); + structVector.allocateNew(); + IntWriter intWriter = structWriter.integer(INT_COL); + Float8Writer float8Writer = structWriter.float8(FLT_COL); + + for (int i = 0; i < size; i++) { + structWriter.setPosition(i); + structWriter.start(); + intWriter.writeInt(i); + float8Writer.writeFloat8(i * .1); + structWriter.end(); + } + + structWriter.setValueCount(size); + + return structVector; + } } From 996a86c0134427292c6d6ef4dd69f1d96e7ef1a5 Mon Sep 17 00:00:00 2001 From: Ivan Chesnov Date: Fri, 13 Oct 2023 16:01:33 +0300 Subject: [PATCH 4/7] GH-38246 fixed style issue --- .../arrow/vector/complex/FixedSizeListVector.java | 2 +- .../arrow/vector/complex/LargeListVector.java | 2 +- .../org/apache/arrow/vector/complex/ListVector.java | 2 +- .../vector/complex/NonNullableStructVector.java | 3 --- .../org/apache/arrow/vector/types/pojo/Field.java | 2 +- .../apache/arrow/vector/TestVectorSchemaRoot.java | 13 +++++++------ 6 files changed, 11 insertions(+), 13 deletions(-) diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/FixedSizeListVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/FixedSizeListVector.java index da5eca6e9eb0a..996a5f648d5f8 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/FixedSizeListVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/FixedSizeListVector.java @@ -123,7 +123,7 @@ public FixedSizeListVector(Field field, @Override public Field getField() { - if(!field.getChildren().isEmpty()){ + if (field.getChildren().contains(getDataVector().getField())) { return field; } field.setChildren(Collections.singletonList(getDataVector().getField())); diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/LargeListVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/LargeListVector.java index cbdb24eb24c06..c5c243dd178b8 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/LargeListVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/LargeListVector.java @@ -815,7 +815,7 @@ public int getBufferSizeFor(int valueCount) { @Override public Field getField() { - if(!field.getChildren().isEmpty()){ + if (field.getChildren().contains(getDataVector().getField())) { return field; } field.setChildren(Collections.singletonList(getDataVector().getField())); diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java index baa7a4e611422..dc4faece16f14 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java @@ -663,7 +663,7 @@ public int getBufferSizeFor(int valueCount) { @Override public Field getField() { - if(!field.getChildren().isEmpty()){ + if (field.getChildren().contains(getDataVector().getField())) { return field; } field.setChildren(Collections.singletonList(getDataVector().getField())); diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/NonNullableStructVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/NonNullableStructVector.java index aa844cfc7b62c..3a137b867288c 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/NonNullableStructVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/NonNullableStructVector.java @@ -467,9 +467,6 @@ public void reset() { @Override public Field getField() { - if (!field.getChildren().isEmpty()) { - return field; - } List children = new ArrayList<>(); for (ValueVector child : getChildren()) { children.add(child.getField()); diff --git a/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/Field.java b/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/Field.java index 072753d929552..bd5ae6f9d780f 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/Field.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/Field.java @@ -257,7 +257,7 @@ public List getChildren() { } public void setChildren(List children) { - this.children = Collections2.toImmutableList(children); + this.children = Collections2.toImmutableList(children); } @JsonIgnore diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestVectorSchemaRoot.java b/java/vector/src/test/java/org/apache/arrow/vector/TestVectorSchemaRoot.java index 7a445750a63de..2208def3eaba9 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestVectorSchemaRoot.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestVectorSchemaRoot.java @@ -29,8 +29,9 @@ import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.vector.complex.ListVector; -import org.apache.arrow.vector.complex.impl.UnionListWriter; +import org.apache.arrow.vector.complex.UnionVector; +import org.apache.arrow.vector.complex.writer.FieldWriter; +import org.apache.arrow.vector.types.UnionMode; import org.apache.arrow.vector.types.pojo.ArrowType; import org.apache.arrow.vector.types.pojo.Field; import org.apache.arrow.vector.types.pojo.FieldType; @@ -92,23 +93,23 @@ private void checkCount(BitVector vec1, IntVector vec2, VectorSchemaRoot vsr, in private VectorSchemaRoot createBatch() { FieldType varCharType = new FieldType(true, new ArrowType.Utf8(), /*dictionary=*/null); - FieldType listType = new FieldType(true, new ArrowType.List(), /*dictionary=*/null); + FieldType unionType = new FieldType(true, new ArrowType.Union(UnionMode.Sparse, null), /*dictionary=*/null); // create the schema List schemaFields = new ArrayList<>(); Field childField = new Field("varCharCol", varCharType, null); List childFields = new ArrayList<>(); childFields.add(childField); - schemaFields.add(new Field("listCol", listType, childFields)); + schemaFields.add(new Field("listCol", unionType, childFields)); Schema schema = new Schema(schemaFields); VectorSchemaRoot schemaRoot = VectorSchemaRoot.create(schema, allocator); // get and allocate the vector - ListVector vector = (ListVector) schemaRoot.getVector("listCol"); + UnionVector vector = (UnionVector) schemaRoot.getVector("listCol"); vector.allocateNew(); // write data to the vector - UnionListWriter writer = vector.getWriter(); + FieldWriter writer = vector.getWriter(); writer.setPosition(0); From 3f232b3352ab7fb4f01d449ba6824f294c1dbf59 Mon Sep 17 00:00:00 2001 From: Ivan Chesnov Date: Sat, 14 Oct 2023 10:53:14 +0300 Subject: [PATCH 5/7] GH-38246 fixed comments --- .../org/apache/arrow/vector/ValueVector.java | 34 +++++++++++++++++++ .../vector/complex/FixedSizeListVector.java | 14 ++------ .../arrow/vector/complex/LargeListVector.java | 14 ++------ .../arrow/vector/complex/ListVector.java | 11 ++---- .../arrow/vector/complex/MapVector.java | 2 +- .../complex/NonNullableStructVector.java | 15 ++++---- .../apache/arrow/vector/types/pojo/Field.java | 6 +--- .../arrow/vector/TestVectorSchemaRoot.java | 16 ++++----- 8 files changed, 59 insertions(+), 53 deletions(-) diff --git a/java/vector/src/main/java/org/apache/arrow/vector/ValueVector.java b/java/vector/src/main/java/org/apache/arrow/vector/ValueVector.java index 62c13dd818784..e5f743c9c7f25 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/ValueVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/ValueVector.java @@ -132,12 +132,46 @@ public interface ValueVector extends Closeable, Iterable { */ TransferPair getTransferPair(BufferAllocator allocator); + /** + * To transfer quota responsibility. + * + * @param ref the name of the vector + * @param allocator the target allocator + * @return a {@link org.apache.arrow.vector.util.TransferPair transfer pair}, creating a new target vector of + * the same type. + */ TransferPair getTransferPair(String ref, BufferAllocator allocator); + /** + * To transfer quota responsibility. + * + * @param field the Field object used by the target vector + * @param allocator the target allocator + * @return a {@link org.apache.arrow.vector.util.TransferPair transfer pair}, creating a new target vector of + * the same type. + */ TransferPair getTransferPair(Field field, BufferAllocator allocator); + /** + * To transfer quota responsibility. + * + * @param ref the name of the vector + * @param allocator the target allocator + * @param callBack A schema change callback. + * @return a {@link org.apache.arrow.vector.util.TransferPair transfer pair}, creating a new target vector of + * the same type. + */ TransferPair getTransferPair(String ref, BufferAllocator allocator, CallBack callBack); + /** + * To transfer quota responsibility. + * + * @param field the Field object used by the target vector + * @param allocator the target allocator + * @param callBack A schema change callback. + * @return a {@link org.apache.arrow.vector.util.TransferPair transfer pair}, creating a new target vector of + * the same type. + */ TransferPair getTransferPair(Field field, BufferAllocator allocator, CallBack callBack); /** diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/FixedSizeListVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/FixedSizeListVector.java index 996a5f648d5f8..367335436aecd 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/FixedSizeListVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/FixedSizeListVector.java @@ -89,15 +89,7 @@ public FixedSizeListVector(String name, BufferAllocator allocator, FieldType fieldType, CallBack unusedSchemaChangeCallback) { - super(allocator); - - this.validityBuffer = allocator.getEmpty(); - this.vector = ZeroVector.INSTANCE; - this.listSize = ((ArrowType.FixedSizeList) fieldType.getType()).getListSize(); - Preconditions.checkArgument(listSize >= 0, "list size must be non-negative"); - this.valueCount = 0; - this.validityAllocationSizeInBytes = getValidityBufferSizeFromCount(INITIAL_VALUE_ALLOCATION); - this.field = new Field(name, fieldType, null); + this(new Field(name, fieldType, null), allocator, unusedSchemaChangeCallback); } /** @@ -126,7 +118,7 @@ public Field getField() { if (field.getChildren().contains(getDataVector().getField())) { return field; } - field.setChildren(Collections.singletonList(getDataVector().getField())); + field = new Field(field.getName(), field.getFieldType(), Collections.singletonList(getDataVector().getField())); return field; } @@ -155,7 +147,7 @@ public void initializeChildrenFromFields(List children) { checkArgument(addOrGetVector.isCreated(), "Child vector already existed: %s", addOrGetVector.getVector()); addOrGetVector.getVector().initializeChildrenFromFields(field.getChildren()); - this.field.setChildren(children); + this.field = new Field(this.field.getName(), this.field.getFieldType(), children); } @Override diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/LargeListVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/LargeListVector.java index c5c243dd178b8..312bed6ab3349 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/LargeListVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/LargeListVector.java @@ -118,15 +118,7 @@ public static LargeListVector empty(String name, BufferAllocator allocator) { * @param callBack A schema change callback. */ public LargeListVector(String name, BufferAllocator allocator, FieldType fieldType, CallBack callBack) { - super(allocator); - this.field = new Field(name, fieldType, null); - this.validityBuffer = allocator.getEmpty(); - this.callBack = callBack; - this.validityAllocationSizeInBytes = getValidityBufferSizeFromCount(INITIAL_VALUE_ALLOCATION); - this.lastSet = -1; - this.offsetBuffer = allocator.getEmpty(); - this.vector = vector == null ? DEFAULT_DATA_VECTOR : vector; - this.valueCount = 0; + this(new Field(name, fieldType, null), allocator, callBack); } /** @@ -158,7 +150,7 @@ public void initializeChildrenFromFields(List children) { checkArgument(addOrGetVector.isCreated(), "Child vector already existed: %s", addOrGetVector.getVector()); addOrGetVector.getVector().initializeChildrenFromFields(field.getChildren()); - this.field.setChildren(children); + this.field = new Field(this.field.getName(), this.field.getFieldType(), children); } @Override @@ -818,7 +810,7 @@ public Field getField() { if (field.getChildren().contains(getDataVector().getField())) { return field; } - field.setChildren(Collections.singletonList(getDataVector().getField())); + field = new Field(field.getName(), field.getFieldType(), Collections.singletonList(getDataVector().getField())); return field; } diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java index dc4faece16f14..e5a83921b3135 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java @@ -92,12 +92,7 @@ public static ListVector empty(String name, BufferAllocator allocator) { * @param callBack A schema change callback. */ public ListVector(String name, BufferAllocator allocator, FieldType fieldType, CallBack callBack) { - super(name, allocator, callBack); - this.validityBuffer = allocator.getEmpty(); - this.field = new Field(name, fieldType, null); - this.callBack = callBack; - this.validityAllocationSizeInBytes = getValidityBufferSizeFromCount(INITIAL_VALUE_ALLOCATION); - this.lastSet = -1; + this(new Field(name, fieldType, null), allocator, callBack); } /** @@ -126,7 +121,7 @@ public void initializeChildrenFromFields(List children) { checkArgument(addOrGetVector.isCreated(), "Child vector already existed: %s", addOrGetVector.getVector()); addOrGetVector.getVector().initializeChildrenFromFields(field.getChildren()); - this.field.setChildren(children); + this.field = new Field(this.field.getName(), this.field.getFieldType(), children); } @Override @@ -666,7 +661,7 @@ public Field getField() { if (field.getChildren().contains(getDataVector().getField())) { return field; } - field.setChildren(Collections.singletonList(getDataVector().getField())); + field = new Field(field.getName(), field.getFieldType(), Collections.singletonList(getDataVector().getField())); return field; } diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/MapVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/MapVector.java index f8fe644556e62..c1913574bab19 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/MapVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/MapVector.java @@ -104,7 +104,7 @@ public void initializeChildrenFromFields(List children) { checkArgument(addOrGetVector.isCreated(), "Child vector already existed: %s", addOrGetVector.getVector()); addOrGetVector.getVector().initializeChildrenFromFields(structField.getChildren()); - this.field.setChildren(children); + this.field = new Field(this.field.getName(), this.field.getFieldType(), children); } /** diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/NonNullableStructVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/NonNullableStructVector.java index 3a137b867288c..5addcf4d15530 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/NonNullableStructVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/NonNullableStructVector.java @@ -62,7 +62,7 @@ public static NonNullableStructVector emptyWithDuplicates(String name, BufferAll } private final SingleStructReaderImpl reader = new SingleStructReaderImpl(this); - protected final Field field; + protected Field field; public int valueCount; /** @@ -76,13 +76,7 @@ public NonNullableStructVector(String name, BufferAllocator allocator, FieldType fieldType, CallBack callBack) { - super(name, - allocator, - callBack, - null, - true); - this.field = new Field(name, fieldType, null); - this.valueCount = 0; + this(new Field(name, fieldType, null), allocator, callBack); } /** @@ -471,7 +465,10 @@ public Field getField() { for (ValueVector child : getChildren()) { children.add(child.getField()); } - field.setChildren(children); + if (children.isEmpty() || field.getChildren().equals(children)) { + return field; + } + field = new Field(field.getName(), field.getFieldType(), children); return field; } diff --git a/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/Field.java b/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/Field.java index bd5ae6f9d780f..54c609d4a104f 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/Field.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/Field.java @@ -70,7 +70,7 @@ public static Field notNullable(String name, ArrowType type) { private final String name; private final FieldType fieldType; - private List children; + private final List children; private Field( String name, @@ -256,10 +256,6 @@ public List getChildren() { return children; } - public void setChildren(List children) { - this.children = Collections2.toImmutableList(children); - } - @JsonIgnore public Map getMetadata() { return fieldType.getMetadata(); diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestVectorSchemaRoot.java b/java/vector/src/test/java/org/apache/arrow/vector/TestVectorSchemaRoot.java index 2208def3eaba9..67b8408b27f29 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestVectorSchemaRoot.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestVectorSchemaRoot.java @@ -29,9 +29,8 @@ import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.vector.complex.UnionVector; -import org.apache.arrow.vector.complex.writer.FieldWriter; -import org.apache.arrow.vector.types.UnionMode; +import org.apache.arrow.vector.complex.ListVector; +import org.apache.arrow.vector.complex.impl.UnionListWriter; import org.apache.arrow.vector.types.pojo.ArrowType; import org.apache.arrow.vector.types.pojo.Field; import org.apache.arrow.vector.types.pojo.FieldType; @@ -41,6 +40,7 @@ import org.junit.Test; public class TestVectorSchemaRoot { + private BufferAllocator allocator; @Before @@ -57,7 +57,7 @@ public void terminate() { public void testResetRowCount() { final int size = 20; try (final BitVector vec1 = new BitVector("bit", allocator); - final IntVector vec2 = new IntVector("int", allocator)) { + final IntVector vec2 = new IntVector("int", allocator)) { VectorSchemaRoot vsr = VectorSchemaRoot.of(vec1, vec2); vsr.allocateNew(); @@ -93,23 +93,23 @@ private void checkCount(BitVector vec1, IntVector vec2, VectorSchemaRoot vsr, in private VectorSchemaRoot createBatch() { FieldType varCharType = new FieldType(true, new ArrowType.Utf8(), /*dictionary=*/null); - FieldType unionType = new FieldType(true, new ArrowType.Union(UnionMode.Sparse, null), /*dictionary=*/null); + FieldType listType = new FieldType(true, new ArrowType.List(), /*dictionary=*/null); // create the schema List schemaFields = new ArrayList<>(); Field childField = new Field("varCharCol", varCharType, null); List childFields = new ArrayList<>(); childFields.add(childField); - schemaFields.add(new Field("listCol", unionType, childFields)); + schemaFields.add(new Field("listCol", listType, childFields)); Schema schema = new Schema(schemaFields); VectorSchemaRoot schemaRoot = VectorSchemaRoot.create(schema, allocator); // get and allocate the vector - UnionVector vector = (UnionVector) schemaRoot.getVector("listCol"); + ListVector vector = (ListVector) schemaRoot.getVector("listCol"); vector.allocateNew(); // write data to the vector - FieldWriter writer = vector.getWriter(); + UnionListWriter writer = vector.getWriter(); writer.setPosition(0); From 62c989995cbd11f18f402d1af3aff5a856bd6edd Mon Sep 17 00:00:00 2001 From: Ivan Chesnov Date: Sat, 14 Oct 2023 12:30:12 +0300 Subject: [PATCH 6/7] GH-38246 revert unneeded tests changes --- .../test/java/org/apache/arrow/vector/TestVectorSchemaRoot.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestVectorSchemaRoot.java b/java/vector/src/test/java/org/apache/arrow/vector/TestVectorSchemaRoot.java index 67b8408b27f29..ce3fb2cdf0ea1 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestVectorSchemaRoot.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestVectorSchemaRoot.java @@ -57,7 +57,7 @@ public void terminate() { public void testResetRowCount() { final int size = 20; try (final BitVector vec1 = new BitVector("bit", allocator); - final IntVector vec2 = new IntVector("int", allocator)) { + final IntVector vec2 = new IntVector("int", allocator)) { VectorSchemaRoot vsr = VectorSchemaRoot.of(vec1, vec2); vsr.allocateNew(); From 2838de28ca818a5cf653a698a22d4b01ae9068b6 Mon Sep 17 00:00:00 2001 From: Ivan Chesnov Date: Mon, 16 Oct 2023 11:23:40 +0300 Subject: [PATCH 7/7] GH-38246 refactoring --- .../vector/complex/NonNullableStructVector.java | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/NonNullableStructVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/NonNullableStructVector.java index 5addcf4d15530..e642c547a1612 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/NonNullableStructVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/NonNullableStructVector.java @@ -17,8 +17,6 @@ package org.apache.arrow.vector.complex; -import static org.apache.arrow.util.Preconditions.checkNotNull; - import java.util.ArrayList; import java.util.Collection; import java.util.Iterator; @@ -89,13 +87,7 @@ public NonNullableStructVector(String name, public NonNullableStructVector(Field field, BufferAllocator allocator, CallBack callBack) { - super(field.getName(), - allocator, - callBack, - null, - true); - this.field = field; - this.valueCount = 0; + this(field, allocator, callBack, null, true); } /** @@ -113,9 +105,7 @@ public NonNullableStructVector(String name, CallBack callBack, ConflictPolicy conflictPolicy, boolean allowConflictPolicyChanges) { - super(name, allocator, callBack, conflictPolicy, allowConflictPolicyChanges); - this.field = new Field(name, checkNotNull(fieldType), null); - this.valueCount = 0; + this(new Field(name, fieldType, null), allocator, callBack, conflictPolicy, allowConflictPolicyChanges); } /**