From 4904a8d1f9189b728a38b97da1ac85539313c4e4 Mon Sep 17 00:00:00 2001 From: Vibhatha Lakmal Abeykoon Date: Wed, 15 May 2024 22:41:03 +0530 Subject: [PATCH 01/16] feat: adding getTypeBufferCount and getTypeLayout --- .../org/apache/arrow/vector/TypeLayout.java | 392 +++++++++++++++++- .../org/apache/arrow/vector/VectorLoader.java | 2 +- .../apache/arrow/vector/VectorUnloader.java | 2 +- .../arrow/vector/ipc/JsonFileReader.java | 2 +- .../arrow/vector/ipc/JsonFileWriter.java | 2 +- .../validate/ValidateVectorBufferVisitor.java | 2 +- .../apache/arrow/vector/TestTypeLayout.java | 122 +++++- 7 files changed, 490 insertions(+), 34 deletions(-) diff --git a/java/vector/src/main/java/org/apache/arrow/vector/TypeLayout.java b/java/vector/src/main/java/org/apache/arrow/vector/TypeLayout.java index ea92efdc55f61..3c94a5bf58412 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/TypeLayout.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/TypeLayout.java @@ -28,6 +28,7 @@ import org.apache.arrow.vector.types.pojo.ArrowType; import org.apache.arrow.vector.types.pojo.ArrowType.ArrowTypeVisitor; import org.apache.arrow.vector.types.pojo.ArrowType.Binary; +import org.apache.arrow.vector.types.pojo.ArrowType.BinaryView; import org.apache.arrow.vector.types.pojo.ArrowType.Bool; import org.apache.arrow.vector.types.pojo.ArrowType.Date; import org.apache.arrow.vector.types.pojo.ArrowType.Decimal; @@ -58,6 +59,7 @@ public class TypeLayout { /** * Constructs a new {@link TypeLayout} for the given arrowType. */ + @Deprecated public static TypeLayout getTypeLayout(final ArrowType arrowType) { TypeLayout layout = arrowType.accept(new ArrowTypeVisitor() { @@ -186,7 +188,6 @@ public TypeLayout visit(Binary type) { @Override public TypeLayout visit(ArrowType.BinaryView type) { - // TODO: https://github.com/apache/arrow/issues/40934 throw new UnsupportedOperationException("BinaryView not supported"); } @@ -197,7 +198,6 @@ public TypeLayout visit(Utf8 type) { @Override public TypeLayout visit(Utf8View type) { - // TODO: https://github.com/apache/arrow/issues/40934 throw new UnsupportedOperationException("Utf8View not supported"); } @@ -274,9 +274,235 @@ public TypeLayout visit(Duration type) { return layout; } + /** + * Constructs a new {@link TypeLayout}. + * + * @param arrowType the type to create the layout for + * @param vector the vector to create the layout for + * @return the layout for the given type and vector + * @throws UnsupportedOperationException if the ArrowType is not supported + */ + public static TypeLayout getTypeLayout(final ArrowType arrowType, FieldVector vector) { + TypeLayout layout = arrowType.accept(new ArrowTypeVisitor() { + + @Override + public TypeLayout visit(Int type) { + return newFixedWidthTypeLayout(BufferLayout.dataBuffer(type.getBitWidth())); + } + + @Override + public TypeLayout visit(Union type) { + List vectors; + switch (type.getMode()) { + case Dense: + vectors = asList( + BufferLayout.typeBuffer(), + BufferLayout.offsetBuffer() // offset to find the vector + ); + break; + case Sparse: + vectors = asList( + BufferLayout.typeBuffer() // type of the value at the index or 0 if null + ); + break; + default: + throw new UnsupportedOperationException("Unsupported Union Mode: " + type.getMode()); + } + return new TypeLayout(vectors); + } + + @Override + public TypeLayout visit(Struct type) { + List vectors = asList( + BufferLayout.validityVector() + ); + return new TypeLayout(vectors); + } + + @Override + public TypeLayout visit(Timestamp type) { + return newFixedWidthTypeLayout(BufferLayout.dataBuffer(64)); + } + + @Override + public TypeLayout visit(org.apache.arrow.vector.types.pojo.ArrowType.List type) { + List vectors = asList( + BufferLayout.validityVector(), + BufferLayout.offsetBuffer() + ); + return new TypeLayout(vectors); + } + + @Override + public TypeLayout visit(ArrowType.LargeList type) { + List vectors = asList( + BufferLayout.validityVector(), + BufferLayout.largeOffsetBuffer() + ); + return new TypeLayout(vectors); + } + + @Override + public TypeLayout visit(FixedSizeList type) { + List vectors = asList( + BufferLayout.validityVector() + ); + return new TypeLayout(vectors); + } + + @Override + public TypeLayout visit(Map type) { + List vectors = asList( + BufferLayout.validityVector(), + BufferLayout.offsetBuffer() + ); + return new TypeLayout(vectors); + } + + @Override + public TypeLayout visit(FloatingPoint type) { + int bitWidth; + switch (type.getPrecision()) { + case HALF: + bitWidth = 16; + break; + case SINGLE: + bitWidth = 32; + break; + case DOUBLE: + bitWidth = 64; + break; + default: + throw new UnsupportedOperationException("Unsupported Precision: " + type.getPrecision()); + } + return newFixedWidthTypeLayout(BufferLayout.dataBuffer(bitWidth)); + } + + @Override + public TypeLayout visit(Decimal type) { + return newFixedWidthTypeLayout(BufferLayout.dataBuffer(type.getBitWidth())); + } + + @Override + public TypeLayout visit(FixedSizeBinary type) { + return newFixedWidthTypeLayout(new BufferLayout(BufferType.DATA, type.getByteWidth() * 8)); + } + + @Override + public TypeLayout visit(Bool type) { + return newFixedWidthTypeLayout(BufferLayout.booleanVector()); + } + + @Override + public TypeLayout visit(Binary type) { + return newVariableWidthTypeLayout(); + } + + @Override + public TypeLayout visit(BinaryView type) { + return newVariableWidthViewTypeLayout((ViewVarBinaryVector) vector); + } + + @Override + public TypeLayout visit(Utf8 type) { + return newVariableWidthTypeLayout(); + } + + @Override + public TypeLayout visit(Utf8View type) { + return newVariableWidthViewTypeLayout((ViewVarCharVector) vector); + } + + @Override + public TypeLayout visit(LargeUtf8 type) { + return newLargeVariableWidthTypeLayout(); + } + + @Override + public TypeLayout visit(LargeBinary type) { + return newLargeVariableWidthTypeLayout(); + } + + private TypeLayout newVariableWidthTypeLayout() { + return newPrimitiveTypeLayout(BufferLayout.validityVector(), BufferLayout.offsetBuffer(), + BufferLayout.byteVector()); + } + + private TypeLayout newVariableWidthViewTypeLayout(BaseVariableWidthViewVector vector) { + final int numDataBuffers = vector.getDataBuffers().size(); + List bufferLayouts = new ArrayList<>(numDataBuffers + 2); + bufferLayouts.add(BufferLayout.validityVector()); + bufferLayouts.add(BufferLayout.byteVector()); + + for (int i = 0; i < numDataBuffers; i++) { + bufferLayouts.add(BufferLayout.byteVector()); + } + + return new TypeLayout(bufferLayouts); + } + + private TypeLayout newLargeVariableWidthTypeLayout() { + return newPrimitiveTypeLayout(BufferLayout.validityVector(), BufferLayout.largeOffsetBuffer(), + BufferLayout.byteVector()); + } + + private TypeLayout newPrimitiveTypeLayout(BufferLayout... vectors) { + return new TypeLayout(asList(vectors)); + } + + public TypeLayout newFixedWidthTypeLayout(BufferLayout dataVector) { + return newPrimitiveTypeLayout(BufferLayout.validityVector(), dataVector); + } + + @Override + public TypeLayout visit(Null type) { + return new TypeLayout(Collections.emptyList()); + } + + @Override + public TypeLayout visit(Date type) { + switch (type.getUnit()) { + case DAY: + return newFixedWidthTypeLayout(BufferLayout.dataBuffer(32)); + case MILLISECOND: + return newFixedWidthTypeLayout(BufferLayout.dataBuffer(64)); + default: + throw new UnsupportedOperationException("Unknown unit " + type.getUnit()); + } + } + + @Override + public TypeLayout visit(Time type) { + return newFixedWidthTypeLayout(BufferLayout.dataBuffer(type.getBitWidth())); + } + + @Override + public TypeLayout visit(Interval type) { + switch (type.getUnit()) { + case DAY_TIME: + return newFixedWidthTypeLayout(BufferLayout.dataBuffer(64)); + case YEAR_MONTH: + return newFixedWidthTypeLayout(BufferLayout.dataBuffer(32)); + case MONTH_DAY_NANO: + return newFixedWidthTypeLayout(BufferLayout.dataBuffer(128)); + default: + throw new UnsupportedOperationException("Unknown unit " + type.getUnit()); + } + } + + @Override + public TypeLayout visit(Duration type) { + return newFixedWidthTypeLayout(BufferLayout.dataBuffer(64)); + } + + }); + return layout; + } + /** * Gets the number of {@link BufferLayout}s for the given arrowType. */ + @Deprecated public static int getTypeBufferCount(final ArrowType arrowType) { return arrowType.accept(new ArrowTypeVisitor() { @@ -377,9 +603,8 @@ public Integer visit(Binary type) { } @Override - public Integer visit(ArrowType.BinaryView type) { - // TODO: https://github.com/apache/arrow/issues/40935 - return VARIABLE_WIDTH_BUFFER_COUNT; + public Integer visit(BinaryView type) { + throw new UnsupportedOperationException("BinaryView not supported"); } @Override @@ -389,10 +614,165 @@ public Integer visit(Utf8 type) { @Override public Integer visit(Utf8View type) { - // TODO: https://github.com/apache/arrow/issues/40935 + throw new UnsupportedOperationException("Utf8View not supported"); + } + + @Override + public Integer visit(LargeUtf8 type) { + return VARIABLE_WIDTH_BUFFER_COUNT; + } + + @Override + public Integer visit(LargeBinary type) { + return VARIABLE_WIDTH_BUFFER_COUNT; + } + + @Override + public Integer visit(Null type) { + return 0; + } + + @Override + public Integer visit(Date type) { + return FIXED_WIDTH_BUFFER_COUNT; + } + + @Override + public Integer visit(Time type) { + return FIXED_WIDTH_BUFFER_COUNT; + } + + @Override + public Integer visit(Interval type) { + return FIXED_WIDTH_BUFFER_COUNT; + } + + @Override + public Integer visit(Duration type) { + return FIXED_WIDTH_BUFFER_COUNT; + } + + }); + } + + + /** + * Gets the number of {@link BufferLayout}s. + * + * @param arrowType the ArrowType for which the buffer count is to be determined + * @param vector the FieldVector associated with the ArrowType + * @return the number of BufferLayouts for the given ArrowType and FieldVector + * @throws UnsupportedOperationException if the ArrowType is not supported + */ + public static int getTypeBufferCount(final ArrowType arrowType, FieldVector vector) { + + return arrowType.accept(new ArrowTypeVisitor() { + + /** + * All fixed width vectors have a common number of buffers 2: one validity buffer, plus a data buffer. + */ + static final int FIXED_WIDTH_BUFFER_COUNT = 2; + + /** + * All variable width vectors have a common number of buffers 3: a validity buffer, + * an offset buffer, and a data buffer. + */ + static final int VARIABLE_WIDTH_BUFFER_COUNT = 3; + + @Override + public Integer visit(Int type) { + return FIXED_WIDTH_BUFFER_COUNT; + } + + @Override + public Integer visit(Union type) { + switch (type.getMode()) { + case Dense: + // TODO: validate this + return 2; + case Sparse: + // type buffer + return 1; + default: + throw new UnsupportedOperationException("Unsupported Union Mode: " + type.getMode()); + } + } + + @Override + public Integer visit(Struct type) { + // validity buffer + return 1; + } + + @Override + public Integer visit(Timestamp type) { + return FIXED_WIDTH_BUFFER_COUNT; + } + + @Override + public Integer visit(org.apache.arrow.vector.types.pojo.ArrowType.List type) { + // validity buffer + offset buffer + return 2; + } + + @Override + public Integer visit(ArrowType.LargeList type) { + // validity buffer + offset buffer + return 2; + } + + @Override + public Integer visit(FixedSizeList type) { + // validity buffer + return 1; + } + + @Override + public Integer visit(Map type) { + // validity buffer + offset buffer + return 2; + } + + @Override + public Integer visit(FloatingPoint type) { + return FIXED_WIDTH_BUFFER_COUNT; + } + + @Override + public Integer visit(Decimal type) { + return FIXED_WIDTH_BUFFER_COUNT; + } + + @Override + public Integer visit(FixedSizeBinary type) { + return FIXED_WIDTH_BUFFER_COUNT; + } + + @Override + public Integer visit(Bool type) { + return FIXED_WIDTH_BUFFER_COUNT; + } + + @Override + public Integer visit(Binary type) { + return VARIABLE_WIDTH_BUFFER_COUNT; + } + + @Override + public Integer visit(BinaryView type) { + return 2 + ((ViewVarBinaryVector) vector).getDataBuffers().size(); + } + + @Override + public Integer visit(Utf8 type) { return VARIABLE_WIDTH_BUFFER_COUNT; } + @Override + public Integer visit(Utf8View type) { + return 2 + ((ViewVarCharVector) vector).getDataBuffers().size(); + } + @Override public Integer visit(LargeUtf8 type) { return VARIABLE_WIDTH_BUFFER_COUNT; diff --git a/java/vector/src/main/java/org/apache/arrow/vector/VectorLoader.java b/java/vector/src/main/java/org/apache/arrow/vector/VectorLoader.java index 510cef24c7e16..1d03361817779 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/VectorLoader.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/VectorLoader.java @@ -98,7 +98,7 @@ private void loadBuffers( CompressionCodec codec) { checkArgument(nodes.hasNext(), "no more field nodes for field %s and vector %s", field, vector); ArrowFieldNode fieldNode = nodes.next(); - int bufferLayoutCount = TypeLayout.getTypeBufferCount(field.getType()); + int bufferLayoutCount = TypeLayout.getTypeBufferCount(field.getType(), vector); List ownBuffers = new ArrayList<>(bufferLayoutCount); for (int j = 0; j < bufferLayoutCount; j++) { ArrowBuf nextBuf = buffers.next(); diff --git a/java/vector/src/main/java/org/apache/arrow/vector/VectorUnloader.java b/java/vector/src/main/java/org/apache/arrow/vector/VectorUnloader.java index 1d44e37ac71af..12f90a034db5e 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/VectorUnloader.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/VectorUnloader.java @@ -92,7 +92,7 @@ public ArrowRecordBatch getRecordBatch() { private void appendNodes(FieldVector vector, List nodes, List buffers) { nodes.add(new ArrowFieldNode(vector.getValueCount(), includeNullCount ? vector.getNullCount() : -1)); List fieldBuffers = vector.getFieldBuffers(); - int expectedBufferCount = TypeLayout.getTypeBufferCount(vector.getField().getType()); + int expectedBufferCount = TypeLayout.getTypeBufferCount(vector.getField().getType(), vector); if (fieldBuffers.size() != expectedBufferCount) { throw new IllegalArgumentException(String.format( "wrong number of buffers for field %s in vector %s. found: %s", diff --git a/java/vector/src/main/java/org/apache/arrow/vector/ipc/JsonFileReader.java b/java/vector/src/main/java/org/apache/arrow/vector/ipc/JsonFileReader.java index e927acd4816ad..a043555c687b3 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/ipc/JsonFileReader.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/ipc/JsonFileReader.java @@ -715,7 +715,7 @@ private ArrowBuf readIntoBuffer(BufferAllocator allocator, BufferType bufferType private void readFromJsonIntoVector(Field field, FieldVector vector) throws JsonParseException, IOException { ArrowType type = field.getType(); - TypeLayout typeLayout = TypeLayout.getTypeLayout(type); + TypeLayout typeLayout = TypeLayout.getTypeLayout(type, vector); List vectorTypes = typeLayout.getBufferTypes(); ArrowBuf[] vectorBuffers = new ArrowBuf[vectorTypes.size()]; /* diff --git a/java/vector/src/main/java/org/apache/arrow/vector/ipc/JsonFileWriter.java b/java/vector/src/main/java/org/apache/arrow/vector/ipc/JsonFileWriter.java index f5e267e81256c..525338977c6c1 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/ipc/JsonFileWriter.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/ipc/JsonFileWriter.java @@ -208,7 +208,7 @@ private void writeBatch(VectorSchemaRoot recordBatch) throws IOException { } private void writeFromVectorIntoJson(Field field, FieldVector vector) throws IOException { - List vectorTypes = TypeLayout.getTypeLayout(field.getType()).getBufferTypes(); + List vectorTypes = TypeLayout.getTypeLayout(field.getType(), vector).getBufferTypes(); List vectorBuffers = vector.getFieldBuffers(); if (vectorTypes.size() != vectorBuffers.size()) { throw new IllegalArgumentException("vector types and inner vector buffers are not the same size: " + diff --git a/java/vector/src/main/java/org/apache/arrow/vector/validate/ValidateVectorBufferVisitor.java b/java/vector/src/main/java/org/apache/arrow/vector/validate/ValidateVectorBufferVisitor.java index 0a67db0455b41..e628f780c53fb 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/validate/ValidateVectorBufferVisitor.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/validate/ValidateVectorBufferVisitor.java @@ -51,7 +51,7 @@ private void validateVectorCommon(ValueVector vector) { if (vector instanceof FieldVector) { FieldVector fieldVector = (FieldVector) vector; - int typeBufferCount = TypeLayout.getTypeBufferCount(arrowType); + int typeBufferCount = TypeLayout.getTypeBufferCount(arrowType, fieldVector); validateOrThrow(fieldVector.getFieldBuffers().size() == typeBufferCount, "Expected %s buffers in vector of type %s, got %s.", typeBufferCount, vector.getField().getType().toString(), fieldVector.getFieldBuffers().size()); diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestTypeLayout.java b/java/vector/src/test/java/org/apache/arrow/vector/TestTypeLayout.java index 97930f433d301..e5a985487c248 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestTypeLayout.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestTypeLayout.java @@ -17,82 +17,158 @@ package org.apache.arrow.vector; -import static org.junit.Assert.assertEquals; +import static org.junit.jupiter.api.Assertions.assertEquals; +import java.util.Random; + +import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.memory.RootAllocator; import org.apache.arrow.vector.types.DateUnit; import org.apache.arrow.vector.types.FloatingPointPrecision; import org.apache.arrow.vector.types.IntervalUnit; import org.apache.arrow.vector.types.TimeUnit; import org.apache.arrow.vector.types.UnionMode; import org.apache.arrow.vector.types.pojo.ArrowType; -import org.junit.Test; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; public class TestTypeLayout { + private BufferAllocator allocator; + + @BeforeEach + public void prepare() { + allocator = new RootAllocator(Integer.MAX_VALUE); + } + + @AfterEach + public void shutdown() { + allocator.close(); + } + + @Test public void testTypeBufferCount() { ArrowType type = new ArrowType.Int(8, true); - assertEquals(TypeLayout.getTypeBufferCount(type), TypeLayout.getTypeLayout(type).getBufferLayouts().size()); + assertEquals(TypeLayout.getTypeBufferCount(type, null), + TypeLayout.getTypeLayout(type, null).getBufferLayouts().size()); type = new ArrowType.Union(UnionMode.Sparse, new int[2]); - assertEquals(TypeLayout.getTypeBufferCount(type), TypeLayout.getTypeLayout(type).getBufferLayouts().size()); + assertEquals(TypeLayout.getTypeBufferCount(type, null), + TypeLayout.getTypeLayout(type, null).getBufferLayouts().size()); type = new ArrowType.Union(UnionMode.Dense, new int[1]); - assertEquals(TypeLayout.getTypeBufferCount(type), TypeLayout.getTypeLayout(type).getBufferLayouts().size()); + assertEquals(TypeLayout.getTypeBufferCount(type, null), + TypeLayout.getTypeLayout(type, null).getBufferLayouts().size()); type = new ArrowType.Struct(); - assertEquals(TypeLayout.getTypeBufferCount(type), TypeLayout.getTypeLayout(type).getBufferLayouts().size()); + assertEquals(TypeLayout.getTypeBufferCount(type, null), + TypeLayout.getTypeLayout(type, null).getBufferLayouts().size()); type = new ArrowType.Timestamp(TimeUnit.MILLISECOND, null); - assertEquals(TypeLayout.getTypeBufferCount(type), TypeLayout.getTypeLayout(type).getBufferLayouts().size()); + assertEquals(TypeLayout.getTypeBufferCount(type, null), + TypeLayout.getTypeLayout(type, null).getBufferLayouts().size()); type = new ArrowType.List(); - assertEquals(TypeLayout.getTypeBufferCount(type), TypeLayout.getTypeLayout(type).getBufferLayouts().size()); + assertEquals(TypeLayout.getTypeBufferCount(type, null), + TypeLayout.getTypeLayout(type, null).getBufferLayouts().size()); type = new ArrowType.FixedSizeList(5); - assertEquals(TypeLayout.getTypeBufferCount(type), TypeLayout.getTypeLayout(type).getBufferLayouts().size()); + assertEquals(TypeLayout.getTypeBufferCount(type, null), + TypeLayout.getTypeLayout(type, null).getBufferLayouts().size()); type = new ArrowType.Map(false); - assertEquals(TypeLayout.getTypeBufferCount(type), TypeLayout.getTypeLayout(type).getBufferLayouts().size()); + assertEquals(TypeLayout.getTypeBufferCount(type, null), + TypeLayout.getTypeLayout(type, null).getBufferLayouts().size()); type = new ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE); - assertEquals(TypeLayout.getTypeBufferCount(type), TypeLayout.getTypeLayout(type).getBufferLayouts().size()); + assertEquals(TypeLayout.getTypeBufferCount(type, null), + TypeLayout.getTypeLayout(type, null).getBufferLayouts().size()); type = new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE); - assertEquals(TypeLayout.getTypeBufferCount(type), TypeLayout.getTypeLayout(type).getBufferLayouts().size()); + assertEquals(TypeLayout.getTypeBufferCount(type, null), + TypeLayout.getTypeLayout(type, null).getBufferLayouts().size()); type = new ArrowType.Decimal(10, 10, 128); - assertEquals(TypeLayout.getTypeBufferCount(type), TypeLayout.getTypeLayout(type).getBufferLayouts().size()); + assertEquals(TypeLayout.getTypeBufferCount(type, null), + TypeLayout.getTypeLayout(type, null).getBufferLayouts().size()); type = new ArrowType.Decimal(10, 10, 256); - assertEquals(TypeLayout.getTypeBufferCount(type), TypeLayout.getTypeLayout(type).getBufferLayouts().size()); + assertEquals(TypeLayout.getTypeBufferCount(type, null), + TypeLayout.getTypeLayout(type, null).getBufferLayouts().size()); type = new ArrowType.FixedSizeBinary(5); - assertEquals(TypeLayout.getTypeBufferCount(type), TypeLayout.getTypeLayout(type).getBufferLayouts().size()); + assertEquals(TypeLayout.getTypeBufferCount(type, null), + TypeLayout.getTypeLayout(type, null).getBufferLayouts().size()); type = new ArrowType.Bool(); - assertEquals(TypeLayout.getTypeBufferCount(type), TypeLayout.getTypeLayout(type).getBufferLayouts().size()); + assertEquals(TypeLayout.getTypeBufferCount(type, null), + TypeLayout.getTypeLayout(type, null).getBufferLayouts().size()); type = new ArrowType.Binary(); - assertEquals(TypeLayout.getTypeBufferCount(type), TypeLayout.getTypeLayout(type).getBufferLayouts().size()); + assertEquals(TypeLayout.getTypeBufferCount(type, null), + TypeLayout.getTypeLayout(type, null).getBufferLayouts().size()); type = new ArrowType.Utf8(); - assertEquals(TypeLayout.getTypeBufferCount(type), TypeLayout.getTypeLayout(type).getBufferLayouts().size()); + assertEquals(TypeLayout.getTypeBufferCount(type, null), + TypeLayout.getTypeLayout(type, null).getBufferLayouts().size()); type = new ArrowType.Null(); - assertEquals(TypeLayout.getTypeBufferCount(type), TypeLayout.getTypeLayout(type).getBufferLayouts().size()); + assertEquals(TypeLayout.getTypeBufferCount(type, null), + TypeLayout.getTypeLayout(type, null).getBufferLayouts().size()); type = new ArrowType.Date(DateUnit.DAY); - assertEquals(TypeLayout.getTypeBufferCount(type), TypeLayout.getTypeLayout(type).getBufferLayouts().size()); + assertEquals(TypeLayout.getTypeBufferCount(type, null), + TypeLayout.getTypeLayout(type, null).getBufferLayouts().size()); type = new ArrowType.Time(TimeUnit.MILLISECOND, 32); - assertEquals(TypeLayout.getTypeBufferCount(type), TypeLayout.getTypeLayout(type).getBufferLayouts().size()); + assertEquals(TypeLayout.getTypeBufferCount(type, null), + TypeLayout.getTypeLayout(type, null).getBufferLayouts().size()); type = new ArrowType.Interval(IntervalUnit.DAY_TIME); - assertEquals(TypeLayout.getTypeBufferCount(type), TypeLayout.getTypeLayout(type).getBufferLayouts().size()); + assertEquals(TypeLayout.getTypeBufferCount(type, null), + TypeLayout.getTypeLayout(type, null).getBufferLayouts().size()); type = new ArrowType.Duration(TimeUnit.MILLISECOND); - assertEquals(TypeLayout.getTypeBufferCount(type), TypeLayout.getTypeLayout(type).getBufferLayouts().size()); + assertEquals(TypeLayout.getTypeBufferCount(type, null), + TypeLayout.getTypeLayout(type, null).getBufferLayouts().size()); + } + + private String generateRandomString(int length) { + Random random = new Random(); + StringBuilder sb = new StringBuilder(length); + for (int i = 0; i < length; i++) { + sb.append(random.nextInt(10)); // 0-9 + } + return sb.toString(); + } + + @Test + public void testTypeBufferCountInVectorsWithVariadicBuffers() { + // empty vector + try (ViewVarCharVector viewVarCharVector = new ViewVarCharVector("myvector", allocator)) { + ArrowType type = viewVarCharVector.getMinorType().getType(); + assertEquals(TypeLayout.getTypeBufferCount(type, viewVarCharVector), + TypeLayout.getTypeLayout(type, viewVarCharVector).getBufferLayouts().size()); + } + // vector with long strings + try (ViewVarCharVector viewVarCharVector = new ViewVarCharVector("myvector", allocator)) { + viewVarCharVector.allocateNew(32, 6); + + viewVarCharVector.setSafe(0, generateRandomString(8).getBytes()); + viewVarCharVector.setSafe(1, generateRandomString(12).getBytes()); + viewVarCharVector.setSafe(2, generateRandomString(14).getBytes()); + viewVarCharVector.setSafe(3, generateRandomString(18).getBytes()); + viewVarCharVector.setSafe(4, generateRandomString(22).getBytes()); + viewVarCharVector.setSafe(5, generateRandomString(24).getBytes()); + + viewVarCharVector.setValueCount(6); + + ArrowType type = viewVarCharVector.getMinorType().getType(); + assertEquals(TypeLayout.getTypeBufferCount(type, viewVarCharVector), + TypeLayout.getTypeLayout(type, viewVarCharVector).getBufferLayouts().size()); + } } } From 9b29042032fd9f8a4c9fe67eeaba74a8320ae10a Mon Sep 17 00:00:00 2001 From: Vibhatha Lakmal Abeykoon Date: Thu, 16 May 2024 06:22:38 +0530 Subject: [PATCH 02/16] fix: minor docs change --- .../org/apache/arrow/vector/BaseVariableWidthViewVector.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/java/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthViewVector.java b/java/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthViewVector.java index ec700a0dc2592..38231cc4591f3 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthViewVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthViewVector.java @@ -161,7 +161,7 @@ public ArrowBuf getDataBuffer() { /** * Get the buffers that store the data for views in the vector. * - * @return buffer + * @return list of ArrowBuf */ public List getDataBuffers() { return dataBuffers; From 5d12bf85a07f992d99e5b2101b60f0641de528d7 Mon Sep 17 00:00:00 2001 From: Vibhatha Lakmal Abeykoon Date: Thu, 16 May 2024 08:07:12 +0530 Subject: [PATCH 03/16] fix: add docs --- .../src/main/java/org/apache/arrow/vector/TypeLayout.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/java/vector/src/main/java/org/apache/arrow/vector/TypeLayout.java b/java/vector/src/main/java/org/apache/arrow/vector/TypeLayout.java index 3c94a5bf58412..600d19b715a59 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/TypeLayout.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/TypeLayout.java @@ -58,6 +58,7 @@ public class TypeLayout { /** * Constructs a new {@link TypeLayout} for the given arrowType. + * This method is deprecated and will be removed in a future release. */ @Deprecated public static TypeLayout getTypeLayout(final ArrowType arrowType) { @@ -501,6 +502,7 @@ public TypeLayout visit(Duration type) { /** * Gets the number of {@link BufferLayout}s for the given arrowType. + * This method is deprecated and will be removed in a future release. */ @Deprecated public static int getTypeBufferCount(final ArrowType arrowType) { From 1bf769ef5d34d608289ac45d470695145c2a6a3d Mon Sep 17 00:00:00 2001 From: Vibhatha Lakmal Abeykoon Date: Sat, 18 May 2024 05:01:07 +0530 Subject: [PATCH 04/16] fix: adding ArrowRecordBatch --- .../vector/BaseVariableWidthViewVector.java | 17 +- .../org/apache/arrow/vector/VectorLoader.java | 19 +- .../apache/arrow/vector/VectorUnloader.java | 20 +- .../arrow/vector/ViewVarCharVector.java | 2 +- .../vector/ipc/message/ArrowRecordBatch.java | 57 ++++- .../apache/arrow/vector/TestTypeLayout.java | 233 ++++++++++++++++++ 6 files changed, 328 insertions(+), 20 deletions(-) diff --git a/java/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthViewVector.java b/java/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthViewVector.java index 38231cc4591f3..95078dd62364b 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthViewVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthViewVector.java @@ -368,8 +368,21 @@ public List getChildrenFromFields() { */ @Override public void loadFieldBuffers(ArrowFieldNode fieldNode, List ownBuffers) { - // TODO: https://github.com/apache/arrow/issues/40931 - throw new UnsupportedOperationException("loadFieldBuffers is not supported for BaseVariableWidthViewVector"); + ArrowBuf bitBuf = ownBuffers.get(0); + ArrowBuf viewBuf = ownBuffers.get(1); + List dataBufs = ownBuffers.subList(2, ownBuffers.size()); + + this.clear(); + + this.viewBuffer = viewBuf.getReferenceManager().retain(viewBuf, allocator); + this.validityBuffer = BitVectorHelper.loadValidityBuffer(fieldNode, bitBuf, allocator); + + for (ArrowBuf dataBuf : dataBufs) { + this.dataBuffers.add(dataBuf.getReferenceManager().retain(dataBuf, allocator)); + } + + lastSet = fieldNode.getLength() - 1; + valueCount = fieldNode.getLength(); } /** diff --git a/java/vector/src/main/java/org/apache/arrow/vector/VectorLoader.java b/java/vector/src/main/java/org/apache/arrow/vector/VectorLoader.java index 1d03361817779..14b672fea630d 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/VectorLoader.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/VectorLoader.java @@ -80,8 +80,13 @@ public void load(ArrowRecordBatch recordBatch) { CompressionUtil.CodecType.fromCompressionType(recordBatch.getBodyCompression().getCodec()); decompressionNeeded = codecType != CompressionUtil.CodecType.NO_COMPRESSION; CompressionCodec codec = decompressionNeeded ? factory.createCodec(codecType) : NoCompressionCodec.INSTANCE; + Iterator variadicBufferCounts = null; + if (recordBatch.getVariadicBufferCounts() != null && !recordBatch.getVariadicBufferCounts().isEmpty()) { + variadicBufferCounts = recordBatch.getVariadicBufferCounts().iterator(); + } + for (FieldVector fieldVector : root.getFieldVectors()) { - loadBuffers(fieldVector, fieldVector.getField(), buffers, nodes, codec); + loadBuffers(fieldVector, fieldVector.getField(), buffers, nodes, codec, variadicBufferCounts); } root.setRowCount(recordBatch.getLength()); if (nodes.hasNext() || buffers.hasNext()) { @@ -95,10 +100,16 @@ private void loadBuffers( Field field, Iterator buffers, Iterator nodes, - CompressionCodec codec) { + CompressionCodec codec, + Iterator variadicBufferCounts) { checkArgument(nodes.hasNext(), "no more field nodes for field %s and vector %s", field, vector); ArrowFieldNode fieldNode = nodes.next(); - int bufferLayoutCount = TypeLayout.getTypeBufferCount(field.getType(), vector); + // variadicBufferLayoutCount will be 0 for vectors of type except BaseVariableWidthViewVector + long variadicBufferLayoutCount = 0; + if (variadicBufferCounts != null) { + variadicBufferLayoutCount = variadicBufferCounts.next(); + } + int bufferLayoutCount = (int) (variadicBufferLayoutCount + TypeLayout.getTypeBufferCount(field.getType(), vector)); List ownBuffers = new ArrayList<>(bufferLayoutCount); for (int j = 0; j < bufferLayoutCount; j++) { ArrowBuf nextBuf = buffers.next(); @@ -130,7 +141,7 @@ private void loadBuffers( for (int i = 0; i < childrenFromFields.size(); i++) { Field child = children.get(i); FieldVector fieldVector = childrenFromFields.get(i); - loadBuffers(fieldVector, child, buffers, nodes, codec); + loadBuffers(fieldVector, child, buffers, nodes, codec, variadicBufferCounts); } } } diff --git a/java/vector/src/main/java/org/apache/arrow/vector/VectorUnloader.java b/java/vector/src/main/java/org/apache/arrow/vector/VectorUnloader.java index 12f90a034db5e..e42aa18a0ec0b 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/VectorUnloader.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/VectorUnloader.java @@ -80,19 +80,29 @@ public VectorUnloader( public ArrowRecordBatch getRecordBatch() { List nodes = new ArrayList<>(); List buffers = new ArrayList<>(); + List variadicBufferCounts = new ArrayList<>(); for (FieldVector vector : root.getFieldVectors()) { - appendNodes(vector, nodes, buffers); + appendNodes(vector, nodes, buffers, variadicBufferCounts); } // Do NOT retain buffers in ArrowRecordBatch constructor since we have already retained them. return new ArrowRecordBatch( - root.getRowCount(), nodes, buffers, CompressionUtil.createBodyCompression(codec), alignBuffers, - /*retainBuffers*/ false); + root.getRowCount(), nodes, buffers, CompressionUtil.createBodyCompression(codec), + variadicBufferCounts, alignBuffers, /*retainBuffers*/ false); } - private void appendNodes(FieldVector vector, List nodes, List buffers) { + private long getVariadicBufferCount(FieldVector vector) { + if (vector instanceof BaseVariableWidthViewVector) { + return ((BaseVariableWidthViewVector) vector).getDataBuffers().size(); + } + return 0L; + } + + private void appendNodes(FieldVector vector, List nodes, List buffers, + List variadicBufferCounts) { nodes.add(new ArrowFieldNode(vector.getValueCount(), includeNullCount ? vector.getNullCount() : -1)); List fieldBuffers = vector.getFieldBuffers(); int expectedBufferCount = TypeLayout.getTypeBufferCount(vector.getField().getType(), vector); + variadicBufferCounts.add(getVariadicBufferCount(vector)); if (fieldBuffers.size() != expectedBufferCount) { throw new IllegalArgumentException(String.format( "wrong number of buffers for field %s in vector %s. found: %s", @@ -107,7 +117,7 @@ private void appendNodes(FieldVector vector, List nodes, List buffersLayout; + private final List variadicBufferCounts; + private boolean closed = false; public ArrowRecordBatch( int length, List nodes, List buffers) { - this(length, nodes, buffers, NoCompressionCodec.DEFAULT_BODY_COMPRESSION, true); + this(length, nodes, buffers, NoCompressionCodec.DEFAULT_BODY_COMPRESSION, null, true); } public ArrowRecordBatch( int length, List nodes, List buffers, ArrowBodyCompression bodyCompression) { - this(length, nodes, buffers, bodyCompression, true); + this(length, nodes, buffers, bodyCompression, null, true); } /** @@ -76,12 +78,13 @@ public ArrowRecordBatch( * @param nodes field level info * @param buffers will be retained until this recordBatch is closed * @param bodyCompression compression info. + * @param variadicBufferCounts the number of buffers in each variadic section. * @param alignBuffers Whether to align buffers to an 8 byte boundary. */ public ArrowRecordBatch( int length, List nodes, List buffers, - ArrowBodyCompression bodyCompression, boolean alignBuffers) { - this(length, nodes, buffers, bodyCompression, alignBuffers, /*retainBuffers*/ true); + ArrowBodyCompression bodyCompression, List variadicBufferCounts, boolean alignBuffers) { + this(length, nodes, buffers, bodyCompression, variadicBufferCounts, alignBuffers, /*retainBuffers*/ true); } /** @@ -91,19 +94,22 @@ public ArrowRecordBatch( * @param nodes field level info * @param buffers will be retained until this recordBatch is closed * @param bodyCompression compression info. + * @param variadicBufferCounts the number of buffers in each variadic section. * @param alignBuffers Whether to align buffers to an 8 byte boundary. * @param retainBuffers Whether to retain() each source buffer in the constructor. If false, the caller is * responsible for retaining the buffers beforehand. */ public ArrowRecordBatch( int length, List nodes, List buffers, - ArrowBodyCompression bodyCompression, boolean alignBuffers, boolean retainBuffers) { + ArrowBodyCompression bodyCompression, List variadicBufferCounts, boolean alignBuffers, + boolean retainBuffers) { super(); this.length = length; this.nodes = nodes; this.buffers = buffers; Preconditions.checkArgument(bodyCompression != null, "body compression cannot be null"); this.bodyCompression = bodyCompression; + this.variadicBufferCounts = variadicBufferCounts; List arrowBuffers = new ArrayList<>(buffers.size()); long offset = 0; for (ArrowBuf arrowBuf : buffers) { @@ -129,12 +135,14 @@ public ArrowRecordBatch( // to distinguish this from the public constructor. private ArrowRecordBatch( boolean dummy, int length, List nodes, - List buffers, ArrowBodyCompression bodyCompression) { + List buffers, ArrowBodyCompression bodyCompression, + List variadicBufferCounts) { this.length = length; this.nodes = nodes; this.buffers = buffers; Preconditions.checkArgument(bodyCompression != null, "body compression cannot be null"); this.bodyCompression = bodyCompression; + this.variadicBufferCounts = variadicBufferCounts; this.closed = false; List arrowBuffers = new ArrayList<>(); long offset = 0; @@ -179,6 +187,14 @@ public List getBuffers() { return buffers; } + /** + * Get the record batch variadic buffer counts. + * @return the variadic buffer counts + */ + public List getVariadicBufferCounts() { + return variadicBufferCounts; + } + /** * Create a new ArrowRecordBatch which has the same information as this batch but whose buffers * are owned by that Allocator. @@ -195,7 +211,7 @@ public ArrowRecordBatch cloneWithTransfer(final BufferAllocator allocator) { .writerIndex(buf.writerIndex())) .collect(Collectors.toList()); close(); - return new ArrowRecordBatch(false, length, nodes, newBufs, bodyCompression); + return new ArrowRecordBatch(false, length, nodes, newBufs, bodyCompression, variadicBufferCounts); } /** @@ -217,6 +233,24 @@ public int writeTo(FlatBufferBuilder builder) { if (bodyCompression.getCodec() != NoCompressionCodec.COMPRESSION_TYPE) { compressOffset = bodyCompression.writeTo(builder); } + + // Start the variadicBufferCounts vector. + int variadicBufferCountsOffset = 0; + if (variadicBufferCounts != null && !variadicBufferCounts.isEmpty()) { + variadicBufferCountsOffset = variadicBufferCounts.size(); + int elementSizeInBytes = 8; // Size of long in bytes + builder.startVector(elementSizeInBytes, variadicBufferCountsOffset, elementSizeInBytes); + + // Add each long to the builder. Note that elements should be added in reverse order. + for (int i = variadicBufferCounts.size() - 1; i >= 0; i--) { + long value = variadicBufferCounts.get(i); + builder.addLong(value); + } + + // End the vector. This returns an offset that you can use to refer to the vector. + variadicBufferCountsOffset = builder.endVector(); + } + RecordBatch.startRecordBatch(builder); RecordBatch.addLength(builder, length); RecordBatch.addNodes(builder, nodesOffset); @@ -224,6 +258,12 @@ public int writeTo(FlatBufferBuilder builder) { if (bodyCompression.getCodec() != NoCompressionCodec.COMPRESSION_TYPE) { RecordBatch.addCompression(builder, compressOffset); } + + // Add the variadicBufferCounts to the RecordBatch + if (variadicBufferCounts != null && !variadicBufferCounts.isEmpty()) { + RecordBatch.addVariadicBufferCounts(builder, variadicBufferCountsOffset); + } + return RecordBatch.endRecordBatch(builder); } @@ -248,7 +288,8 @@ public void close() { @Override public String toString() { return "ArrowRecordBatch [length=" + length + ", nodes=" + nodes + ", #buffers=" + buffers.size() + - ", buffersLayout=" + buffersLayout + ", closed=" + closed + "]"; + ", #variadicBufferCounts=" + variadicBufferCounts.size() + ", buffersLayout=" + buffersLayout + + ", closed=" + closed + "]"; } /** diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestTypeLayout.java b/java/vector/src/test/java/org/apache/arrow/vector/TestTypeLayout.java index e5a985487c248..d93e8d85f76d0 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestTypeLayout.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestTypeLayout.java @@ -17,24 +17,50 @@ package org.apache.arrow.vector; +import static org.apache.arrow.vector.testing.ValueVectorDataPopulator.setVector; +import static org.junit.jupiter.api.Assertions.assertArrayEquals; import static org.junit.jupiter.api.Assertions.assertEquals; +import java.io.IOException; +import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.List; import java.util.Random; +import org.apache.arrow.memory.ArrowBuf; import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; +import org.apache.arrow.vector.complex.NonNullableStructVector; +import org.apache.arrow.vector.complex.impl.ComplexWriterImpl; +import org.apache.arrow.vector.complex.reader.FieldReader; +import org.apache.arrow.vector.complex.writer.BaseWriter.ComplexWriter; +import org.apache.arrow.vector.complex.writer.BaseWriter.ListWriter; +import org.apache.arrow.vector.complex.writer.BaseWriter.StructWriter; +import org.apache.arrow.vector.complex.writer.IntWriter; +import org.apache.arrow.vector.ipc.message.ArrowRecordBatch; import org.apache.arrow.vector.types.DateUnit; import org.apache.arrow.vector.types.FloatingPointPrecision; import org.apache.arrow.vector.types.IntervalUnit; import org.apache.arrow.vector.types.TimeUnit; import org.apache.arrow.vector.types.UnionMode; import org.apache.arrow.vector.types.pojo.ArrowType; +import org.apache.arrow.vector.types.pojo.Field; +import org.apache.arrow.vector.types.pojo.Schema; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; public class TestTypeLayout { + private static final Charset utf8Charset = StandardCharsets.UTF_8; + private static final byte[] STR1 = "AAAAA1".getBytes(utf8Charset); + private static final byte[] STR2 = "BBBBBBBBB2".getBytes(utf8Charset); + private static final byte[] STR3 = "CCCC3".getBytes(utf8Charset); + private static final byte[] STR4 = "DDDDDDDD4".getBytes(utf8Charset); + private static final byte[] STR5 = "EEE5".getBytes(utf8Charset); + private static final byte[] STR6 = "0123456789123456".getBytes(utf8Charset); + private BufferAllocator allocator; @BeforeEach @@ -171,4 +197,211 @@ public void testTypeBufferCountInVectorsWithVariadicBuffers() { TypeLayout.getTypeLayout(type, viewVarCharVector).getBufferLayouts().size()); } } + + @Test + public void testVectorLoadUnload() { + + try (final ViewVarCharVector vector1 = new ViewVarCharVector("myvector", allocator)) { + + setVector(vector1, STR1, STR2, STR3, STR4, STR5, STR6); + + assertEquals(5, vector1.getLastSet()); + vector1.setValueCount(15); + assertEquals(14, vector1.getLastSet()); + + /* Check the vector output */ + assertArrayEquals(STR1, vector1.get(0)); + assertArrayEquals(STR2, vector1.get(1)); + assertArrayEquals(STR3, vector1.get(2)); + assertArrayEquals(STR4, vector1.get(3)); + assertArrayEquals(STR5, vector1.get(4)); + assertArrayEquals(STR6, vector1.get(5)); + + Field field = vector1.getField(); + String fieldName = field.getName(); + + List fields = new ArrayList<>(); + List fieldVectors = new ArrayList<>(); + + fields.add(field); + fieldVectors.add(vector1); + + Schema schema = new Schema(fields); + + VectorSchemaRoot schemaRoot1 = new VectorSchemaRoot(schema, fieldVectors, vector1.getValueCount()); + VectorUnloader vectorUnloader = new VectorUnloader(schemaRoot1); + + try ( + ArrowRecordBatch recordBatch = vectorUnloader.getRecordBatch(); + BufferAllocator finalVectorsAllocator = allocator.newChildAllocator("new vector", 0, Long.MAX_VALUE); + VectorSchemaRoot schemaRoot2 = VectorSchemaRoot.create(schema, finalVectorsAllocator); + ) { + + VectorLoader vectorLoader = new VectorLoader(schemaRoot2); + vectorLoader.load(recordBatch); + + ViewVarCharVector vector2 = (ViewVarCharVector) schemaRoot2.getVector(fieldName); + /* + * lastSet would have internally been set by VectorLoader.load() when it invokes + * loadFieldBuffers. + */ + assertEquals(14, vector2.getLastSet()); + vector2.setValueCount(25); + assertEquals(24, vector2.getLastSet()); + + /* Check the vector output */ + assertArrayEquals(STR1, vector2.get(0)); + assertArrayEquals(STR2, vector2.get(1)); + assertArrayEquals(STR3, vector2.get(2)); + assertArrayEquals(STR4, vector2.get(3)); + assertArrayEquals(STR5, vector2.get(4)); + assertArrayEquals(STR6, vector2.get(5)); + } + } + } + + @Test + public void testVectorLoadUnload2() { + + try (final VarCharVector vector1 = new VarCharVector("myvector", allocator)) { + + setVector(vector1, STR1, STR2, STR3, STR4, STR5, STR6); + + assertEquals(5, vector1.getLastSet()); + vector1.setValueCount(15); + assertEquals(14, vector1.getLastSet()); + + /* Check the vector output */ + assertArrayEquals(STR1, vector1.get(0)); + assertArrayEquals(STR2, vector1.get(1)); + assertArrayEquals(STR3, vector1.get(2)); + assertArrayEquals(STR4, vector1.get(3)); + assertArrayEquals(STR5, vector1.get(4)); + assertArrayEquals(STR6, vector1.get(5)); + + Field field = vector1.getField(); + String fieldName = field.getName(); + + List fields = new ArrayList<>(); + List fieldVectors = new ArrayList<>(); + + fields.add(field); + fieldVectors.add(vector1); + + Schema schema = new Schema(fields); + + VectorSchemaRoot schemaRoot1 = new VectorSchemaRoot(schema, fieldVectors, vector1.getValueCount()); + VectorUnloader vectorUnloader = new VectorUnloader(schemaRoot1); + + try ( + ArrowRecordBatch recordBatch = vectorUnloader.getRecordBatch(); + BufferAllocator finalVectorsAllocator = allocator.newChildAllocator("new vector", 0, Long.MAX_VALUE); + VectorSchemaRoot schemaRoot2 = VectorSchemaRoot.create(schema, finalVectorsAllocator); + ) { + + VectorLoader vectorLoader = new VectorLoader(schemaRoot2); + vectorLoader.load(recordBatch); + + VarCharVector vector2 = (VarCharVector) schemaRoot2.getVector(fieldName); + /* + * lastSet would have internally been set by VectorLoader.load() when it invokes + * loadFieldBuffers. + */ + assertEquals(14, vector2.getLastSet()); + vector2.setValueCount(25); + assertEquals(24, vector2.getLastSet()); + + /* Check the vector output */ + assertArrayEquals(STR1, vector2.get(0)); + assertArrayEquals(STR2, vector2.get(1)); + assertArrayEquals(STR3, vector2.get(2)); + assertArrayEquals(STR4, vector2.get(3)); + assertArrayEquals(STR5, vector2.get(4)); + assertArrayEquals(STR6, vector2.get(5)); + } + } + } + + @Test + public void testUnloadLoadAddPadding() throws IOException { + int count = 10000; + Schema schema; + try ( + BufferAllocator originalVectorsAllocator = + allocator.newChildAllocator("original vectors", 0, Integer.MAX_VALUE); + NonNullableStructVector parent = NonNullableStructVector.empty("parent", originalVectorsAllocator)) { + + // write some data + ComplexWriter writer = new ComplexWriterImpl("root", parent); + StructWriter rootWriter = writer.rootAsStruct(); + ListWriter list = rootWriter.list("list"); + IntWriter intWriter = list.integer(); + for (int i = 0; i < count; i++) { + list.setPosition(i); + list.startList(); + for (int j = 0; j < i % 4 + 1; j++) { + intWriter.writeInt(i); + } + list.endList(); + } + writer.setValueCount(count); + + // unload it + FieldVector root = parent.getChild("root"); + schema = new Schema(root.getField().getChildren()); + VectorUnloader vectorUnloader = newVectorUnloader(root); + try ( + ArrowRecordBatch recordBatch = vectorUnloader.getRecordBatch(); + BufferAllocator finalVectorsAllocator = allocator.newChildAllocator("final vectors", 0, Integer.MAX_VALUE); + VectorSchemaRoot newRoot = VectorSchemaRoot.create(schema, finalVectorsAllocator); + ) { + List oldBuffers = recordBatch.getBuffers(); + List newBuffers = new ArrayList<>(); + for (ArrowBuf oldBuffer : oldBuffers) { + long l = oldBuffer.readableBytes(); + if (l % 64 != 0) { + // pad + l = l + 64 - l % 64; + } + ArrowBuf newBuffer = allocator.buffer(l); + for (long i = oldBuffer.readerIndex(); i < oldBuffer.writerIndex(); i++) { + newBuffer.setByte(i - oldBuffer.readerIndex(), oldBuffer.getByte(i)); + } + newBuffer.readerIndex(0); + newBuffer.writerIndex(l); + newBuffers.add(newBuffer); + } + + try (ArrowRecordBatch newBatch = + new ArrowRecordBatch(recordBatch.getLength(), recordBatch.getNodes(), newBuffers);) { + // load it + VectorLoader vectorLoader = new VectorLoader(newRoot); + + vectorLoader.load(newBatch); + + FieldReader reader = newRoot.getVector("list").getReader(); + for (int i = 0; i < count; i++) { + reader.setPosition(i); + List expected = new ArrayList<>(); + for (int j = 0; j < i % 4 + 1; j++) { + expected.add(i); + } + assertEquals(expected, reader.readObject()); + } + } + + for (ArrowBuf newBuf : newBuffers) { + newBuf.getReferenceManager().release(); + } + } + } + } + + public static VectorUnloader newVectorUnloader(FieldVector root) { + Schema schema = new Schema(root.getField().getChildren()); + int valueCount = root.getValueCount(); + List fields = root.getChildrenFromFields(); + VectorSchemaRoot vsr = new VectorSchemaRoot(schema.getFields(), fields, valueCount); + return new VectorUnloader(vsr); + } } From 938697885567ced6888b0ae5e32dda3e01d7cb7c Mon Sep 17 00:00:00 2001 From: Vibhatha Lakmal Abeykoon Date: Sat, 18 May 2024 05:21:54 +0530 Subject: [PATCH 05/16] fix: adding listview components --- .../org/apache/arrow/vector/TypeLayout.java | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/java/vector/src/main/java/org/apache/arrow/vector/TypeLayout.java b/java/vector/src/main/java/org/apache/arrow/vector/TypeLayout.java index 600d19b715a59..6e63bacdd1f7f 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/TypeLayout.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/TypeLayout.java @@ -40,6 +40,7 @@ import org.apache.arrow.vector.types.pojo.ArrowType.Interval; import org.apache.arrow.vector.types.pojo.ArrowType.LargeBinary; import org.apache.arrow.vector.types.pojo.ArrowType.LargeUtf8; +import org.apache.arrow.vector.types.pojo.ArrowType.ListView; import org.apache.arrow.vector.types.pojo.ArrowType.Map; import org.apache.arrow.vector.types.pojo.ArrowType.Null; import org.apache.arrow.vector.types.pojo.ArrowType.Struct; @@ -496,6 +497,16 @@ public TypeLayout visit(Duration type) { return newFixedWidthTypeLayout(BufferLayout.dataBuffer(64)); } + @Override + public TypeLayout visit(ListView type) { + List vectors = asList( + BufferLayout.validityVector(), + BufferLayout.offsetBuffer(), + BufferLayout.sizeBuffer() + ); + return new TypeLayout(vectors); + } + }); return layout; } @@ -810,6 +821,12 @@ public Integer visit(Duration type) { return FIXED_WIDTH_BUFFER_COUNT; } + @Override + public Integer visit(ListView type) { + // validity buffer + offset buffer + size buffer + return 3; + } + }); } From ecf9121a53fdf5564f7658ded751188e0f587e5c Mon Sep 17 00:00:00 2001 From: Vibhatha Lakmal Abeykoon Date: Mon, 20 May 2024 14:30:32 +0530 Subject: [PATCH 06/16] fix: updating c data interface to use the updated constructor --- .../apache/arrow/c/StructVectorUnloader.java | 21 ++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/java/c/src/main/java/org/apache/arrow/c/StructVectorUnloader.java b/java/c/src/main/java/org/apache/arrow/c/StructVectorUnloader.java index aa6d9b4d0f6a7..d08a63b92ddd4 100644 --- a/java/c/src/main/java/org/apache/arrow/c/StructVectorUnloader.java +++ b/java/c/src/main/java/org/apache/arrow/c/StructVectorUnloader.java @@ -21,6 +21,7 @@ import java.util.List; import org.apache.arrow.memory.ArrowBuf; +import org.apache.arrow.vector.BaseVariableWidthViewVector; import org.apache.arrow.vector.FieldVector; import org.apache.arrow.vector.TypeLayout; import org.apache.arrow.vector.complex.StructVector; @@ -87,17 +88,27 @@ public StructVectorUnloader(StructVector root, boolean includeNullCount, Compres public ArrowRecordBatch getRecordBatch() { List nodes = new ArrayList<>(); List buffers = new ArrayList<>(); + List variadicBufferCounts = new ArrayList<>(); for (FieldVector vector : root.getChildrenFromFields()) { - appendNodes(vector, nodes, buffers); + appendNodes(vector, nodes, buffers, variadicBufferCounts); } return new ArrowRecordBatch(root.getValueCount(), nodes, buffers, CompressionUtil.createBodyCompression(codec), - alignBuffers); + variadicBufferCounts, alignBuffers); } - private void appendNodes(FieldVector vector, List nodes, List buffers) { + private long getVariadicBufferCount(FieldVector vector) { + if (vector instanceof BaseVariableWidthViewVector) { + return ((BaseVariableWidthViewVector) vector).getDataBuffers().size(); + } + return 0L; + } + + private void appendNodes(FieldVector vector, List nodes, List buffers, + List variadicBufferCounts) { nodes.add(new ArrowFieldNode(vector.getValueCount(), includeNullCount ? vector.getNullCount() : -1)); List fieldBuffers = vector.getFieldBuffers(); - int expectedBufferCount = TypeLayout.getTypeBufferCount(vector.getField().getType()); + int expectedBufferCount = TypeLayout.getTypeBufferCount(vector.getField().getType(), vector); + variadicBufferCounts.add(getVariadicBufferCount(vector)); if (fieldBuffers.size() != expectedBufferCount) { throw new IllegalArgumentException(String.format("wrong number of buffers for field %s in vector %s. found: %s", vector.getField(), vector.getClass().getSimpleName(), fieldBuffers)); @@ -106,7 +117,7 @@ private void appendNodes(FieldVector vector, List nodes, List Date: Mon, 20 May 2024 14:57:49 +0530 Subject: [PATCH 07/16] fix: adding StructVectorLoader with variadicBufferCounts --- .../org/apache/arrow/c/StructVectorLoader.java | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/java/c/src/main/java/org/apache/arrow/c/StructVectorLoader.java b/java/c/src/main/java/org/apache/arrow/c/StructVectorLoader.java index d9afd0189d807..eb8ccd4900afc 100644 --- a/java/c/src/main/java/org/apache/arrow/c/StructVectorLoader.java +++ b/java/c/src/main/java/org/apache/arrow/c/StructVectorLoader.java @@ -90,8 +90,12 @@ public StructVector load(BufferAllocator allocator, ArrowRecordBatch recordBatch .fromCompressionType(recordBatch.getBodyCompression().getCodec()); decompressionNeeded = codecType != CompressionUtil.CodecType.NO_COMPRESSION; CompressionCodec codec = decompressionNeeded ? factory.createCodec(codecType) : NoCompressionCodec.INSTANCE; + Iterator variadicBufferCounts = null; + if (recordBatch.getVariadicBufferCounts() != null && !recordBatch.getVariadicBufferCounts().isEmpty()) { + variadicBufferCounts = recordBatch.getVariadicBufferCounts().iterator(); + } for (FieldVector fieldVector : result.getChildrenFromFields()) { - loadBuffers(fieldVector, fieldVector.getField(), buffers, nodes, codec); + loadBuffers(fieldVector, fieldVector.getField(), buffers, nodes, codec, variadicBufferCounts); } result.loadFieldBuffers(new ArrowFieldNode(recordBatch.getLength(), 0), Collections.singletonList(null)); if (nodes.hasNext() || buffers.hasNext()) { @@ -102,10 +106,15 @@ public StructVector load(BufferAllocator allocator, ArrowRecordBatch recordBatch } private void loadBuffers(FieldVector vector, Field field, Iterator buffers, Iterator nodes, - CompressionCodec codec) { + CompressionCodec codec, Iterator variadicBufferCounts) { checkArgument(nodes.hasNext(), "no more field nodes for field %s and vector %s", field, vector); ArrowFieldNode fieldNode = nodes.next(); - int bufferLayoutCount = TypeLayout.getTypeBufferCount(field.getType()); + // variadicBufferLayoutCount will be 0 for vectors of type except BaseVariableWidthViewVector + long variadicBufferLayoutCount = 0; + if (variadicBufferCounts != null) { + variadicBufferLayoutCount = variadicBufferCounts.next(); + } + int bufferLayoutCount = (int) (variadicBufferLayoutCount + TypeLayout.getTypeBufferCount(field.getType(), vector)); List ownBuffers = new ArrayList<>(bufferLayoutCount); for (int j = 0; j < bufferLayoutCount; j++) { ArrowBuf nextBuf = buffers.next(); @@ -138,7 +147,7 @@ private void loadBuffers(FieldVector vector, Field field, Iterator buf for (int i = 0; i < childrenFromFields.size(); i++) { Field child = children.get(i); FieldVector fieldVector = childrenFromFields.get(i); - loadBuffers(fieldVector, child, buffers, nodes, codec); + loadBuffers(fieldVector, child, buffers, nodes, codec, variadicBufferCounts); } } } From ac26865d909c7232041b7a1962c02bf4bb2f833b Mon Sep 17 00:00:00 2001 From: Vibhatha Lakmal Abeykoon Date: Mon, 20 May 2024 18:15:38 +0530 Subject: [PATCH 08/16] fix: adding test cases and todos for future enhancements --- .../arrow/vector/ipc/JsonFileReader.java | 4 +- .../validate/ValidateVectorBufferVisitor.java | 3 +- .../apache/arrow/vector/TestTypeLayout.java | 223 ------------------ .../arrow/vector/TestVarCharViewVector.java | 66 ++++++ 4 files changed, 71 insertions(+), 225 deletions(-) diff --git a/java/vector/src/main/java/org/apache/arrow/vector/ipc/JsonFileReader.java b/java/vector/src/main/java/org/apache/arrow/vector/ipc/JsonFileReader.java index a043555c687b3..7afda393aeaf7 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/ipc/JsonFileReader.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/ipc/JsonFileReader.java @@ -715,7 +715,9 @@ private ArrowBuf readIntoBuffer(BufferAllocator allocator, BufferType bufferType private void readFromJsonIntoVector(Field field, FieldVector vector) throws JsonParseException, IOException { ArrowType type = field.getType(); - TypeLayout typeLayout = TypeLayout.getTypeLayout(type, vector); + // TODO: update the `getTypeLayout` method to take a FieldVector as an argument. + // In relation, the metadata must be used to determine the buffer count for views. + TypeLayout typeLayout = TypeLayout.getTypeLayout(type); List vectorTypes = typeLayout.getBufferTypes(); ArrowBuf[] vectorBuffers = new ArrowBuf[vectorTypes.size()]; /* diff --git a/java/vector/src/main/java/org/apache/arrow/vector/validate/ValidateVectorBufferVisitor.java b/java/vector/src/main/java/org/apache/arrow/vector/validate/ValidateVectorBufferVisitor.java index e628f780c53fb..e92b0c69c5af2 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/validate/ValidateVectorBufferVisitor.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/validate/ValidateVectorBufferVisitor.java @@ -51,7 +51,8 @@ private void validateVectorCommon(ValueVector vector) { if (vector instanceof FieldVector) { FieldVector fieldVector = (FieldVector) vector; - int typeBufferCount = TypeLayout.getTypeBufferCount(arrowType, fieldVector); + // TODO: update the `getTypeBufferCount` method to use the `vector` (FieldVector). + int typeBufferCount = TypeLayout.getTypeBufferCount(arrowType); validateOrThrow(fieldVector.getFieldBuffers().size() == typeBufferCount, "Expected %s buffers in vector of type %s, got %s.", typeBufferCount, vector.getField().getType().toString(), fieldVector.getFieldBuffers().size()); diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestTypeLayout.java b/java/vector/src/test/java/org/apache/arrow/vector/TestTypeLayout.java index d93e8d85f76d0..672fc69e8d83f 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestTypeLayout.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestTypeLayout.java @@ -17,36 +17,20 @@ package org.apache.arrow.vector; -import static org.apache.arrow.vector.testing.ValueVectorDataPopulator.setVector; -import static org.junit.jupiter.api.Assertions.assertArrayEquals; import static org.junit.jupiter.api.Assertions.assertEquals; -import java.io.IOException; import java.nio.charset.Charset; import java.nio.charset.StandardCharsets; -import java.util.ArrayList; -import java.util.List; import java.util.Random; -import org.apache.arrow.memory.ArrowBuf; import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.vector.complex.NonNullableStructVector; -import org.apache.arrow.vector.complex.impl.ComplexWriterImpl; -import org.apache.arrow.vector.complex.reader.FieldReader; -import org.apache.arrow.vector.complex.writer.BaseWriter.ComplexWriter; -import org.apache.arrow.vector.complex.writer.BaseWriter.ListWriter; -import org.apache.arrow.vector.complex.writer.BaseWriter.StructWriter; -import org.apache.arrow.vector.complex.writer.IntWriter; -import org.apache.arrow.vector.ipc.message.ArrowRecordBatch; import org.apache.arrow.vector.types.DateUnit; import org.apache.arrow.vector.types.FloatingPointPrecision; import org.apache.arrow.vector.types.IntervalUnit; import org.apache.arrow.vector.types.TimeUnit; import org.apache.arrow.vector.types.UnionMode; import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.types.pojo.Field; -import org.apache.arrow.vector.types.pojo.Schema; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; @@ -197,211 +181,4 @@ public void testTypeBufferCountInVectorsWithVariadicBuffers() { TypeLayout.getTypeLayout(type, viewVarCharVector).getBufferLayouts().size()); } } - - @Test - public void testVectorLoadUnload() { - - try (final ViewVarCharVector vector1 = new ViewVarCharVector("myvector", allocator)) { - - setVector(vector1, STR1, STR2, STR3, STR4, STR5, STR6); - - assertEquals(5, vector1.getLastSet()); - vector1.setValueCount(15); - assertEquals(14, vector1.getLastSet()); - - /* Check the vector output */ - assertArrayEquals(STR1, vector1.get(0)); - assertArrayEquals(STR2, vector1.get(1)); - assertArrayEquals(STR3, vector1.get(2)); - assertArrayEquals(STR4, vector1.get(3)); - assertArrayEquals(STR5, vector1.get(4)); - assertArrayEquals(STR6, vector1.get(5)); - - Field field = vector1.getField(); - String fieldName = field.getName(); - - List fields = new ArrayList<>(); - List fieldVectors = new ArrayList<>(); - - fields.add(field); - fieldVectors.add(vector1); - - Schema schema = new Schema(fields); - - VectorSchemaRoot schemaRoot1 = new VectorSchemaRoot(schema, fieldVectors, vector1.getValueCount()); - VectorUnloader vectorUnloader = new VectorUnloader(schemaRoot1); - - try ( - ArrowRecordBatch recordBatch = vectorUnloader.getRecordBatch(); - BufferAllocator finalVectorsAllocator = allocator.newChildAllocator("new vector", 0, Long.MAX_VALUE); - VectorSchemaRoot schemaRoot2 = VectorSchemaRoot.create(schema, finalVectorsAllocator); - ) { - - VectorLoader vectorLoader = new VectorLoader(schemaRoot2); - vectorLoader.load(recordBatch); - - ViewVarCharVector vector2 = (ViewVarCharVector) schemaRoot2.getVector(fieldName); - /* - * lastSet would have internally been set by VectorLoader.load() when it invokes - * loadFieldBuffers. - */ - assertEquals(14, vector2.getLastSet()); - vector2.setValueCount(25); - assertEquals(24, vector2.getLastSet()); - - /* Check the vector output */ - assertArrayEquals(STR1, vector2.get(0)); - assertArrayEquals(STR2, vector2.get(1)); - assertArrayEquals(STR3, vector2.get(2)); - assertArrayEquals(STR4, vector2.get(3)); - assertArrayEquals(STR5, vector2.get(4)); - assertArrayEquals(STR6, vector2.get(5)); - } - } - } - - @Test - public void testVectorLoadUnload2() { - - try (final VarCharVector vector1 = new VarCharVector("myvector", allocator)) { - - setVector(vector1, STR1, STR2, STR3, STR4, STR5, STR6); - - assertEquals(5, vector1.getLastSet()); - vector1.setValueCount(15); - assertEquals(14, vector1.getLastSet()); - - /* Check the vector output */ - assertArrayEquals(STR1, vector1.get(0)); - assertArrayEquals(STR2, vector1.get(1)); - assertArrayEquals(STR3, vector1.get(2)); - assertArrayEquals(STR4, vector1.get(3)); - assertArrayEquals(STR5, vector1.get(4)); - assertArrayEquals(STR6, vector1.get(5)); - - Field field = vector1.getField(); - String fieldName = field.getName(); - - List fields = new ArrayList<>(); - List fieldVectors = new ArrayList<>(); - - fields.add(field); - fieldVectors.add(vector1); - - Schema schema = new Schema(fields); - - VectorSchemaRoot schemaRoot1 = new VectorSchemaRoot(schema, fieldVectors, vector1.getValueCount()); - VectorUnloader vectorUnloader = new VectorUnloader(schemaRoot1); - - try ( - ArrowRecordBatch recordBatch = vectorUnloader.getRecordBatch(); - BufferAllocator finalVectorsAllocator = allocator.newChildAllocator("new vector", 0, Long.MAX_VALUE); - VectorSchemaRoot schemaRoot2 = VectorSchemaRoot.create(schema, finalVectorsAllocator); - ) { - - VectorLoader vectorLoader = new VectorLoader(schemaRoot2); - vectorLoader.load(recordBatch); - - VarCharVector vector2 = (VarCharVector) schemaRoot2.getVector(fieldName); - /* - * lastSet would have internally been set by VectorLoader.load() when it invokes - * loadFieldBuffers. - */ - assertEquals(14, vector2.getLastSet()); - vector2.setValueCount(25); - assertEquals(24, vector2.getLastSet()); - - /* Check the vector output */ - assertArrayEquals(STR1, vector2.get(0)); - assertArrayEquals(STR2, vector2.get(1)); - assertArrayEquals(STR3, vector2.get(2)); - assertArrayEquals(STR4, vector2.get(3)); - assertArrayEquals(STR5, vector2.get(4)); - assertArrayEquals(STR6, vector2.get(5)); - } - } - } - - @Test - public void testUnloadLoadAddPadding() throws IOException { - int count = 10000; - Schema schema; - try ( - BufferAllocator originalVectorsAllocator = - allocator.newChildAllocator("original vectors", 0, Integer.MAX_VALUE); - NonNullableStructVector parent = NonNullableStructVector.empty("parent", originalVectorsAllocator)) { - - // write some data - ComplexWriter writer = new ComplexWriterImpl("root", parent); - StructWriter rootWriter = writer.rootAsStruct(); - ListWriter list = rootWriter.list("list"); - IntWriter intWriter = list.integer(); - for (int i = 0; i < count; i++) { - list.setPosition(i); - list.startList(); - for (int j = 0; j < i % 4 + 1; j++) { - intWriter.writeInt(i); - } - list.endList(); - } - writer.setValueCount(count); - - // unload it - FieldVector root = parent.getChild("root"); - schema = new Schema(root.getField().getChildren()); - VectorUnloader vectorUnloader = newVectorUnloader(root); - try ( - ArrowRecordBatch recordBatch = vectorUnloader.getRecordBatch(); - BufferAllocator finalVectorsAllocator = allocator.newChildAllocator("final vectors", 0, Integer.MAX_VALUE); - VectorSchemaRoot newRoot = VectorSchemaRoot.create(schema, finalVectorsAllocator); - ) { - List oldBuffers = recordBatch.getBuffers(); - List newBuffers = new ArrayList<>(); - for (ArrowBuf oldBuffer : oldBuffers) { - long l = oldBuffer.readableBytes(); - if (l % 64 != 0) { - // pad - l = l + 64 - l % 64; - } - ArrowBuf newBuffer = allocator.buffer(l); - for (long i = oldBuffer.readerIndex(); i < oldBuffer.writerIndex(); i++) { - newBuffer.setByte(i - oldBuffer.readerIndex(), oldBuffer.getByte(i)); - } - newBuffer.readerIndex(0); - newBuffer.writerIndex(l); - newBuffers.add(newBuffer); - } - - try (ArrowRecordBatch newBatch = - new ArrowRecordBatch(recordBatch.getLength(), recordBatch.getNodes(), newBuffers);) { - // load it - VectorLoader vectorLoader = new VectorLoader(newRoot); - - vectorLoader.load(newBatch); - - FieldReader reader = newRoot.getVector("list").getReader(); - for (int i = 0; i < count; i++) { - reader.setPosition(i); - List expected = new ArrayList<>(); - for (int j = 0; j < i % 4 + 1; j++) { - expected.add(i); - } - assertEquals(expected, reader.readObject()); - } - } - - for (ArrowBuf newBuf : newBuffers) { - newBuf.getReferenceManager().release(); - } - } - } - } - - public static VectorUnloader newVectorUnloader(FieldVector root) { - Schema schema = new Schema(root.getField().getChildren()); - int valueCount = root.getValueCount(); - List fields = root.getChildrenFromFields(); - VectorSchemaRoot vsr = new VectorSchemaRoot(schema.getFields(), fields, valueCount); - return new VectorUnloader(vsr); - } } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestVarCharViewVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestVarCharViewVector.java index efb5afac91b13..2d37b0b4eb9ad 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestVarCharViewVector.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestVarCharViewVector.java @@ -31,6 +31,7 @@ import java.nio.ByteBuffer; import java.nio.charset.StandardCharsets; +import java.util.ArrayList; import java.util.Arrays; import java.util.List; import java.util.Objects; @@ -41,8 +42,11 @@ import org.apache.arrow.memory.RootAllocator; import org.apache.arrow.memory.util.ArrowBufPointer; import org.apache.arrow.memory.util.CommonUtil; +import org.apache.arrow.vector.ipc.message.ArrowRecordBatch; import org.apache.arrow.vector.testing.ValueVectorDataPopulator; import org.apache.arrow.vector.types.Types; +import org.apache.arrow.vector.types.pojo.Field; +import org.apache.arrow.vector.types.pojo.Schema; import org.apache.arrow.vector.util.ReusableByteArray; import org.apache.arrow.vector.util.Text; import org.junit.jupiter.api.AfterEach; @@ -1451,6 +1455,68 @@ public void testSafeOverwriteLongFromALongerLongString() { } } + @Test + public void testVectorLoadUnload() { + + try (final ViewVarCharVector vector1 = new ViewVarCharVector("myvector", allocator)) { + + setVector(vector1, STR1, STR2, STR3, STR4, STR5, STR6); + + assertEquals(5, vector1.getLastSet()); + vector1.setValueCount(15); + assertEquals(14, vector1.getLastSet()); + + /* Check the vector output */ + assertArrayEquals(STR1, vector1.get(0)); + assertArrayEquals(STR2, vector1.get(1)); + assertArrayEquals(STR3, vector1.get(2)); + assertArrayEquals(STR4, vector1.get(3)); + assertArrayEquals(STR5, vector1.get(4)); + assertArrayEquals(STR6, vector1.get(5)); + + Field field = vector1.getField(); + String fieldName = field.getName(); + + List fields = new ArrayList<>(); + List fieldVectors = new ArrayList<>(); + + fields.add(field); + fieldVectors.add(vector1); + + Schema schema = new Schema(fields); + + VectorSchemaRoot schemaRoot1 = new VectorSchemaRoot(schema, fieldVectors, vector1.getValueCount()); + VectorUnloader vectorUnloader = new VectorUnloader(schemaRoot1); + + try ( + ArrowRecordBatch recordBatch = vectorUnloader.getRecordBatch(); + BufferAllocator finalVectorsAllocator = allocator.newChildAllocator("new vector", 0, Long.MAX_VALUE); + VectorSchemaRoot schemaRoot2 = VectorSchemaRoot.create(schema, finalVectorsAllocator); + ) { + + VectorLoader vectorLoader = new VectorLoader(schemaRoot2); + vectorLoader.load(recordBatch); + + ViewVarCharVector vector2 = (ViewVarCharVector) schemaRoot2.getVector(fieldName); + /* + * lastSet would have internally been set by VectorLoader.load() when it invokes + * loadFieldBuffers. + */ + assertEquals(14, vector2.getLastSet()); + vector2.setValueCount(25); + assertEquals(24, vector2.getLastSet()); + + /* Check the vector output */ + assertArrayEquals(STR1, vector2.get(0)); + assertArrayEquals(STR2, vector2.get(1)); + assertArrayEquals(STR3, vector2.get(2)); + assertArrayEquals(STR4, vector2.get(3)); + assertArrayEquals(STR5, vector2.get(4)); + assertArrayEquals(STR6, vector2.get(5)); + } + } + } + private String generateRandomString(int length) { Random random = new Random(); StringBuilder sb = new StringBuilder(length); From 0c8d632acec02bd810646f51dfd6a53501477074 Mon Sep 17 00:00:00 2001 From: Vibhatha Lakmal Abeykoon Date: Mon, 20 May 2024 18:51:20 +0530 Subject: [PATCH 09/16] fix: adding todos with issue links --- .../main/java/org/apache/arrow/vector/ipc/JsonFileReader.java | 3 +-- .../main/java/org/apache/arrow/vector/ipc/JsonFileWriter.java | 3 ++- .../arrow/vector/validate/ValidateVectorBufferVisitor.java | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/java/vector/src/main/java/org/apache/arrow/vector/ipc/JsonFileReader.java b/java/vector/src/main/java/org/apache/arrow/vector/ipc/JsonFileReader.java index 7afda393aeaf7..5c9efc445e0c4 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/ipc/JsonFileReader.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/ipc/JsonFileReader.java @@ -715,8 +715,7 @@ private ArrowBuf readIntoBuffer(BufferAllocator allocator, BufferType bufferType private void readFromJsonIntoVector(Field field, FieldVector vector) throws JsonParseException, IOException { ArrowType type = field.getType(); - // TODO: update the `getTypeLayout` method to take a FieldVector as an argument. - // In relation, the metadata must be used to determine the buffer count for views. + // TODO: https://github.com/apache/arrow/issues/41733 TypeLayout typeLayout = TypeLayout.getTypeLayout(type); List vectorTypes = typeLayout.getBufferTypes(); ArrowBuf[] vectorBuffers = new ArrowBuf[vectorTypes.size()]; diff --git a/java/vector/src/main/java/org/apache/arrow/vector/ipc/JsonFileWriter.java b/java/vector/src/main/java/org/apache/arrow/vector/ipc/JsonFileWriter.java index 525338977c6c1..670881b238ecb 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/ipc/JsonFileWriter.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/ipc/JsonFileWriter.java @@ -208,7 +208,8 @@ private void writeBatch(VectorSchemaRoot recordBatch) throws IOException { } private void writeFromVectorIntoJson(Field field, FieldVector vector) throws IOException { - List vectorTypes = TypeLayout.getTypeLayout(field.getType(), vector).getBufferTypes(); + // TODO: https://github.com/apache/arrow/issues/41733 + List vectorTypes = TypeLayout.getTypeLayout(field.getType()).getBufferTypes(); List vectorBuffers = vector.getFieldBuffers(); if (vectorTypes.size() != vectorBuffers.size()) { throw new IllegalArgumentException("vector types and inner vector buffers are not the same size: " + diff --git a/java/vector/src/main/java/org/apache/arrow/vector/validate/ValidateVectorBufferVisitor.java b/java/vector/src/main/java/org/apache/arrow/vector/validate/ValidateVectorBufferVisitor.java index e92b0c69c5af2..af5a67049f722 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/validate/ValidateVectorBufferVisitor.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/validate/ValidateVectorBufferVisitor.java @@ -51,7 +51,7 @@ private void validateVectorCommon(ValueVector vector) { if (vector instanceof FieldVector) { FieldVector fieldVector = (FieldVector) vector; - // TODO: update the `getTypeBufferCount` method to use the `vector` (FieldVector). + // TODO: https://github.com/apache/arrow/issues/41734 int typeBufferCount = TypeLayout.getTypeBufferCount(arrowType); validateOrThrow(fieldVector.getFieldBuffers().size() == typeBufferCount, "Expected %s buffers in vector of type %s, got %s.", From f3bfe7dc022c0fe95cb5d9b3bc1ac33a9133d421 Mon Sep 17 00:00:00 2001 From: Vibhatha Lakmal Abeykoon Date: Mon, 20 May 2024 19:04:29 +0530 Subject: [PATCH 10/16] fix: clean up TestTypeLayout --- .../test/java/org/apache/arrow/vector/TestTypeLayout.java | 8 -------- 1 file changed, 8 deletions(-) diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestTypeLayout.java b/java/vector/src/test/java/org/apache/arrow/vector/TestTypeLayout.java index 672fc69e8d83f..f216957371415 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestTypeLayout.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestTypeLayout.java @@ -37,14 +37,6 @@ public class TestTypeLayout { - private static final Charset utf8Charset = StandardCharsets.UTF_8; - private static final byte[] STR1 = "AAAAA1".getBytes(utf8Charset); - private static final byte[] STR2 = "BBBBBBBBB2".getBytes(utf8Charset); - private static final byte[] STR3 = "CCCC3".getBytes(utf8Charset); - private static final byte[] STR4 = "DDDDDDDD4".getBytes(utf8Charset); - private static final byte[] STR5 = "EEE5".getBytes(utf8Charset); - private static final byte[] STR6 = "0123456789123456".getBytes(utf8Charset); - private BufferAllocator allocator; @BeforeEach From 7baa957e555bb86d498b3854248f307281d84153 Mon Sep 17 00:00:00 2001 From: Vibhatha Lakmal Abeykoon Date: Mon, 20 May 2024 19:09:42 +0530 Subject: [PATCH 11/16] fix: typo --- .../src/test/java/org/apache/arrow/vector/TestTypeLayout.java | 2 -- 1 file changed, 2 deletions(-) diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestTypeLayout.java b/java/vector/src/test/java/org/apache/arrow/vector/TestTypeLayout.java index f216957371415..e5a985487c248 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestTypeLayout.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestTypeLayout.java @@ -19,8 +19,6 @@ import static org.junit.jupiter.api.Assertions.assertEquals; -import java.nio.charset.Charset; -import java.nio.charset.StandardCharsets; import java.util.Random; import org.apache.arrow.memory.BufferAllocator; From 6c4b753a4c9956a2d605daebfa826fb762f3bb1b Mon Sep 17 00:00:00 2001 From: Vibhatha Abeykoon Date: Wed, 22 May 2024 11:09:49 +0530 Subject: [PATCH 12/16] fix: remove new API methods --- .../apache/arrow/c/StructVectorLoader.java | 2 +- .../apache/arrow/c/StructVectorUnloader.java | 5 +- .../org/apache/arrow/vector/TypeLayout.java | 413 +----------------- .../org/apache/arrow/vector/VectorLoader.java | 2 +- .../apache/arrow/vector/VectorUnloader.java | 5 +- .../apache/arrow/vector/TestTypeLayout.java | 92 ++-- 6 files changed, 63 insertions(+), 456 deletions(-) diff --git a/java/c/src/main/java/org/apache/arrow/c/StructVectorLoader.java b/java/c/src/main/java/org/apache/arrow/c/StructVectorLoader.java index eb8ccd4900afc..27acf84d30157 100644 --- a/java/c/src/main/java/org/apache/arrow/c/StructVectorLoader.java +++ b/java/c/src/main/java/org/apache/arrow/c/StructVectorLoader.java @@ -114,7 +114,7 @@ private void loadBuffers(FieldVector vector, Field field, Iterator buf if (variadicBufferCounts != null) { variadicBufferLayoutCount = variadicBufferCounts.next(); } - int bufferLayoutCount = (int) (variadicBufferLayoutCount + TypeLayout.getTypeBufferCount(field.getType(), vector)); + int bufferLayoutCount = (int) (variadicBufferLayoutCount + TypeLayout.getTypeBufferCount(field.getType())); List ownBuffers = new ArrayList<>(bufferLayoutCount); for (int j = 0; j < bufferLayoutCount; j++) { ArrowBuf nextBuf = buffers.next(); diff --git a/java/c/src/main/java/org/apache/arrow/c/StructVectorUnloader.java b/java/c/src/main/java/org/apache/arrow/c/StructVectorUnloader.java index d08a63b92ddd4..8d015157ebf38 100644 --- a/java/c/src/main/java/org/apache/arrow/c/StructVectorUnloader.java +++ b/java/c/src/main/java/org/apache/arrow/c/StructVectorUnloader.java @@ -107,8 +107,9 @@ private void appendNodes(FieldVector vector, List nodes, List variadicBufferCounts) { nodes.add(new ArrowFieldNode(vector.getValueCount(), includeNullCount ? vector.getNullCount() : -1)); List fieldBuffers = vector.getFieldBuffers(); - int expectedBufferCount = TypeLayout.getTypeBufferCount(vector.getField().getType(), vector); - variadicBufferCounts.add(getVariadicBufferCount(vector)); + long variadicBufferCount = getVariadicBufferCount(vector); + int expectedBufferCount = (int) (TypeLayout.getTypeBufferCount(vector.getField().getType()) + variadicBufferCount); + variadicBufferCounts.add(variadicBufferCount); if (fieldBuffers.size() != expectedBufferCount) { throw new IllegalArgumentException(String.format("wrong number of buffers for field %s in vector %s. found: %s", vector.getField(), vector.getClass().getSimpleName(), fieldBuffers)); diff --git a/java/vector/src/main/java/org/apache/arrow/vector/TypeLayout.java b/java/vector/src/main/java/org/apache/arrow/vector/TypeLayout.java index 6e63bacdd1f7f..0d01d77632bde 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/TypeLayout.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/TypeLayout.java @@ -40,7 +40,6 @@ import org.apache.arrow.vector.types.pojo.ArrowType.Interval; import org.apache.arrow.vector.types.pojo.ArrowType.LargeBinary; import org.apache.arrow.vector.types.pojo.ArrowType.LargeUtf8; -import org.apache.arrow.vector.types.pojo.ArrowType.ListView; import org.apache.arrow.vector.types.pojo.ArrowType.Map; import org.apache.arrow.vector.types.pojo.ArrowType.Null; import org.apache.arrow.vector.types.pojo.ArrowType.Struct; @@ -59,9 +58,7 @@ public class TypeLayout { /** * Constructs a new {@link TypeLayout} for the given arrowType. - * This method is deprecated and will be removed in a future release. */ - @Deprecated public static TypeLayout getTypeLayout(final ArrowType arrowType) { TypeLayout layout = arrowType.accept(new ArrowTypeVisitor() { @@ -190,7 +187,7 @@ public TypeLayout visit(Binary type) { @Override public TypeLayout visit(ArrowType.BinaryView type) { - throw new UnsupportedOperationException("BinaryView not supported"); + return newVariableWidthViewTypeLayout(); } @Override @@ -200,7 +197,7 @@ public TypeLayout visit(Utf8 type) { @Override public TypeLayout visit(Utf8View type) { - throw new UnsupportedOperationException("Utf8View not supported"); + return newVariableWidthViewTypeLayout(); } @Override @@ -218,232 +215,12 @@ private TypeLayout newVariableWidthTypeLayout() { BufferLayout.byteVector()); } - private TypeLayout newLargeVariableWidthTypeLayout() { - return newPrimitiveTypeLayout(BufferLayout.validityVector(), BufferLayout.largeOffsetBuffer(), - BufferLayout.byteVector()); - } - - private TypeLayout newPrimitiveTypeLayout(BufferLayout... vectors) { - return new TypeLayout(asList(vectors)); - } - - public TypeLayout newFixedWidthTypeLayout(BufferLayout dataVector) { - return newPrimitiveTypeLayout(BufferLayout.validityVector(), dataVector); - } - - @Override - public TypeLayout visit(Null type) { - return new TypeLayout(Collections.emptyList()); - } - - @Override - public TypeLayout visit(Date type) { - switch (type.getUnit()) { - case DAY: - return newFixedWidthTypeLayout(BufferLayout.dataBuffer(32)); - case MILLISECOND: - return newFixedWidthTypeLayout(BufferLayout.dataBuffer(64)); - default: - throw new UnsupportedOperationException("Unknown unit " + type.getUnit()); - } - } - - @Override - public TypeLayout visit(Time type) { - return newFixedWidthTypeLayout(BufferLayout.dataBuffer(type.getBitWidth())); - } - - @Override - public TypeLayout visit(Interval type) { - switch (type.getUnit()) { - case DAY_TIME: - return newFixedWidthTypeLayout(BufferLayout.dataBuffer(64)); - case YEAR_MONTH: - return newFixedWidthTypeLayout(BufferLayout.dataBuffer(32)); - case MONTH_DAY_NANO: - return newFixedWidthTypeLayout(BufferLayout.dataBuffer(128)); - default: - throw new UnsupportedOperationException("Unknown unit " + type.getUnit()); - } - } - - @Override - public TypeLayout visit(Duration type) { - return newFixedWidthTypeLayout(BufferLayout.dataBuffer(64)); - } - - }); - return layout; - } - - /** - * Constructs a new {@link TypeLayout}. - * - * @param arrowType the type to create the layout for - * @param vector the vector to create the layout for - * @return the layout for the given type and vector - * @throws UnsupportedOperationException if the ArrowType is not supported - */ - public static TypeLayout getTypeLayout(final ArrowType arrowType, FieldVector vector) { - TypeLayout layout = arrowType.accept(new ArrowTypeVisitor() { - - @Override - public TypeLayout visit(Int type) { - return newFixedWidthTypeLayout(BufferLayout.dataBuffer(type.getBitWidth())); - } - - @Override - public TypeLayout visit(Union type) { - List vectors; - switch (type.getMode()) { - case Dense: - vectors = asList( - BufferLayout.typeBuffer(), - BufferLayout.offsetBuffer() // offset to find the vector - ); - break; - case Sparse: - vectors = asList( - BufferLayout.typeBuffer() // type of the value at the index or 0 if null - ); - break; - default: - throw new UnsupportedOperationException("Unsupported Union Mode: " + type.getMode()); - } - return new TypeLayout(vectors); - } - - @Override - public TypeLayout visit(Struct type) { - List vectors = asList( - BufferLayout.validityVector() - ); - return new TypeLayout(vectors); - } - - @Override - public TypeLayout visit(Timestamp type) { - return newFixedWidthTypeLayout(BufferLayout.dataBuffer(64)); - } - - @Override - public TypeLayout visit(org.apache.arrow.vector.types.pojo.ArrowType.List type) { - List vectors = asList( - BufferLayout.validityVector(), - BufferLayout.offsetBuffer() - ); - return new TypeLayout(vectors); - } - - @Override - public TypeLayout visit(ArrowType.LargeList type) { - List vectors = asList( - BufferLayout.validityVector(), - BufferLayout.largeOffsetBuffer() - ); - return new TypeLayout(vectors); - } - - @Override - public TypeLayout visit(FixedSizeList type) { - List vectors = asList( - BufferLayout.validityVector() - ); - return new TypeLayout(vectors); - } - - @Override - public TypeLayout visit(Map type) { - List vectors = asList( - BufferLayout.validityVector(), - BufferLayout.offsetBuffer() - ); - return new TypeLayout(vectors); - } - - @Override - public TypeLayout visit(FloatingPoint type) { - int bitWidth; - switch (type.getPrecision()) { - case HALF: - bitWidth = 16; - break; - case SINGLE: - bitWidth = 32; - break; - case DOUBLE: - bitWidth = 64; - break; - default: - throw new UnsupportedOperationException("Unsupported Precision: " + type.getPrecision()); - } - return newFixedWidthTypeLayout(BufferLayout.dataBuffer(bitWidth)); - } - - @Override - public TypeLayout visit(Decimal type) { - return newFixedWidthTypeLayout(BufferLayout.dataBuffer(type.getBitWidth())); - } - - @Override - public TypeLayout visit(FixedSizeBinary type) { - return newFixedWidthTypeLayout(new BufferLayout(BufferType.DATA, type.getByteWidth() * 8)); - } - - @Override - public TypeLayout visit(Bool type) { - return newFixedWidthTypeLayout(BufferLayout.booleanVector()); - } - - @Override - public TypeLayout visit(Binary type) { - return newVariableWidthTypeLayout(); - } - - @Override - public TypeLayout visit(BinaryView type) { - return newVariableWidthViewTypeLayout((ViewVarBinaryVector) vector); - } - - @Override - public TypeLayout visit(Utf8 type) { - return newVariableWidthTypeLayout(); - } - - @Override - public TypeLayout visit(Utf8View type) { - return newVariableWidthViewTypeLayout((ViewVarCharVector) vector); - } - - @Override - public TypeLayout visit(LargeUtf8 type) { - return newLargeVariableWidthTypeLayout(); - } - - @Override - public TypeLayout visit(LargeBinary type) { - return newLargeVariableWidthTypeLayout(); - } - - private TypeLayout newVariableWidthTypeLayout() { - return newPrimitiveTypeLayout(BufferLayout.validityVector(), BufferLayout.offsetBuffer(), - BufferLayout.byteVector()); - } - - private TypeLayout newVariableWidthViewTypeLayout(BaseVariableWidthViewVector vector) { - final int numDataBuffers = vector.getDataBuffers().size(); - List bufferLayouts = new ArrayList<>(numDataBuffers + 2); - bufferLayouts.add(BufferLayout.validityVector()); - bufferLayouts.add(BufferLayout.byteVector()); - - for (int i = 0; i < numDataBuffers; i++) { - bufferLayouts.add(BufferLayout.byteVector()); - } - - return new TypeLayout(bufferLayouts); + private TypeLayout newVariableWidthViewTypeLayout() { + return newPrimitiveTypeLayout(BufferLayout.validityVector(), BufferLayout.byteVector()); } private TypeLayout newLargeVariableWidthTypeLayout() { + // NOTE: only considers the non variadic buffers return newPrimitiveTypeLayout(BufferLayout.validityVector(), BufferLayout.largeOffsetBuffer(), BufferLayout.byteVector()); } @@ -494,17 +271,7 @@ public TypeLayout visit(Interval type) { @Override public TypeLayout visit(Duration type) { - return newFixedWidthTypeLayout(BufferLayout.dataBuffer(64)); - } - - @Override - public TypeLayout visit(ListView type) { - List vectors = asList( - BufferLayout.validityVector(), - BufferLayout.offsetBuffer(), - BufferLayout.sizeBuffer() - ); - return new TypeLayout(vectors); + return newFixedWidthTypeLayout(BufferLayout.dataBuffer(64)); } }); @@ -513,9 +280,7 @@ public TypeLayout visit(ListView type) { /** * Gets the number of {@link BufferLayout}s for the given arrowType. - * This method is deprecated and will be removed in a future release. */ - @Deprecated public static int getTypeBufferCount(final ArrowType arrowType) { return arrowType.accept(new ArrowTypeVisitor() { @@ -617,165 +382,10 @@ public Integer visit(Binary type) { @Override public Integer visit(BinaryView type) { - throw new UnsupportedOperationException("BinaryView not supported"); - } - - @Override - public Integer visit(Utf8 type) { - return VARIABLE_WIDTH_BUFFER_COUNT; - } - - @Override - public Integer visit(Utf8View type) { - throw new UnsupportedOperationException("Utf8View not supported"); - } - - @Override - public Integer visit(LargeUtf8 type) { - return VARIABLE_WIDTH_BUFFER_COUNT; - } - - @Override - public Integer visit(LargeBinary type) { - return VARIABLE_WIDTH_BUFFER_COUNT; - } - - @Override - public Integer visit(Null type) { - return 0; - } - - @Override - public Integer visit(Date type) { - return FIXED_WIDTH_BUFFER_COUNT; - } - - @Override - public Integer visit(Time type) { - return FIXED_WIDTH_BUFFER_COUNT; - } - - @Override - public Integer visit(Interval type) { - return FIXED_WIDTH_BUFFER_COUNT; - } - - @Override - public Integer visit(Duration type) { - return FIXED_WIDTH_BUFFER_COUNT; - } - - }); - } - - - /** - * Gets the number of {@link BufferLayout}s. - * - * @param arrowType the ArrowType for which the buffer count is to be determined - * @param vector the FieldVector associated with the ArrowType - * @return the number of BufferLayouts for the given ArrowType and FieldVector - * @throws UnsupportedOperationException if the ArrowType is not supported - */ - public static int getTypeBufferCount(final ArrowType arrowType, FieldVector vector) { - - return arrowType.accept(new ArrowTypeVisitor() { - - /** - * All fixed width vectors have a common number of buffers 2: one validity buffer, plus a data buffer. - */ - static final int FIXED_WIDTH_BUFFER_COUNT = 2; - - /** - * All variable width vectors have a common number of buffers 3: a validity buffer, - * an offset buffer, and a data buffer. - */ - static final int VARIABLE_WIDTH_BUFFER_COUNT = 3; - - @Override - public Integer visit(Int type) { - return FIXED_WIDTH_BUFFER_COUNT; - } - - @Override - public Integer visit(Union type) { - switch (type.getMode()) { - case Dense: - // TODO: validate this - return 2; - case Sparse: - // type buffer - return 1; - default: - throw new UnsupportedOperationException("Unsupported Union Mode: " + type.getMode()); - } - } - - @Override - public Integer visit(Struct type) { - // validity buffer - return 1; - } - - @Override - public Integer visit(Timestamp type) { - return FIXED_WIDTH_BUFFER_COUNT; - } - - @Override - public Integer visit(org.apache.arrow.vector.types.pojo.ArrowType.List type) { - // validity buffer + offset buffer + // NOTE: only consider the validity and view buffers return 2; } - @Override - public Integer visit(ArrowType.LargeList type) { - // validity buffer + offset buffer - return 2; - } - - @Override - public Integer visit(FixedSizeList type) { - // validity buffer - return 1; - } - - @Override - public Integer visit(Map type) { - // validity buffer + offset buffer - return 2; - } - - @Override - public Integer visit(FloatingPoint type) { - return FIXED_WIDTH_BUFFER_COUNT; - } - - @Override - public Integer visit(Decimal type) { - return FIXED_WIDTH_BUFFER_COUNT; - } - - @Override - public Integer visit(FixedSizeBinary type) { - return FIXED_WIDTH_BUFFER_COUNT; - } - - @Override - public Integer visit(Bool type) { - return FIXED_WIDTH_BUFFER_COUNT; - } - - @Override - public Integer visit(Binary type) { - return VARIABLE_WIDTH_BUFFER_COUNT; - } - - @Override - public Integer visit(BinaryView type) { - return 2 + ((ViewVarBinaryVector) vector).getDataBuffers().size(); - } - @Override public Integer visit(Utf8 type) { return VARIABLE_WIDTH_BUFFER_COUNT; @@ -783,7 +393,8 @@ public Integer visit(Utf8 type) { @Override public Integer visit(Utf8View type) { - return 2 + ((ViewVarCharVector) vector).getDataBuffers().size(); + // NOTE: only consider the validity and view buffers + return 2; } @Override @@ -821,12 +432,6 @@ public Integer visit(Duration type) { return FIXED_WIDTH_BUFFER_COUNT; } - @Override - public Integer visit(ListView type) { - // validity buffer + offset buffer + size buffer - return 3; - } - }); } diff --git a/java/vector/src/main/java/org/apache/arrow/vector/VectorLoader.java b/java/vector/src/main/java/org/apache/arrow/vector/VectorLoader.java index 14b672fea630d..9590e70f46770 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/VectorLoader.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/VectorLoader.java @@ -109,7 +109,7 @@ private void loadBuffers( if (variadicBufferCounts != null) { variadicBufferLayoutCount = variadicBufferCounts.next(); } - int bufferLayoutCount = (int) (variadicBufferLayoutCount + TypeLayout.getTypeBufferCount(field.getType(), vector)); + int bufferLayoutCount = (int) (variadicBufferLayoutCount + TypeLayout.getTypeBufferCount(field.getType())); List ownBuffers = new ArrayList<>(bufferLayoutCount); for (int j = 0; j < bufferLayoutCount; j++) { ArrowBuf nextBuf = buffers.next(); diff --git a/java/vector/src/main/java/org/apache/arrow/vector/VectorUnloader.java b/java/vector/src/main/java/org/apache/arrow/vector/VectorUnloader.java index e42aa18a0ec0b..8528099b6d619 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/VectorUnloader.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/VectorUnloader.java @@ -101,8 +101,9 @@ private void appendNodes(FieldVector vector, List nodes, List variadicBufferCounts) { nodes.add(new ArrowFieldNode(vector.getValueCount(), includeNullCount ? vector.getNullCount() : -1)); List fieldBuffers = vector.getFieldBuffers(); - int expectedBufferCount = TypeLayout.getTypeBufferCount(vector.getField().getType(), vector); - variadicBufferCounts.add(getVariadicBufferCount(vector)); + long variadicBufferCount = getVariadicBufferCount(vector); + int expectedBufferCount = (int) (TypeLayout.getTypeBufferCount(vector.getField().getType()) + variadicBufferCount); + variadicBufferCounts.add(variadicBufferCount); if (fieldBuffers.size() != expectedBufferCount) { throw new IllegalArgumentException(String.format( "wrong number of buffers for field %s in vector %s. found: %s", diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestTypeLayout.java b/java/vector/src/test/java/org/apache/arrow/vector/TestTypeLayout.java index e5a985487c248..5a58133f2e2bd 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestTypeLayout.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestTypeLayout.java @@ -51,89 +51,89 @@ public void shutdown() { @Test public void testTypeBufferCount() { ArrowType type = new ArrowType.Int(8, true); - assertEquals(TypeLayout.getTypeBufferCount(type, null), - TypeLayout.getTypeLayout(type, null).getBufferLayouts().size()); + assertEquals(TypeLayout.getTypeBufferCount(type), + TypeLayout.getTypeLayout(type).getBufferLayouts().size()); type = new ArrowType.Union(UnionMode.Sparse, new int[2]); - assertEquals(TypeLayout.getTypeBufferCount(type, null), - TypeLayout.getTypeLayout(type, null).getBufferLayouts().size()); + assertEquals(TypeLayout.getTypeBufferCount(type), + TypeLayout.getTypeLayout(type).getBufferLayouts().size()); type = new ArrowType.Union(UnionMode.Dense, new int[1]); - assertEquals(TypeLayout.getTypeBufferCount(type, null), - TypeLayout.getTypeLayout(type, null).getBufferLayouts().size()); + assertEquals(TypeLayout.getTypeBufferCount(type), + TypeLayout.getTypeLayout(type).getBufferLayouts().size()); type = new ArrowType.Struct(); - assertEquals(TypeLayout.getTypeBufferCount(type, null), - TypeLayout.getTypeLayout(type, null).getBufferLayouts().size()); + assertEquals(TypeLayout.getTypeBufferCount(type), + TypeLayout.getTypeLayout(type).getBufferLayouts().size()); type = new ArrowType.Timestamp(TimeUnit.MILLISECOND, null); - assertEquals(TypeLayout.getTypeBufferCount(type, null), - TypeLayout.getTypeLayout(type, null).getBufferLayouts().size()); + assertEquals(TypeLayout.getTypeBufferCount(type), + TypeLayout.getTypeLayout(type).getBufferLayouts().size()); type = new ArrowType.List(); - assertEquals(TypeLayout.getTypeBufferCount(type, null), - TypeLayout.getTypeLayout(type, null).getBufferLayouts().size()); + assertEquals(TypeLayout.getTypeBufferCount(type), + TypeLayout.getTypeLayout(type).getBufferLayouts().size()); type = new ArrowType.FixedSizeList(5); - assertEquals(TypeLayout.getTypeBufferCount(type, null), - TypeLayout.getTypeLayout(type, null).getBufferLayouts().size()); + assertEquals(TypeLayout.getTypeBufferCount(type), + TypeLayout.getTypeLayout(type).getBufferLayouts().size()); type = new ArrowType.Map(false); - assertEquals(TypeLayout.getTypeBufferCount(type, null), - TypeLayout.getTypeLayout(type, null).getBufferLayouts().size()); + assertEquals(TypeLayout.getTypeBufferCount(type), + TypeLayout.getTypeLayout(type).getBufferLayouts().size()); type = new ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE); - assertEquals(TypeLayout.getTypeBufferCount(type, null), - TypeLayout.getTypeLayout(type, null).getBufferLayouts().size()); + assertEquals(TypeLayout.getTypeBufferCount(type), + TypeLayout.getTypeLayout(type).getBufferLayouts().size()); type = new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE); - assertEquals(TypeLayout.getTypeBufferCount(type, null), - TypeLayout.getTypeLayout(type, null).getBufferLayouts().size()); + assertEquals(TypeLayout.getTypeBufferCount(type), + TypeLayout.getTypeLayout(type).getBufferLayouts().size()); type = new ArrowType.Decimal(10, 10, 128); - assertEquals(TypeLayout.getTypeBufferCount(type, null), - TypeLayout.getTypeLayout(type, null).getBufferLayouts().size()); + assertEquals(TypeLayout.getTypeBufferCount(type), + TypeLayout.getTypeLayout(type).getBufferLayouts().size()); type = new ArrowType.Decimal(10, 10, 256); - assertEquals(TypeLayout.getTypeBufferCount(type, null), - TypeLayout.getTypeLayout(type, null).getBufferLayouts().size()); + assertEquals(TypeLayout.getTypeBufferCount(type), + TypeLayout.getTypeLayout(type).getBufferLayouts().size()); type = new ArrowType.FixedSizeBinary(5); - assertEquals(TypeLayout.getTypeBufferCount(type, null), - TypeLayout.getTypeLayout(type, null).getBufferLayouts().size()); + assertEquals(TypeLayout.getTypeBufferCount(type), + TypeLayout.getTypeLayout(type).getBufferLayouts().size()); type = new ArrowType.Bool(); - assertEquals(TypeLayout.getTypeBufferCount(type, null), - TypeLayout.getTypeLayout(type, null).getBufferLayouts().size()); + assertEquals(TypeLayout.getTypeBufferCount(type), + TypeLayout.getTypeLayout(type).getBufferLayouts().size()); type = new ArrowType.Binary(); - assertEquals(TypeLayout.getTypeBufferCount(type, null), - TypeLayout.getTypeLayout(type, null).getBufferLayouts().size()); + assertEquals(TypeLayout.getTypeBufferCount(type), + TypeLayout.getTypeLayout(type).getBufferLayouts().size()); type = new ArrowType.Utf8(); - assertEquals(TypeLayout.getTypeBufferCount(type, null), - TypeLayout.getTypeLayout(type, null).getBufferLayouts().size()); + assertEquals(TypeLayout.getTypeBufferCount(type), + TypeLayout.getTypeLayout(type).getBufferLayouts().size()); type = new ArrowType.Null(); - assertEquals(TypeLayout.getTypeBufferCount(type, null), - TypeLayout.getTypeLayout(type, null).getBufferLayouts().size()); + assertEquals(TypeLayout.getTypeBufferCount(type), + TypeLayout.getTypeLayout(type).getBufferLayouts().size()); type = new ArrowType.Date(DateUnit.DAY); - assertEquals(TypeLayout.getTypeBufferCount(type, null), - TypeLayout.getTypeLayout(type, null).getBufferLayouts().size()); + assertEquals(TypeLayout.getTypeBufferCount(type), + TypeLayout.getTypeLayout(type).getBufferLayouts().size()); type = new ArrowType.Time(TimeUnit.MILLISECOND, 32); - assertEquals(TypeLayout.getTypeBufferCount(type, null), - TypeLayout.getTypeLayout(type, null).getBufferLayouts().size()); + assertEquals(TypeLayout.getTypeBufferCount(type), + TypeLayout.getTypeLayout(type).getBufferLayouts().size()); type = new ArrowType.Interval(IntervalUnit.DAY_TIME); - assertEquals(TypeLayout.getTypeBufferCount(type, null), - TypeLayout.getTypeLayout(type, null).getBufferLayouts().size()); + assertEquals(TypeLayout.getTypeBufferCount(type), + TypeLayout.getTypeLayout(type).getBufferLayouts().size()); type = new ArrowType.Duration(TimeUnit.MILLISECOND); - assertEquals(TypeLayout.getTypeBufferCount(type, null), - TypeLayout.getTypeLayout(type, null).getBufferLayouts().size()); + assertEquals(TypeLayout.getTypeBufferCount(type), + TypeLayout.getTypeLayout(type).getBufferLayouts().size()); } private String generateRandomString(int length) { @@ -150,8 +150,8 @@ public void testTypeBufferCountInVectorsWithVariadicBuffers() { // empty vector try (ViewVarCharVector viewVarCharVector = new ViewVarCharVector("myvector", allocator)) { ArrowType type = viewVarCharVector.getMinorType().getType(); - assertEquals(TypeLayout.getTypeBufferCount(type, viewVarCharVector), - TypeLayout.getTypeLayout(type, viewVarCharVector).getBufferLayouts().size()); + assertEquals(TypeLayout.getTypeBufferCount(type), + TypeLayout.getTypeLayout(type).getBufferLayouts().size()); } // vector with long strings try (ViewVarCharVector viewVarCharVector = new ViewVarCharVector("myvector", allocator)) { @@ -167,8 +167,8 @@ public void testTypeBufferCountInVectorsWithVariadicBuffers() { viewVarCharVector.setValueCount(6); ArrowType type = viewVarCharVector.getMinorType().getType(); - assertEquals(TypeLayout.getTypeBufferCount(type, viewVarCharVector), - TypeLayout.getTypeLayout(type, viewVarCharVector).getBufferLayouts().size()); + assertEquals(TypeLayout.getTypeBufferCount(type), + TypeLayout.getTypeLayout(type).getBufferLayouts().size()); } } } From ea965df508025b6906555d35d939c7071c931d64 Mon Sep 17 00:00:00 2001 From: Vibhatha Abeykoon Date: Wed, 22 May 2024 11:28:33 +0530 Subject: [PATCH 13/16] fix: adding new constructors --- .../vector/ipc/message/ArrowRecordBatch.java | 56 +++++++++++++++++++ 1 file changed, 56 insertions(+) diff --git a/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/ArrowRecordBatch.java b/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/ArrowRecordBatch.java index 6a529db598fff..0ea71d02ad814 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/ArrowRecordBatch.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/ArrowRecordBatch.java @@ -71,6 +71,21 @@ public ArrowRecordBatch( this(length, nodes, buffers, bodyCompression, null, true); } + /** + * Construct a record batch from nodes. + * + * @param length how many rows in this batch + * @param nodes field level info + * @param buffers will be retained until this recordBatch is closed + * @param bodyCompression compression info. + * @param alignBuffers Whether to align buffers to an 8 byte boundary. + */ + public ArrowRecordBatch( + int length, List nodes, List buffers, + ArrowBodyCompression bodyCompression, boolean alignBuffers) { + this(length, nodes, buffers, bodyCompression, alignBuffers, /*retainBuffers*/ true); + } + /** * Construct a record batch from nodes. * @@ -87,6 +102,47 @@ public ArrowRecordBatch( this(length, nodes, buffers, bodyCompression, variadicBufferCounts, alignBuffers, /*retainBuffers*/ true); } + /** + * Construct a record batch from nodes. + * + * @param length how many rows in this batch + * @param nodes field level info + * @param buffers will be retained until this recordBatch is closed + * @param bodyCompression compression info. + * @param alignBuffers Whether to align buffers to an 8 byte boundary. + * @param retainBuffers Whether to retain() each source buffer in the constructor. If false, the caller is + * responsible for retaining the buffers beforehand. + */ + public ArrowRecordBatch( + int length, List nodes, List buffers, + ArrowBodyCompression bodyCompression, boolean alignBuffers, + boolean retainBuffers) { + super(); + this.length = length; + this.nodes = nodes; + this.buffers = buffers; + Preconditions.checkArgument(bodyCompression != null, "body compression cannot be null"); + this.bodyCompression = bodyCompression; + List arrowBuffers = new ArrayList<>(buffers.size()); + long offset = 0; + for (ArrowBuf arrowBuf : buffers) { + if (retainBuffers) { + arrowBuf.getReferenceManager().retain(); + } + long size = arrowBuf.readableBytes(); + arrowBuffers.add(new ArrowBuffer(offset, size)); + if (LOGGER.isTraceEnabled()) { + LOGGER.trace("Buffer in RecordBatch at {}, length: {}", offset, size); + } + offset += size; + if (alignBuffers) { // align on 8 byte boundaries + offset = DataSizeRoundingUtil.roundUpTo8Multiple(offset); + } + } + this.buffersLayout = Collections.unmodifiableList(arrowBuffers); + this.variadicBufferCounts = null; + } + /** * Construct a record batch from nodes. * From cd5bd391ba207deb90579af6a45d551512115b21 Mon Sep 17 00:00:00 2001 From: Vibhatha Abeykoon Date: Wed, 22 May 2024 11:55:58 +0530 Subject: [PATCH 14/16] fix: refactor constructor order --- .../vector/ipc/message/ArrowRecordBatch.java | 38 ++++++++++--------- 1 file changed, 21 insertions(+), 17 deletions(-) diff --git a/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/ArrowRecordBatch.java b/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/ArrowRecordBatch.java index 0ea71d02ad814..f70427751460a 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/ArrowRecordBatch.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/ArrowRecordBatch.java @@ -86,22 +86,6 @@ public ArrowRecordBatch( this(length, nodes, buffers, bodyCompression, alignBuffers, /*retainBuffers*/ true); } - /** - * Construct a record batch from nodes. - * - * @param length how many rows in this batch - * @param nodes field level info - * @param buffers will be retained until this recordBatch is closed - * @param bodyCompression compression info. - * @param variadicBufferCounts the number of buffers in each variadic section. - * @param alignBuffers Whether to align buffers to an 8 byte boundary. - */ - public ArrowRecordBatch( - int length, List nodes, List buffers, - ArrowBodyCompression bodyCompression, List variadicBufferCounts, boolean alignBuffers) { - this(length, nodes, buffers, bodyCompression, variadicBufferCounts, alignBuffers, /*retainBuffers*/ true); - } - /** * Construct a record batch from nodes. * @@ -143,6 +127,22 @@ public ArrowRecordBatch( this.variadicBufferCounts = null; } + /** + * Construct a record batch from nodes. + * + * @param length how many rows in this batch + * @param nodes field level info + * @param buffers will be retained until this recordBatch is closed + * @param bodyCompression compression info. + * @param variadicBufferCounts the number of buffers in each variadic section. + * @param alignBuffers Whether to align buffers to an 8 byte boundary. + */ + public ArrowRecordBatch( + int length, List nodes, List buffers, + ArrowBodyCompression bodyCompression, List variadicBufferCounts, boolean alignBuffers) { + this(length, nodes, buffers, bodyCompression, variadicBufferCounts, alignBuffers, /*retainBuffers*/ true); + } + /** * Construct a record batch from nodes. * @@ -343,8 +343,12 @@ public void close() { @Override public String toString() { + int variadicBufCount = 0; + if (variadicBufferCounts != null && !variadicBufferCounts.isEmpty()) { + variadicBufCount = variadicBufferCounts.size(); + } return "ArrowRecordBatch [length=" + length + ", nodes=" + nodes + ", #buffers=" + buffers.size() + - ", #variadicBufferCounts=" + variadicBufferCounts.size() + ", buffersLayout=" + buffersLayout + + ", #variadicBufferCounts=" + variadicBufCount + ", buffersLayout=" + buffersLayout + ", closed=" + closed + "]"; } From 452048f3e35b1a9584ab67659bc5da9ee5e479ac Mon Sep 17 00:00:00 2001 From: Vibhatha Abeykoon Date: Wed, 22 May 2024 11:58:16 +0530 Subject: [PATCH 15/16] fix: minor changes --- .../org/apache/arrow/vector/ipc/message/ArrowRecordBatch.java | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/ArrowRecordBatch.java b/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/ArrowRecordBatch.java index f70427751460a..c9e7ffc1a4f16 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/ArrowRecordBatch.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/ArrowRecordBatch.java @@ -99,8 +99,7 @@ public ArrowRecordBatch( */ public ArrowRecordBatch( int length, List nodes, List buffers, - ArrowBodyCompression bodyCompression, boolean alignBuffers, - boolean retainBuffers) { + ArrowBodyCompression bodyCompression, boolean alignBuffers, boolean retainBuffers) { super(); this.length = length; this.nodes = nodes; From 36ccbba4d9752da73121a0c49ebecf8f0af0af41 Mon Sep 17 00:00:00 2001 From: Vibhatha Abeykoon Date: Thu, 23 May 2024 15:18:08 +0530 Subject: [PATCH 16/16] fix: addressing reviews --- .../vector/ipc/message/ArrowRecordBatch.java | 27 ++----------------- 1 file changed, 2 insertions(+), 25 deletions(-) diff --git a/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/ArrowRecordBatch.java b/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/ArrowRecordBatch.java index c9e7ffc1a4f16..b910cfc6ecc25 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/ArrowRecordBatch.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/ArrowRecordBatch.java @@ -83,7 +83,7 @@ public ArrowRecordBatch( public ArrowRecordBatch( int length, List nodes, List buffers, ArrowBodyCompression bodyCompression, boolean alignBuffers) { - this(length, nodes, buffers, bodyCompression, alignBuffers, /*retainBuffers*/ true); + this(length, nodes, buffers, bodyCompression, null, alignBuffers, /*retainBuffers*/ true); } /** @@ -100,30 +100,7 @@ public ArrowRecordBatch( public ArrowRecordBatch( int length, List nodes, List buffers, ArrowBodyCompression bodyCompression, boolean alignBuffers, boolean retainBuffers) { - super(); - this.length = length; - this.nodes = nodes; - this.buffers = buffers; - Preconditions.checkArgument(bodyCompression != null, "body compression cannot be null"); - this.bodyCompression = bodyCompression; - List arrowBuffers = new ArrayList<>(buffers.size()); - long offset = 0; - for (ArrowBuf arrowBuf : buffers) { - if (retainBuffers) { - arrowBuf.getReferenceManager().retain(); - } - long size = arrowBuf.readableBytes(); - arrowBuffers.add(new ArrowBuffer(offset, size)); - if (LOGGER.isTraceEnabled()) { - LOGGER.trace("Buffer in RecordBatch at {}, length: {}", offset, size); - } - offset += size; - if (alignBuffers) { // align on 8 byte boundaries - offset = DataSizeRoundingUtil.roundUpTo8Multiple(offset); - } - } - this.buffersLayout = Collections.unmodifiableList(arrowBuffers); - this.variadicBufferCounts = null; + this(length, nodes, buffers, bodyCompression, null, alignBuffers, retainBuffers); } /**