From 3134bae1ad82b9785410ffef90cc3f61d67efb3e Mon Sep 17 00:00:00 2001 From: Yi Wen Wong Date: Mon, 18 Nov 2024 05:36:14 -0800 Subject: [PATCH 1/6] remove ExtField method --- .../apache/fury/format/type/DataTypes.java | 33 +++---------------- 1 file changed, 4 insertions(+), 29 deletions(-) diff --git a/java/fury-format/src/main/java/org/apache/fury/format/type/DataTypes.java b/java/fury-format/src/main/java/org/apache/fury/format/type/DataTypes.java index 383be40ce6..3bb83c4085 100644 --- a/java/fury-format/src/main/java/org/apache/fury/format/type/DataTypes.java +++ b/java/fury-format/src/main/java/org/apache/fury/format/type/DataTypes.java @@ -314,7 +314,7 @@ public static Field field(String name, ArrowType type, Field... children) { } public static Field field(String name, FieldType fieldType, List children) { - return new ExtField(name, fieldType, children); + return new Field(name, fieldType, children); } public static Field notNullField(String name, ArrowType type, Field... children) { @@ -396,19 +396,11 @@ public static Field mapField(String name, Field keyField, Field itemField) { } public static Field keyFieldForMap(Field mapField) { - Field field = mapField.getChildren().get(0).getChildren().get(0); - if (field.getClass() != ExtField.class) { - return new ExtField(field.getName(), field.getFieldType(), field.getChildren()); - } - return field; + return mapField.getChildren().get(0).getChildren().get(0); } public static Field itemFieldForMap(Field mapField) { - Field field = mapField.getChildren().get(0).getChildren().get(1); - if (field.getClass() != ExtField.class) { - return new ExtField(field.getName(), field.getFieldType(), field.getChildren()); - } - return field; + return mapField.getChildren().get(0).getChildren().get(1); } public static Field keyArrayFieldForMap(Field mapField) { @@ -425,24 +417,7 @@ public static Schema schemaFromStructField(Field structField) { } public static Schema createSchema(Field field) { - if (field.getClass() != ExtField.class) { - throw new IllegalArgumentException( - String.format("Field %s got wrong type %s", field, field.getClass())); - } - ExtField extField = (ExtField) field; - Object extData = extField.extData; - if (extData == null) { - extField.extData = extData = new Schema(field.getChildren(), field.getMetadata()); - } - return (Schema) extData; - } - - static class ExtField extends Field { - Object extData; - - public ExtField(String name, FieldType fieldType, List children) { - super(name, fieldType, children); - } + return new Schema(field.getChildren(), field.getMetadata()); } public static Field structField(boolean nullable, Field... fields) { From df88409ebe83ee8039ee42c9b6941ab255bd206d Mon Sep 17 00:00:00 2001 From: Yi Wen Wong Date: Mon, 18 Nov 2024 05:49:53 -0800 Subject: [PATCH 2/6] add --add-opens argLine configuration to fury-format pom.xml --- java/fury-format/pom.xml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/java/fury-format/pom.xml b/java/fury-format/pom.xml index 6e55c8fbef..2d9184fa2b 100644 --- a/java/fury-format/pom.xml +++ b/java/fury-format/pom.xml @@ -116,6 +116,12 @@ + + maven-surefire-plugin + + --add-opens=java.base/java.nio=ALL-UNNAMED + + From f85b3f840293f380af604a74160110585c2ec78c Mon Sep 17 00:00:00 2001 From: Yi Wen Wong Date: Mon, 18 Nov 2024 06:22:55 -0800 Subject: [PATCH 3/6] move extData to unsafeTrait. --- .../apache/fury/format/row/binary/BinaryArray.java | 3 ++- .../apache/fury/format/row/binary/BinaryRow.java | 3 ++- .../apache/fury/format/row/binary/UnsafeTrait.java | 13 +++++++++++-- 3 files changed, 15 insertions(+), 4 deletions(-) diff --git a/java/fury-format/src/main/java/org/apache/fury/format/row/binary/BinaryArray.java b/java/fury-format/src/main/java/org/apache/fury/format/row/binary/BinaryArray.java index 3b1c5cb9c5..fdae1a73b1 100644 --- a/java/fury-format/src/main/java/org/apache/fury/format/row/binary/BinaryArray.java +++ b/java/fury-format/src/main/java/org/apache/fury/format/row/binary/BinaryArray.java @@ -63,6 +63,7 @@ public BinaryArray(Field field) { } else { this.elementSize = width; } + initializeExtData(1); // Only require at most one slot to cache the schema for array type. } public void pointTo(MemoryBuffer buffer, int offset, int sizeInBytes) { @@ -135,7 +136,7 @@ public BigDecimal getDecimal(int ordinal) { @Override public BinaryRow getStruct(int ordinal) { - return getStruct(ordinal, field.getChildren().get(0)); + return getStruct(ordinal, field.getChildren().get(0), 0); } @Override diff --git a/java/fury-format/src/main/java/org/apache/fury/format/row/binary/BinaryRow.java b/java/fury-format/src/main/java/org/apache/fury/format/row/binary/BinaryRow.java index 59d95d42ec..8303faea09 100644 --- a/java/fury-format/src/main/java/org/apache/fury/format/row/binary/BinaryRow.java +++ b/java/fury-format/src/main/java/org/apache/fury/format/row/binary/BinaryRow.java @@ -73,6 +73,7 @@ public BinaryRow(Schema schema) { this.numFields = schema.getFields().size(); Preconditions.checkArgument(numFields > 0); this.bitmapWidthInBytes = BitUtils.calculateBitmapWidthInBytes(numFields); + initializeExtData(numFields); } public void pointTo(MemoryBuffer buffer, int offset, int sizeInBytes) { @@ -155,7 +156,7 @@ public BigDecimal getDecimal(int ordinal) { @Override public BinaryRow getStruct(int ordinal) { - return getStruct(ordinal, schema.getFields().get(ordinal)); + return getStruct(ordinal, schema.getFields().get(ordinal), ordinal); } @Override diff --git a/java/fury-format/src/main/java/org/apache/fury/format/row/binary/UnsafeTrait.java b/java/fury-format/src/main/java/org/apache/fury/format/row/binary/UnsafeTrait.java index 0b7d1d5f5c..ce5ceba50f 100644 --- a/java/fury-format/src/main/java/org/apache/fury/format/row/binary/UnsafeTrait.java +++ b/java/fury-format/src/main/java/org/apache/fury/format/row/binary/UnsafeTrait.java @@ -26,6 +26,7 @@ import org.apache.arrow.memory.ArrowBuf; import org.apache.arrow.vector.types.pojo.ArrowType; import org.apache.arrow.vector.types.pojo.Field; +import org.apache.arrow.vector.types.pojo.Schema; import org.apache.arrow.vector.util.DecimalUtility; import org.apache.fury.format.row.Getters; import org.apache.fury.format.row.Setters; @@ -35,6 +36,7 @@ /** Internal to binary row format to reuse code, don't use it in anywhere else. */ abstract class UnsafeTrait implements Getters, Setters { + private Object[] extData; abstract MemoryBuffer getBuffer(); @@ -55,6 +57,10 @@ public MemoryBuffer getBuffer(int ordinal) { abstract int getOffset(int ordinal); + void initializeExtData(int numSlots) { + extData = new Object[numSlots]; + } + // ########################################################### // ####################### getters ####################### // ########################################################### @@ -143,14 +149,17 @@ BigDecimal getDecimal(int ordinal, ArrowType.Decimal decimalType) { return decimal; } - BinaryRow getStruct(int ordinal, Field field) { + BinaryRow getStruct(int ordinal, Field field, int extDataSlot) { if (isNullAt(ordinal)) { return null; } final long offsetAndSize = getInt64(ordinal); final int relativeOffset = (int) (offsetAndSize >> 32); final int size = (int) offsetAndSize; - BinaryRow row = new BinaryRow(DataTypes.createSchema(field)); + if (extData[extDataSlot] == null) { + extData[extDataSlot] = DataTypes.createSchema(field); + } + BinaryRow row = new BinaryRow((Schema) extData[extDataSlot]); row.pointTo(getBuffer(), getBaseOffset() + relativeOffset, size); return row; } From 12bb888a0173b65e472c6a62d23cb84c21f9e20e Mon Sep 17 00:00:00 2001 From: Yi Wen Wong Date: Mon, 18 Nov 2024 06:48:19 -0800 Subject: [PATCH 4/6] add javaDoc comments for getStruct --- .../org/apache/fury/format/row/binary/UnsafeTrait.java | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/java/fury-format/src/main/java/org/apache/fury/format/row/binary/UnsafeTrait.java b/java/fury-format/src/main/java/org/apache/fury/format/row/binary/UnsafeTrait.java index ce5ceba50f..6c59327f5a 100644 --- a/java/fury-format/src/main/java/org/apache/fury/format/row/binary/UnsafeTrait.java +++ b/java/fury-format/src/main/java/org/apache/fury/format/row/binary/UnsafeTrait.java @@ -149,7 +149,15 @@ BigDecimal getDecimal(int ordinal, ArrowType.Decimal decimalType) { return decimal; } - BinaryRow getStruct(int ordinal, Field field, int extDataSlot) { + /** + * Gets the field at a specific ordinal as a struct. + * + * @param ordinal the ordinal position of this field. + * @param field the Arrow field corresponding to this struct. + * @param extDataSlot the ext data slot used to cache the schema for the struct. + * @return the binary row representation of the struct. + */ + BinaryRow getStruct(int ordinal, Field field, int extDataSlot) { if (isNullAt(ordinal)) { return null; } From 2d34649937d2b1fe469b03e0fdd527ccc089314d Mon Sep 17 00:00:00 2001 From: Yi Wen Wong Date: Tue, 19 Nov 2024 04:53:40 -0800 Subject: [PATCH 5/6] Revert "add --add-opens argLine configuration to fury-format pom.xml" This reverts commit df88409ebe83ee8039ee42c9b6941ab255bd206d. --- java/fury-format/pom.xml | 6 ------ 1 file changed, 6 deletions(-) diff --git a/java/fury-format/pom.xml b/java/fury-format/pom.xml index 2d9184fa2b..6e55c8fbef 100644 --- a/java/fury-format/pom.xml +++ b/java/fury-format/pom.xml @@ -116,12 +116,6 @@ - - maven-surefire-plugin - - --add-opens=java.base/java.nio=ALL-UNNAMED - - From 3e6f72185816a0cb0259c9059415d8c423281fea Mon Sep 17 00:00:00 2001 From: Yi Wen Wong Date: Tue, 19 Nov 2024 05:05:06 -0800 Subject: [PATCH 6/6] spotless apply. --- .../fury/format/row/binary/UnsafeTrait.java | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/java/fury-format/src/main/java/org/apache/fury/format/row/binary/UnsafeTrait.java b/java/fury-format/src/main/java/org/apache/fury/format/row/binary/UnsafeTrait.java index 6c59327f5a..a13eca4e3e 100644 --- a/java/fury-format/src/main/java/org/apache/fury/format/row/binary/UnsafeTrait.java +++ b/java/fury-format/src/main/java/org/apache/fury/format/row/binary/UnsafeTrait.java @@ -149,15 +149,15 @@ BigDecimal getDecimal(int ordinal, ArrowType.Decimal decimalType) { return decimal; } - /** - * Gets the field at a specific ordinal as a struct. - * - * @param ordinal the ordinal position of this field. - * @param field the Arrow field corresponding to this struct. - * @param extDataSlot the ext data slot used to cache the schema for the struct. - * @return the binary row representation of the struct. - */ - BinaryRow getStruct(int ordinal, Field field, int extDataSlot) { + /** + * Gets the field at a specific ordinal as a struct. + * + * @param ordinal the ordinal position of this field. + * @param field the Arrow field corresponding to this struct. + * @param extDataSlot the ext data slot used to cache the schema for the struct. + * @return the binary row representation of the struct. + */ + BinaryRow getStruct(int ordinal, Field field, int extDataSlot) { if (isNullAt(ordinal)) { return null; }