From 1b0015985bc1c6d527326f5b4b5ed4940f6185d5 Mon Sep 17 00:00:00 2001 From: Hongze Zhang Date: Mon, 6 Jun 2022 23:32:26 +0800 Subject: [PATCH] StructVector's child vectors get unexpectedly reordered after adding duplicated fields (#112) --- .../vector/complex/AbstractStructVector.java | 4 +- .../arrow/vector/util/MapWithOrdinalImpl.java | 4 +- .../apache/arrow/vector/TestStructVector.java | 55 ++++++++++++++++--- 3 files changed, 53 insertions(+), 10 deletions(-) diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/AbstractStructVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/AbstractStructVector.java index be6d992338996..5d9812dc4b6c8 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/AbstractStructVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/AbstractStructVector.java @@ -26,6 +26,7 @@ import org.apache.arrow.memory.ArrowBuf; import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.util.Preconditions; +import org.apache.arrow.util.VisibleForTesting; import org.apache.arrow.vector.BitVectorHelper; import org.apache.arrow.vector.FieldVector; import org.apache.arrow.vector.ValueVector; @@ -229,7 +230,8 @@ public T getChild(String name, Class clazz) { return typeify(f, clazz); } - protected ValueVector add(String childName, FieldType fieldType) { + @VisibleForTesting + public ValueVector add(String childName, FieldType fieldType) { FieldVector vector = fieldType.createNewSingleVector(childName, allocator, callBack); putChild(childName, vector); if (callBack != null) { diff --git a/java/vector/src/main/java/org/apache/arrow/vector/util/MapWithOrdinalImpl.java b/java/vector/src/main/java/org/apache/arrow/vector/util/MapWithOrdinalImpl.java index 41ce1fc0d10aa..3612d677ed5a9 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/util/MapWithOrdinalImpl.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/util/MapWithOrdinalImpl.java @@ -20,7 +20,7 @@ import java.util.AbstractMap; import java.util.ArrayList; import java.util.Collection; -import java.util.HashMap; +import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Set; @@ -54,7 +54,7 @@ public class MapWithOrdinalImpl implements MapWithOrdinal { private static final Logger logger = LoggerFactory.getLogger(MapWithOrdinalImpl.class); - private final Map> primary = new HashMap<>(); + private final Map> primary = new LinkedHashMap<>(); private final IntObjectHashMap secondary = new IntObjectHashMap<>(); private final Map delegate = new Map() { diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestStructVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestStructVector.java index 734ff46311598..bab9bf2ad6432 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestStructVector.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestStructVector.java @@ -171,13 +171,54 @@ public void testAddOrGetComplexChildVectors() { vector.addOrGetStruct("struct"); vector.addOrGetMap("map", true); - List childrens = vector.getChildrenFromFields(); - assertEquals(5, childrens.size()); - assertEquals(MinorType.LIST, childrens.get(0).getMinorType()); - assertEquals(MinorType.FIXED_SIZE_LIST, childrens.get(1).getMinorType()); - assertEquals(MinorType.UNION, childrens.get(2).getMinorType()); - assertEquals(MinorType.STRUCT, childrens.get(3).getMinorType()); - assertEquals(MinorType.MAP, childrens.get(4).getMinorType()); + List children = vector.getChildrenFromFields(); + assertEquals(5, children.size()); + assertEquals(MinorType.LIST, children.get(0).getMinorType()); + assertEquals(MinorType.FIXED_SIZE_LIST, children.get(1).getMinorType()); + assertEquals(MinorType.UNION, children.get(2).getMinorType()); + assertEquals(MinorType.STRUCT, children.get(3).getMinorType()); + assertEquals(MinorType.MAP, children.get(4).getMinorType()); + } + } + + @Test + public void testAddChildVectorsWithDuplicatedFieldNames() { + try (StructVector vector = StructVector.emptyWithDuplicates("struct", allocator)) { + // Add a bit more fields to test against stability of the internal field + // ordering mechanism of StructVector + vector.add("varchar1", FieldType.nullable(MinorType.VARCHAR.getType())); + vector.add("int1", FieldType.nullable(MinorType.INT.getType())); + vector.add("varchar2", FieldType.nullable(MinorType.VARCHAR.getType())); + vector.add("int2", FieldType.nullable(MinorType.INT.getType())); + vector.add("varchar3", FieldType.nullable(MinorType.VARCHAR.getType())); + vector.add("int3", FieldType.nullable(MinorType.INT.getType())); + + // To ensure duplicated field names don't mess up the original field order + // in the struct vector + vector.add("varchar1", FieldType.nullable(MinorType.VARCHAR.getType())); + vector.add("varchar2", FieldType.nullable(MinorType.VARCHAR.getType())); + vector.add("varchar3", FieldType.nullable(MinorType.VARCHAR.getType())); + + List children = vector.getChildrenFromFields(); + assertEquals(9, children.size()); + assertEquals("varchar1", children.get(0).getName()); + assertEquals("int1", children.get(1).getName()); + assertEquals("varchar2", children.get(2).getName()); + assertEquals("int2", children.get(3).getName()); + assertEquals("varchar3", children.get(4).getName()); + assertEquals("int3", children.get(5).getName()); + assertEquals("varchar1", children.get(6).getName()); + assertEquals("varchar2", children.get(7).getName()); + assertEquals("varchar3", children.get(8).getName()); + assertEquals(MinorType.VARCHAR, children.get(0).getMinorType()); + assertEquals(MinorType.INT, children.get(1).getMinorType()); + assertEquals(MinorType.VARCHAR, children.get(2).getMinorType()); + assertEquals(MinorType.INT, children.get(3).getMinorType()); + assertEquals(MinorType.VARCHAR, children.get(4).getMinorType()); + assertEquals(MinorType.INT, children.get(5).getMinorType()); + assertEquals(MinorType.VARCHAR, children.get(6).getMinorType()); + assertEquals(MinorType.VARCHAR, children.get(7).getMinorType()); + assertEquals(MinorType.VARCHAR, children.get(8).getMinorType()); } } }