Skip to content

Commit

Permalink
StructVector's child vectors get unexpectedly reordered after adding …
Browse files Browse the repository at this point in the history
…duplicated fields (apache#112)
  • Loading branch information
zhztheplayer authored Jun 6, 2022
1 parent 6d25fe3 commit 1b00159
Show file tree
Hide file tree
Showing 3 changed files with 53 additions and 10 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
import org.apache.arrow.memory.ArrowBuf;
import org.apache.arrow.memory.BufferAllocator;
import org.apache.arrow.util.Preconditions;
import org.apache.arrow.util.VisibleForTesting;
import org.apache.arrow.vector.BitVectorHelper;
import org.apache.arrow.vector.FieldVector;
import org.apache.arrow.vector.ValueVector;
Expand Down Expand Up @@ -229,7 +230,8 @@ public <T extends FieldVector> T getChild(String name, Class<T> clazz) {
return typeify(f, clazz);
}

protected ValueVector add(String childName, FieldType fieldType) {
@VisibleForTesting
public ValueVector add(String childName, FieldType fieldType) {
FieldVector vector = fieldType.createNewSingleVector(childName, allocator, callBack);
putChild(childName, vector);
if (callBack != null) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
import java.util.AbstractMap;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
Expand Down Expand Up @@ -54,7 +54,7 @@
public class MapWithOrdinalImpl<K, V> implements MapWithOrdinal<K, V> {
private static final Logger logger = LoggerFactory.getLogger(MapWithOrdinalImpl.class);

private final Map<K, Map.Entry<Integer, V>> primary = new HashMap<>();
private final Map<K, Map.Entry<Integer, V>> primary = new LinkedHashMap<>();
private final IntObjectHashMap<V> secondary = new IntObjectHashMap<>();

private final Map<K, V> delegate = new Map<K, V>() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -171,13 +171,54 @@ public void testAddOrGetComplexChildVectors() {
vector.addOrGetStruct("struct");
vector.addOrGetMap("map", true);

List<FieldVector> childrens = vector.getChildrenFromFields();
assertEquals(5, childrens.size());
assertEquals(MinorType.LIST, childrens.get(0).getMinorType());
assertEquals(MinorType.FIXED_SIZE_LIST, childrens.get(1).getMinorType());
assertEquals(MinorType.UNION, childrens.get(2).getMinorType());
assertEquals(MinorType.STRUCT, childrens.get(3).getMinorType());
assertEquals(MinorType.MAP, childrens.get(4).getMinorType());
List<FieldVector> children = vector.getChildrenFromFields();
assertEquals(5, children.size());
assertEquals(MinorType.LIST, children.get(0).getMinorType());
assertEquals(MinorType.FIXED_SIZE_LIST, children.get(1).getMinorType());
assertEquals(MinorType.UNION, children.get(2).getMinorType());
assertEquals(MinorType.STRUCT, children.get(3).getMinorType());
assertEquals(MinorType.MAP, children.get(4).getMinorType());
}
}

@Test
public void testAddChildVectorsWithDuplicatedFieldNames() {
try (StructVector vector = StructVector.emptyWithDuplicates("struct", allocator)) {
// Add a bit more fields to test against stability of the internal field
// ordering mechanism of StructVector
vector.add("varchar1", FieldType.nullable(MinorType.VARCHAR.getType()));
vector.add("int1", FieldType.nullable(MinorType.INT.getType()));
vector.add("varchar2", FieldType.nullable(MinorType.VARCHAR.getType()));
vector.add("int2", FieldType.nullable(MinorType.INT.getType()));
vector.add("varchar3", FieldType.nullable(MinorType.VARCHAR.getType()));
vector.add("int3", FieldType.nullable(MinorType.INT.getType()));

// To ensure duplicated field names don't mess up the original field order
// in the struct vector
vector.add("varchar1", FieldType.nullable(MinorType.VARCHAR.getType()));
vector.add("varchar2", FieldType.nullable(MinorType.VARCHAR.getType()));
vector.add("varchar3", FieldType.nullable(MinorType.VARCHAR.getType()));

List<FieldVector> children = vector.getChildrenFromFields();
assertEquals(9, children.size());
assertEquals("varchar1", children.get(0).getName());
assertEquals("int1", children.get(1).getName());
assertEquals("varchar2", children.get(2).getName());
assertEquals("int2", children.get(3).getName());
assertEquals("varchar3", children.get(4).getName());
assertEquals("int3", children.get(5).getName());
assertEquals("varchar1", children.get(6).getName());
assertEquals("varchar2", children.get(7).getName());
assertEquals("varchar3", children.get(8).getName());
assertEquals(MinorType.VARCHAR, children.get(0).getMinorType());
assertEquals(MinorType.INT, children.get(1).getMinorType());
assertEquals(MinorType.VARCHAR, children.get(2).getMinorType());
assertEquals(MinorType.INT, children.get(3).getMinorType());
assertEquals(MinorType.VARCHAR, children.get(4).getMinorType());
assertEquals(MinorType.INT, children.get(5).getMinorType());
assertEquals(MinorType.VARCHAR, children.get(6).getMinorType());
assertEquals(MinorType.VARCHAR, children.get(7).getMinorType());
assertEquals(MinorType.VARCHAR, children.get(8).getMinorType());
}
}
}

0 comments on commit 1b00159

Please sign in to comment.