From 66ae3f291d7ce5a5c4eb3734f6f951f865809aef Mon Sep 17 00:00:00 2001 From: Norman Jordan Date: Thu, 30 May 2024 13:43:13 -0700 Subject: [PATCH] GH-37728: [Java] Add methods to get an Iterable for a ValueVector * Added a method to get an Iterator for a ValueVector * Added a method to get an Iterable for a ValueVector * The Iterator/Iterable are typed to Object --- java/dataset/pom.xml | 6 +++ .../substrait/TestAceroSubstraitConsumer.java | 39 ++++++++++++------- .../arrow/vector/util/ValueVectorUtility.java | 29 ++++++++++++++ 3 files changed, 60 insertions(+), 14 deletions(-) diff --git a/java/dataset/pom.xml b/java/dataset/pom.xml index 3dea16204a4db..2ace8c3507c7c 100644 --- a/java/dataset/pom.xml +++ b/java/dataset/pom.xml @@ -150,6 +150,12 @@ 2.15.1 test + + org.hamcrest + hamcrest + 2.2 + test + diff --git a/java/dataset/src/test/java/org/apache/arrow/dataset/substrait/TestAceroSubstraitConsumer.java b/java/dataset/src/test/java/org/apache/arrow/dataset/substrait/TestAceroSubstraitConsumer.java index 0fba72892cdc6..292294a7001db 100644 --- a/java/dataset/src/test/java/org/apache/arrow/dataset/substrait/TestAceroSubstraitConsumer.java +++ b/java/dataset/src/test/java/org/apache/arrow/dataset/substrait/TestAceroSubstraitConsumer.java @@ -17,6 +17,7 @@ package org.apache.arrow.dataset.substrait; +import static org.hamcrest.MatcherAssert.assertThat; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertThrows; import static org.junit.Assert.assertTrue; @@ -45,6 +46,9 @@ import org.apache.arrow.vector.types.pojo.Field; import org.apache.arrow.vector.types.pojo.FieldType; import org.apache.arrow.vector.types.pojo.Schema; +import org.apache.arrow.vector.util.Text; +import org.apache.arrow.vector.util.ValueVectorUtility; +import org.hamcrest.collection.IsIterableContainingInOrder; import org.junit.ClassRule; import org.junit.Test; import org.junit.rules.TemporaryFolder; @@ -236,9 +240,11 @@ public void testRunExtendedExpressionsFilter() throws Exception { int rowcount = 0; while (reader.loadNextBatch()) { rowcount += reader.getVectorSchemaRoot().getRowCount(); - assertTrue(reader.getVectorSchemaRoot().getVector("id").toString().equals("[19, 1, 11]")); - assertTrue(reader.getVectorSchemaRoot().getVector("name").toString() - .equals("[value_19, value_1, value_11]")); + assertThat(ValueVectorUtility.iterable(reader.getVectorSchemaRoot().getVector("id")), + IsIterableContainingInOrder.contains(new Integer[] {19, 1, 11})); + assertThat(ValueVectorUtility.iterable(reader.getVectorSchemaRoot().getVector("name")), + IsIterableContainingInOrder.contains( + new Text("value_19"), new Text("value_1"), new Text("value_11"))); } assertEquals(3, rowcount); } @@ -331,11 +337,13 @@ public void testRunExtendedExpressionsProjection() throws Exception { assertEquals(schema.getFields(), reader.getVectorSchemaRoot().getSchema().getFields()); int rowcount = 0; while (reader.loadNextBatch()) { - assertTrue(reader.getVectorSchemaRoot().getVector("add_two_to_column_a").toString() - .equals("[21, 3, 13, 23, 47]")); - assertTrue(reader.getVectorSchemaRoot().getVector("concat_column_a_and_b").toString() - .equals("[value_19 - value_19, value_1 - value_1, value_11 - value_11, " + - "value_21 - value_21, value_45 - value_45]")); + assertThat(ValueVectorUtility.iterable(reader.getVectorSchemaRoot().getVector("add_two_to_column_a")), + IsIterableContainingInOrder.contains(21, 3, 13, 23, 47)); + assertThat(ValueVectorUtility.iterable(reader.getVectorSchemaRoot().getVector("concat_column_a_and_b")), + IsIterableContainingInOrder.contains( + new Text("value_19 - value_19"), new Text("value_1 - value_1"), + new Text("value_11 - value_11"), new Text("value_21 - value_21"), + new Text("value_45 - value_45"))); rowcount += reader.getVectorSchemaRoot().getRowCount(); } assertEquals(5, rowcount); @@ -370,8 +378,9 @@ public void testRunExtendedExpressionsProjectionWithFilterInsteadOfProjectionExc assertEquals(schema.getFields(), reader.getVectorSchemaRoot().getSchema().getFields()); int rowcount = 0; while (reader.loadNextBatch()) { - assertTrue(reader.getVectorSchemaRoot().getVector("filter_id_lower_than_20").toString() - .equals("[true, true, true, false, false]")); + assertThat(ValueVectorUtility.iterable(reader.getVectorSchemaRoot().getVector( + "filter_id_lower_than_20")), + IsIterableContainingInOrder.contains(true, true, true, false, false)); rowcount += reader.getVectorSchemaRoot().getRowCount(); } assertEquals(5, rowcount); @@ -441,10 +450,12 @@ public void testRunExtendedExpressionsProjectAndFilter() throws Exception { assertEquals(schema.getFields(), reader.getVectorSchemaRoot().getSchema().getFields()); int rowcount = 0; while (reader.loadNextBatch()) { - assertTrue(reader.getVectorSchemaRoot().getVector("add_two_to_column_a").toString() - .equals("[21, 3, 13]")); - assertTrue(reader.getVectorSchemaRoot().getVector("concat_column_a_and_b").toString() - .equals("[value_19 - value_19, value_1 - value_1, value_11 - value_11]")); + assertThat(ValueVectorUtility.iterable(reader.getVectorSchemaRoot().getVector("add_two_to_column_a")), + IsIterableContainingInOrder.contains(21, 3, 13)); + assertThat(ValueVectorUtility.iterable(reader.getVectorSchemaRoot().getVector("concat_column_a_and_b")), + IsIterableContainingInOrder.contains( + new Text("value_19 - value_19"), new Text("value_1 - value_1"), + new Text("value_11 - value_11"))); rowcount += reader.getVectorSchemaRoot().getRowCount(); } assertEquals(3, rowcount); diff --git a/java/vector/src/main/java/org/apache/arrow/vector/util/ValueVectorUtility.java b/java/vector/src/main/java/org/apache/arrow/vector/util/ValueVectorUtility.java index ceb7081e1ea3d..4d2d7a2a4b3a4 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/util/ValueVectorUtility.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/util/ValueVectorUtility.java @@ -19,6 +19,7 @@ import static org.apache.arrow.vector.validate.ValidateUtil.validateOrThrow; +import java.util.Iterator; import java.util.function.BiFunction; import org.apache.arrow.util.Preconditions; @@ -108,6 +109,34 @@ public static String getToString( return sb.toString(); } + /** + * Creates an Iterator to iterate over the values in the ValueVector. + * @param vector the vector for which to iterate. + * @return an Iterator over the values + */ + public static Iterator iterator(ValueVector vector) { + return new Iterator() { + private int index = 0; + + @Override public boolean hasNext() { + return index < vector.getValueCount(); + } + + @Override public Object next() { + return vector.getObject(index++); + } + }; + } + + /** + * Creates an Iterable for the values in a ValueVector. + * @param vector the vector to create an iterable for + * @return an Iterable for the vector + */ + public static Iterable iterable(ValueVector vector) { + return () -> iterator(vector); + } + /** * Utility to validate vector in O(1) time. */