Skip to content

Commit

Permalink
apacheGH-37728: [Java] Add methods to get an Iterable for a ValueVector
Browse files Browse the repository at this point in the history
* Added a method to get an Iterator for a ValueVector
* Added a method to get an Iterable for a ValueVector
* The Iterator/Iterable are typed to Object
  • Loading branch information
normanj-bitquill committed May 30, 2024
1 parent 6800be9 commit 66ae3f2
Show file tree
Hide file tree
Showing 3 changed files with 60 additions and 14 deletions.
6 changes: 6 additions & 0 deletions java/dataset/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,12 @@
<version>2.15.1</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.hamcrest</groupId>
<artifactId>hamcrest</artifactId>
<version>2.2</version>
<scope>test</scope>
</dependency>
</dependencies>
<build>
<resources>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

package org.apache.arrow.dataset.substrait;

import static org.hamcrest.MatcherAssert.assertThat;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertThrows;
import static org.junit.Assert.assertTrue;
Expand Down Expand Up @@ -45,6 +46,9 @@
import org.apache.arrow.vector.types.pojo.Field;
import org.apache.arrow.vector.types.pojo.FieldType;
import org.apache.arrow.vector.types.pojo.Schema;
import org.apache.arrow.vector.util.Text;
import org.apache.arrow.vector.util.ValueVectorUtility;
import org.hamcrest.collection.IsIterableContainingInOrder;
import org.junit.ClassRule;
import org.junit.Test;
import org.junit.rules.TemporaryFolder;
Expand Down Expand Up @@ -236,9 +240,11 @@ public void testRunExtendedExpressionsFilter() throws Exception {
int rowcount = 0;
while (reader.loadNextBatch()) {
rowcount += reader.getVectorSchemaRoot().getRowCount();
assertTrue(reader.getVectorSchemaRoot().getVector("id").toString().equals("[19, 1, 11]"));
assertTrue(reader.getVectorSchemaRoot().getVector("name").toString()
.equals("[value_19, value_1, value_11]"));
assertThat(ValueVectorUtility.iterable(reader.getVectorSchemaRoot().getVector("id")),
IsIterableContainingInOrder.contains(new Integer[] {19, 1, 11}));
assertThat(ValueVectorUtility.iterable(reader.getVectorSchemaRoot().getVector("name")),
IsIterableContainingInOrder.contains(
new Text("value_19"), new Text("value_1"), new Text("value_11")));
}
assertEquals(3, rowcount);
}
Expand Down Expand Up @@ -331,11 +337,13 @@ public void testRunExtendedExpressionsProjection() throws Exception {
assertEquals(schema.getFields(), reader.getVectorSchemaRoot().getSchema().getFields());
int rowcount = 0;
while (reader.loadNextBatch()) {
assertTrue(reader.getVectorSchemaRoot().getVector("add_two_to_column_a").toString()
.equals("[21, 3, 13, 23, 47]"));
assertTrue(reader.getVectorSchemaRoot().getVector("concat_column_a_and_b").toString()
.equals("[value_19 - value_19, value_1 - value_1, value_11 - value_11, " +
"value_21 - value_21, value_45 - value_45]"));
assertThat(ValueVectorUtility.iterable(reader.getVectorSchemaRoot().getVector("add_two_to_column_a")),
IsIterableContainingInOrder.contains(21, 3, 13, 23, 47));
assertThat(ValueVectorUtility.iterable(reader.getVectorSchemaRoot().getVector("concat_column_a_and_b")),
IsIterableContainingInOrder.contains(
new Text("value_19 - value_19"), new Text("value_1 - value_1"),
new Text("value_11 - value_11"), new Text("value_21 - value_21"),
new Text("value_45 - value_45")));
rowcount += reader.getVectorSchemaRoot().getRowCount();
}
assertEquals(5, rowcount);
Expand Down Expand Up @@ -370,8 +378,9 @@ public void testRunExtendedExpressionsProjectionWithFilterInsteadOfProjectionExc
assertEquals(schema.getFields(), reader.getVectorSchemaRoot().getSchema().getFields());
int rowcount = 0;
while (reader.loadNextBatch()) {
assertTrue(reader.getVectorSchemaRoot().getVector("filter_id_lower_than_20").toString()
.equals("[true, true, true, false, false]"));
assertThat(ValueVectorUtility.iterable(reader.getVectorSchemaRoot().getVector(
"filter_id_lower_than_20")),
IsIterableContainingInOrder.contains(true, true, true, false, false));
rowcount += reader.getVectorSchemaRoot().getRowCount();
}
assertEquals(5, rowcount);
Expand Down Expand Up @@ -441,10 +450,12 @@ public void testRunExtendedExpressionsProjectAndFilter() throws Exception {
assertEquals(schema.getFields(), reader.getVectorSchemaRoot().getSchema().getFields());
int rowcount = 0;
while (reader.loadNextBatch()) {
assertTrue(reader.getVectorSchemaRoot().getVector("add_two_to_column_a").toString()
.equals("[21, 3, 13]"));
assertTrue(reader.getVectorSchemaRoot().getVector("concat_column_a_and_b").toString()
.equals("[value_19 - value_19, value_1 - value_1, value_11 - value_11]"));
assertThat(ValueVectorUtility.iterable(reader.getVectorSchemaRoot().getVector("add_two_to_column_a")),
IsIterableContainingInOrder.contains(21, 3, 13));
assertThat(ValueVectorUtility.iterable(reader.getVectorSchemaRoot().getVector("concat_column_a_and_b")),
IsIterableContainingInOrder.contains(
new Text("value_19 - value_19"), new Text("value_1 - value_1"),
new Text("value_11 - value_11")));
rowcount += reader.getVectorSchemaRoot().getRowCount();
}
assertEquals(3, rowcount);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

import static org.apache.arrow.vector.validate.ValidateUtil.validateOrThrow;

import java.util.Iterator;
import java.util.function.BiFunction;

import org.apache.arrow.util.Preconditions;
Expand Down Expand Up @@ -108,6 +109,34 @@ public static <V extends ValueVector> String getToString(
return sb.toString();
}

/**
* Creates an Iterator to iterate over the values in the ValueVector.
* @param vector the vector for which to iterate.
* @return an Iterator over the values
*/
public static Iterator<Object> iterator(ValueVector vector) {
return new Iterator<Object>() {
private int index = 0;

@Override public boolean hasNext() {
return index < vector.getValueCount();
}

@Override public Object next() {
return vector.getObject(index++);
}
};
}

/**
* Creates an Iterable for the values in a ValueVector.
* @param vector the vector to create an iterable for
* @return an Iterable for the vector
*/
public static Iterable<Object> iterable(ValueVector vector) {
return () -> iterator(vector);
}

/**
* Utility to validate vector in O(1) time.
*/
Expand Down

0 comments on commit 66ae3f2

Please sign in to comment.