From 46d2c1171ee50a2265f12c46ad93c986c3a17512 Mon Sep 17 00:00:00 2001 From: liyafan82 Date: Fri, 12 Jul 2019 18:17:18 +0800 Subject: [PATCH] [ARROW-5920][Java] Support sort & compare for all variable width vectors --- .../sort/DefaultVectorComparators.java | 27 ++++++++++--------- .../algorithm/search/TestVectorSearcher.java | 7 +++-- ...stVariableWidthOutOfPlaceVectorSorter.java | 3 ++- 3 files changed, 21 insertions(+), 16 deletions(-) diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/DefaultVectorComparators.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/DefaultVectorComparators.java index 2dfa0aaa7cc9f..535940e2bcc56 100644 --- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/DefaultVectorComparators.java +++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/DefaultVectorComparators.java @@ -17,14 +17,15 @@ package org.apache.arrow.algorithm.sort; +import static org.apache.arrow.vector.BaseVariableWidthVector.OFFSET_WIDTH; + +import org.apache.arrow.vector.BaseVariableWidthVector; import org.apache.arrow.vector.BigIntVector; import org.apache.arrow.vector.Float4Vector; import org.apache.arrow.vector.Float8Vector; import org.apache.arrow.vector.IntVector; import org.apache.arrow.vector.SmallIntVector; import org.apache.arrow.vector.TinyIntVector; -import org.apache.arrow.vector.VarCharVector; -import org.apache.arrow.vector.holders.NullableVarCharHolder; /** * Default comparator implementations for different types of vectors. @@ -169,26 +170,26 @@ public int compareNotNull(int index1, int index2) { } /** - * Default comparator for varchars. + * Default comparator for {@link org.apache.arrow.vector.BaseVariableWidthVector}. * The comparison is in lexicographic order, with null comes first. */ - public static class VarCharComparator extends VectorValueComparator { - - private NullableVarCharHolder holder1 = new NullableVarCharHolder(); - private NullableVarCharHolder holder2 = new NullableVarCharHolder(); + public static class VariableWidthComparator extends VectorValueComparator { @Override public int compareNotNull(int index1, int index2) { - vector1.get(index1, holder1); - vector2.get(index2, holder2); + int start1 = vector1.getOffsetBuffer().getInt(index1 * OFFSET_WIDTH); + int start2 = vector2.getOffsetBuffer().getInt(index2 * OFFSET_WIDTH); + + int end1 = vector1.getOffsetBuffer().getInt((index1 + 1) * OFFSET_WIDTH); + int end2 = vector2.getOffsetBuffer().getInt((index2 + 1) * OFFSET_WIDTH); - int length1 = holder1.end - holder1.start; - int length2 = holder2.end - holder2.start; + int length1 = end1 - start1; + int length2 = end2 - start2; int minLength = length1 < length2 ? length1 : length2; for (int i = 0; i < minLength; i++) { - byte b1 = holder1.buffer.getByte(holder1.start + i); - byte b2 = holder2.buffer.getByte(holder2.start + i); + byte b1 = vector1.getDataBuffer().getByte(start1 + i); + byte b2 = vector2.getDataBuffer().getByte(start2 + i); if (b1 != b2) { return b1 - b2; diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/search/TestVectorSearcher.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/search/TestVectorSearcher.java index f5c2912476594..41269cd571359 100644 --- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/search/TestVectorSearcher.java +++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/search/TestVectorSearcher.java @@ -23,6 +23,7 @@ import org.apache.arrow.algorithm.sort.VectorValueComparator; import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; +import org.apache.arrow.vector.BaseVariableWidthVector; import org.apache.arrow.vector.IntVector; import org.apache.arrow.vector.VarCharVector; import org.junit.After; @@ -137,7 +138,8 @@ public void testBinarySearchVarChar() { negVector.set(0, "abcd".getBytes()); // do search - VectorValueComparator comparator = new DefaultVectorComparators.VarCharComparator(); + VectorValueComparator comparator = + new DefaultVectorComparators.VariableWidthComparator(); for (int i = 0; i < VECTOR_LENGTH; i++) { int result = VectorSearcher.binarySearch(rawVector, comparator, rawVector, i); assertEquals(i, result); @@ -175,7 +177,8 @@ public void testLinearSearchVarChar() { negVector.set(0, "abcd".getBytes()); // do search - VectorValueComparator comparator = new DefaultVectorComparators.VarCharComparator(); + VectorValueComparator comparator = + new DefaultVectorComparators.VariableWidthComparator(); for (int i = 0; i < VECTOR_LENGTH; i++) { int result = VectorSearcher.linearSearch(rawVector, comparator, rawVector, i); assertEquals(i, result); diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestVariableWidthOutOfPlaceVectorSorter.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestVariableWidthOutOfPlaceVectorSorter.java index 68be2549de43d..7ce4e3d75079e 100644 --- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestVariableWidthOutOfPlaceVectorSorter.java +++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestVariableWidthOutOfPlaceVectorSorter.java @@ -65,7 +65,8 @@ public void testSortString() { // sort the vector VariableWidthOutOfPlaceVectorSorter sorter = new VariableWidthOutOfPlaceVectorSorter(); - DefaultVectorComparators.VarCharComparator comparator = new DefaultVectorComparators.VarCharComparator(); + DefaultVectorComparators.VariableWidthComparator comparator = + new DefaultVectorComparators.VariableWidthComparator(); VarCharVector sortedVec = (VarCharVector) vec.getField().getFieldType().createNewSingleVector("", allocator, null);