Skip to content

Commit

Permalink
[ARROW-5920][Java] Support sort & compare for all variable width vectors
Browse files Browse the repository at this point in the history
  • Loading branch information
liyafan82 committed Jul 12, 2019
1 parent 03360e1 commit 46d2c11
Show file tree
Hide file tree
Showing 3 changed files with 21 additions and 16 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,15 @@

package org.apache.arrow.algorithm.sort;

import static org.apache.arrow.vector.BaseVariableWidthVector.OFFSET_WIDTH;

import org.apache.arrow.vector.BaseVariableWidthVector;
import org.apache.arrow.vector.BigIntVector;
import org.apache.arrow.vector.Float4Vector;
import org.apache.arrow.vector.Float8Vector;
import org.apache.arrow.vector.IntVector;
import org.apache.arrow.vector.SmallIntVector;
import org.apache.arrow.vector.TinyIntVector;
import org.apache.arrow.vector.VarCharVector;
import org.apache.arrow.vector.holders.NullableVarCharHolder;

/**
* Default comparator implementations for different types of vectors.
Expand Down Expand Up @@ -169,26 +170,26 @@ public int compareNotNull(int index1, int index2) {
}

/**
* Default comparator for varchars.
* Default comparator for {@link org.apache.arrow.vector.BaseVariableWidthVector}.
* The comparison is in lexicographic order, with null comes first.
*/
public static class VarCharComparator extends VectorValueComparator<VarCharVector> {

private NullableVarCharHolder holder1 = new NullableVarCharHolder();
private NullableVarCharHolder holder2 = new NullableVarCharHolder();
public static class VariableWidthComparator extends VectorValueComparator<BaseVariableWidthVector> {

@Override
public int compareNotNull(int index1, int index2) {
vector1.get(index1, holder1);
vector2.get(index2, holder2);
int start1 = vector1.getOffsetBuffer().getInt(index1 * OFFSET_WIDTH);
int start2 = vector2.getOffsetBuffer().getInt(index2 * OFFSET_WIDTH);

int end1 = vector1.getOffsetBuffer().getInt((index1 + 1) * OFFSET_WIDTH);
int end2 = vector2.getOffsetBuffer().getInt((index2 + 1) * OFFSET_WIDTH);

int length1 = holder1.end - holder1.start;
int length2 = holder2.end - holder2.start;
int length1 = end1 - start1;
int length2 = end2 - start2;

int minLength = length1 < length2 ? length1 : length2;
for (int i = 0; i < minLength; i++) {
byte b1 = holder1.buffer.getByte(holder1.start + i);
byte b2 = holder2.buffer.getByte(holder2.start + i);
byte b1 = vector1.getDataBuffer().getByte(start1 + i);
byte b2 = vector2.getDataBuffer().getByte(start2 + i);

if (b1 != b2) {
return b1 - b2;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import org.apache.arrow.algorithm.sort.VectorValueComparator;
import org.apache.arrow.memory.BufferAllocator;
import org.apache.arrow.memory.RootAllocator;
import org.apache.arrow.vector.BaseVariableWidthVector;
import org.apache.arrow.vector.IntVector;
import org.apache.arrow.vector.VarCharVector;
import org.junit.After;
Expand Down Expand Up @@ -137,7 +138,8 @@ public void testBinarySearchVarChar() {
negVector.set(0, "abcd".getBytes());

// do search
VectorValueComparator<VarCharVector> comparator = new DefaultVectorComparators.VarCharComparator();
VectorValueComparator<BaseVariableWidthVector> comparator =
new DefaultVectorComparators.VariableWidthComparator();
for (int i = 0; i < VECTOR_LENGTH; i++) {
int result = VectorSearcher.binarySearch(rawVector, comparator, rawVector, i);
assertEquals(i, result);
Expand Down Expand Up @@ -175,7 +177,8 @@ public void testLinearSearchVarChar() {
negVector.set(0, "abcd".getBytes());

// do search
VectorValueComparator<VarCharVector> comparator = new DefaultVectorComparators.VarCharComparator();
VectorValueComparator<BaseVariableWidthVector> comparator =
new DefaultVectorComparators.VariableWidthComparator();
for (int i = 0; i < VECTOR_LENGTH; i++) {
int result = VectorSearcher.linearSearch(rawVector, comparator, rawVector, i);
assertEquals(i, result);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,8 @@ public void testSortString() {

// sort the vector
VariableWidthOutOfPlaceVectorSorter sorter = new VariableWidthOutOfPlaceVectorSorter();
DefaultVectorComparators.VarCharComparator comparator = new DefaultVectorComparators.VarCharComparator();
DefaultVectorComparators.VariableWidthComparator comparator =
new DefaultVectorComparators.VariableWidthComparator();

VarCharVector sortedVec =
(VarCharVector) vec.getField().getFieldType().createNewSingleVector("", allocator, null);
Expand Down

0 comments on commit 46d2c11

Please sign in to comment.