From dd45aeabd0b30a25dadf953f8a2512f4809ef9a8 Mon Sep 17 00:00:00 2001 From: liyafan82 Date: Fri, 4 Oct 2019 16:24:43 +0530 Subject: [PATCH] ARROW-6580: [Java] Support comparison for unsigned integers In this issue, we support the comparison of unsigned integer vectors, including UInt1Vector, UInt2Vector, UInt4Vector, and UInt8Vector. With support for comparison for these vectors, the sort for them is also supported automatically. Closes #5405 from liyafan82/fly_0917_uint and squashes the following commits: c94b9b729 Add test cases for boundary values a321784e6 Support comparison for unsigned integers Authored-by: liyafan82 Signed-off-by: Praveen --- .../sort/DefaultVectorComparators.java | 86 +++++++++++ .../sort/TestDefaultVectorComparator.java | 144 ++++++++++++++++++ 2 files changed, 230 insertions(+) diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/DefaultVectorComparators.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/DefaultVectorComparators.java index 8c4b862c34c73..e3246c2610cd9 100644 --- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/DefaultVectorComparators.java +++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/DefaultVectorComparators.java @@ -20,6 +20,7 @@ import static org.apache.arrow.vector.complex.BaseRepeatedValueVector.OFFSET_WIDTH; import org.apache.arrow.memory.util.ArrowBufPointer; +import org.apache.arrow.memory.util.ByteFunctionHelpers; import org.apache.arrow.vector.BaseFixedWidthVector; import org.apache.arrow.vector.BaseVariableWidthVector; import org.apache.arrow.vector.BigIntVector; @@ -28,6 +29,10 @@ import org.apache.arrow.vector.IntVector; import org.apache.arrow.vector.SmallIntVector; import org.apache.arrow.vector.TinyIntVector; +import org.apache.arrow.vector.UInt1Vector; +import org.apache.arrow.vector.UInt2Vector; +import org.apache.arrow.vector.UInt4Vector; +import org.apache.arrow.vector.UInt8Vector; import org.apache.arrow.vector.ValueVector; import org.apache.arrow.vector.complex.BaseRepeatedValueVector; @@ -56,6 +61,14 @@ public static VectorValueComparator createDefaultComp return (VectorValueComparator) new Float4Comparator(); } else if (vector instanceof Float8Vector) { return (VectorValueComparator) new Float8Comparator(); + } else if (vector instanceof UInt1Vector) { + return (VectorValueComparator) new UInt1Comparator(); + } else if (vector instanceof UInt2Vector) { + return (VectorValueComparator) new UInt2Comparator(); + } else if (vector instanceof UInt4Vector) { + return (VectorValueComparator) new UInt4Comparator(); + } else if (vector instanceof UInt8Vector) { + return (VectorValueComparator) new UInt8Comparator(); } } else if (vector instanceof BaseVariableWidthVector) { return (VectorValueComparator) new VariableWidthComparator(); @@ -141,6 +154,79 @@ public int compareNotNull(int index1, int index2) { } } + /** + * Default comparator for unsigned bytes. + * The comparison is based on values, with null comes first. + */ + public static class UInt1Comparator extends VectorValueComparator { + + public UInt1Comparator() { + super(1); + } + + @Override + public int compareNotNull(int index1, int index2) { + byte value1 = vector1.get(index1); + byte value2 = vector2.get(index2); + + return (value1 & 0xff) - (value2 & 0xff); + } + } + + /** + * Default comparator for unsigned short integer. + * The comparison is based on values, with null comes first. + */ + public static class UInt2Comparator extends VectorValueComparator { + + public UInt2Comparator() { + super(2); + } + + @Override + public int compareNotNull(int index1, int index2) { + char value1 = vector1.get(index1); + char value2 = vector2.get(index2); + return value1 - value2; + } + } + + /** + * Default comparator for unsigned integer. + * The comparison is based on values, with null comes first. + */ + public static class UInt4Comparator extends VectorValueComparator { + + public UInt4Comparator() { + super(4); + } + + @Override + public int compareNotNull(int index1, int index2) { + int value1 = vector1.get(index1); + int value2 = vector2.get(index2); + return ByteFunctionHelpers.unsignedIntCompare(value1, value2); + } + } + + /** + * Default comparator for unsigned long integer. + * The comparison is based on values, with null comes first. + */ + public static class UInt8Comparator extends VectorValueComparator { + + public UInt8Comparator() { + super(8); + } + + @Override + public int compareNotNull(int index1, int index2) { + long value1 = vector1.get(index1); + long value2 = vector2.get(index2); + return ByteFunctionHelpers.unsignedLongCompare(value1, value2); + } + } + /** * Default comparator for float type. * The comparison is based on values, with null comes first. diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestDefaultVectorComparator.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestDefaultVectorComparator.java index 81f5d73d164ce..8c07d82ac37c7 100644 --- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestDefaultVectorComparator.java +++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestDefaultVectorComparator.java @@ -23,6 +23,10 @@ import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; import org.apache.arrow.vector.IntVector; +import org.apache.arrow.vector.UInt1Vector; +import org.apache.arrow.vector.UInt2Vector; +import org.apache.arrow.vector.UInt4Vector; +import org.apache.arrow.vector.UInt8Vector; import org.apache.arrow.vector.complex.ListVector; import org.apache.arrow.vector.types.Types; import org.apache.arrow.vector.types.pojo.FieldType; @@ -107,4 +111,144 @@ public void testCompareLists() { assertTrue(comparator.compare(0, 0) == 0); } } + + @Test + public void testCompareUInt1() { + try (UInt1Vector vec = new UInt1Vector("", allocator)) { + vec.allocateNew(10); + vec.setValueCount(10); + + vec.setNull(0); + vec.set(1, -2); + vec.set(2, -1); + vec.set(3, 0); + vec.set(4, 1); + vec.set(5, 2); + vec.set(6, -2); + vec.setNull(7); + vec.set(8, Byte.MAX_VALUE); + vec.set(9, Byte.MIN_VALUE); + + VectorValueComparator comparator = + DefaultVectorComparators.createDefaultComparator(vec); + comparator.attachVector(vec); + + assertTrue(comparator.compare(0, 1) < 0); + assertTrue(comparator.compare(1, 2) < 0); + assertTrue(comparator.compare(1, 3) > 0); + assertTrue(comparator.compare(2, 5) > 0); + assertTrue(comparator.compare(4, 5) < 0); + assertTrue(comparator.compare(1, 6) == 0); + assertTrue(comparator.compare(0, 7) == 0); + assertTrue(comparator.compare(8, 9) < 0); + assertTrue(comparator.compare(4, 8) < 0); + assertTrue(comparator.compare(5, 9) < 0); + assertTrue(comparator.compare(2, 9) > 0); + } + } + + @Test + public void testCompareUInt2() { + try (UInt2Vector vec = new UInt2Vector("", allocator)) { + vec.allocateNew(10); + vec.setValueCount(10); + + vec.setNull(0); + vec.set(1, -2); + vec.set(2, -1); + vec.set(3, 0); + vec.set(4, 1); + vec.set(5, 2); + vec.set(6, -2); + vec.setNull(7); + vec.set(8, Short.MAX_VALUE); + vec.set(9, Short.MIN_VALUE); + + VectorValueComparator comparator = + DefaultVectorComparators.createDefaultComparator(vec); + comparator.attachVector(vec); + + assertTrue(comparator.compare(0, 1) < 0); + assertTrue(comparator.compare(1, 2) < 0); + assertTrue(comparator.compare(1, 3) > 0); + assertTrue(comparator.compare(2, 5) > 0); + assertTrue(comparator.compare(4, 5) < 0); + assertTrue(comparator.compare(1, 6) == 0); + assertTrue(comparator.compare(0, 7) == 0); + assertTrue(comparator.compare(8, 9) < 0); + assertTrue(comparator.compare(4, 8) < 0); + assertTrue(comparator.compare(5, 9) < 0); + assertTrue(comparator.compare(2, 9) > 0); + } + } + + @Test + public void testCompareUInt4() { + try (UInt4Vector vec = new UInt4Vector("", allocator)) { + vec.allocateNew(10); + vec.setValueCount(10); + + vec.setNull(0); + vec.set(1, -2); + vec.set(2, -1); + vec.set(3, 0); + vec.set(4, 1); + vec.set(5, 2); + vec.set(6, -2); + vec.setNull(7); + vec.set(8, Integer.MAX_VALUE); + vec.set(9, Integer.MIN_VALUE); + + VectorValueComparator comparator = + DefaultVectorComparators.createDefaultComparator(vec); + comparator.attachVector(vec); + + assertTrue(comparator.compare(0, 1) < 0); + assertTrue(comparator.compare(1, 2) < 0); + assertTrue(comparator.compare(1, 3) > 0); + assertTrue(comparator.compare(2, 5) > 0); + assertTrue(comparator.compare(4, 5) < 0); + assertTrue(comparator.compare(1, 6) == 0); + assertTrue(comparator.compare(0, 7) == 0); + assertTrue(comparator.compare(8, 9) < 0); + assertTrue(comparator.compare(4, 8) < 0); + assertTrue(comparator.compare(5, 9) < 0); + assertTrue(comparator.compare(2, 9) > 0); + } + } + + @Test + public void testCompareUInt8() { + try (UInt8Vector vec = new UInt8Vector("", allocator)) { + vec.allocateNew(10); + vec.setValueCount(10); + + vec.setNull(0); + vec.set(1, -2); + vec.set(2, -1); + vec.set(3, 0); + vec.set(4, 1); + vec.set(5, 2); + vec.set(6, -2); + vec.setNull(7); + vec.set(8, Long.MAX_VALUE); + vec.set(9, Long.MIN_VALUE); + + VectorValueComparator comparator = + DefaultVectorComparators.createDefaultComparator(vec); + comparator.attachVector(vec); + + assertTrue(comparator.compare(0, 1) < 0); + assertTrue(comparator.compare(1, 2) < 0); + assertTrue(comparator.compare(1, 3) > 0); + assertTrue(comparator.compare(2, 5) > 0); + assertTrue(comparator.compare(4, 5) < 0); + assertTrue(comparator.compare(1, 6) == 0); + assertTrue(comparator.compare(0, 7) == 0); + assertTrue(comparator.compare(8, 9) < 0); + assertTrue(comparator.compare(4, 8) < 0); + assertTrue(comparator.compare(5, 9) < 0); + assertTrue(comparator.compare(2, 9) > 0); + } + } }