Skip to content

Commit

Permalink
[SPARK-25114][CORE] Fix RecordBinaryComparator when subtraction betwe…
Browse files Browse the repository at this point in the history
…en two words is divisible by Integer.MAX_VALUE.

## What changes were proposed in this pull request?

apache#22079 (comment) It is possible for two objects to be unequal and yet we consider them as equal with this code, if the long values are separated by Int.MaxValue.
This PR fixes the issue.

## How was this patch tested?
Add new test cases in `RecordBinaryComparatorSuite`.

Closes apache#22101 from jiangxb1987/fix-rbc.

Authored-by: Xingbo Jiang <[email protected]>
Signed-off-by: Xiao Li <[email protected]>
  • Loading branch information
jiangxb1987 authored and gatorsmile committed Aug 21, 2018
1 parent f984ec7 commit 4fb96e5
Show file tree
Hide file tree
Showing 2 changed files with 81 additions and 11 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,10 @@

public final class RecordBinaryComparator extends RecordComparator {

// TODO(jiangxb) Add test suite for this.
@Override
public int compare(
Object leftObj, long leftOff, int leftLen, Object rightObj, long rightOff, int rightLen) {
int i = 0;
int res = 0;

// If the arrays have different length, the longer one is larger.
if (leftLen != rightLen) {
Expand All @@ -40,27 +38,33 @@ public int compare(
// check if stars align and we can get both offsets to be aligned
if ((leftOff % 8) == (rightOff % 8)) {
while ((leftOff + i) % 8 != 0 && i < leftLen) {
res = (Platform.getByte(leftObj, leftOff + i) & 0xff) -
(Platform.getByte(rightObj, rightOff + i) & 0xff);
if (res != 0) return res;
final int v1 = Platform.getByte(leftObj, leftOff + i) & 0xff;
final int v2 = Platform.getByte(rightObj, rightOff + i) & 0xff;
if (v1 != v2) {
return v1 > v2 ? 1 : -1;
}
i += 1;
}
}
// for architectures that support unaligned accesses, chew it up 8 bytes at a time
if (Platform.unaligned() || (((leftOff + i) % 8 == 0) && ((rightOff + i) % 8 == 0))) {
while (i <= leftLen - 8) {
res = (int) ((Platform.getLong(leftObj, leftOff + i) -
Platform.getLong(rightObj, rightOff + i)) % Integer.MAX_VALUE);
if (res != 0) return res;
final long v1 = Platform.getLong(leftObj, leftOff + i);
final long v2 = Platform.getLong(rightObj, rightOff + i);
if (v1 != v2) {
return v1 > v2 ? 1 : -1;
}
i += 8;
}
}
// this will finish off the unaligned comparisons, or do the entire aligned comparison
// whichever is needed.
while (i < leftLen) {
res = (Platform.getByte(leftObj, leftOff + i) & 0xff) -
(Platform.getByte(rightObj, rightOff + i) & 0xff);
if (res != 0) return res;
final int v1 = Platform.getByte(leftObj, leftOff + i) & 0xff;
final int v2 = Platform.getByte(rightObj, rightOff + i) & 0xff;
if (v1 != v2) {
return v1 > v2 ? 1 : -1;
}
i += 1;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -253,4 +253,70 @@ public void testBinaryComparatorForNullColumns() throws Exception {
assert(compare(0, 0) == 0);
assert(compare(0, 1) > 0);
}

@Test
public void testBinaryComparatorWhenSubtractionIsDivisibleByMaxIntValue() throws Exception {
int numFields = 1;

UnsafeRow row1 = new UnsafeRow(numFields);
byte[] data1 = new byte[100];
row1.pointTo(data1, computeSizeInBytes(numFields * 8));
row1.setLong(0, 11);

UnsafeRow row2 = new UnsafeRow(numFields);
byte[] data2 = new byte[100];
row2.pointTo(data2, computeSizeInBytes(numFields * 8));
row2.setLong(0, 11L + Integer.MAX_VALUE);

insertRow(row1);
insertRow(row2);

assert(compare(0, 1) < 0);
}

@Test
public void testBinaryComparatorWhenSubtractionCanOverflowLongValue() throws Exception {
int numFields = 1;

UnsafeRow row1 = new UnsafeRow(numFields);
byte[] data1 = new byte[100];
row1.pointTo(data1, computeSizeInBytes(numFields * 8));
row1.setLong(0, Long.MIN_VALUE);

UnsafeRow row2 = new UnsafeRow(numFields);
byte[] data2 = new byte[100];
row2.pointTo(data2, computeSizeInBytes(numFields * 8));
row2.setLong(0, 1);

insertRow(row1);
insertRow(row2);

assert(compare(0, 1) < 0);
}

@Test
public void testBinaryComparatorWhenOnlyTheLastColumnDiffers() throws Exception {
int numFields = 4;

UnsafeRow row1 = new UnsafeRow(numFields);
byte[] data1 = new byte[100];
row1.pointTo(data1, computeSizeInBytes(numFields * 8));
row1.setInt(0, 11);
row1.setDouble(1, 3.14);
row1.setInt(2, -1);
row1.setLong(3, 0);

UnsafeRow row2 = new UnsafeRow(numFields);
byte[] data2 = new byte[100];
row2.pointTo(data2, computeSizeInBytes(numFields * 8));
row2.setInt(0, 11);
row2.setDouble(1, 3.14);
row2.setInt(2, -1);
row2.setLong(3, 1);

insertRow(row1);
insertRow(row2);

assert(compare(0, 1) < 0);
}
}

0 comments on commit 4fb96e5

Please sign in to comment.