Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

HBASE-26566 Optimize encodeNumeric in OrderedBytes #3940

Merged
merged 6 commits into from
Dec 27, 2021
Merged
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -314,6 +314,10 @@ public class OrderedBytes {
private static final BigDecimal EN2 = BigDecimal.valueOf(1e-2);
private static final BigDecimal EN10 = BigDecimal.valueOf(1e-10);

// TODO: 36 is an arbitrary encoding limit. Reevaluate once we have a better handling of
// numeric scale.
private static final int MAX_NUM_ENCODE_BYTES = 18;

/**
* Max precision guaranteed to fit into a {@code long}.
*/
Expand Down Expand Up @@ -632,7 +636,7 @@ private static int encodeNumericSmall(PositionedByteRange dst, BigDecimal val) {
byte[] a = dst.getBytes();
boolean isNeg = val.signum() == -1;
final int offset = dst.getOffset(), start = dst.getPosition();
int e = 0, d, startM;
int e = 0, startM;

if (isNeg) { /* Small negative number: 0x14, -E, ~M */
dst.put(NEG_SMALL);
Expand All @@ -641,21 +645,17 @@ private static int encodeNumericSmall(PositionedByteRange dst, BigDecimal val) {
}

// normalize abs(val) to determine E
while (abs.compareTo(EN10) < 0) { abs = abs.movePointRight(8); e += 4; }
while (abs.compareTo(EN2) < 0) { abs = abs.movePointRight(2); e++; }
int zerosBeforeFirstNonZero = abs.scale() - abs.precision();
int lengthToMoveRight = zerosBeforeFirstNonZero % 2 ==
0 ? zerosBeforeFirstNonZero : zerosBeforeFirstNonZero - 1;
e = lengthToMoveRight / 2;
abs = abs.movePointRight(lengthToMoveRight);

putVaruint64(dst, e, !isNeg); // encode appropriate E value.

// encode M by peeling off centimal digits, encoding x as 2x+1
startM = dst.getPosition();
// TODO: 18 is an arbitrary encoding limit. Reevaluate once we have a better handling of
// numeric scale.
for (int i = 0; i < 18 && abs.compareTo(BigDecimal.ZERO) != 0; i++) {
abs = abs.movePointRight(2);
d = abs.intValue();
dst.put((byte) ((2 * d + 1) & 0xff));
abs = abs.subtract(BigDecimal.valueOf(d));
}
encodeToCentimal(dst, abs);
// terminal digit should be 2x
a[offset + dst.getPosition() - 1] = (byte) (a[offset + dst.getPosition() - 1] & 0xfe);
if (isNeg) {
Expand Down Expand Up @@ -707,7 +707,7 @@ private static int encodeNumericLarge(PositionedByteRange dst, BigDecimal val) {
byte[] a = dst.getBytes();
boolean isNeg = val.signum() == -1;
final int start = dst.getPosition(), offset = dst.getOffset();
int e = 0, d, startM;
int e = 0, startM;

if (isNeg) { /* Large negative number: 0x08, ~E, ~M */
dst.put(NEG_LARGE);
Expand All @@ -716,9 +716,14 @@ private static int encodeNumericLarge(PositionedByteRange dst, BigDecimal val) {
}

// normalize abs(val) to determine E
while (abs.compareTo(E32) >= 0 && e <= 350) { abs = abs.movePointLeft(32); e +=16; }
while (abs.compareTo(E8) >= 0 && e <= 350) { abs = abs.movePointLeft(8); e+= 4; }
while (abs.compareTo(BigDecimal.ONE) >= 0 && e <= 350) { abs = abs.movePointLeft(2); e++; }
int integerDigits = abs.precision() - abs.scale();
int lengthToMoveLeft = integerDigits % 2 == 0 ? integerDigits : integerDigits + 1;
e = lengthToMoveLeft / 2;
if (e > 350) {
e = 351;
lengthToMoveLeft = 702;
}
abs = abs.movePointLeft(lengthToMoveLeft);

Copy link
Contributor Author

@YutSean YutSean Dec 17, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This old 350 limit is problematic, for value > 100^350, the abs will > 1, which will make the following steps have an incorrect encoding for M . And for value < 100^350, this limit is useless. This limit should be removed in my opinion.

// encode appropriate header byte and/or E value.
if (e > 10) { /* large number, write out {~,}E */
Expand All @@ -733,14 +738,7 @@ private static int encodeNumericLarge(PositionedByteRange dst, BigDecimal val) {

// encode M by peeling off centimal digits, encoding x as 2x+1
startM = dst.getPosition();
// TODO: 18 is an arbitrary encoding limit. Reevaluate once we have a better handling of
// numeric scale.
for (int i = 0; i < 18 && abs.compareTo(BigDecimal.ZERO) != 0; i++) {
abs = abs.movePointRight(2);
d = abs.intValue();
dst.put((byte) (2 * d + 1));
abs = abs.subtract(BigDecimal.valueOf(d));
}
encodeToCentimal(dst, abs);
// terminal digit should be 2x
a[offset + dst.getPosition() - 1] = (byte) (a[offset + dst.getPosition() - 1] & 0xfe);
if (isNeg) {
Expand All @@ -750,6 +748,21 @@ private static int encodeNumericLarge(PositionedByteRange dst, BigDecimal val) {
return dst.getPosition() - start;
}

private static void encodeToCentimal(PositionedByteRange dst, BigDecimal val) {
String stringOfAbs = val.stripTrailingZeros().toPlainString();
String value = stringOfAbs.substring(stringOfAbs.indexOf('.') + 1);
int d;

int maxPrecision = Math.min(MAX_NUM_ENCODE_BYTES * 2, value.length());
for (int i = 0; i < maxPrecision; i += 2) {
d = (value.charAt(i) - '0') * 10;
if (i + 1 < maxPrecision) {
d += (value.charAt(i + 1) - '0');
}
dst.put((byte) (2 * d + 1));
}
}

/**
* Encode a numerical value using the variable-length encoding.
* @param dst The destination to which encoded digits are written.
Expand Down