From 0677a3c359b8dfc0e8f9b90f9f33129ec735a1c4 Mon Sep 17 00:00:00 2001 From: Becker Ewing Date: Wed, 16 Aug 2023 13:47:39 -0400 Subject: [PATCH] HBASE-28025 Enhance ByteBufferUtils.findCommonPrefix to compare 8 bytes each time --- .../hadoop/hbase/util/ByteBufferUtils.java | 185 +++++++++++++++--- .../org/apache/hadoop/hbase/util/Bytes.java | 107 +++++++++- .../hbase/util/TestByteBufferUtils.java | 31 +++ .../apache/hadoop/hbase/util/TestBytes.java | 43 ++++ 4 files changed, 329 insertions(+), 37 deletions(-) diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/ByteBufferUtils.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/ByteBufferUtils.java index a5a5c5105db0..054de74d7d1e 100644 --- a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/ByteBufferUtils.java +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/ByteBufferUtils.java @@ -80,6 +80,14 @@ static abstract class Converter { abstract int putLong(ByteBuffer buffer, int index, long val); } + static abstract class CommonPrefixer { + abstract int findCommonPrefix(ByteBuffer left, int leftOffset, int leftLength, byte[] right, + int rightOffset, int rightLength); + + abstract int findCommonPrefix(ByteBuffer left, int leftOffset, int leftLength, ByteBuffer right, + int rightOffset, int rightLength); + } + static class ComparerHolder { static final String UNSAFE_COMPARER_NAME = ComparerHolder.class.getName() + "$UnsafeComparer"; @@ -322,6 +330,111 @@ int putLong(ByteBuffer buffer, int index, long val) { } } + static class CommonPrefixerHolder { + static final String UNSAFE_COMMON_PREFIXER_NAME = + CommonPrefixerHolder.class.getName() + "$UnsafeCommonPrefixer"; + + static final CommonPrefixer BEST_COMMON_PREFIXER = getBestCommonPrefixer(); + + static CommonPrefixer getBestCommonPrefixer() { + try { + Class theClass = + Class.forName(UNSAFE_COMMON_PREFIXER_NAME).asSubclass(CommonPrefixer.class); + + return theClass.getConstructor().newInstance(); + } catch (Throwable t) { // ensure we really catch *everything* + return PureJavaCommonPrefixer.INSTANCE; + } + } + + static final class PureJavaCommonPrefixer extends CommonPrefixer { + static final PureJavaCommonPrefixer INSTANCE = new PureJavaCommonPrefixer(); + + private PureJavaCommonPrefixer() { + } + + @Override + public int findCommonPrefix(ByteBuffer left, int leftOffset, int leftLength, byte[] right, + int rightOffset, int rightLength) { + int length = Math.min(leftLength, rightLength); + int result = 0; + + while ( + result < length + && ByteBufferUtils.toByte(left, leftOffset + result) == right[rightOffset + result] + ) { + result++; + } + + return result; + } + + @Override + int findCommonPrefix(ByteBuffer left, int leftOffset, int leftLength, ByteBuffer right, + int rightOffset, int rightLength) { + int length = Math.min(leftLength, rightLength); + int result = 0; + + while ( + result < length && ByteBufferUtils.toByte(left, leftOffset + result) + == ByteBufferUtils.toByte(right, rightOffset + result) + ) { + result++; + } + + return result; + } + } + + static final class UnsafeCommonPrefixer extends CommonPrefixer { + + static { + if (!UNSAFE_UNALIGNED) { + throw new Error(); + } + } + + public UnsafeCommonPrefixer() { + } + + @Override + public int findCommonPrefix(ByteBuffer left, int leftOffset, int leftLength, byte[] right, + int rightOffset, int rightLength) { + long offset1Adj; + Object refObj1 = null; + if (left.isDirect()) { + offset1Adj = leftOffset + UnsafeAccess.directBufferAddress(left); + } else { + offset1Adj = leftOffset + left.arrayOffset() + UnsafeAccess.BYTE_ARRAY_BASE_OFFSET; + refObj1 = left.array(); + } + return findCommonPrefixUnsafe(refObj1, offset1Adj, leftLength, right, + rightOffset + UnsafeAccess.BYTE_ARRAY_BASE_OFFSET, rightLength); + } + + @Override + public int findCommonPrefix(ByteBuffer left, int leftOffset, int leftLength, ByteBuffer right, + int rightOffset, int rightLength) { + long offset1Adj, offset2Adj; + Object refObj1 = null, refObj2 = null; + if (left.isDirect()) { + offset1Adj = leftOffset + UnsafeAccess.directBufferAddress(left); + } else { + offset1Adj = leftOffset + left.arrayOffset() + UnsafeAccess.BYTE_ARRAY_BASE_OFFSET; + refObj1 = left.array(); + } + if (right.isDirect()) { + offset2Adj = rightOffset + UnsafeAccess.directBufferAddress(right); + } else { + offset2Adj = rightOffset + right.arrayOffset() + UnsafeAccess.BYTE_ARRAY_BASE_OFFSET; + refObj2 = right.array(); + } + return findCommonPrefixUnsafe(refObj1, offset1Adj, leftLength, refObj2, offset2Adj, + rightLength); + } + } + } + /** * Similar to {@link WritableUtils#writeVLong(java.io.DataOutput, long)}, but writes to a * {@link ByteBuffer}. @@ -744,14 +857,7 @@ public static void copyFromBufferToBuffer(ByteBuffer in, ByteBuffer out, int sou */ public static int findCommonPrefix(byte[] left, int leftOffset, int leftLength, byte[] right, int rightOffset, int rightLength) { - int length = Math.min(leftLength, rightLength); - int result = 0; - - while (result < length && left[leftOffset + result] == right[rightOffset + result]) { - result++; - } - - return result; + return Bytes.findCommonPrefix(left, right, leftLength, rightLength, leftOffset, rightOffset); } /** @@ -765,17 +871,8 @@ public static int findCommonPrefix(byte[] left, int leftOffset, int leftLength, */ public static int findCommonPrefix(ByteBuffer left, int leftOffset, int leftLength, ByteBuffer right, int rightOffset, int rightLength) { - int length = Math.min(leftLength, rightLength); - int result = 0; - - while ( - result < length && ByteBufferUtils.toByte(left, leftOffset + result) - == ByteBufferUtils.toByte(right, rightOffset + result) - ) { - result++; - } - - return result; + return CommonPrefixerHolder.BEST_COMMON_PREFIXER.findCommonPrefix(left, leftOffset, leftLength, + right, rightOffset, rightLength); } /** @@ -789,17 +886,8 @@ public static int findCommonPrefix(ByteBuffer left, int leftOffset, int leftLeng */ public static int findCommonPrefix(ByteBuffer left, int leftOffset, int leftLength, byte[] right, int rightOffset, int rightLength) { - int length = Math.min(leftLength, rightLength); - int result = 0; - - while ( - result < length - && ByteBufferUtils.toByte(left, leftOffset + result) == right[rightOffset + result] - ) { - result++; - } - - return result; + return CommonPrefixerHolder.BEST_COMMON_PREFIXER.findCommonPrefix(left, leftOffset, leftLength, + right, rightOffset, rightLength); } /** @@ -972,6 +1060,43 @@ static int compareToUnsafe(Object obj1, long o1, int l1, Object obj2, long o2, i return l1 - l2; } + static int findCommonPrefixUnsafe(Object left, long leftOffset, int leftLength, Object right, + long rightOffset, int rightLength) { + final int stride = 8; + final int minLength = Math.min(leftLength, rightLength); + int strideLimit = minLength & ~(stride - 1); + int result = 0; + int i; + + for (i = 0; i < strideLimit; i += stride) { + long lw = HBasePlatformDependent.getLong(left, leftOffset + (long) i); + long rw = HBasePlatformDependent.getLong(right, rightOffset + (long) i); + + if (lw != rw) { + if (!UnsafeAccess.LITTLE_ENDIAN) { + return result + (Long.numberOfLeadingZeros(lw ^ rw) / Bytes.SIZEOF_LONG); + } else { + return result + (Long.numberOfTrailingZeros(lw ^ rw) / Bytes.SIZEOF_LONG); + } + } else { + result += Bytes.SIZEOF_LONG; + } + } + + // The epilogue to cover the last (minLength % stride) elements. + for (; i < minLength; i++) { + byte il = HBasePlatformDependent.getByte(left, leftOffset + i); + byte ir = HBasePlatformDependent.getByte(right, rightOffset + i); + if (il != ir) { + return result; + } else { + result++; + } + } + + return result; + } + /** * Reads a short value at the given buffer's offset. * @param buffer input byte buffer to read diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/Bytes.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/Bytes.java index 0203cc390fe8..96b3dbd4a8a5 100644 --- a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/Bytes.java +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/Bytes.java @@ -1179,6 +1179,11 @@ static abstract class Converter { } + static abstract class CommonPrefixer { + abstract int findCommonPrefix(byte[] left, int leftOffset, int leftLength, byte[] right, + int rightOffset, int rightLength); + } + static Comparer lexicographicalComparerJavaImpl() { return LexicographicalComparerHolder.PureJavaComparer.INSTANCE; } @@ -1453,6 +1458,99 @@ public int compareTo(byte[] buffer1, int offset1, int length1, byte[] buffer2, i } } + static class CommonPrefixerHolder { + static final String UNSAFE_COMMON_PREFIXER_NAME = + CommonPrefixerHolder.class.getName() + "$UnsafeCommonPrefixer"; + + static final CommonPrefixer BEST_COMMON_PREFIXER = getBestCommonPrefixer(); + + static CommonPrefixer getBestCommonPrefixer() { + try { + Class theClass = + Class.forName(UNSAFE_COMMON_PREFIXER_NAME).asSubclass(CommonPrefixer.class); + + return theClass.getConstructor().newInstance(); + } catch (Throwable t) { // ensure we really catch *everything* + return CommonPrefixerHolder.PureJavaCommonPrefixer.INSTANCE; + } + } + + static final class PureJavaCommonPrefixer extends CommonPrefixer { + static final PureJavaCommonPrefixer INSTANCE = new PureJavaCommonPrefixer(); + + private PureJavaCommonPrefixer() { + } + + @Override + public int findCommonPrefix(byte[] left, int leftOffset, int leftLength, byte[] right, + int rightOffset, int rightLength) { + int length = Math.min(leftLength, rightLength); + int result = 0; + + while (result < length && left[leftOffset + result] == right[rightOffset + result]) { + result++; + } + return result; + } + } + + static final class UnsafeCommonPrefixer extends CommonPrefixer { + + static { + if (!UNSAFE_UNALIGNED) { + throw new Error(); + } + + // sanity check - this should never fail + if (HBasePlatformDependent.arrayIndexScale(byte[].class) != 1) { + throw new AssertionError(); + } + } + + public UnsafeCommonPrefixer() { + } + + @Override + public int findCommonPrefix(byte[] left, int leftOffset, int leftLength, byte[] right, + int rightOffset, int rightLength) { + final int stride = 8; + final int minLength = Math.min(leftLength, rightLength); + int strideLimit = minLength & ~(stride - 1); + final long leftOffsetAdj = leftOffset + UnsafeAccess.BYTE_ARRAY_BASE_OFFSET; + final long rightOffsetAdj = rightOffset + UnsafeAccess.BYTE_ARRAY_BASE_OFFSET; + int result = 0; + int i; + + for (i = 0; i < strideLimit; i += stride) { + long lw = HBasePlatformDependent.getLong(left, leftOffsetAdj + i); + long rw = HBasePlatformDependent.getLong(right, rightOffsetAdj + i); + if (lw != rw) { + if (!UnsafeAccess.LITTLE_ENDIAN) { + return result + (Long.numberOfLeadingZeros(lw ^ rw) / Bytes.SIZEOF_LONG); + } else { + return result + (Long.numberOfTrailingZeros(lw ^ rw) / Bytes.SIZEOF_LONG); + } + } else { + result += Bytes.SIZEOF_LONG; + } + } + + // The epilogue to cover the last (minLength % stride) elements. + for (; i < minLength; i++) { + int il = (left[leftOffset + i]); + int ir = (right[rightOffset + i]); + if (il != ir) { + return result; + } else { + result++; + } + } + + return result; + } + } + } + /** * Lexicographically determine the equality of two arrays. * @param left left operand @@ -2429,12 +2527,7 @@ public static int searchDelimiterIndexInReverse(final byte[] b, final int offset public static int findCommonPrefix(byte[] left, byte[] right, int leftLength, int rightLength, int leftOffset, int rightOffset) { - int length = Math.min(leftLength, rightLength); - int result = 0; - - while (result < length && left[leftOffset + result] == right[rightOffset + result]) { - result++; - } - return result; + return CommonPrefixerHolder.BEST_COMMON_PREFIXER.findCommonPrefix(left, leftOffset, leftLength, + right, rightOffset, rightLength); } } diff --git a/hbase-common/src/test/java/org/apache/hadoop/hbase/util/TestByteBufferUtils.java b/hbase-common/src/test/java/org/apache/hadoop/hbase/util/TestByteBufferUtils.java index c824e01e4256..eabfed2042ca 100644 --- a/hbase-common/src/test/java/org/apache/hadoop/hbase/util/TestByteBufferUtils.java +++ b/hbase-common/src/test/java/org/apache/hadoop/hbase/util/TestByteBufferUtils.java @@ -606,6 +606,37 @@ public void testEquals() { assertTrue(ByteBufferUtils.equals(bb, 0, a.length, a, 0, a.length)); } + @Test + public void testFindCommonPrefix() { + ByteBuffer bb1 = ByteBuffer.allocate(135); + ByteBuffer bb2 = ByteBuffer.allocate(135); + ByteBuffer bb3 = ByteBuffer.allocateDirect(135); + byte[] b = new byte[71]; + + fillBB(bb1, (byte) 5); + fillBB(bb2, (byte) 5); + fillBB(bb3, (byte) 5); + fillArray(b, (byte) 5); + + assertEquals(135, + ByteBufferUtils.findCommonPrefix(bb1, 0, bb1.remaining(), bb2, 0, bb2.remaining())); + assertEquals(71, ByteBufferUtils.findCommonPrefix(bb1, 0, bb1.remaining(), b, 0, b.length)); + assertEquals(135, + ByteBufferUtils.findCommonPrefix(bb1, 0, bb1.remaining(), bb3, 0, bb3.remaining())); + assertEquals(71, ByteBufferUtils.findCommonPrefix(bb3, 0, bb3.remaining(), b, 0, b.length)); + + b[13] = 9; + assertEquals(13, ByteBufferUtils.findCommonPrefix(bb1, 0, bb1.remaining(), b, 0, b.length)); + + bb2.put(134, (byte) 6); + assertEquals(134, + ByteBufferUtils.findCommonPrefix(bb1, 0, bb1.remaining(), bb2, 0, bb2.remaining())); + + bb2.put(6, (byte) 4); + assertEquals(6, + ByteBufferUtils.findCommonPrefix(bb1, 0, bb1.remaining(), bb2, 0, bb2.remaining())); + } + private static void fillBB(ByteBuffer bb, byte b) { for (int i = bb.position(); i < bb.limit(); i++) { bb.put(i, b); diff --git a/hbase-common/src/test/java/org/apache/hadoop/hbase/util/TestBytes.java b/hbase-common/src/test/java/org/apache/hadoop/hbase/util/TestBytes.java index 14be2f4cc37a..b74348959982 100644 --- a/hbase-common/src/test/java/org/apache/hadoop/hbase/util/TestBytes.java +++ b/hbase-common/src/test/java/org/apache/hadoop/hbase/util/TestBytes.java @@ -585,4 +585,47 @@ public void testToFromHex() { assertArrayEquals(testData, result); } } + + @Test + public void testFindCommonPrefix() throws Exception { + testFindCommonPrefix(false); + } + + @Test + public void testFindCommonPrefixUnsafe() throws Exception { + testFindCommonPrefix(true); + } + + private static void testFindCommonPrefix(boolean unsafe) throws Exception { + setUnsafe(unsafe); + try { + // tests for common prefixes less than 8 bytes in length (i.e. using non-vectorized path) + byte[] hello = Bytes.toBytes("hello"); + byte[] helloWorld = Bytes.toBytes("helloworld"); + + assertEquals(5, + Bytes.findCommonPrefix(hello, helloWorld, hello.length, helloWorld.length, 0, 0)); + assertEquals(5, Bytes.findCommonPrefix(hello, hello, hello.length, hello.length, 0, 0)); + assertEquals(3, + Bytes.findCommonPrefix(hello, hello, hello.length - 2, hello.length - 2, 2, 2)); + assertEquals(0, Bytes.findCommonPrefix(hello, hello, 0, 0, 0, 0)); + + // tests for common prefixes greater than 8 bytes in length which may use the vectorized path + byte[] hellohello = Bytes.toBytes("hellohello"); + byte[] hellohellohi = Bytes.toBytes("hellohellohi"); + + assertEquals(10, Bytes.findCommonPrefix(hellohello, hellohellohi, hellohello.length, + hellohellohi.length, 0, 0)); + assertEquals(10, Bytes.findCommonPrefix(hellohellohi, hellohello, hellohellohi.length, + hellohello.length, 0, 0)); + assertEquals(10, + Bytes.findCommonPrefix(hellohello, hellohello, hellohello.length, hellohello.length, 0, 0)); + + hellohello[2] = 0; + assertEquals(2, Bytes.findCommonPrefix(hellohello, hellohellohi, hellohello.length, + hellohellohi.length, 0, 0)); + } finally { + setUnsafe(HBasePlatformDependent.unaligned()); + } + } }