From 03c7f183283e29c9482347246de96f42ae54bc16 Mon Sep 17 00:00:00 2001 From: Bryan Beaudreault Date: Wed, 21 Dec 2022 12:34:25 -0500 Subject: [PATCH] HBASE-27233 Read blocks into off-heap if caching is disabled for read (#4931) Signed-off-by: Xiaolin Ha --- .../hbase/io/hfile/HFileReaderImpl.java | 40 +++-- .../hadoop/hbase/io/hfile/TestHFile.java | 145 ++++++++++++++++++ 2 files changed, 174 insertions(+), 11 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderImpl.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderImpl.java index e181a61de347..af3a9b960aa3 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderImpl.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderImpl.java @@ -1218,21 +1218,34 @@ public HFileBlock getMetaBlock(String metaBlockName, boolean cacheBlock) throws } /** - * If expected block is data block, we'll allocate the ByteBuff of block from - * {@link org.apache.hadoop.hbase.io.ByteBuffAllocator} and it's usually an off-heap one, - * otherwise it will allocate from heap. + * Whether we use heap or not depends on our intent to cache the block. We want to avoid + * allocating to off-heap if we intend to cache into the on-heap L1 cache. Otherwise, it's more + * efficient to allocate to off-heap since we can control GC ourselves for those. So our decision + * here breaks down as follows:
+ * If block cache is disabled, don't use heap. If we're not using the CombinedBlockCache, use heap + * unless caching is disabled for the request. Otherwise, only use heap if caching is enabled and + * the expected block type is not DATA (which goes to off-heap L2 in combined cache). * @see org.apache.hadoop.hbase.io.hfile.HFileBlock.FSReader#readBlockData(long, long, boolean, * boolean, boolean) */ - private boolean shouldUseHeap(BlockType expectedBlockType) { + private boolean shouldUseHeap(BlockType expectedBlockType, boolean cacheBlock) { if (!cacheConf.getBlockCache().isPresent()) { return false; - } else if (!cacheConf.isCombinedBlockCache()) { - // Block to cache in LruBlockCache must be an heap one. So just allocate block memory from - // heap for saving an extra off-heap to heap copying. - return true; } - return expectedBlockType != null && !expectedBlockType.isData(); + + // we only cache a block if cacheBlock is true and caching-on-read is enabled in CacheConfig + // we can really only check for that if have an expectedBlockType + if (expectedBlockType != null) { + cacheBlock &= cacheConf.shouldCacheBlockOnRead(expectedBlockType.getCategory()); + } + + if (!cacheConf.isCombinedBlockCache()) { + // Block to cache in LruBlockCache must be an heap one, if caching enabled. So just allocate + // block memory from heap for saving an extra off-heap to heap copying in that case. + return cacheBlock; + } + + return cacheBlock && expectedBlockType != null && !expectedBlockType.isData(); } @Override @@ -1321,8 +1334,13 @@ public HFileBlock readBlock(long dataBlockOffset, long onDiskBlockSize, final bo span.addEvent("block cache miss", attributes); // Load block from filesystem. HFileBlock hfileBlock = fsBlockReader.readBlockData(dataBlockOffset, onDiskBlockSize, pread, - !isCompaction, shouldUseHeap(expectedBlockType)); - validateBlockType(hfileBlock, expectedBlockType); + !isCompaction, shouldUseHeap(expectedBlockType, cacheable)); + try { + validateBlockType(hfileBlock, expectedBlockType); + } catch (IOException e) { + hfileBlock.release(); + throw e; + } BlockType.BlockCategory category = hfileBlock.getBlockType().getCategory(); final boolean cacheCompressed = cacheConf.shouldCacheCompressed(category); final boolean cacheOnRead = cacheConf.shouldCacheBlockOnRead(category); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFile.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFile.java index 3b45f89b8fc9..c1114770786e 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFile.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFile.java @@ -259,6 +259,151 @@ public void testReaderWithCombinedBlockCache() throws Exception { alloc.clean(); } + /** + * Tests that we properly allocate from the off-heap or on-heap when LRUCache is configured. In + * this case, the determining factor is whether we end up caching the block or not. So the below + * test cases try different permutations of enabling/disabling via CacheConfig and via user + * request (cacheblocks), along with different expected block types. + */ + @Test + public void testReaderBlockAllocationWithLRUCache() throws IOException { + // false because caching is fully enabled + testReaderBlockAllocationWithLRUCache(true, true, null, false); + // false because we only look at cache config when expectedBlockType is non-null + testReaderBlockAllocationWithLRUCache(false, true, null, false); + // false because cacheBlock is true and even with cache config is disabled, we still cache + // important blocks like indexes + testReaderBlockAllocationWithLRUCache(false, true, BlockType.INTERMEDIATE_INDEX, false); + // true because since it's a DATA block, we honor the cache config + testReaderBlockAllocationWithLRUCache(false, true, BlockType.DATA, true); + // true for the following 2 because cacheBlock takes precedence over cache config + testReaderBlockAllocationWithLRUCache(true, false, null, true); + testReaderBlockAllocationWithLRUCache(true, false, BlockType.INTERMEDIATE_INDEX, false); + // false for the following 3 because both cache config and cacheBlock are false. + // per above, INDEX would supersede cache config, but not cacheBlock + testReaderBlockAllocationWithLRUCache(false, false, null, true); + testReaderBlockAllocationWithLRUCache(false, false, BlockType.INTERMEDIATE_INDEX, true); + testReaderBlockAllocationWithLRUCache(false, false, BlockType.DATA, true); + } + + private void testReaderBlockAllocationWithLRUCache(boolean cacheConfigCacheBlockOnRead, + boolean cacheBlock, BlockType blockType, boolean expectSharedMem) throws IOException { + int bufCount = 1024, blockSize = 64 * 1024; + ByteBuffAllocator alloc = initAllocator(true, blockSize, bufCount, 0); + fillByteBuffAllocator(alloc, bufCount); + Path storeFilePath = writeStoreFile(); + Configuration myConf = new Configuration(conf); + + myConf.setBoolean(CacheConfig.CACHE_DATA_ON_READ_KEY, cacheConfigCacheBlockOnRead); + // Open the file reader with LRUBlockCache + BlockCache lru = new LruBlockCache(1024 * 1024 * 32, blockSize, true, myConf); + CacheConfig cacheConfig = new CacheConfig(myConf, null, lru, alloc); + HFile.Reader reader = HFile.createReader(fs, storeFilePath, cacheConfig, true, myConf); + long offset = 0; + while (offset < reader.getTrailer().getLoadOnOpenDataOffset()) { + long read = readAtOffsetWithAllocationAsserts(alloc, reader, offset, cacheBlock, blockType, + expectSharedMem); + if (read < 0) { + break; + } + + offset += read; + } + + reader.close(); + Assert.assertEquals(bufCount, alloc.getFreeBufferCount()); + alloc.clean(); + lru.shutdown(); + } + + /** + * Tests that we properly allocate from the off-heap or on-heap when CombinedCache is configured. + * In this case, we should always use off-heap unless the block is an INDEX (which always goes to + * L1 cache which is on-heap) + */ + @Test + public void testReaderBlockAllocationWithCombinedCache() throws IOException { + // true because caching is fully enabled and block type null + testReaderBlockAllocationWithCombinedCache(true, true, null, true); + // false because caching is fully enabled, index block type always goes to on-heap L1 + testReaderBlockAllocationWithCombinedCache(true, true, BlockType.INTERMEDIATE_INDEX, false); + // true because cacheBlocks takes precedence over cache config which block type is null + testReaderBlockAllocationWithCombinedCache(false, true, null, true); + // false because caching is enabled and block type is index, which always goes to L1 + testReaderBlockAllocationWithCombinedCache(false, true, BlockType.INTERMEDIATE_INDEX, false); + // true because since it's a DATA block, we honor the cache config + testReaderBlockAllocationWithCombinedCache(false, true, BlockType.DATA, true); + // true for the following 2 because cacheBlock takes precedence over cache config + // with caching disabled, we always go to off-heap + testReaderBlockAllocationWithCombinedCache(true, false, null, true); + testReaderBlockAllocationWithCombinedCache(true, false, BlockType.INTERMEDIATE_INDEX, false); + // true for the following 3, because with caching disabled we always go to off-heap + testReaderBlockAllocationWithCombinedCache(false, false, null, true); + testReaderBlockAllocationWithCombinedCache(false, false, BlockType.INTERMEDIATE_INDEX, true); + testReaderBlockAllocationWithCombinedCache(false, false, BlockType.DATA, true); + } + + private void testReaderBlockAllocationWithCombinedCache(boolean cacheConfigCacheBlockOnRead, + boolean cacheBlock, BlockType blockType, boolean expectSharedMem) throws IOException { + int bufCount = 1024, blockSize = 64 * 1024; + ByteBuffAllocator alloc = initAllocator(true, blockSize, bufCount, 0); + fillByteBuffAllocator(alloc, bufCount); + Path storeFilePath = writeStoreFile(); + // Open the file reader with CombinedBlockCache + BlockCache combined = initCombinedBlockCache("LRU"); + Configuration myConf = new Configuration(conf); + + myConf.setBoolean(CacheConfig.CACHE_DATA_ON_READ_KEY, cacheConfigCacheBlockOnRead); + myConf.setBoolean(EVICT_BLOCKS_ON_CLOSE_KEY, true); + + CacheConfig cacheConfig = new CacheConfig(myConf, null, combined, alloc); + HFile.Reader reader = HFile.createReader(fs, storeFilePath, cacheConfig, true, myConf); + long offset = 0; + while (offset < reader.getTrailer().getLoadOnOpenDataOffset()) { + long read = readAtOffsetWithAllocationAsserts(alloc, reader, offset, cacheBlock, blockType, + expectSharedMem); + if (read < 0) { + break; + } + + offset += read; + } + + reader.close(); + combined.shutdown(); + Assert.assertEquals(bufCount, alloc.getFreeBufferCount()); + alloc.clean(); + } + + private long readAtOffsetWithAllocationAsserts(ByteBuffAllocator alloc, HFile.Reader reader, + long offset, boolean cacheBlock, BlockType blockType, boolean expectSharedMem) + throws IOException { + HFileBlock block; + try { + block = reader.readBlock(offset, -1, cacheBlock, true, false, true, blockType, null); + } catch (IOException e) { + if (e.getMessage().contains("Expected block type")) { + return -1; + } + throw e; + } + + Assert.assertEquals(expectSharedMem, block.isSharedMem()); + + if (expectSharedMem) { + Assert.assertTrue(alloc.getFreeBufferCount() < alloc.getTotalBufferCount()); + } else { + // Should never allocate off-heap block from allocator because ensure that it's LRU. + Assert.assertEquals(alloc.getTotalBufferCount(), alloc.getFreeBufferCount()); + } + + try { + return block.getOnDiskSizeWithHeader(); + } finally { + block.release(); // return back the ByteBuffer back to allocator. + } + } + private void readStoreFile(Path storeFilePath, Configuration conf, ByteBuffAllocator alloc) throws Exception { // Open the file reader with block cache disabled.