From b5a9f7ced16a5d327ff37b9bcb875961a0b92e4a Mon Sep 17 00:00:00 2001 From: Wellington Ramos Chevreuil Date: Tue, 2 Apr 2024 11:58:53 +0100 Subject: [PATCH] CDPD-68550: Backport HBASE-28458 BucketCache.notifyFileCachingCompleted may incorrectly consider a file fully cached (#5777) Signed-off-by: Duo Zhang (cherry picked from commit c4ac2df041aa4795f91024b1e5dc8d4f5b6c048e) Change-Id: I7b8f26cff9c4dd12672dbc407fd94716d2eb7e34 --- .../hbase/io/hfile/bucket/BucketCache.java | 26 ++++++++++++------- .../TestBlockEvictionOnRegionMovement.java | 9 ++++--- .../bucket/TestBucketCachePersister.java | 25 +++++++++--------- 3 files changed, 34 insertions(+), 26 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketCache.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketCache.java index 6cfe7ceb6f89..5e544a0007e4 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketCache.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketCache.java @@ -2052,26 +2052,30 @@ public void notifyFileCachingCompleted(Path fileName, int totalBlockCount, int d // so we need to count all blocks for this file in the backing map under // a read lock for the block offset final List locks = new ArrayList<>(); - LOG.debug("Notifying caching completed for file {}, with total blocks {}", fileName, - dataBlockCount); + LOG.debug("Notifying caching completed for file {}, with total blocks {}, and data blocks {}", + fileName, totalBlockCount, dataBlockCount); try { final MutableInt count = new MutableInt(); LOG.debug("iterating over {} entries in the backing map", backingMap.size()); backingMap.entrySet().stream().forEach(entry -> { - if (entry.getKey().getHfileName().equals(fileName.getName())) { + if ( + entry.getKey().getHfileName().equals(fileName.getName()) + && entry.getKey().getBlockType().equals(BlockType.DATA) + ) { LOG.debug("found block for file {} in the backing map. Acquiring read lock for offset {}", fileName.getName(), entry.getKey().getOffset()); ReentrantReadWriteLock lock = offsetLock.getLock(entry.getKey().getOffset()); lock.readLock().lock(); locks.add(lock); + // rechecks the given key is still there (no eviction happened before the lock acquired) if (backingMap.containsKey(entry.getKey())) { count.increment(); } } }); - // We may either place only data blocks on the BucketCache or all type of blocks - if (dataBlockCount == count.getValue() || totalBlockCount == count.getValue()) { - LOG.debug("File {} has now been fully cached.", fileName.getName()); + // BucketCache would only have data blocks + if (dataBlockCount == count.getValue()) { + LOG.debug("File {} has now been fully cached.", fileName); fileCacheCompleted(fileName, size); } else { LOG.debug( @@ -2079,15 +2083,17 @@ public void notifyFileCachingCompleted(Path fileName, int totalBlockCount, int d + "Total blocks for file: {}. Checking for blocks pending cache in cache writer queue.", fileName.getName(), count.getValue(), dataBlockCount); if (ramCache.hasBlocksForFile(fileName.getName())) { + for (ReentrantReadWriteLock lock : locks) { + lock.readLock().unlock(); + } LOG.debug("There are still blocks pending caching for file {}. Will sleep 100ms " + "and try the verification again.", fileName.getName()); Thread.sleep(100); notifyFileCachingCompleted(fileName, totalBlockCount, dataBlockCount, size); } else { - LOG.info( - "We found only {} blocks cached from a total of {} for file {}, " - + "but no blocks pending caching. Maybe cache is full?", - count, dataBlockCount, fileName.getName()); + LOG.info("We found only {} blocks cached from a total of {} for file {}, " + + "but no blocks pending caching. Maybe cache is full or evictions " + + "happened concurrently to cache prefetch.", count, totalBlockCount, fileName); } } } catch (InterruptedException e) { diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestBlockEvictionOnRegionMovement.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestBlockEvictionOnRegionMovement.java index 2523b16482f7..ca538db1a198 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestBlockEvictionOnRegionMovement.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestBlockEvictionOnRegionMovement.java @@ -42,6 +42,7 @@ import org.apache.hadoop.hbase.testclassification.IOTests; import org.apache.hadoop.hbase.testclassification.MediumTests; import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; import org.apache.hadoop.hbase.zookeeper.MiniZooKeeperCluster; import org.junit.After; import org.junit.Before; @@ -89,7 +90,7 @@ public void setup() throws Exception { @Test public void testBlockEvictionOnRegionMove() throws Exception { // Write to table and flush - TableName tableRegionMove = writeDataToTable(); + TableName tableRegionMove = writeDataToTable("testBlockEvictionOnRegionMove"); HRegionServer regionServingRS = cluster.getRegionServer(1).getRegions(tableRegionMove).size() == 1 @@ -115,7 +116,7 @@ public void testBlockEvictionOnRegionMove() throws Exception { @Test public void testBlockEvictionOnGracefulStop() throws Exception { // Write to table and flush - TableName tableRegionClose = writeDataToTable(); + TableName tableRegionClose = writeDataToTable("testBlockEvictionOnGracefulStop"); HRegionServer regionServingRS = cluster.getRegionServer(1).getRegions(tableRegionClose).size() == 1 @@ -138,8 +139,8 @@ public void testBlockEvictionOnGracefulStop() throws Exception { assertNotEquals(0, regionServingRS.getBlockCache().get().getBlockCaches()[1].getBlockCount()); } - public TableName writeDataToTable() throws IOException, InterruptedException { - TableName tableName = TableName.valueOf("table1"); + public TableName writeDataToTable(String testName) throws IOException, InterruptedException { + TableName tableName = TableName.valueOf(testName + EnvironmentEdgeManager.currentTime()); byte[] row0 = Bytes.toBytes("row1"); byte[] row1 = Bytes.toBytes("row2"); byte[] family = Bytes.toBytes("family"); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/bucket/TestBucketCachePersister.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/bucket/TestBucketCachePersister.java index 8db88c56be32..41d05aa8b096 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/bucket/TestBucketCachePersister.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/bucket/TestBucketCachePersister.java @@ -163,20 +163,21 @@ public void testPrefetchBlockEvictionWhilePrefetchRunning() throws Exception { // Load Blocks in cache Path storeFile = writeStoreFile("Region4", "TestPrefetch4", conf, cacheConf, fs); HFile.createReader(fs, storeFile, cacheConf, true, conf); - while (bucketCache.backingMap.size() == 0) { + boolean evicted = false; + while (!PrefetchExecutor.isCompleted(storeFile)) { + if (bucketCache.backingMap.size() > 0 && !evicted) { + Iterator> it = + bucketCache.backingMap.entrySet().iterator(); + // Evict a data block from cache + Map.Entry entry = it.next(); + while (it.hasNext() && !evicted) { + if (entry.getKey().getBlockType().equals(BlockType.DATA)) { + evicted = bucketCache.evictBlock(it.next().getKey()); + } + } + } Thread.sleep(10); } - Iterator> it = - bucketCache.backingMap.entrySet().iterator(); - // Evict Blocks from cache - bucketCache.evictBlock(it.next().getKey()); - bucketCache.evictBlock(it.next().getKey()); - int retries = 0; - while (!PrefetchExecutor.isCompleted(storeFile) && retries < 5) { - Thread.sleep(500); - retries++; - } - assertTrue(retries < 5); assertFalse(bucketCache.fullyCachedFiles.containsKey(storeFile.getName())); cleanupBucketCache(bucketCache); }