diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncNonMetaRegionLocator.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncNonMetaRegionLocator.java index bcb85d24ecdc..e1375c36babe 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncNonMetaRegionLocator.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncNonMetaRegionLocator.java @@ -299,37 +299,80 @@ private RegionLocations addToCache(TableCache tableCache, RegionLocations locs) LOG.trace("Try adding {} to cache", locs); byte[] startKey = locs.getRegionLocation().getRegion().getStartKey(); for (;;) { - RegionLocations oldLocs = tableCache.cache.putIfAbsent(startKey, locs); - if (oldLocs == null) { - return locs; - } - // check whether the regions are the same, this usually happens when table is split/merged, or - // deleted and recreated again. - RegionInfo region = locs.getRegionLocation().getRegion(); - RegionInfo oldRegion = oldLocs.getRegionLocation().getRegion(); - if (region.getEncodedName().equals(oldRegion.getEncodedName())) { - RegionLocations mergedLocs = oldLocs.mergeLocations(locs); - if (isEqual(mergedLocs, oldLocs)) { - // the merged one is the same with the old one, give up - LOG.trace("Will not add {} to cache because the old value {} " - + " is newer than us or has the same server name." - + " Maybe it is updated before we replace it", locs, oldLocs); - return oldLocs; - } - if (tableCache.cache.replace(startKey, oldLocs, mergedLocs)) { - return mergedLocs; - } - } else { - // the region is different, here we trust the one we fetched. This maybe wrong but finally - // the upper layer can detect this and trigger removal of the wrong locations - if (LOG.isDebugEnabled()) { - LOG.debug("The newnly fetch region {} is different from the old one {} for row '{}'," - + " try replaing the old one...", region, oldRegion, Bytes.toStringBinary(startKey)); - } - if (tableCache.cache.replace(startKey, oldLocs, locs)) { + // synchronize here because we may need to make multiple modifications in + // cleanOverlappingRegions, and we want them to be atomic + synchronized (tableCache) { + RegionLocations oldLocs = tableCache.cache.putIfAbsent(startKey, locs); + if (oldLocs == null) { + cleanOverlappingRegions(locs, tableCache); return locs; } + // check whether the regions are the same, this usually happens when table is split/merged, + // or + // deleted and recreated again. + RegionInfo region = locs.getRegionLocation().getRegion(); + RegionInfo oldRegion = oldLocs.getRegionLocation().getRegion(); + if (region.getEncodedName().equals(oldRegion.getEncodedName())) { + RegionLocations mergedLocs = oldLocs.mergeLocations(locs); + if (isEqual(mergedLocs, oldLocs)) { + // the merged one is the same with the old one, give up + LOG.trace("Will not add {} to cache because the old value {} " + + " is newer than us or has the same server name." + + " Maybe it is updated before we replace it", locs, oldLocs); + return oldLocs; + } + if (tableCache.cache.replace(startKey, oldLocs, mergedLocs)) { + cleanOverlappingRegions(locs, tableCache); + return mergedLocs; + } + } else { + // the region is different, here we trust the one we fetched. This maybe wrong but finally + // the upper layer can detect this and trigger removal of the wrong locations + if (LOG.isDebugEnabled()) { + LOG.debug( + "The newly fetch region {} is different from the old one {} for row '{}'," + + " try replaying the old one...", + region, oldRegion, Bytes.toStringBinary(startKey)); + } + if (tableCache.cache.replace(startKey, oldLocs, locs)) { + cleanOverlappingRegions(locs, tableCache); + return locs; + } + } + } + } + } + + /** + * When caching a location, the region may have been the result of a merge. Check to see if the + * region's boundaries overlap any other cached locations. Those would have been merge parents + * which no longer exist. We need to proactively clear them out to avoid a case where a merged + * region which receives no requests never gets cleared. This causes requests to other merged + * regions after it to see the wrong cached location. See HBASE-27650 + * @param locations the new location that was just cached + * @param tableCache the tableCache containing that and other locations for this table. + */ + private void cleanOverlappingRegions(RegionLocations locations, TableCache tableCache) { + RegionInfo region = locations.getRegionLocation().getRegion(); + + boolean isLast = Bytes.equals(region.getEndKey(), HConstants.EMPTY_END_ROW); + + while (true) { + Map.Entry overlap = + isLast ? tableCache.cache.lastEntry() : tableCache.cache.floorEntry(region.getEndKey()); + if ( + overlap == null || overlap.getValue() == locations + || Bytes.equals(overlap.getKey(), region.getStartKey()) + ) { + break; } + + if (LOG.isTraceEnabled()) { + LOG.trace("Removing cached location {} because it overlaps with new location {}", + overlap.getValue(), locations); + } + + tableCache.cache.remove(overlap.getKey()); } } @@ -442,6 +485,10 @@ private RegionLocations locateRowInCache(TableCache tableCache, TableName tableN recordCacheHit(); return locs; } else { + if (LOG.isTraceEnabled()) { + LOG.trace("Requested row {} comes after region end key of {} for cached location {}", + Bytes.toStringBinary(row), Bytes.toStringBinary(endKey), locs); + } recordCacheMiss(); return null; } diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/MetricsConnection.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/MetricsConnection.java index 75dc7aea27ce..288b802b6a1c 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/MetricsConnection.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/MetricsConnection.java @@ -553,6 +553,10 @@ public void incrMetaCacheMiss() { metaCacheMisses.inc(); } + public long getMetaCacheMisses() { + return metaCacheMisses.getCount(); + } + /** Increment the number of meta cache drops requested for entire RegionServer. */ public void incrMetaCacheNumClearServer() { metaCacheNumClearServer.inc(); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestMetaCache.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestMetaCache.java index be07c8aaef01..402995fa28ab 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestMetaCache.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestMetaCache.java @@ -26,6 +26,7 @@ import java.io.IOException; import java.util.Arrays; import java.util.List; +import java.util.concurrent.TimeUnit; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.CallQueueTooBigException; import org.apache.hadoop.hbase.HBaseClassTestRule; @@ -46,10 +47,12 @@ import org.apache.hadoop.hbase.util.Bytes; import org.junit.After; import org.junit.AfterClass; +import org.junit.Assert; import org.junit.BeforeClass; import org.junit.ClassRule; import org.junit.Test; import org.junit.experimental.categories.Category; +import org.junit.function.ThrowingRunnable; import org.apache.hbase.thirdparty.com.google.common.io.Closeables; import org.apache.hbase.thirdparty.com.google.protobuf.RpcController; @@ -112,6 +115,77 @@ private void setupConnection(int retry) throws IOException { metrics = asyncConn.getConnectionMetrics().get(); } + @Test + public void testMergeEmptyWithMetaCache() throws Throwable { + TableName tableName = TableName.valueOf("MergeEmpty"); + byte[] family = Bytes.toBytes("CF"); + TableDescriptor td = TableDescriptorBuilder.newBuilder(tableName) + .setColumnFamily(ColumnFamilyDescriptorBuilder.of(family)).build(); + TEST_UTIL.getAdmin().createTable(td, new byte[][] { Bytes.toBytes(2), Bytes.toBytes(5) }); + TEST_UTIL.waitTableAvailable(tableName); + TEST_UTIL.waitUntilNoRegionsInTransition(); + RegionInfo regionA = null; + RegionInfo regionB = null; + RegionInfo regionC = null; + for (RegionInfo region : TEST_UTIL.getAdmin().getRegions(tableName)) { + if (region.getStartKey().length == 0) { + regionA = region; + } else if (Bytes.equals(region.getStartKey(), Bytes.toBytes(2))) { + regionB = region; + } else if (Bytes.equals(region.getStartKey(), Bytes.toBytes(5))) { + regionC = region; + } + } + + assertNotNull(regionA); + assertNotNull(regionB); + assertNotNull(regionC); + + TEST_UTIL.getConfiguration().setBoolean(MetricsConnection.CLIENT_SIDE_METRICS_ENABLED_KEY, + true); + try (AsyncConnection asyncConn = + ConnectionFactory.createAsyncConnection(TEST_UTIL.getConfiguration()).get()) { + AsyncConnectionImpl asyncConnImpl = (AsyncConnectionImpl) asyncConn; + + MetricsConnection asyncMetrics = asyncConnImpl.getConnectionMetrics().get(); + + // warm meta cache + asyncConn.getRegionLocator(tableName).getAllRegionLocations().get(); + + Assert.assertEquals(3, TEST_UTIL.getAdmin().getRegions(tableName).size()); + + // Merge the 3 regions into one + TEST_UTIL.getAdmin().mergeRegionsAsync( + new byte[][] { regionA.getRegionName(), regionB.getRegionName(), regionC.getRegionName() }, + false).get(30, TimeUnit.SECONDS); + + Assert.assertEquals(1, TEST_UTIL.getAdmin().getRegions(tableName).size()); + + AsyncTable asyncTable = asyncConn.getTable(tableName); + + // This request should cause us to cache the newly merged region. + // As part of caching that region, it should clear out any cached merge parent regions which + // are overlapped by the new region. That way, subsequent calls below won't fall into the + // bug in HBASE-27650. Otherwise, a request for row 6 would always get stuck on cached + // regionB and we'd continue to see cache misses below. + assertTrue( + executeAndGetNewMisses(() -> asyncTable.get(new Get(Bytes.toBytes(6))).get(), asyncMetrics) + > 0); + + // We verify no new cache misses here due to above, which proves we've fixed up the cache + assertEquals(0, executeAndGetNewMisses(() -> asyncTable.get(new Get(Bytes.toBytes(6))).get(), + asyncMetrics)); + } + } + + private long executeAndGetNewMisses(ThrowingRunnable runnable, MetricsConnection metrics) + throws Throwable { + long lastVal = metrics.getMetaCacheMisses(); + runnable.run(); + long curVal = metrics.getMetaCacheMisses(); + return curVal - lastVal; + } + @Test public void testPreserveMetaCacheOnException() throws Exception { ((FakeRSRpcServices) badRS.getRSRpcServices())