diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/HConstants.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/HConstants.java
index 03ff914d7bf0..e3b0318f1d95 100644
--- a/hbase-common/src/main/java/org/apache/hadoop/hbase/HConstants.java
+++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/HConstants.java
@@ -1473,7 +1473,7 @@ public enum OperationStatusCode {
// Regions Recovery based on high storeFileRefCount threshold value
public static final String STORE_FILE_REF_COUNT_THRESHOLD =
- "hbase.regions.recovery.store.file.count";
+ "hbase.regions.recovery.store.file.ref.count";
// default -1 indicates there is no threshold on high storeRefCount
public static final int DEFAULT_STORE_FILE_REF_COUNT_THRESHOLD = -1;
diff --git a/hbase-common/src/main/resources/hbase-default.xml b/hbase-common/src/main/resources/hbase-default.xml
index 77c06d77aa6a..f738f0500b6d 100644
--- a/hbase-common/src/main/resources/hbase-default.xml
+++ b/hbase-common/src/main/resources/hbase-default.xml
@@ -1912,15 +1912,22 @@ possible configurations would overwhelm and obscure the important.
- hbase.regions.recovery.store.file.count
+ hbase.regions.recovery.store.file.ref.count
-1
- Store files Ref Count threshold value considered
- for reopening regions. Any region with store files
- ref count > this value would be eligible for
- reopening by master. Default value -1 indicates
- this feature is turned off. Only positive integer
- value should be provided to enable the feature.
+ Very large ref count on a file indicates
+ that it is a ref leak on that object. Such files
+ can not be removed even after it is invalidated
+ via compaction. Only way to recover in such
+ scenario is to reopen the region which can
+ release all resources, like the refcount, leases, etc.
+ This config represents Store files Ref Count threshold
+ value considered for reopening regions.
+ Any region with store files ref count > this value
+ would be eligible for reopening by master.
+ Default value -1 indicates this feature is turned off.
+ Only positive integer value should be provided to enable
+ this feature.
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
index 36fd36bdb453..fa95e1bd8320 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
@@ -1471,7 +1471,7 @@ private void startServiceThreads() throws IOException {
getChoreService().scheduleChore(hfileCleaner);
// Regions Reopen based on very high storeFileRefCount is considered enabled
- // only if hbase.regions.recovery.store.file.count has value > 0
+ // only if hbase.regions.recovery.store.file.ref.count has value > 0
final int maxStoreFileRefCount = conf.getInt(
HConstants.STORE_FILE_REF_COUNT_THRESHOLD,
HConstants.DEFAULT_STORE_FILE_REF_COUNT_THRESHOLD);
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionsRecoveryChore.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionsRecoveryChore.java
index dabc371b7859..7502eeb94621 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionsRecoveryChore.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionsRecoveryChore.java
@@ -141,9 +141,10 @@ private Map> getTableToRegionsByRefCount(
// Here, we take max ref count of all store files and not the cumulative
// count of all store files
final int maxStoreFileRefCount = regionMetrics.getMaxStoreFileRefCount();
- // ignore store file ref count threshold <= 0 (default is -1 i.e. disabled)
- if (storeFileRefCountThreshold > 0 && maxStoreFileRefCount > storeFileRefCountThreshold) {
- prepareTableToReopenRegionsMap(tableToReopenRegionsMap, regionMetrics,
+
+ if (maxStoreFileRefCount > storeFileRefCountThreshold) {
+ final byte[] regionName = regionMetrics.getRegionName();
+ prepareTableToReopenRegionsMap(tableToReopenRegionsMap, regionName,
maxStoreFileRefCount);
}
}
@@ -154,9 +155,8 @@ private Map> getTableToRegionsByRefCount(
private void prepareTableToReopenRegionsMap(
final Map> tableToReopenRegionsMap,
- final RegionMetrics regionMetrics, final int regionStoreRefCount) {
+ final byte[] regionName, final int regionStoreRefCount) {
- final byte[] regionName = regionMetrics.getRegionName();
final RegionInfo regionInfo = hMaster.getAssignmentManager().getRegionInfo(regionName);
final TableName tableName = regionInfo.getTable();
if (TableName.isMetaTableName(tableName)) {
@@ -166,9 +166,7 @@ private void prepareTableToReopenRegionsMap(
}
LOG.warn("Region {} for Table {} has high storeFileRefCount {}, considering it for reopen..",
regionInfo.getRegionNameAsString(), tableName, regionStoreRefCount);
- if (!tableToReopenRegionsMap.containsKey(tableName)) {
- tableToReopenRegionsMap.put(tableName, new ArrayList<>());
- }
+ tableToReopenRegionsMap.putIfAbsent(tableName, new ArrayList<>());
tableToReopenRegionsMap.get(tableName).add(regionName);
}
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ReopenTableRegionsProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ReopenTableRegionsProcedure.java
index e6487b96f4ca..7bf834c62c8c 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ReopenTableRegionsProcedure.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ReopenTableRegionsProcedure.java
@@ -57,25 +57,25 @@ public class ReopenTableRegionsProcedure
// Specify specific regions of a table to reopen.
// if specified null, all regions of the table will be reopened.
- private final List regionNamesList;
+ private final List regionNames;
private List regions = Collections.emptyList();
private RetryCounter retryCounter;
public ReopenTableRegionsProcedure() {
- regionNamesList = null;
+ regionNames = null;
}
public ReopenTableRegionsProcedure(TableName tableName) {
this.tableName = tableName;
- this.regionNamesList = null;
+ this.regionNames = null;
}
public ReopenTableRegionsProcedure(final TableName tableName,
- final List regionNamesList) {
+ final List regionNames) {
this.tableName = tableName;
- this.regionNamesList = regionNamesList;
+ this.regionNames = regionNames;
}
@Override
@@ -109,9 +109,9 @@ protected Flow executeFromState(MasterProcedureEnv env, ReopenTableRegionsState
LOG.info("Table {} is disabled, give up reopening its regions", tableName);
return Flow.NO_MORE_STATE;
}
- List tableRegionsForReopen = env.getAssignmentManager()
+ List tableRegions = env.getAssignmentManager()
.getRegionStates().getRegionsOfTableForReopen(tableName);
- regions = prepareRegionsForReopen(tableRegionsForReopen);
+ regions = getRegionLocationsForReopen(tableRegions);
setNextState(ReopenTableRegionsState.REOPEN_TABLE_REGIONS_REOPEN_REGIONS);
return Flow.HAS_MORE_STATE;
case REOPEN_TABLE_REGIONS_REOPEN_REGIONS:
@@ -167,13 +167,13 @@ protected Flow executeFromState(MasterProcedureEnv env, ReopenTableRegionsState
}
}
- private List prepareRegionsForReopen(
+ private List getRegionLocationsForReopen(
List tableRegionsForReopen) {
List regionsToReopen = new ArrayList<>();
- if (CollectionUtils.isNotEmpty(regionNamesList) &&
+ if (CollectionUtils.isNotEmpty(regionNames) &&
CollectionUtils.isNotEmpty(tableRegionsForReopen)) {
- for (byte[] regionName : regionNamesList) {
+ for (byte[] regionName : regionNames) {
for (HRegionLocation hRegionLocation : tableRegionsForReopen) {
if (Bytes.equals(regionName, hRegionLocation.getRegion().getRegionName())) {
regionsToReopen.add(hRegionLocation);
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestRegionsRecoveryChore.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestRegionsRecoveryChore.java
index 49346f19bb90..69deac95c494 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestRegionsRecoveryChore.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestRegionsRecoveryChore.java
@@ -129,7 +129,7 @@ public void testRegionReopensWithStoreRefConfig() throws Exception {
}
Stoppable stoppable = new StoppableImplementation();
Configuration configuration = getCustomConf();
- configuration.setInt("hbase.regions.recovery.store.file.count", 300);
+ configuration.setInt("hbase.regions.recovery.store.file.ref.count", 300);
regionsRecoveryChore = new RegionsRecoveryChore(stoppable, configuration, hMaster);
regionsRecoveryChore.chore();
@@ -144,6 +144,43 @@ public void testRegionReopensWithStoreRefConfig() throws Exception {
.getRegionInfo(Mockito.any());
}
+ @Test
+ public void testRegionReopensWithLessThreshold() throws Exception {
+ regionNo = 0;
+ ClusterMetrics clusterMetrics = TestRegionsRecoveryChore.getClusterMetrics(4);
+ final Map serverMetricsMap =
+ clusterMetrics.getLiveServerMetrics();
+ LOG.debug("All Region Names with refCount....");
+ for (ServerMetrics serverMetrics : serverMetricsMap.values()) {
+ Map regionMetricsMap = serverMetrics.getRegionMetrics();
+ for (RegionMetrics regionMetrics : regionMetricsMap.values()) {
+ LOG.debug("name: " + new String(regionMetrics.getRegionName()) + " refCount: " +
+ regionMetrics.getStoreRefCount());
+ }
+ }
+ Mockito.when(hMaster.getClusterMetrics()).thenReturn(clusterMetrics);
+ Mockito.when(hMaster.getAssignmentManager()).thenReturn(assignmentManager);
+ for (byte[] regionName : REGION_NAME_LIST) {
+ Mockito.when(assignmentManager.getRegionInfo(regionName))
+ .thenReturn(TestRegionsRecoveryChore.getRegionInfo(regionName));
+ }
+ Stoppable stoppable = new StoppableImplementation();
+ Configuration configuration = getCustomConf();
+ configuration.setInt("hbase.regions.recovery.store.file.ref.count", 400);
+ regionsRecoveryChore = new RegionsRecoveryChore(stoppable, configuration, hMaster);
+ regionsRecoveryChore.chore();
+
+ // Verify that we need to reopen regions of only 1 table
+ Mockito.verify(hMaster, Mockito.times(1)).reopenRegions(Mockito.any(), Mockito.anyList(),
+ Mockito.anyLong(), Mockito.anyLong());
+ Mockito.verify(hMaster, Mockito.times(1)).getClusterMetrics();
+
+ // Verify that we need to reopen only 1 region with refCount > 400
+ Mockito.verify(hMaster, Mockito.times(1)).getAssignmentManager();
+ Mockito.verify(assignmentManager, Mockito.times(1))
+ .getRegionInfo(Mockito.any());
+ }
+
@Test
public void testRegionReopensWithoutStoreRefConfig() throws Exception {
regionNo = 0;
@@ -166,7 +203,7 @@ public void testRegionReopensWithoutStoreRefConfig() throws Exception {
}
Stoppable stoppable = new StoppableImplementation();
Configuration configuration = getCustomConf();
- configuration.unset("hbase.regions.recovery.store.file.count");
+ configuration.unset("hbase.regions.recovery.store.file.ref.count");
regionsRecoveryChore = new RegionsRecoveryChore(stoppable, configuration, hMaster);
regionsRecoveryChore.chore();
diff --git a/src/main/asciidoc/_chapters/hbase-default.adoc b/src/main/asciidoc/_chapters/hbase-default.adoc
index 0a37268fc1f5..59a3a073c362 100644
--- a/src/main/asciidoc/_chapters/hbase-default.adoc
+++ b/src/main/asciidoc/_chapters/hbase-default.adoc
@@ -2178,17 +2178,24 @@ The percent of region server RPC threads failed to abort RS.
`1200000`
-[[hbase.regions.recovery.store.file.count]]
-*`hbase.regions.recovery.store.file.count`*::
-+
-.Description
-
- Store files Ref Count threshold value considered
- for reopening regions. Any region with store files
- ref count > this value would be eligible for
- reopening by master. Default value -1 indicates
- this feature is turned off. Only positive integer
- value should be provided to enable the feature.
+[[hbase.regions.recovery.store.file.ref.count]]
+*`hbase.regions.recovery.store.file.ref.count`*::
++
+.Description
+
+ Very large ref count on a file indicates
+ that it is a ref leak on that object. Such files
+ can not be removed even after it is invalidated
+ via compaction. Only way to recover in such
+ scenario is to reopen the region which can
+ release all resources, like the refcount, leases, etc.
+ This config represents Store files Ref Count threshold
+ value considered for reopening regions.
+ Any region with store files ref count > this value
+ would be eligible for reopening by master.
+ Default value -1 indicates this feature is turned off.
+ Only positive integer value should be provided to enable
+ this feature.
+
.Default