Skip to content

Commit

Permalink
incorporating review comments
Browse files Browse the repository at this point in the history
  • Loading branch information
virajjasani committed Sep 30, 2019
1 parent 86a1644 commit 9fe22e2
Show file tree
Hide file tree
Showing 7 changed files with 89 additions and 40 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -1473,7 +1473,7 @@ public enum OperationStatusCode {

// Regions Recovery based on high storeFileRefCount threshold value
public static final String STORE_FILE_REF_COUNT_THRESHOLD =
"hbase.regions.recovery.store.file.count";
"hbase.regions.recovery.store.file.ref.count";

// default -1 indicates there is no threshold on high storeRefCount
public static final int DEFAULT_STORE_FILE_REF_COUNT_THRESHOLD = -1;
Expand Down
21 changes: 14 additions & 7 deletions hbase-common/src/main/resources/hbase-default.xml
Original file line number Diff line number Diff line change
Expand Up @@ -1912,15 +1912,22 @@ possible configurations would overwhelm and obscure the important.
</description>
</property>
<property>
<name>hbase.regions.recovery.store.file.count</name>
<name>hbase.regions.recovery.store.file.ref.count</name>
<value>-1</value>
<description>
Store files Ref Count threshold value considered
for reopening regions. Any region with store files
ref count > this value would be eligible for
reopening by master. Default value -1 indicates
this feature is turned off. Only positive integer
value should be provided to enable the feature.
Very large ref count on a file indicates
that it is a ref leak on that object. Such files
can not be removed even after it is invalidated
via compaction. Only way to recover in such
scenario is to reopen the region which can
release all resources, like the refcount, leases, etc.
This config represents Store files Ref Count threshold
value considered for reopening regions.
Any region with store files ref count > this value
would be eligible for reopening by master.
Default value -1 indicates this feature is turned off.
Only positive integer value should be provided to enable
this feature.
</description>
</property>
</configuration>
Original file line number Diff line number Diff line change
Expand Up @@ -1471,7 +1471,7 @@ private void startServiceThreads() throws IOException {
getChoreService().scheduleChore(hfileCleaner);

// Regions Reopen based on very high storeFileRefCount is considered enabled
// only if hbase.regions.recovery.store.file.count has value > 0
// only if hbase.regions.recovery.store.file.ref.count has value > 0
final int maxStoreFileRefCount = conf.getInt(
HConstants.STORE_FILE_REF_COUNT_THRESHOLD,
HConstants.DEFAULT_STORE_FILE_REF_COUNT_THRESHOLD);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -141,9 +141,10 @@ private Map<TableName, List<byte[]>> getTableToRegionsByRefCount(
// Here, we take max ref count of all store files and not the cumulative
// count of all store files
final int maxStoreFileRefCount = regionMetrics.getMaxStoreFileRefCount();
// ignore store file ref count threshold <= 0 (default is -1 i.e. disabled)
if (storeFileRefCountThreshold > 0 && maxStoreFileRefCount > storeFileRefCountThreshold) {
prepareTableToReopenRegionsMap(tableToReopenRegionsMap, regionMetrics,

if (maxStoreFileRefCount > storeFileRefCountThreshold) {
final byte[] regionName = regionMetrics.getRegionName();
prepareTableToReopenRegionsMap(tableToReopenRegionsMap, regionName,
maxStoreFileRefCount);
}
}
Expand All @@ -154,9 +155,8 @@ private Map<TableName, List<byte[]>> getTableToRegionsByRefCount(

private void prepareTableToReopenRegionsMap(
final Map<TableName, List<byte[]>> tableToReopenRegionsMap,
final RegionMetrics regionMetrics, final int regionStoreRefCount) {
final byte[] regionName, final int regionStoreRefCount) {

final byte[] regionName = regionMetrics.getRegionName();
final RegionInfo regionInfo = hMaster.getAssignmentManager().getRegionInfo(regionName);
final TableName tableName = regionInfo.getTable();
if (TableName.isMetaTableName(tableName)) {
Expand All @@ -166,9 +166,7 @@ private void prepareTableToReopenRegionsMap(
}
LOG.warn("Region {} for Table {} has high storeFileRefCount {}, considering it for reopen..",
regionInfo.getRegionNameAsString(), tableName, regionStoreRefCount);
if (!tableToReopenRegionsMap.containsKey(tableName)) {
tableToReopenRegionsMap.put(tableName, new ArrayList<>());
}
tableToReopenRegionsMap.putIfAbsent(tableName, new ArrayList<>());
tableToReopenRegionsMap.get(tableName).add(regionName);

}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,25 +57,25 @@ public class ReopenTableRegionsProcedure

// Specify specific regions of a table to reopen.
// if specified null, all regions of the table will be reopened.
private final List<byte[]> regionNamesList;
private final List<byte[]> regionNames;

private List<HRegionLocation> regions = Collections.emptyList();

private RetryCounter retryCounter;

public ReopenTableRegionsProcedure() {
regionNamesList = null;
regionNames = null;
}

public ReopenTableRegionsProcedure(TableName tableName) {
this.tableName = tableName;
this.regionNamesList = null;
this.regionNames = null;
}

public ReopenTableRegionsProcedure(final TableName tableName,
final List<byte[]> regionNamesList) {
final List<byte[]> regionNames) {
this.tableName = tableName;
this.regionNamesList = regionNamesList;
this.regionNames = regionNames;
}

@Override
Expand Down Expand Up @@ -109,9 +109,9 @@ protected Flow executeFromState(MasterProcedureEnv env, ReopenTableRegionsState
LOG.info("Table {} is disabled, give up reopening its regions", tableName);
return Flow.NO_MORE_STATE;
}
List<HRegionLocation> tableRegionsForReopen = env.getAssignmentManager()
List<HRegionLocation> tableRegions = env.getAssignmentManager()
.getRegionStates().getRegionsOfTableForReopen(tableName);
regions = prepareRegionsForReopen(tableRegionsForReopen);
regions = getRegionLocationsForReopen(tableRegions);
setNextState(ReopenTableRegionsState.REOPEN_TABLE_REGIONS_REOPEN_REGIONS);
return Flow.HAS_MORE_STATE;
case REOPEN_TABLE_REGIONS_REOPEN_REGIONS:
Expand Down Expand Up @@ -167,13 +167,13 @@ protected Flow executeFromState(MasterProcedureEnv env, ReopenTableRegionsState
}
}

private List<HRegionLocation> prepareRegionsForReopen(
private List<HRegionLocation> getRegionLocationsForReopen(
List<HRegionLocation> tableRegionsForReopen) {

List<HRegionLocation> regionsToReopen = new ArrayList<>();
if (CollectionUtils.isNotEmpty(regionNamesList) &&
if (CollectionUtils.isNotEmpty(regionNames) &&
CollectionUtils.isNotEmpty(tableRegionsForReopen)) {
for (byte[] regionName : regionNamesList) {
for (byte[] regionName : regionNames) {
for (HRegionLocation hRegionLocation : tableRegionsForReopen) {
if (Bytes.equals(regionName, hRegionLocation.getRegion().getRegionName())) {
regionsToReopen.add(hRegionLocation);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ public void testRegionReopensWithStoreRefConfig() throws Exception {
}
Stoppable stoppable = new StoppableImplementation();
Configuration configuration = getCustomConf();
configuration.setInt("hbase.regions.recovery.store.file.count", 300);
configuration.setInt("hbase.regions.recovery.store.file.ref.count", 300);
regionsRecoveryChore = new RegionsRecoveryChore(stoppable, configuration, hMaster);
regionsRecoveryChore.chore();

Expand All @@ -144,6 +144,43 @@ public void testRegionReopensWithStoreRefConfig() throws Exception {
.getRegionInfo(Mockito.any());
}

@Test
public void testRegionReopensWithLessThreshold() throws Exception {
regionNo = 0;
ClusterMetrics clusterMetrics = TestRegionsRecoveryChore.getClusterMetrics(4);
final Map<ServerName, ServerMetrics> serverMetricsMap =
clusterMetrics.getLiveServerMetrics();
LOG.debug("All Region Names with refCount....");
for (ServerMetrics serverMetrics : serverMetricsMap.values()) {
Map<byte[], RegionMetrics> regionMetricsMap = serverMetrics.getRegionMetrics();
for (RegionMetrics regionMetrics : regionMetricsMap.values()) {
LOG.debug("name: " + new String(regionMetrics.getRegionName()) + " refCount: " +
regionMetrics.getStoreRefCount());
}
}
Mockito.when(hMaster.getClusterMetrics()).thenReturn(clusterMetrics);
Mockito.when(hMaster.getAssignmentManager()).thenReturn(assignmentManager);
for (byte[] regionName : REGION_NAME_LIST) {
Mockito.when(assignmentManager.getRegionInfo(regionName))
.thenReturn(TestRegionsRecoveryChore.getRegionInfo(regionName));
}
Stoppable stoppable = new StoppableImplementation();
Configuration configuration = getCustomConf();
configuration.setInt("hbase.regions.recovery.store.file.ref.count", 400);
regionsRecoveryChore = new RegionsRecoveryChore(stoppable, configuration, hMaster);
regionsRecoveryChore.chore();

// Verify that we need to reopen regions of only 1 table
Mockito.verify(hMaster, Mockito.times(1)).reopenRegions(Mockito.any(), Mockito.anyList(),
Mockito.anyLong(), Mockito.anyLong());
Mockito.verify(hMaster, Mockito.times(1)).getClusterMetrics();

// Verify that we need to reopen only 1 region with refCount > 400
Mockito.verify(hMaster, Mockito.times(1)).getAssignmentManager();
Mockito.verify(assignmentManager, Mockito.times(1))
.getRegionInfo(Mockito.any());
}

@Test
public void testRegionReopensWithoutStoreRefConfig() throws Exception {
regionNo = 0;
Expand All @@ -166,7 +203,7 @@ public void testRegionReopensWithoutStoreRefConfig() throws Exception {
}
Stoppable stoppable = new StoppableImplementation();
Configuration configuration = getCustomConf();
configuration.unset("hbase.regions.recovery.store.file.count");
configuration.unset("hbase.regions.recovery.store.file.ref.count");
regionsRecoveryChore = new RegionsRecoveryChore(stoppable, configuration, hMaster);
regionsRecoveryChore.chore();

Expand Down
29 changes: 18 additions & 11 deletions src/main/asciidoc/_chapters/hbase-default.adoc
Original file line number Diff line number Diff line change
Expand Up @@ -2178,17 +2178,24 @@ The percent of region server RPC threads failed to abort RS.
`1200000`


[[hbase.regions.recovery.store.file.count]]
*`hbase.regions.recovery.store.file.count`*::
+
.Description

Store files Ref Count threshold value considered
for reopening regions. Any region with store files
ref count > this value would be eligible for
reopening by master. Default value -1 indicates
this feature is turned off. Only positive integer
value should be provided to enable the feature.
[[hbase.regions.recovery.store.file.ref.count]]
*`hbase.regions.recovery.store.file.ref.count`*::
+
.Description

Very large ref count on a file indicates
that it is a ref leak on that object. Such files
can not be removed even after it is invalidated
via compaction. Only way to recover in such
scenario is to reopen the region which can
release all resources, like the refcount, leases, etc.
This config represents Store files Ref Count threshold
value considered for reopening regions.
Any region with store files ref count > this value
would be eligible for reopening by master.
Default value -1 indicates this feature is turned off.
Only positive integer value should be provided to enable
this feature.

+
.Default
Expand Down

0 comments on commit 9fe22e2

Please sign in to comment.