Skip to content

Commit

Permalink
HBASE-22460 : Reopen regions with very high Store Ref Counts
Browse files Browse the repository at this point in the history
  • Loading branch information
virajjasani committed Sep 25, 2019
1 parent 52f5a85 commit 86a1644
Show file tree
Hide file tree
Showing 20 changed files with 999 additions and 4 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -154,4 +154,10 @@ default String getNameAsString() {
* @return the reference count for the stores of this region
*/
int getStoreRefCount();

/**
* @return the max reference count for any store file among all stores files
* of this region
*/
int getMaxStoreFileRefCount();
}
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ public static RegionMetrics toRegionMetrics(ClusterStatusProtos.RegionLoad regio
.setStoreCount(regionLoadPB.getStores())
.setStoreFileCount(regionLoadPB.getStorefiles())
.setStoreRefCount(regionLoadPB.getStoreRefCount())
.setMaxStoreFileRefCount(regionLoadPB.getMaxStoreFileRefCount())
.setStoreFileSize(new Size(regionLoadPB.getStorefileSizeMB(), Size.Unit.MEGABYTE))
.setStoreSequenceIds(regionLoadPB.getStoreCompleteSequenceIdList().stream()
.collect(Collectors.toMap(
Expand Down Expand Up @@ -113,6 +114,7 @@ public static ClusterStatusProtos.RegionLoad toRegionLoad(RegionMetrics regionMe
.setStores(regionMetrics.getStoreCount())
.setStorefiles(regionMetrics.getStoreFileCount())
.setStoreRefCount(regionMetrics.getStoreRefCount())
.setMaxStoreFileRefCount(regionMetrics.getMaxStoreFileRefCount())
.setStorefileSizeMB((int) regionMetrics.getStoreFileSize().get(Size.Unit.MEGABYTE))
.addAllStoreCompleteSequenceId(toStoreSequenceId(regionMetrics.getStoreSequenceId()))
.setStoreUncompressedSizeMB(
Expand All @@ -128,6 +130,7 @@ public static RegionMetricsBuilder newBuilder(byte[] name) {
private int storeCount;
private int storeFileCount;
private int storeRefCount;
private int maxStoreFileRefCount;
private long compactingCellCount;
private long compactedCellCount;
private Size storeFileSize = Size.ZERO;
Expand Down Expand Up @@ -161,6 +164,10 @@ public RegionMetricsBuilder setStoreRefCount(int value) {
this.storeRefCount = value;
return this;
}
public RegionMetricsBuilder setMaxStoreFileRefCount(int value) {
this.maxStoreFileRefCount = value;
return this;
}
public RegionMetricsBuilder setCompactingCellCount(long value) {
this.compactingCellCount = value;
return this;
Expand Down Expand Up @@ -235,6 +242,7 @@ public RegionMetrics build() {
storeCount,
storeFileCount,
storeRefCount,
maxStoreFileRefCount,
compactingCellCount,
compactedCellCount,
storeFileSize,
Expand All @@ -259,6 +267,7 @@ private static class RegionMetricsImpl implements RegionMetrics {
private final int storeCount;
private final int storeFileCount;
private final int storeRefCount;
private final int maxStoreFileRefCount;
private final long compactingCellCount;
private final long compactedCellCount;
private final Size storeFileSize;
Expand All @@ -280,6 +289,7 @@ private static class RegionMetricsImpl implements RegionMetrics {
int storeCount,
int storeFileCount,
int storeRefCount,
int maxStoreFileRefCount,
final long compactingCellCount,
long compactedCellCount,
Size storeFileSize,
Expand All @@ -301,6 +311,7 @@ private static class RegionMetricsImpl implements RegionMetrics {
this.storeCount = storeCount;
this.storeFileCount = storeFileCount;
this.storeRefCount = storeRefCount;
this.maxStoreFileRefCount = maxStoreFileRefCount;
this.compactingCellCount = compactingCellCount;
this.compactedCellCount = compactedCellCount;
this.storeFileSize = Preconditions.checkNotNull(storeFileSize);
Expand Down Expand Up @@ -340,6 +351,11 @@ public int getStoreRefCount() {
return storeRefCount;
}

@Override
public int getMaxStoreFileRefCount() {
return maxStoreFileRefCount;
}

@Override
public Size getStoreFileSize() {
return storeFileSize;
Expand Down Expand Up @@ -433,6 +449,8 @@ public String toString() {
this.getStoreFileCount());
Strings.appendKeyValue(sb, "storeRefCount",
this.getStoreRefCount());
Strings.appendKeyValue(sb, "maxStoreFileRefCount",
this.getMaxStoreFileRefCount());
Strings.appendKeyValue(sb, "uncompressedStoreFileSize",
this.getUncompressedStoreFileSize());
Strings.appendKeyValue(sb, "lastMajorCompactionTimestamp",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -343,6 +343,8 @@ public long getLastReportTimestamp() {
public String toString() {
int storeCount = 0;
int storeFileCount = 0;
int storeRefCount = 0;
int maxStoreFileRefCount = 0;
long uncompressedStoreFileSizeMB = 0;
long storeFileSizeMB = 0;
long memStoreSizeMB = 0;
Expand All @@ -358,6 +360,9 @@ public String toString() {
for (RegionMetrics r : getRegionMetrics().values()) {
storeCount += r.getStoreCount();
storeFileCount += r.getStoreFileCount();
storeRefCount += r.getStoreRefCount();
int currentMaxStoreFileRefCount = r.getMaxStoreFileRefCount();
maxStoreFileRefCount = Math.max(maxStoreFileRefCount, currentMaxStoreFileRefCount);
uncompressedStoreFileSizeMB += r.getUncompressedStoreFileSize().get(Size.Unit.MEGABYTE);
storeFileSizeMB += r.getStoreFileSize().get(Size.Unit.MEGABYTE);
memStoreSizeMB += r.getMemStoreSize().get(Size.Unit.MEGABYTE);
Expand All @@ -379,6 +384,8 @@ public String toString() {
Strings.appendKeyValue(sb, "maxHeapMB", getMaxHeapSize());
Strings.appendKeyValue(sb, "numberOfStores", storeCount);
Strings.appendKeyValue(sb, "numberOfStorefiles", storeFileCount);
Strings.appendKeyValue(sb, "storeRefCount", storeRefCount);
Strings.appendKeyValue(sb, "maxStoreFileRefCount", maxStoreFileRefCount);
Strings.appendKeyValue(sb, "storefileUncompressedSizeMB", uncompressedStoreFileSizeMB);
Strings.appendKeyValue(sb, "storefileSizeMB", storeFileSizeMB);
if (uncompressedStoreFileSizeMB != 0) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1471,6 +1471,13 @@ public enum OperationStatusCode {
// User defined Default TTL config key
public static final String DEFAULT_SNAPSHOT_TTL_CONFIG_KEY = "hbase.master.snapshot.ttl";

// Regions Recovery based on high storeFileRefCount threshold value
public static final String STORE_FILE_REF_COUNT_THRESHOLD =
"hbase.regions.recovery.store.file.count";

// default -1 indicates there is no threshold on high storeRefCount
public static final int DEFAULT_STORE_FILE_REF_COUNT_THRESHOLD = -1;

/**
* Configurations for master executor services.
*/
Expand Down
22 changes: 22 additions & 0 deletions hbase-common/src/main/resources/hbase-default.xml
Original file line number Diff line number Diff line change
Expand Up @@ -1901,4 +1901,26 @@ possible configurations would overwhelm and obscure the important.
automatically deleted until it is manually deleted
</description>
</property>
<property>
<name>hbase.master.regions.recovery.check.interval</name>
<value>1200000</value>
<description>
Regions Recovery Chore interval in milliseconds.
This chore keeps running at this interval to
find all regions with configurable max store file ref count
and reopens them.
</description>
</property>
<property>
<name>hbase.regions.recovery.store.file.count</name>
<value>-1</value>
<description>
Store files Ref Count threshold value considered
for reopening regions. Any region with store files
ref count > this value would be eligible for
reopening by master. Default value -1 indicates
this feature is turned off. Only positive integer
value should be provided to enable the feature.
</description>
</property>
</configuration>
Original file line number Diff line number Diff line change
Expand Up @@ -233,6 +233,7 @@ public interface MetricsRegionServerSource extends BaseSource, JvmPauseMonitorSo
String STOREFILE_COUNT_DESC = "Number of Store Files";
String STORE_REF_COUNT = "storeRefCount";
String STORE_REF_COUNT_DESC = "Store reference count";
String MAX_STORE_FILE_REF_COUNT = "maxStoreFileRefCount";
String MEMSTORE_SIZE = "memStoreSize";
String MEMSTORE_SIZE_DESC = "Size of the memstore";
String STOREFILE_SIZE = "storeFileSize";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -164,4 +164,10 @@ public interface MetricsRegionWrapper {
* @return the number of references active on the store
*/
long getStoreRefCount();

/**
* @return the max number of references active on any store file among
* all store files that belong to this region
*/
long getMaxStoreFileRefCount();
}
Original file line number Diff line number Diff line change
Expand Up @@ -217,6 +217,10 @@ void snapshot(MetricsRecordBuilder mrb, boolean ignored) {
regionNamePrefix + MetricsRegionServerSource.STORE_REF_COUNT,
MetricsRegionServerSource.STORE_REF_COUNT),
this.regionWrapper.getStoreRefCount());
mrb.addGauge(Interns.info(
regionNamePrefix + MetricsRegionServerSource.MAX_STORE_FILE_REF_COUNT,
MetricsRegionServerSource.MAX_STORE_FILE_REF_COUNT),
this.regionWrapper.getMaxStoreFileRefCount());
mrb.addGauge(Interns.info(
regionNamePrefix + MetricsRegionServerSource.MEMSTORE_SIZE,
MetricsRegionServerSource.MEMSTORE_SIZE_DESC),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,11 @@ public long getStoreRefCount() {
return 0;
}

@Override
public long getMaxStoreFileRefCount() {
return 0;
}

@Override
public long getMemStoreSize() {
return 0;
Expand Down
6 changes: 6 additions & 0 deletions hbase-protocol-shaded/src/main/protobuf/ClusterStatus.proto
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,12 @@ message RegionLoad {

/** the number of references active on the store */
optional int32 store_ref_count = 21 [default = 0];

/**
* The max number of references active on single store file among all store files
* that belong to given region
*/
optional int32 max_store_file_ref_count = 22 [default = 0];
}

/* Server-level protobufs */
Expand Down
6 changes: 6 additions & 0 deletions hbase-protocol/src/main/protobuf/ClusterStatus.proto
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,12 @@ message RegionLoad {

/** the number of references active on the store */
optional int32 store_ref_count = 21 [default = 0];

/**
* The max number of references active on single store file among all store files
* that belong to given region
*/
optional int32 max_store_file_ref_count = 22 [default = 0];
}

/* Server-level protobufs */
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,7 @@
import org.apache.hadoop.hbase.master.procedure.ProcedurePrepareLatch;
import org.apache.hadoop.hbase.master.procedure.ProcedureSyncWait;
import org.apache.hadoop.hbase.master.procedure.RecoverMetaProcedure;
import org.apache.hadoop.hbase.master.procedure.ReopenTableRegionsProcedure;
import org.apache.hadoop.hbase.master.procedure.ServerCrashProcedure;
import org.apache.hadoop.hbase.master.procedure.TruncateTableProcedure;
import org.apache.hadoop.hbase.master.replication.AbstractPeerProcedure;
Expand Down Expand Up @@ -420,6 +421,7 @@ public void run() {
// monitor for distributed procedures
private MasterProcedureManagerHost mpmHost;

private RegionsRecoveryChore regionsRecoveryChore = null;
// it is assigned after 'initialized' guard set to true, so should be volatile
private volatile MasterQuotaManager quotaManager;
private SpaceQuotaSnapshotNotifier spaceQuotaSnapshotNotifier;
Expand Down Expand Up @@ -1468,6 +1470,20 @@ private void startServiceThreads() throws IOException {
getMasterFileSystem().getFileSystem(), archiveDir, cleanerPool, params);
getChoreService().scheduleChore(hfileCleaner);

// Regions Reopen based on very high storeFileRefCount is considered enabled
// only if hbase.regions.recovery.store.file.count has value > 0
final int maxStoreFileRefCount = conf.getInt(
HConstants.STORE_FILE_REF_COUNT_THRESHOLD,
HConstants.DEFAULT_STORE_FILE_REF_COUNT_THRESHOLD);
if (maxStoreFileRefCount > 0) {
this.regionsRecoveryChore = new RegionsRecoveryChore(this, conf, this);
getChoreService().scheduleChore(this.regionsRecoveryChore);
} else {
LOG.info("Reopening regions with very high storeFileRefCount is disabled. " +
"Provide threshold value > 0 for {} to enable it.",
HConstants.STORE_FILE_REF_COUNT_THRESHOLD);
}

replicationBarrierCleaner = new ReplicationBarrierCleaner(conf, this, getConnection(),
replicationPeerManager);
getChoreService().scheduleChore(replicationBarrierCleaner);
Expand Down Expand Up @@ -1635,6 +1651,7 @@ private void stopChores() {
choreService.cancelChore(this.replicationBarrierCleaner);
choreService.cancelChore(this.snapshotCleanerChore);
choreService.cancelChore(this.hbckChore);
choreService.cancelChore(this.regionsRecoveryChore);
}
}

Expand Down Expand Up @@ -3718,6 +3735,38 @@ public void remoteProcedureFailed(long procId, RemoteProcedureException error) {
}
}

/**
* Reopen regions provided in the argument
*
* @param tableName The current table name
* @param regionNames The region names of the regions to reopen
* @param nonceGroup Identifier for the source of the request, a client or process
* @param nonce A unique identifier for this operation from the client or process identified by
* <code>nonceGroup</code> (the source must ensure each operation gets a unique id).
* @return procedure Id
* @throws IOException if reopening region fails while running procedure
*/
long reopenRegions(final TableName tableName, final List<byte[]> regionNames,
final long nonceGroup, final long nonce)
throws IOException {

return MasterProcedureUtil
.submitProcedure(new MasterProcedureUtil.NonceProcedureRunnable(this, nonceGroup, nonce) {

@Override
protected void run() throws IOException {
submitProcedure(new ReopenTableRegionsProcedure(tableName, regionNames));
}

@Override
protected String getDescription() {
return "ReopenTableRegionsProcedure";
}

});

}

@Override
public ReplicationPeerManager getReplicationPeerManager() {
return replicationPeerManager;
Expand Down
Loading

0 comments on commit 86a1644

Please sign in to comment.