Skip to content

Commit

Permalink
HBASE-27036 Displays the number of decommissioned region server for s…
Browse files Browse the repository at this point in the history
…tatus command (#4431)

add decommissionedServers to cluster metric

Signed-off-by: Pankaj Kumar <[email protected]>
Signed-off-by: Duo Zhang <[email protected]>
Signed-off-by: Viraj Jasani <[email protected]
  • Loading branch information
tomscut authored Jun 2, 2022
1 parent 15002fc commit ac8b517
Show file tree
Hide file tree
Showing 7 changed files with 54 additions and 4 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,11 @@ public interface ClusterMetrics {
*/
List<ServerName> getDeadServerNames();

/**
* @return the names of region servers on the decommissioned list
*/
List<ServerName> getDecommissionedServerNames();

/**
* @return the names of region servers on the live list
*/
Expand Down Expand Up @@ -222,5 +227,9 @@ enum Option {
* metrics about monitored tasks
*/
TASKS,
/**
* metrics about decommissioned region servers
*/
DECOMMISSIONED_SERVERS,
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,9 @@ public static ClusterStatusProtos.ClusterStatus toClusterStatus(ClusterMetrics m
.map(status -> ClusterStatusProtos.TableRegionStatesCount.newBuilder()
.setTableName(ProtobufUtil.toProtoTableName((status.getKey())))
.setRegionStatesCount(ProtobufUtil.toTableRegionStatesCount(status.getValue())).build())
.collect(Collectors.toList()));
.collect(Collectors.toList()))
.addAllDecommissionedServers(metrics.getDecommissionedServerNames().stream()
.map(ProtobufUtil::toServerName).collect(Collectors.toList()));
if (metrics.getMasterName() != null) {
builder.setMaster(ProtobufUtil.toServerName((metrics.getMasterName())));
}
Expand Down Expand Up @@ -111,7 +113,9 @@ public static ClusterMetrics toClusterMetrics(ClusterStatusProtos.ClusterStatus
.collect(Collectors.toMap(e -> ProtobufUtil.toTableName(e.getTableName()),
e -> ProtobufUtil.toTableRegionStatesCount(e.getRegionStatesCount()))))
.setMasterTasks(proto.getMasterTasksList().stream().map(t -> ProtobufUtil.getServerTask(t))
.collect(Collectors.toList()));
.collect(Collectors.toList()))
.setDecommissionedServerNames(proto.getDecommissionedServersList().stream()
.map(ProtobufUtil::toServerName).collect(Collectors.toList()));
if (proto.hasClusterId()) {
builder.setClusterId(ClusterId.convert(proto.getClusterId()).toString());
}
Expand Down Expand Up @@ -167,6 +171,8 @@ public static ClusterMetrics.Option toOption(ClusterStatusProtos.Option option)
return ClusterMetrics.Option.TABLE_TO_REGIONS_COUNT;
case TASKS:
return ClusterMetrics.Option.TASKS;
case DECOMMISSIONED_SERVERS:
return ClusterMetrics.Option.DECOMMISSIONED_SERVERS;
// should not reach here
default:
throw new IllegalArgumentException("Invalid option: " + option);
Expand Down Expand Up @@ -206,6 +212,8 @@ public static ClusterStatusProtos.Option toOption(ClusterMetrics.Option option)
return ClusterStatusProtos.Option.TABLE_TO_REGIONS_COUNT;
case TASKS:
return ClusterStatusProtos.Option.TASKS;
case DECOMMISSIONED_SERVERS:
return ClusterStatusProtos.Option.DECOMMISSIONED_SERVERS;
// should not reach here
default:
throw new IllegalArgumentException("Invalid option: " + option);
Expand Down Expand Up @@ -253,6 +261,7 @@ public static ClusterMetricsBuilder newBuilder() {
private Map<TableName, RegionStatesCount> tableRegionStatesCount = Collections.emptyMap();
@Nullable
private List<ServerTask> masterTasks;
private List<ServerName> decommissionedServerNames = Collections.emptyList();

private ClusterMetricsBuilder() {
}
Expand Down Expand Up @@ -317,6 +326,11 @@ public ClusterMetricsBuilder setMasterTasks(List<ServerTask> masterTasks) {
return this;
}

public ClusterMetricsBuilder setDecommissionedServerNames(List<ServerName> value) {
this.decommissionedServerNames = value;
return this;
}

public ClusterMetricsBuilder
setTableRegionStatesCount(Map<TableName, RegionStatesCount> tableRegionStatesCount) {
this.tableRegionStatesCount = tableRegionStatesCount;
Expand All @@ -326,13 +340,14 @@ public ClusterMetricsBuilder setMasterTasks(List<ServerTask> masterTasks) {
public ClusterMetrics build() {
return new ClusterMetricsImpl(hbaseVersion, deadServerNames, liveServerMetrics, masterName,
backupMasterNames, regionsInTransition, clusterId, masterCoprocessorNames, balancerOn,
masterInfoPort, serversName, tableRegionStatesCount, masterTasks);
masterInfoPort, serversName, tableRegionStatesCount, masterTasks, decommissionedServerNames);
}

private static class ClusterMetricsImpl implements ClusterMetrics {
@Nullable
private final String hbaseVersion;
private final List<ServerName> deadServerNames;
private final List<ServerName> decommissionedServerNames;
private final Map<ServerName, ServerMetrics> liveServerMetrics;
@Nullable
private final ServerName masterName;
Expand All @@ -353,9 +368,10 @@ private static class ClusterMetricsImpl implements ClusterMetrics {
List<ServerName> backupMasterNames, List<RegionState> regionsInTransition, String clusterId,
List<String> masterCoprocessorNames, Boolean balancerOn, int masterInfoPort,
List<ServerName> serversName, Map<TableName, RegionStatesCount> tableRegionStatesCount,
List<ServerTask> masterTasks) {
List<ServerTask> masterTasks, List<ServerName> decommissionedServerNames) {
this.hbaseVersion = hbaseVersion;
this.deadServerNames = Preconditions.checkNotNull(deadServerNames);
this.decommissionedServerNames = Preconditions.checkNotNull(decommissionedServerNames);
this.liveServerMetrics = Preconditions.checkNotNull(liveServerMetrics);
this.masterName = masterName;
this.backupMasterNames = Preconditions.checkNotNull(backupMasterNames);
Expand All @@ -379,6 +395,11 @@ public List<ServerName> getDeadServerNames() {
return Collections.unmodifiableList(deadServerNames);
}

@Override
public List<ServerName> getDecommissionedServerNames() {
return Collections.unmodifiableList(decommissionedServerNames);
}

@Override
public Map<ServerName, ServerMetrics> getLiveServerMetrics() {
return Collections.unmodifiableMap(liveServerMetrics);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -349,6 +349,7 @@ message ClusterStatus {
repeated ServerName servers_name = 11;
repeated TableRegionStatesCount table_region_states_count = 12;
repeated ServerTask master_tasks = 13;
repeated ServerName decommissioned_servers = 14;
}

enum Option {
Expand All @@ -365,4 +366,5 @@ enum Option {
SERVERS_NAME = 10;
TABLE_TO_REGIONS_COUNT = 11;
TASKS = 12;
DECOMMISSIONED_SERVERS = 13;
}
Original file line number Diff line number Diff line change
Expand Up @@ -2903,6 +2903,12 @@ public ClusterMetrics getClusterMetricsWithoutCoprocessor(EnumSet<Option> option
}
break;
}
case DECOMMISSIONED_SERVERS: {
if (serverManager != null) {
builder.setDecommissionedServerNames(serverManager.getDrainingServersList());
}
break;
}
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,12 @@ public void testDefaults() throws Exception {
Assert.assertEquals(origin.getMasterInfoPort(), defaults.getMasterInfoPort());
Assert.assertEquals(origin.getServersName().size(), defaults.getServersName().size());
Assert.assertEquals(ADMIN.getRegionServers().size(), defaults.getServersName().size());
// We decommission the first online region server and verify the metrics.
List<ServerName> serverNames = origin.getServersName().subList(0, 1);
ADMIN.decommissionRegionServers(serverNames, false);
Assert.assertEquals(1, ADMIN.getClusterMetrics().getDecommissionedServerNames().size());
Assert.assertEquals(ADMIN.getClusterMetrics().getDecommissionedServerNames().get(0),
serverNames.get(0));
}

@Test
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -227,6 +227,11 @@ public List<ServerName> getDeadServerNames() {
return null;
}

@Override
public List<ServerName> getDecommissionedServerNames() {
return null;
}

@Override
public Map<ServerName, ServerMetrics> getLiveServerMetrics() {
Map<ServerName, ServerMetrics> liveServerMetrics = new HashMap<>();
Expand Down
1 change: 1 addition & 0 deletions hbase-shell/src/main/ruby/hbase/admin.rb
Original file line number Diff line number Diff line change
Expand Up @@ -1043,6 +1043,7 @@ def status(format, type)
else
puts "1 active master, #{cluster_metrics.getBackupMasterNames.size} backup masters,
#{cluster_metrics.getLiveServerMetrics.size} servers,
#{cluster_metrics.getDecommissionedServerNames.size} decommissioned,
#{cluster_metrics.getDeadServerNames.size} dead,
#{format('%.4f', cluster_metrics.getAverageLoad)} average load"
end
Expand Down

0 comments on commit ac8b517

Please sign in to comment.