From 4b6ce0fcbf68f570831c56ba8dcb4fe8fdbf2b28 Mon Sep 17 00:00:00 2001 From: stack Date: Thu, 12 Dec 2019 13:51:35 -0800 Subject: [PATCH] HBASE-23572 In 'HBCK Report', distringush between live, dead, and unknown servers Signed-off-by: Sean Busbey --- .../resources/hbase-webapps/master/hbck.jsp | 70 ++++++++++++------- 1 file changed, 46 insertions(+), 24 deletions(-) diff --git a/hbase-server/src/main/resources/hbase-webapps/master/hbck.jsp b/hbase-server/src/main/resources/hbase-webapps/master/hbck.jsp index f89aac8bce6f..e9a8658286af 100644 --- a/hbase-server/src/main/resources/hbase-webapps/master/hbck.jsp +++ b/hbase-server/src/main/resources/hbase-webapps/master/hbck.jsp @@ -31,13 +31,14 @@ <%@ page import="org.apache.hadoop.hbase.client.RegionInfo" %> <%@ page import="org.apache.hadoop.hbase.master.HbckChore" %> <%@ page import="org.apache.hadoop.hbase.master.HMaster" %> +<%@ page import="org.apache.hadoop.hbase.master.ServerManager" %> <%@ page import="org.apache.hadoop.hbase.ServerName" %> <%@ page import="org.apache.hadoop.hbase.util.Bytes" %> <%@ page import="org.apache.hadoop.hbase.util.Pair" %> <%@ page import="org.apache.hadoop.hbase.master.CatalogJanitor" %> <%@ page import="org.apache.hadoop.hbase.master.CatalogJanitor.Report" %> <% - HMaster master = (HMaster) getServletContext().getAttribute(HMaster.MASTER); + final HMaster master = (HMaster) getServletContext().getAttribute(HMaster.MASTER); pageContext.setAttribute("pageTitle", "HBase Master HBCK Report: " + master.getServerName()); HbckChore hbckChore = master.getHbckChore(); Map>> inconsistentRegions = null; @@ -60,13 +61,13 @@ String iso8601end = startTimestamp == 0? "-1": zdt.format(DateTimeFormatter.ISO_OFFSET_DATE_TIME); CatalogJanitor cj = master.getCatalogJanitor(); CatalogJanitor.Report report = cj == null? null: cj.getLastReport(); + final ServerManager serverManager = master.getServerManager(); %>
- <% if (!master.isInitialized()) { %>
- - <% if (inconsistentRegions != null && inconsistentRegions.size() > 0) { %>

- There are three cases: 1. Master thought this region opened, but no regionserver reported it (Fix: use assigns + There are three cases: 1. Master thought this region opened, but no regionserver reported it (Fix: use assign command); 2. Master thought this region opened on Server1, but regionserver reported Server2 (Fix: - need to check the server is still exist. If not, schedule SCP for it. If exist, restart Server2 and Server1): - 3. More than one regionservers reported opened this region (Fix: restart the RegionServers). + need to check the server still exists. If not, schedule ServerCrashProcedure for it. If exists, + restart Server2 and Server1): + 3. More than one regionserver reports opened this region (Fix: restart the RegionServers). Notice: the reported online regionservers may be not right when there are regions in transition. Please check them in regionserver's web UI. @@ -123,15 +123,14 @@ Location in META Reported Online RegionServers - <% for (Map.Entry>> entry : inconsistentRegions.entrySet()) { %> + <% for (Map.Entry>> entry : inconsistentRegions.entrySet()) {%> <%= entry.getKey() %> - <%= entry.getValue().getFirst() %> - <%= entry.getValue().getSecond().stream().map(ServerName::getServerName) - .collect(Collectors.joining(", ")) %> + <%= formatServerName(master, serverManager, entry.getValue().getFirst()) %> + <%= entry.getValue().getSecond().stream().map(s -> formatServerName(master, serverManager, s)). + collect(Collectors.joining(", ")) %> <% } %> -

<%= inconsistentRegions.size() %> region(s) in set.

<% } %> @@ -142,14 +141,6 @@

Orphan Regions on RegionServer

-

- - The below are Regions we've lost account of. To be safe, run bulk load of any data found in these Region orphan directories back into the HBase cluster. - First make sure hbase:meta is in healthy state; run 'hbkc2 fixMeta' to be sure. Once this is done, per Region below, run a bulk - load -- '$ hbase completebulkload REGION_DIR_PATH TABLE_NAME' -- and then delete the desiccated directory content (HFiles are removed upon successful load; all that is left are empty directories - and occasionally a seqid marking file). - -

@@ -159,10 +150,9 @@ <% for (Map.Entry entry : orphanRegionsOnRS.entrySet()) { %> - + <% } %> -

<%= orphanRegionsOnRS.size() %> region(s) in set.

<%= entry.getKey() %><%= entry.getValue() %><%= formatServerName(master, serverManager, entry.getValue()) %>
<% } %> @@ -173,7 +163,14 @@

Orphan Regions on FileSystem

- +

+ + The below are Regions we've lost account of. To be safe, run bulk load of any data found in these Region orphan directories back into the HBase cluster. + First make sure hbase:meta is in a healthy state; run hbck2 fixMeta to be sure. Once this is done, per Region below, run a bulk + load -- $ hbase completebulkload REGION_DIR_PATH TABLE_NAME -- and then delete the desiccated directory content (HFiles are removed upon + successful load; all that is left are empty directories and occasionally a seqid marking file). + +

@@ -301,3 +298,28 @@ + +<%! +/** + * Format serverName for display. + * If a live server reference, make it a link. + * If dead, make it italic. + * If unknown, make it plain. + */ +private static String formatServerName(HMaster master, + ServerManager serverManager, ServerName serverName) { + String sn = serverName.toString(); + if (serverManager.isServerOnline(serverName)) { + int infoPort = master.getRegionServerInfoPort(serverName); + if (infoPort > 0) { + return "" + sn + ""; + } else { + return "" + sn + ""; + } + } else if (serverManager.isServerDead(serverName)) { + return "" + sn + ""; + } + return sn; +} +%>
Region Encoded Name