diff --git a/hbase-server/src/main/jamon/org/apache/hadoop/hbase/tmpl/tool/CanaryStatusTmpl.jamon b/hbase-server/src/main/jamon/org/apache/hadoop/hbase/tmpl/tool/CanaryStatusTmpl.jamon new file mode 100644 index 000000000000..e2d29eef2803 --- /dev/null +++ b/hbase-server/src/main/jamon/org/apache/hadoop/hbase/tmpl/tool/CanaryStatusTmpl.jamon @@ -0,0 +1,156 @@ + +<%doc> + +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +<%args> +RegionStdOutSink sink; + +<%import> +java.util.Map; +java.util.concurrent.atomic.LongAdder; +org.apache.hadoop.hbase.ServerName; +org.apache.hadoop.hbase.tool.CanaryTool.RegionStdOutSink; + + + + + + + + Canary + + + + + + + + + + + +
+
+

Failed Servers

+ <%java> + Map perServerFailuresCount = sink.getPerServerFailuresCount(); + + + + + + + <%if (perServerFailuresCount != null && perServerFailuresCount.size() > 0)%> + <%for Map.Entry entry : perServerFailuresCount.entrySet() %> + + + + + + + +
ServerFailures Count
<& serverNameLink ; serverName = entry.getKey() &><% entry.getValue() %>
Total Failed Servers: <% (perServerFailuresCount != null) ? perServerFailuresCount.size() : 0 %>
+
+
+

Failed Tables

+ <%java> + Map perTableFailuresCount = sink.getPerTableFailuresCount(); + + + + + + + <%if (perTableFailuresCount != null && perTableFailuresCount.size() > 0)%> + <%for Map.Entry entry : perTableFailuresCount.entrySet()%> + + + + + + + +
TableFailures Count
<% entry.getKey() %><% entry.getValue() %>
Total Failed Tables: <% (perTableFailuresCount != null) ? perTableFailuresCount.size() : 0 %>
+
+ +
+

Software Attributes

+ + + + + + + + + + + + + + + + + + + + + + + + + +
Attribute NameValueDescription
HBase Version<% org.apache.hadoop.hbase.util.VersionInfo.getVersion() %>, r<% org.apache.hadoop.hbase.util.VersionInfo.getRevision() %>HBase version and revision
HBase Compiled<% org.apache.hadoop.hbase.util.VersionInfo.getDate() %>, <% org.apache.hadoop.hbase.util.VersionInfo.getUser() %>When HBase version was compiled and by whom
Hadoop Version<% org.apache.hadoop.util.VersionInfo.getVersion() %>, r<% org.apache.hadoop.util.VersionInfo.getRevision() %>Hadoop version and revision
Hadoop Compiled<% org.apache.hadoop.util.VersionInfo.getDate() %>, <% org.apache.hadoop.util.VersionInfo.getUser() %>When Hadoop version was compiled and by whom
+
+
+ + + + + + + + +<%def serverNameLink> + <%args> + ServerName serverName; + + <%java> + int infoPort = serverName.getPort() + 1; + String url = "//" + serverName.getHostname() + ":" + infoPort + "/"; + + + <%if (infoPort > 0) %> + <% serverName.getServerName() %> + <%else> + <% serverName.getServerName() %> + + diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/tool/CanaryStatusServlet.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/tool/CanaryStatusServlet.java new file mode 100644 index 000000000000..ce214a7a2973 --- /dev/null +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/tool/CanaryStatusServlet.java @@ -0,0 +1,49 @@ +/** + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.tool; + +import java.io.IOException; +import javax.servlet.ServletException; +import javax.servlet.http.HttpServlet; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; +import org.apache.hadoop.hbase.tmpl.tool.CanaryStatusTmpl; +import org.apache.yetus.audience.InterfaceAudience; + + +@InterfaceAudience.Private +public class CanaryStatusServlet extends HttpServlet { + @Override + protected void doGet(HttpServletRequest req, HttpServletResponse resp) + throws ServletException, IOException { + CanaryTool.RegionStdOutSink sink = + (CanaryTool.RegionStdOutSink) getServletContext().getAttribute( + "sink"); + if (sink == null) { + throw new ServletException( + "RegionStdOutSink is null! The CanaryTool's InfoServer is not initialized correctly"); + } + + resp.setContentType("text/html"); + + CanaryStatusTmpl tmpl = new CanaryStatusTmpl(); + tmpl.render(resp.getWriter(), sink); + } + +} diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/tool/CanaryTool.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/tool/CanaryTool.java index ef68a082f871..ed59ab70d6ba 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/tool/CanaryTool.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/tool/CanaryTool.java @@ -24,6 +24,7 @@ import java.io.Closeable; import java.io.IOException; +import java.net.BindException; import java.net.InetSocketAddress; import java.util.ArrayList; import java.util.Arrays; @@ -39,6 +40,7 @@ import java.util.TreeSet; import java.util.concurrent.Callable; import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ConcurrentMap; import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutorService; import java.util.concurrent.Future; @@ -81,6 +83,7 @@ import org.apache.hadoop.hbase.client.TableDescriptor; import org.apache.hadoop.hbase.client.TableDescriptorBuilder; import org.apache.hadoop.hbase.filter.FirstKeyOnlyFilter; +import org.apache.hadoop.hbase.http.InfoServer; import org.apache.hadoop.hbase.tool.CanaryTool.RegionTask.TaskType; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; @@ -122,6 +125,37 @@ */ @InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.TOOLS) public class CanaryTool implements Tool, Canary { + public static final String HBASE_CANARY_INFO_PORT = "hbase.canary.info.port"; + + public static final int DEFAULT_CANARY_INFOPORT = 16050; + + public static final String HBASE_CANARY_INFO_BINDADDRESS = "hbase.canary.info.bindAddress"; + + private InfoServer infoServer; + + private void putUpWebUI() throws IOException { + int port = conf.getInt(HBASE_CANARY_INFO_PORT, DEFAULT_CANARY_INFOPORT); + // -1 is for disabling info server + if (port < 0) { + return; + } + if (zookeeperMode) { + LOG.info("WebUI is not supported in Zookeeper mode"); + } else if (regionServerMode) { + LOG.info("WebUI is not supported in RegionServer mode"); + } else { + String addr = conf.get(HBASE_CANARY_INFO_BINDADDRESS, "0.0.0.0"); + try { + infoServer = new InfoServer("canary", addr, port, false, conf); + infoServer.addUnprivilegedServlet("canary", "/canary-status", CanaryStatusServlet.class); + infoServer.setAttribute("sink", this.sink); + infoServer.start(); + LOG.info("Bind Canary http info server to {}:{} ", addr, port); + } catch (BindException e) { + LOG.warn("Failed binding Canary http info server to {}:{}", addr, port, e); + } + } + } @Override public int checkRegions(String[] targets) throws Exception { @@ -273,10 +307,45 @@ public void publishReadTiming(String znode, String server, long msTime) { public static class RegionStdOutSink extends StdOutSink { private Map perTableReadLatency = new HashMap<>(); private LongAdder writeLatency = new LongAdder(); - private final Map> regionMap = new ConcurrentHashMap<>(); + private final ConcurrentMap> regionMap = + new ConcurrentHashMap<>(); + private ConcurrentMap perServerFailuresCount = + new ConcurrentHashMap<>(); + private ConcurrentMap perTableFailuresCount = new ConcurrentHashMap<>(); + + public ConcurrentMap getPerServerFailuresCount() { + return perServerFailuresCount; + } + + public ConcurrentMap getPerTableFailuresCount() { + return perTableFailuresCount; + } + + public void resetFailuresCountDetails() { + perServerFailuresCount.clear(); + perTableFailuresCount.clear(); + } + + private void incFailuresCountDetails(ServerName serverName, RegionInfo region) { + perServerFailuresCount.compute(serverName, (server, count) -> { + if (count == null) { + count = new LongAdder(); + } + count.increment(); + return count; + }); + perTableFailuresCount.compute(region.getTable().getNameAsString(), (tableName, count) -> { + if (count == null) { + count = new LongAdder(); + } + count.increment(); + return count; + }); + } public void publishReadFailure(ServerName serverName, RegionInfo region, Exception e) { incReadFailureCount(); + incFailuresCountDetails(serverName, region); LOG.error("Read from {} on serverName={} failed", region.getRegionNameAsString(), serverName, e); } @@ -284,6 +353,7 @@ public void publishReadFailure(ServerName serverName, RegionInfo region, Excepti public void publishReadFailure(ServerName serverName, RegionInfo region, ColumnFamilyDescriptor column, Exception e) { incReadFailureCount(); + incFailuresCountDetails(serverName, region); LOG.error("Read from {} on serverName={}, columnFamily={} failed", region.getRegionNameAsString(), serverName, column.getNameAsString(), e); @@ -304,12 +374,14 @@ public void publishReadTiming(ServerName serverName, RegionInfo region, public void publishWriteFailure(ServerName serverName, RegionInfo region, Exception e) { incWriteFailureCount(); + incFailuresCountDetails(serverName, region); LOG.error("Write to {} on {} failed", region.getRegionNameAsString(), serverName, e); } public void publishWriteFailure(ServerName serverName, RegionInfo region, ColumnFamilyDescriptor column, Exception e) { incWriteFailureCount(); + incFailuresCountDetails(serverName, region); LOG.error("Write to {} on {} {} failed", region.getRegionNameAsString(), serverName, column.getNameAsString(), e); } @@ -345,7 +417,7 @@ public LongAdder getWriteLatency() { return this.writeLatency; } - public Map> getRegionMap() { + public ConcurrentMap> getRegionMap() { return this.regionMap; } @@ -908,6 +980,7 @@ public int run(String[] args) throws Exception { System.arraycopy(args, index, monitorTargets, 0, length); } + putUpWebUI(); if (zookeeperMode) { return checkZooKeeper(); } else if (regionServerMode) { @@ -1352,6 +1425,7 @@ public void run() { try { List> taskFutures = new LinkedList<>(); RegionStdOutSink regionSink = this.getSink(); + regionSink.resetFailuresCountDetails(); if (this.targets != null && this.targets.length > 0) { String[] tables = generateMonitorTables(this.targets); // Check to see that each table name passed in the -readTableTimeouts argument is also diff --git a/hbase-server/src/main/resources/hbase-webapps/canary/canary.jsp b/hbase-server/src/main/resources/hbase-webapps/canary/canary.jsp new file mode 100644 index 000000000000..2648ddd8dc50 --- /dev/null +++ b/hbase-server/src/main/resources/hbase-webapps/canary/canary.jsp @@ -0,0 +1,20 @@ +<%-- +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +--%> + diff --git a/hbase-server/src/main/resources/hbase-webapps/canary/index.html b/hbase-server/src/main/resources/hbase-webapps/canary/index.html new file mode 100644 index 000000000000..7e03fdd764a7 --- /dev/null +++ b/hbase-server/src/main/resources/hbase-webapps/canary/index.html @@ -0,0 +1,20 @@ + + diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/tool/TestCanaryStatusServlet.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/tool/TestCanaryStatusServlet.java new file mode 100644 index 000000000000..56c02a52fad0 --- /dev/null +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/tool/TestCanaryStatusServlet.java @@ -0,0 +1,116 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.tool; + +import java.io.IOException; +import java.io.StringWriter; +import org.apache.hadoop.hbase.HBaseClassTestRule; +import org.apache.hadoop.hbase.ServerName; +import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.client.RegionInfo; +import org.apache.hadoop.hbase.client.RegionInfoBuilder; +import org.apache.hadoop.hbase.testclassification.SmallTests; +import org.apache.hadoop.hbase.tmpl.tool.CanaryStatusTmpl; +import org.junit.Assert; +import org.junit.ClassRule; +import org.junit.Test; +import org.junit.experimental.categories.Category; + + +@Category({ SmallTests.class }) +public class TestCanaryStatusServlet { + @ClassRule + public static final HBaseClassTestRule CLASS_RULE = + HBaseClassTestRule.forClass(TestCanaryStatusServlet.class); + + @Test + public void testFailures() throws IOException { + CanaryTool.RegionStdOutSink regionStdOutSink = new CanaryTool.RegionStdOutSink(); + + ServerName serverName1 = ServerName.valueOf("staging-st04.server:22600", + 1584180761635L); + TableName fakeTableName1 = TableName.valueOf("fakeTableName1"); + RegionInfo regionInfo1 = RegionInfoBuilder.newBuilder(fakeTableName1).build(); + + ServerName serverName2 = ServerName.valueOf("staging-st05.server:22600", + 1584180761636L); + TableName fakeTableName2 = TableName.valueOf("fakeTableName2"); + RegionInfo regionInfo2 = RegionInfoBuilder.newBuilder(fakeTableName2).build(); + + regionStdOutSink.publishReadFailure(serverName1, regionInfo1, new IOException()); + regionStdOutSink.publishWriteFailure(serverName2, regionInfo2, new IOException()); + CanaryStatusTmpl tmpl = new CanaryStatusTmpl(); + StringWriter renderResultWriter = new StringWriter(); + tmpl.render(renderResultWriter, regionStdOutSink); + String renderResult = renderResultWriter.toString(); + Assert.assertTrue(renderResult.contains("staging-st04.server,22600")); + Assert.assertTrue(renderResult.contains("fakeTableName1")); + Assert.assertTrue(renderResult.contains("staging-st05.server,22600")); + Assert.assertTrue(renderResult.contains("fakeTableName2")); + + } + + @Test + public void testReadFailuresOnly() throws IOException { + CanaryTool.RegionStdOutSink regionStdOutSink = new CanaryTool.RegionStdOutSink(); + + ServerName serverName1 = ServerName.valueOf("staging-st04.server:22600", + 1584180761635L); + TableName fakeTableName1 = TableName.valueOf("fakeTableName1"); + RegionInfo regionInfo1 = RegionInfoBuilder.newBuilder(fakeTableName1).build(); + + regionStdOutSink.publishReadFailure(serverName1, regionInfo1, new IOException()); + CanaryStatusTmpl tmpl = new CanaryStatusTmpl(); + StringWriter renderResultWriter = new StringWriter(); + tmpl.render(renderResultWriter, regionStdOutSink); + String renderResult = renderResultWriter.toString(); + Assert.assertTrue(renderResult.contains("staging-st04.server,22600")); + Assert.assertTrue(renderResult.contains("fakeTableName1")); + } + + @Test + public void testWriteFailuresOnly() throws IOException { + CanaryTool.RegionStdOutSink regionStdOutSink = new CanaryTool.RegionStdOutSink(); + + ServerName serverName2 = ServerName.valueOf("staging-st05.server:22600", + 1584180761636L); + TableName fakeTableName2 = TableName.valueOf("fakeTableName2"); + RegionInfo regionInfo2 = RegionInfoBuilder.newBuilder(fakeTableName2).build(); + + regionStdOutSink.publishReadFailure(serverName2, regionInfo2, new IOException()); + CanaryStatusTmpl tmpl = new CanaryStatusTmpl(); + StringWriter renderResultWriter = new StringWriter(); + tmpl.render(renderResultWriter, regionStdOutSink); + String renderResult = renderResultWriter.toString(); + Assert.assertTrue(renderResult.contains("staging-st05.server,22600")); + Assert.assertTrue(renderResult.contains("fakeTableName2")); + + } + + @Test + public void testNoFailures() throws IOException { + CanaryTool.RegionStdOutSink regionStdOutSink = new CanaryTool.RegionStdOutSink(); + CanaryStatusTmpl tmpl = new CanaryStatusTmpl(); + StringWriter renderResultWriter = new StringWriter(); + tmpl.render(renderResultWriter, regionStdOutSink); + String renderResult = renderResultWriter.toString(); + Assert.assertTrue(renderResult.contains("Total Failed Servers: 0")); + Assert.assertTrue(renderResult.contains("Total Failed Tables: 0")); + } + +}