From 7f7fe55943c1f4bbfc10b651aee3b0d0f5b909b0 Mon Sep 17 00:00:00 2001 From: Victor Date: Wed, 21 Dec 2022 05:49:43 -0800 Subject: [PATCH] HBASE-27540 add client side counter metrics for failed rpc calls (#4929) Signed-off-by: Bryan Beaudreault --- .../hbase/client/MetricsConnection.java | 6 +++- .../hadoop/hbase/ipc/AbstractRpcClient.java | 10 +++--- .../hbase/client/TestMetricsConnection.java | 33 ++++++++++++++----- 3 files changed, 35 insertions(+), 14 deletions(-) diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/MetricsConnection.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/MetricsConnection.java index b78bb03a16ec..dd6a00c22593 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/MetricsConnection.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/MetricsConnection.java @@ -57,6 +57,7 @@ public class MetricsConnection implements StatisticTrackable { public static final String CLIENT_SIDE_METRICS_ENABLED_KEY = "hbase.client.metrics.enable"; private static final String CNT_BASE = "rpcCount_"; + private static final String FAILURE_CNT_BASE = "rpcFailureCount_"; private static final String DRTN_BASE = "rpcCallDurationMs_"; private static final String REQ_BASE = "rpcCallRequestSizeBytes_"; private static final String RESP_BASE = "rpcCallResponseSizeBytes_"; @@ -434,7 +435,7 @@ private void updateRpcGeneric(String methodName, CallStats stats) { } /** Report RPC context to metrics system. */ - public void updateRpc(MethodDescriptor method, Message param, CallStats stats) { + public void updateRpc(MethodDescriptor method, Message param, CallStats stats, boolean failed) { int callsPerServer = stats.getConcurrentCallsPerServer(); if (callsPerServer > 0) { concurrentCallsPerServerHist.update(callsPerServer); @@ -442,6 +443,9 @@ public void updateRpc(MethodDescriptor method, Message param, CallStats stats) { // Update the counter that tracks RPCs by type. final String methodName = method.getService().getName() + "_" + method.getName(); getMetric(CNT_BASE + methodName, rpcCounters, counterFactory).inc(); + if (failed) { + getMetric(FAILURE_CNT_BASE + methodName, rpcCounters, counterFactory).inc(); + } // this implementation is tied directly to protobuf implementation details. would be better // if we could dispatch based on something static, ie, request Message type. if (method.getService() == ClientService.getDescriptor()) { diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/ipc/AbstractRpcClient.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/ipc/AbstractRpcClient.java index bd1b7c106e3d..443a05b9f4a1 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/ipc/AbstractRpcClient.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/ipc/AbstractRpcClient.java @@ -369,14 +369,16 @@ private T getConnection(ConnectionId remoteId) throws IOException { private void onCallFinished(Call call, HBaseRpcController hrc, InetSocketAddress addr, RpcCallback callback) { call.callStats.setCallTimeMs(EnvironmentEdgeManager.currentTime() - call.getStartTime()); + final boolean failed = (call.error != null) ? true : false; if (metrics != null) { - metrics.updateRpc(call.md, call.param, call.callStats); + metrics.updateRpc(call.md, call.param, call.callStats, failed); } if (LOG.isTraceEnabled()) { - LOG.trace("CallId: {}, call: {}, startTime: {}ms, callTime: {}ms", call.id, call.md.getName(), - call.getStartTime(), call.callStats.getCallTimeMs()); + LOG.trace("CallId: {}, call: {}, startTime: {}ms, callTime: {}ms, status: {}", call.id, + call.md.getName(), call.getStartTime(), call.callStats.getCallTimeMs(), + failed ? "failed" : "successful"); } - if (call.error != null) { + if (failed) { if (call.error instanceof RemoteException) { call.error.fillInStackTrace(); hrc.setFailed(call.error); diff --git a/hbase-client/src/test/java/org/apache/hadoop/hbase/client/TestMetricsConnection.java b/hbase-client/src/test/java/org/apache/hadoop/hbase/client/TestMetricsConnection.java index 280e2107aeba..b27e7022715e 100644 --- a/hbase-client/src/test/java/org/apache/hadoop/hbase/client/TestMetricsConnection.java +++ b/hbase-client/src/test/java/org/apache/hadoop/hbase/client/TestMetricsConnection.java @@ -20,6 +20,7 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; +import com.codahale.metrics.Counter; import com.codahale.metrics.RatioGauge; import com.codahale.metrics.RatioGauge.Ratio; import java.io.IOException; @@ -77,37 +78,51 @@ public void testStaticMetrics() throws IOException { for (int i = 0; i < loop; i++) { METRICS.updateRpc(ClientService.getDescriptor().findMethodByName("Get"), - GetRequest.getDefaultInstance(), MetricsConnection.newCallStats()); + GetRequest.getDefaultInstance(), MetricsConnection.newCallStats(), false); METRICS.updateRpc(ClientService.getDescriptor().findMethodByName("Scan"), - ScanRequest.getDefaultInstance(), MetricsConnection.newCallStats()); + ScanRequest.getDefaultInstance(), MetricsConnection.newCallStats(), false); METRICS.updateRpc(ClientService.getDescriptor().findMethodByName("Multi"), - MultiRequest.getDefaultInstance(), MetricsConnection.newCallStats()); + MultiRequest.getDefaultInstance(), MetricsConnection.newCallStats(), true); METRICS.updateRpc(ClientService.getDescriptor().findMethodByName("Mutate"), MutateRequest.newBuilder() .setMutation(ProtobufUtil.toMutation(MutationType.APPEND, new Append(foo))) .setRegion(region).build(), - MetricsConnection.newCallStats()); + MetricsConnection.newCallStats(), false); METRICS.updateRpc(ClientService.getDescriptor().findMethodByName("Mutate"), MutateRequest.newBuilder() .setMutation(ProtobufUtil.toMutation(MutationType.DELETE, new Delete(foo))) .setRegion(region).build(), - MetricsConnection.newCallStats()); + MetricsConnection.newCallStats(), false); METRICS.updateRpc(ClientService.getDescriptor().findMethodByName("Mutate"), MutateRequest.newBuilder() .setMutation(ProtobufUtil.toMutation(MutationType.INCREMENT, new Increment(foo))) .setRegion(region).build(), - MetricsConnection.newCallStats()); + MetricsConnection.newCallStats(), false); METRICS.updateRpc(ClientService.getDescriptor().findMethodByName("Mutate"), MutateRequest.newBuilder() .setMutation(ProtobufUtil.toMutation(MutationType.PUT, new Put(foo))).setRegion(region) .build(), - MetricsConnection.newCallStats()); + MetricsConnection.newCallStats(), false); } + final String rpcCountPrefix = "rpcCount_" + ClientService.getDescriptor().getName() + "_"; + final String rpcFailureCountPrefix = + "rpcFailureCount_" + ClientService.getDescriptor().getName() + "_"; + String metricKey; + long metricVal; + Counter counter; for (String method : new String[] { "Get", "Scan", "Mutate" }) { - final String metricKey = "rpcCount_" + ClientService.getDescriptor().getName() + "_" + method; - final long metricVal = METRICS.rpcCounters.get(metricKey).getCount(); + metricKey = rpcCountPrefix + method; + metricVal = METRICS.rpcCounters.get(metricKey).getCount(); assertTrue("metric: " + metricKey + " val: " + metricVal, metricVal >= loop); + metricKey = rpcFailureCountPrefix + method; + counter = METRICS.rpcCounters.get(metricKey); + metricVal = (counter != null) ? counter.getCount() : 0; + assertTrue("metric: " + metricKey + " val: " + metricVal, metricVal == 0); } + metricKey = rpcFailureCountPrefix + "Multi"; + counter = METRICS.rpcCounters.get(metricKey); + metricVal = (counter != null) ? counter.getCount() : 0; + assertTrue("metric: " + metricKey + " val: " + metricVal, metricVal == loop); for (MetricsConnection.CallTracker t : new MetricsConnection.CallTracker[] { METRICS.getTracker, METRICS.scanTracker, METRICS.multiTracker, METRICS.appendTracker, METRICS.deleteTracker, METRICS.incrementTracker, METRICS.putTracker }) {