Skip to content

Commit

Permalink
HBASE-15242: add client side metrics for timeout and remote exception…
Browse files Browse the repository at this point in the history
…s. (#5023)

Signed-off-by: Andrew Purtell <[email protected]>
  • Loading branch information
vli02 authored Feb 25, 2023
1 parent 22dbb7a commit 36bb0d1
Show file tree
Hide file tree
Showing 3 changed files with 58 additions and 16 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.ServerName;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.ipc.RemoteException;
import org.apache.yetus.audience.InterfaceAudience;

import org.apache.hbase.thirdparty.com.google.protobuf.Descriptors.MethodDescriptor;
Expand Down Expand Up @@ -118,6 +119,9 @@ static String getScope(Configuration conf, String clusterId, Object connectionOb

private static final String CNT_BASE = "rpcCount_";
private static final String FAILURE_CNT_BASE = "rpcFailureCount_";
private static final String TOTAL_EXCEPTION_CNT = "rpcTotalExceptions";
private static final String LOCAL_EXCEPTION_CNT_BASE = "rpcLocalExceptions_";
private static final String REMOTE_EXCEPTION_CNT_BASE = "rpcRemoteExceptions_";
private static final String DRTN_BASE = "rpcCallDurationMs_";
private static final String REQ_BASE = "rpcCallRequestSizeBytes_";
private static final String RESP_BASE = "rpcCallResponseSizeBytes_";
Expand Down Expand Up @@ -638,16 +642,27 @@ private void shutdown() {
}

/** Report RPC context to metrics system. */
public void updateRpc(MethodDescriptor method, Message param, CallStats stats, boolean failed) {
public void updateRpc(MethodDescriptor method, Message param, CallStats stats, Throwable e) {
int callsPerServer = stats.getConcurrentCallsPerServer();
if (callsPerServer > 0) {
concurrentCallsPerServerHist.update(callsPerServer);
}
// Update the counter that tracks RPCs by type.
final String methodName = method.getService().getName() + "_" + method.getName();
getMetric(CNT_BASE + methodName, rpcCounters, counterFactory).inc();
if (failed) {
if (e != null) {
getMetric(FAILURE_CNT_BASE + methodName, rpcCounters, counterFactory).inc();
getMetric(TOTAL_EXCEPTION_CNT, rpcCounters, counterFactory).inc();
if (e instanceof RemoteException) {
String fullClassName = ((RemoteException) e).getClassName();
String simpleClassName = (fullClassName != null)
? fullClassName.substring(fullClassName.lastIndexOf(".") + 1)
: "unknown";
getMetric(REMOTE_EXCEPTION_CNT_BASE + simpleClassName, rpcCounters, counterFactory).inc();
} else {
getMetric(LOCAL_EXCEPTION_CNT_BASE + e.getClass().getSimpleName(), rpcCounters,
counterFactory).inc();
}
}
// this implementation is tied directly to protobuf implementation details. would be better
// if we could dispatch based on something static, ie, request Message type.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -375,16 +375,15 @@ private T getConnection(ConnectionId remoteId) throws IOException {
private void onCallFinished(Call call, HBaseRpcController hrc, Address addr,
RpcCallback<Message> callback) {
call.callStats.setCallTimeMs(EnvironmentEdgeManager.currentTime() - call.getStartTime());
final boolean failed = (call.error != null) ? true : false;
if (metrics != null) {
metrics.updateRpc(call.md, call.param, call.callStats, failed);
metrics.updateRpc(call.md, call.param, call.callStats, call.error);
}
if (LOG.isTraceEnabled()) {
LOG.trace("CallId: {}, call: {}, startTime: {}ms, callTime: {}ms, status: {}", call.id,
call.md.getName(), call.getStartTime(), call.callStats.getCallTimeMs(),
failed ? "failed" : "successful");
call.error != null ? "failed" : "successful");
}
if (failed) {
if (call.error != null) {
if (call.error instanceof RemoteException) {
call.error.fillInStackTrace();
hrc.setFailed(call.error);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@
import java.util.concurrent.ThreadPoolExecutor;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseClassTestRule;
import org.apache.hadoop.hbase.ipc.CallTimeoutException;
import org.apache.hadoop.hbase.ipc.RemoteWithExtrasException;
import org.apache.hadoop.hbase.security.User;
import org.apache.hadoop.hbase.testclassification.ClientTests;
import org.apache.hadoop.hbase.testclassification.MetricsTests;
Expand Down Expand Up @@ -150,51 +152,77 @@ public void testStaticMetrics() throws IOException {

for (int i = 0; i < loop; i++) {
METRICS.updateRpc(ClientService.getDescriptor().findMethodByName("Get"),
GetRequest.getDefaultInstance(), MetricsConnection.newCallStats(), false);
GetRequest.getDefaultInstance(), MetricsConnection.newCallStats(), null);
METRICS.updateRpc(ClientService.getDescriptor().findMethodByName("Scan"),
ScanRequest.getDefaultInstance(), MetricsConnection.newCallStats(), false);
ScanRequest.getDefaultInstance(), MetricsConnection.newCallStats(),
new RemoteWithExtrasException("java.io.IOException", null, false, false));
METRICS.updateRpc(ClientService.getDescriptor().findMethodByName("Multi"),
MultiRequest.getDefaultInstance(), MetricsConnection.newCallStats(), true);
MultiRequest.getDefaultInstance(), MetricsConnection.newCallStats(),
new CallTimeoutException("test with CallTimeoutException"));
METRICS.updateRpc(ClientService.getDescriptor().findMethodByName("Mutate"),
MutateRequest.newBuilder()
.setMutation(ProtobufUtil.toMutation(MutationType.APPEND, new Append(foo)))
.setRegion(region).build(),
MetricsConnection.newCallStats(), false);
MetricsConnection.newCallStats(), null);
METRICS.updateRpc(ClientService.getDescriptor().findMethodByName("Mutate"),
MutateRequest.newBuilder()
.setMutation(ProtobufUtil.toMutation(MutationType.DELETE, new Delete(foo)))
.setRegion(region).build(),
MetricsConnection.newCallStats(), false);
MetricsConnection.newCallStats(), null);
METRICS.updateRpc(ClientService.getDescriptor().findMethodByName("Mutate"),
MutateRequest.newBuilder()
.setMutation(ProtobufUtil.toMutation(MutationType.INCREMENT, new Increment(foo)))
.setRegion(region).build(),
MetricsConnection.newCallStats(), false);
MetricsConnection.newCallStats(), null);
METRICS.updateRpc(ClientService.getDescriptor().findMethodByName("Mutate"),
MutateRequest.newBuilder()
.setMutation(ProtobufUtil.toMutation(MutationType.PUT, new Put(foo))).setRegion(region)
.build(),
MetricsConnection.newCallStats(), false);
MetricsConnection.newCallStats(), null);
}

final String rpcCountPrefix = "rpcCount_" + ClientService.getDescriptor().getName() + "_";
final String rpcFailureCountPrefix =
"rpcFailureCount_" + ClientService.getDescriptor().getName() + "_";
String metricKey;
long metricVal;
Counter counter;
for (String method : new String[] { "Get", "Scan", "Mutate" }) {

for (String method : new String[] { "Get", "Scan", "Multi", "Mutate" }) {
metricKey = rpcCountPrefix + method;
metricVal = METRICS.getRpcCounters().get(metricKey).getCount();
assertTrue("metric: " + metricKey + " val: " + metricVal, metricVal >= loop);

metricKey = rpcFailureCountPrefix + method;
counter = METRICS.getRpcCounters().get(metricKey);
metricVal = (counter != null) ? counter.getCount() : 0;
assertTrue("metric: " + metricKey + " val: " + metricVal, metricVal == 0);
if (method.equals("Get") || method.equals("Mutate")) {
// no failure
assertTrue("metric: " + metricKey + " val: " + metricVal, metricVal == 0);
} else {
// has failure
assertTrue("metric: " + metricKey + " val: " + metricVal, metricVal == loop);
}
}
metricKey = rpcFailureCountPrefix + "Multi";

// remote exception
metricKey = "rpcRemoteExceptions_IOException";
counter = METRICS.getRpcCounters().get(metricKey);
metricVal = (counter != null) ? counter.getCount() : 0;
assertTrue("metric: " + metricKey + " val: " + metricVal, metricVal == loop);

// local exception
metricKey = "rpcLocalExceptions_CallTimeoutException";
counter = METRICS.getRpcCounters().get(metricKey);
metricVal = (counter != null) ? counter.getCount() : 0;
assertTrue("metric: " + metricKey + " val: " + metricVal, metricVal == loop);

// total exception
metricKey = "rpcTotalExceptions";
counter = METRICS.getRpcCounters().get(metricKey);
metricVal = (counter != null) ? counter.getCount() : 0;
assertTrue("metric: " + metricKey + " val: " + metricVal, metricVal == loop * 2);

for (MetricsConnection.CallTracker t : new MetricsConnection.CallTracker[] {
METRICS.getGetTracker(), METRICS.getScanTracker(), METRICS.getMultiTracker(),
METRICS.getAppendTracker(), METRICS.getDeleteTracker(), METRICS.getIncrementTracker(),
Expand Down

0 comments on commit 36bb0d1

Please sign in to comment.