Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

HBASE-27684: add client metrics related to user region lock. #5081

Merged
merged 9 commits into from
Mar 21, 2023
Original file line number Diff line number Diff line change
Expand Up @@ -1003,6 +1003,7 @@ private RegionLocations locateRegionInMeta(TableName tableName, byte[] row, bool
// Query the meta region
long pauseBase = connectionConfig.getPauseMillis();
takeUserRegionLock();
final long lockStartTime = EnvironmentEdgeManager.currentTime();
try {
// We don't need to check if useCache is enabled or not. Even if useCache is false
// we already cleared the cache for this row before acquiring userRegion lock so if this
Expand Down Expand Up @@ -1113,6 +1114,10 @@ rpcControllerFactory, getMetaLookupPool(), connectionConfig.getMetaReadRpcTimeou
}
} finally {
userRegionLock.unlock();
// update duration of the lock being held
vli02 marked this conversation as resolved.
Show resolved Hide resolved
if (metrics != null) {
metrics.updateUserRegionLockHeld(EnvironmentEdgeManager.currentTime() - lockStartTime);
}
}
try {
Thread.sleep(ConnectionUtils.getPauseTime(pauseBase, tries));
Expand All @@ -1126,9 +1131,19 @@ rpcControllerFactory, getMetaLookupPool(), connectionConfig.getMetaReadRpcTimeou
void takeUserRegionLock() throws IOException {
try {
long waitTime = connectionConfig.getMetaOperationTimeout();
if (metrics != null) {
shahrs87 marked this conversation as resolved.
Show resolved Hide resolved
metrics.updateUserRegionLockQueue(userRegionLock.getQueueLength());
}
final long waitStartTime = EnvironmentEdgeManager.currentTime();
if (!userRegionLock.tryLock(waitTime, TimeUnit.MILLISECONDS)) {
if (metrics != null) {
metrics.incrUserRegionLockTimeout();
}
throw new LockTimeoutException("Failed to get user region lock in" + waitTime + " ms. "
+ " for accessing meta region server.");
} else if (metrics != null) {
vli02 marked this conversation as resolved.
Show resolved Hide resolved
// successfully grabbed the lock, start timer of holding the lock
metrics.updateUserRegionLockWaiting(EnvironmentEdgeManager.currentTime() - waitStartTime);
}
} catch (InterruptedException ie) {
LOG.error("Interrupted while waiting for a lock", ie);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -359,6 +359,10 @@ public Counter newMetric(Class<?> clazz, String name, String scope) {
private final Counter nsLookups;
private final Counter nsLookupsFailed;
private final Timer overloadedBackoffTimer;
private final Counter userRegionLockTimeoutCount;
private final Timer userRegionLockWaitingTimer;
private final Timer userRegionLockHeldTimer;
private final Histogram userRegionLockQueueHist;

// dynamic metrics

Expand Down Expand Up @@ -443,6 +447,15 @@ protected Ratio getRatio() {
this.nsLookups = registry.counter(name(this.getClass(), NS_LOOKUPS, scope));
this.nsLookupsFailed = registry.counter(name(this.getClass(), NS_LOOKUPS_FAILED, scope));

this.userRegionLockTimeoutCount =
registry.counter(name(this.getClass(), "userRegionLockTimeoutCount", scope));
this.userRegionLockWaitingTimer =
registry.timer(name(this.getClass(), "userRegionLockWaitingDuration", scope));
this.userRegionLockHeldTimer =
registry.timer(name(this.getClass(), "userRegionLockHeldDuration", scope));
this.userRegionLockQueueHist =
registry.histogram(name(MetricsConnection.class, "userRegionLockQueueLength", scope));

this.overloadedBackoffTimer =
registry.timer(name(this.getClass(), "overloadedBackoffDurationMs", scope));

Expand Down Expand Up @@ -598,6 +611,41 @@ public void incrementServerOverloadedBackoffTime(long time, TimeUnit timeUnit) {
overloadedBackoffTimer.update(time, timeUnit);
}

/** incr */
vli02 marked this conversation as resolved.
Show resolved Hide resolved
public void incrUserRegionLockTimeout() {
userRegionLockTimeoutCount.inc();
}

/** get */
public Counter getUserRegionLockTimeout() {
return userRegionLockTimeoutCount;
}

public Timer getUserRegionLockWaitingTimer() {
return userRegionLockWaitingTimer;
}

public Timer getUserRegionLockHeldTimer() {
return userRegionLockHeldTimer;
}

public Histogram getUserRegionLockQueue() {
return userRegionLockQueueHist;
}

/** update */
public void updateUserRegionLockWaiting(long duration) {
userRegionLockWaitingTimer.update(duration, TimeUnit.MILLISECONDS);
}

public void updateUserRegionLockHeld(long duration) {
userRegionLockHeldTimer.update(duration, TimeUnit.MILLISECONDS);
}

public void updateUserRegionLockQueue(int count) {
userRegionLockQueueHist.update(count);
}

/** Return the connection count of the metrics within a scope */
public long getConnectionCount() {
return connectionCount.getCount();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -435,6 +435,7 @@ public void testUserRegionLockThrowsException() throws IOException, InterruptedE
conf.setInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER, 0);
conf.setLong(HConstants.HBASE_CLIENT_META_OPERATION_TIMEOUT, 2000);
conf.setLong(HConstants.HBASE_CLIENT_SCANNER_TIMEOUT_PERIOD, 2000);
conf.setBoolean(MetricsConnection.CLIENT_SIDE_METRICS_ENABLED_KEY, true);

try (ConnectionImplementation conn =
(ConnectionImplementation) ConnectionFactory.createConnection(conf)) {
Expand All @@ -459,6 +460,28 @@ public void testUserRegionLockThrowsException() throws IOException, InterruptedE

assertTrue(client1.getException() instanceof LockTimeoutException
^ client2.getException() instanceof LockTimeoutException);

// obtain the client metrics
MetricsConnection metrics = conn.getConnectionMetrics();
long queueCount = metrics.getUserRegionLockQueue().getCount();
assertEquals("Queue of userRegionLock should be updated twice. queueCount: " + queueCount,
vli02 marked this conversation as resolved.
Show resolved Hide resolved
queueCount, 2);

long timeoutCount = metrics.getUserRegionLockTimeout().getCount();
assertEquals("Timeout of userRegionLock should happen once. timeoutCount: " + timeoutCount,
timeoutCount, 1);

long waitingTimerCount = metrics.getUserRegionLockWaitingTimer().getCount();
assertEquals("userRegionLock should be grabbed successfully once. waitingTimerCount: "
+ waitingTimerCount, waitingTimerCount, 1);

long heldTimerCount = metrics.getUserRegionLockHeldTimer().getCount();
vli02 marked this conversation as resolved.
Show resolved Hide resolved
assertEquals(
"userRegionLock should be held successfully once. heldTimerCount: " + heldTimerCount,
heldTimerCount, 1);
double heldTime = metrics.getUserRegionLockHeldTimer().getSnapshot().getMax();
assertTrue("Max held time should be greater than 2 seconds. heldTime: " + heldTime,
heldTime >= 2E9);
}
}

Expand Down