Skip to content

Commit

Permalink
HBASE-27684: add client metrics related to user region lock. (apache#…
Browse files Browse the repository at this point in the history
…5081) (apache#5133)

Signed-off-by: Andrew Purtell <[email protected]>
Signed-off-by: David Manning <[email protected]>
Signed-off-by: Rushabh Shah <[email protected]>
Signed-off-by: Tanuj Khurana <[email protected]>
(cherry picked from commit de375bc)
Change-Id: Ib1d2bedf8849ac64c97500dc78a5e2944776fbf6
  • Loading branch information
vli02 authored Mar 23, 2023
1 parent 01d937e commit 93d1813
Show file tree
Hide file tree
Showing 3 changed files with 68 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -967,6 +967,7 @@ private RegionLocations locateRegionInMeta(TableName tableName, byte[] row, bool
// Query the meta region
long pauseBase = this.pause;
takeUserRegionLock();
final long lockStartTime = EnvironmentEdgeManager.currentTime();
try {
// We don't need to check if useCache is enabled or not. Even if useCache is false
// we already cleared the cache for this row before acquiring userRegion lock so if this
Expand Down Expand Up @@ -1067,6 +1068,10 @@ rpcControllerFactory, getMetaLookupPool(), metaReplicaCallTimeoutScanInMicroSeco
!(e instanceof RegionOfflineException || e instanceof NoServerForRegionException);
} finally {
userRegionLock.unlock();
// update duration of the lock being held
if (metrics != null) {
metrics.updateUserRegionLockHeld(EnvironmentEdgeManager.currentTime() - lockStartTime);
}
}
try {
Thread.sleep(ConnectionUtils.getPauseTime(pauseBase, tries));
Expand All @@ -1080,9 +1085,19 @@ rpcControllerFactory, getMetaLookupPool(), metaReplicaCallTimeoutScanInMicroSeco
void takeUserRegionLock() throws IOException {
try {
long waitTime = connectionConfig.getMetaOperationTimeout();
if (metrics != null) {
metrics.updateUserRegionLockQueue(userRegionLock.getQueueLength());
}
final long waitStartTime = EnvironmentEdgeManager.currentTime();
if (!userRegionLock.tryLock(waitTime, TimeUnit.MILLISECONDS)) {
if (metrics != null) {
metrics.incrUserRegionLockTimeout();
}
throw new LockTimeoutException("Failed to get user region lock in" + waitTime + " ms. "
+ " for accessing meta region server.");
} else if (metrics != null) {
// successfully grabbed the lock, start timer of holding the lock
metrics.updateUserRegionLockWaiting(EnvironmentEdgeManager.currentTime() - waitStartTime);
}
} catch (InterruptedException ie) {
LOG.error("Interrupted while waiting for a lock", ie);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -289,6 +289,10 @@ public Counter newMetric(Class<?> clazz, String name, String scope) {
protected final Counter hedgedReadWin;
protected final Histogram concurrentCallsPerServerHist;
protected final Histogram numActionsPerServerHist;
protected final Counter userRegionLockTimeoutCount;
protected final Timer userRegionLockWaitingTimer;
protected final Timer userRegionLockHeldTimer;
protected final Histogram userRegionLockQueueHist;

// dynamic metrics

Expand Down Expand Up @@ -348,6 +352,14 @@ protected Ratio getRatio() {
registry.histogram(name(MetricsConnection.class, "concurrentCallsPerServer", scope));
this.numActionsPerServerHist =
registry.histogram(name(MetricsConnection.class, "numActionsPerServer", scope));
this.userRegionLockTimeoutCount =
registry.counter(name(this.getClass(), "userRegionLockTimeoutCount", scope));
this.userRegionLockWaitingTimer =
registry.timer(name(this.getClass(), "userRegionLockWaitingDuration", scope));
this.userRegionLockHeldTimer =
registry.timer(name(this.getClass(), "userRegionLockHeldDuration", scope));
this.userRegionLockQueueHist =
registry.histogram(name(MetricsConnection.class, "userRegionLockQueueLength", scope));

this.reporter = JmxReporter.forRegistry(this.registry).build();
this.reporter.start();
Expand Down Expand Up @@ -425,6 +437,24 @@ public void incrDelayRunnersAndUpdateDelayInterval(long interval) {
this.runnerStats.updateDelayInterval(interval);
}

/** incr */
public void incrUserRegionLockTimeout() {
userRegionLockTimeoutCount.inc();
}

/** update */
public void updateUserRegionLockWaiting(long duration) {
userRegionLockWaitingTimer.update(duration, TimeUnit.MILLISECONDS);
}

public void updateUserRegionLockHeld(long duration) {
userRegionLockHeldTimer.update(duration, TimeUnit.MILLISECONDS);
}

public void updateUserRegionLockQueue(int count) {
userRegionLockQueueHist.update(count);
}

/**
* Get a metric for {@code key} from {@code map}, or create it with {@code factory}.
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -563,6 +563,7 @@ public void testUserRegionLockThrowsException() throws IOException, InterruptedE
conf.setInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER, 0);
conf.setLong(HConstants.HBASE_CLIENT_META_OPERATION_TIMEOUT, 2000);
conf.setLong(HConstants.HBASE_CLIENT_SCANNER_TIMEOUT_PERIOD, 2000);
conf.setBoolean(MetricsConnection.CLIENT_SIDE_METRICS_ENABLED_KEY, true);

try (ConnectionImplementation conn =
(ConnectionImplementation) ConnectionFactory.createConnection(conf)) {
Expand All @@ -587,6 +588,28 @@ public void testUserRegionLockThrowsException() throws IOException, InterruptedE

assertTrue(client1.getException() instanceof LockTimeoutException
^ client2.getException() instanceof LockTimeoutException);

// obtain the client metrics
MetricsConnection metrics = conn.getConnectionMetrics();
long queueCount = metrics.userRegionLockQueueHist.getCount();
assertEquals("Queue of userRegionLock should be updated twice. queueCount: " + queueCount,
queueCount, 2);

long timeoutCount = metrics.userRegionLockTimeoutCount.getCount();
assertEquals("Timeout of userRegionLock should happen once. timeoutCount: " + timeoutCount,
timeoutCount, 1);

long waitingTimerCount = metrics.userRegionLockWaitingTimer.getCount();
assertEquals("userRegionLock should be grabbed successfully once. waitingTimerCount: "
+ waitingTimerCount, waitingTimerCount, 1);

long heldTimerCount = metrics.userRegionLockHeldTimer.getCount();
assertEquals(
"userRegionLock should be held successfully once. heldTimerCount: " + heldTimerCount,
heldTimerCount, 1);
double heldTime = metrics.userRegionLockHeldTimer.getSnapshot().getMax();
assertTrue("Max held time should be greater than 2 seconds. heldTime: " + heldTime,
heldTime >= 2E9);
}
}

Expand Down

0 comments on commit 93d1813

Please sign in to comment.