Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

HBASE-27684: add client metrics related to user region lock. (#5081) #5133

Merged
merged 1 commit into from
Mar 21, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -963,6 +963,7 @@ private RegionLocations locateRegionInMeta(TableName tableName, byte[] row, bool
// Query the meta region
long pauseBase = this.pause;
takeUserRegionLock();
final long lockStartTime = EnvironmentEdgeManager.currentTime();
try {
// We don't need to check if useCache is enabled or not. Even if useCache is false
// we already cleared the cache for this row before acquiring userRegion lock so if this
Expand Down Expand Up @@ -1063,6 +1064,10 @@ rpcControllerFactory, getMetaLookupPool(), metaReplicaCallTimeoutScanInMicroSeco
!(e instanceof RegionOfflineException || e instanceof NoServerForRegionException);
} finally {
userRegionLock.unlock();
// update duration of the lock being held
if (metrics != null) {
metrics.updateUserRegionLockHeld(EnvironmentEdgeManager.currentTime() - lockStartTime);
}
}
try {
Thread.sleep(ConnectionUtils.getPauseTime(pauseBase, tries));
Expand All @@ -1076,9 +1081,19 @@ rpcControllerFactory, getMetaLookupPool(), metaReplicaCallTimeoutScanInMicroSeco
void takeUserRegionLock() throws IOException {
try {
long waitTime = connectionConfig.getMetaOperationTimeout();
if (metrics != null) {
metrics.updateUserRegionLockQueue(userRegionLock.getQueueLength());
}
final long waitStartTime = EnvironmentEdgeManager.currentTime();
if (!userRegionLock.tryLock(waitTime, TimeUnit.MILLISECONDS)) {
if (metrics != null) {
metrics.incrUserRegionLockTimeout();
}
throw new LockTimeoutException("Failed to get user region lock in" + waitTime + " ms. "
+ " for accessing meta region server.");
} else if (metrics != null) {
// successfully grabbed the lock, start timer of holding the lock
metrics.updateUserRegionLockWaiting(EnvironmentEdgeManager.currentTime() - waitStartTime);
}
} catch (InterruptedException ie) {
LOG.error("Interrupted while waiting for a lock", ie);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -289,6 +289,10 @@ public Counter newMetric(Class<?> clazz, String name, String scope) {
protected final Counter hedgedReadWin;
protected final Histogram concurrentCallsPerServerHist;
protected final Histogram numActionsPerServerHist;
protected final Counter userRegionLockTimeoutCount;
protected final Timer userRegionLockWaitingTimer;
protected final Timer userRegionLockHeldTimer;
protected final Histogram userRegionLockQueueHist;

// dynamic metrics

Expand Down Expand Up @@ -348,6 +352,14 @@ protected Ratio getRatio() {
registry.histogram(name(MetricsConnection.class, "concurrentCallsPerServer", scope));
this.numActionsPerServerHist =
registry.histogram(name(MetricsConnection.class, "numActionsPerServer", scope));
this.userRegionLockTimeoutCount =
registry.counter(name(this.getClass(), "userRegionLockTimeoutCount", scope));
this.userRegionLockWaitingTimer =
registry.timer(name(this.getClass(), "userRegionLockWaitingDuration", scope));
this.userRegionLockHeldTimer =
registry.timer(name(this.getClass(), "userRegionLockHeldDuration", scope));
this.userRegionLockQueueHist =
registry.histogram(name(MetricsConnection.class, "userRegionLockQueueLength", scope));

this.reporter = JmxReporter.forRegistry(this.registry).build();
this.reporter.start();
Expand Down Expand Up @@ -425,6 +437,24 @@ public void incrDelayRunnersAndUpdateDelayInterval(long interval) {
this.runnerStats.updateDelayInterval(interval);
}

/** incr */
public void incrUserRegionLockTimeout() {
userRegionLockTimeoutCount.inc();
}

/** update */
public void updateUserRegionLockWaiting(long duration) {
userRegionLockWaitingTimer.update(duration, TimeUnit.MILLISECONDS);
}

public void updateUserRegionLockHeld(long duration) {
userRegionLockHeldTimer.update(duration, TimeUnit.MILLISECONDS);
}

public void updateUserRegionLockQueue(int count) {
userRegionLockQueueHist.update(count);
}

/**
* Get a metric for {@code key} from {@code map}, or create it with {@code factory}.
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -563,6 +563,7 @@ public void testUserRegionLockThrowsException() throws IOException, InterruptedE
conf.setInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER, 0);
conf.setLong(HConstants.HBASE_CLIENT_META_OPERATION_TIMEOUT, 2000);
conf.setLong(HConstants.HBASE_CLIENT_SCANNER_TIMEOUT_PERIOD, 2000);
conf.setBoolean(MetricsConnection.CLIENT_SIDE_METRICS_ENABLED_KEY, true);

try (ConnectionImplementation conn =
(ConnectionImplementation) ConnectionFactory.createConnection(conf)) {
Expand All @@ -587,6 +588,28 @@ public void testUserRegionLockThrowsException() throws IOException, InterruptedE

assertTrue(client1.getException() instanceof LockTimeoutException
^ client2.getException() instanceof LockTimeoutException);

// obtain the client metrics
MetricsConnection metrics = conn.getConnectionMetrics();
long queueCount = metrics.userRegionLockQueueHist.getCount();
assertEquals("Queue of userRegionLock should be updated twice. queueCount: " + queueCount,
queueCount, 2);

long timeoutCount = metrics.userRegionLockTimeoutCount.getCount();
assertEquals("Timeout of userRegionLock should happen once. timeoutCount: " + timeoutCount,
timeoutCount, 1);

long waitingTimerCount = metrics.userRegionLockWaitingTimer.getCount();
assertEquals("userRegionLock should be grabbed successfully once. waitingTimerCount: "
+ waitingTimerCount, waitingTimerCount, 1);

long heldTimerCount = metrics.userRegionLockHeldTimer.getCount();
assertEquals(
"userRegionLock should be held successfully once. heldTimerCount: " + heldTimerCount,
heldTimerCount, 1);
double heldTime = metrics.userRegionLockHeldTimer.getSnapshot().getMax();
assertTrue("Max held time should be greater than 2 seconds. heldTime: " + heldTime,
heldTime >= 2E9);
}
}

Expand Down