From 199d2da1bd9094411d6d73e14e3c972a14c4c065 Mon Sep 17 00:00:00 2001 From: Mattie Fu Date: Tue, 26 Jul 2022 09:19:19 -0400 Subject: [PATCH 1/2] fix: fix race condition in BuiltinMetricsTracer --- .../data/v2/stub/metrics/BuiltinMetricsTracer.java | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/google-cloud-bigtable/src/main/java/com/google/cloud/bigtable/data/v2/stub/metrics/BuiltinMetricsTracer.java b/google-cloud-bigtable/src/main/java/com/google/cloud/bigtable/data/v2/stub/metrics/BuiltinMetricsTracer.java index 2148c674e3..b6da36c32e 100644 --- a/google-cloud-bigtable/src/main/java/com/google/cloud/bigtable/data/v2/stub/metrics/BuiltinMetricsTracer.java +++ b/google-cloud-bigtable/src/main/java/com/google/cloud/bigtable/data/v2/stub/metrics/BuiltinMetricsTracer.java @@ -58,7 +58,7 @@ class BuiltinMetricsTracer extends BigtableTracer { // Stopwatch is not thread safe so this is a workaround to check if the stopwatch changes is // flushed to memory. private final Stopwatch serverLatencyTimer = Stopwatch.createUnstarted(); - private final AtomicBoolean serverLatencyTimerIsRunning = new AtomicBoolean(); + private final Object timerLock = new Object(); private boolean flowControlIsDisabled = false; @@ -117,7 +117,7 @@ public void attemptStarted(Object request, int attemptNumber) { this.tableId = Util.extractTableId(request); } if (!flowControlIsDisabled) { - if (serverLatencyTimerIsRunning.compareAndSet(false, true)) { + synchronized (timerLock) { serverLatencyTimer.start(); } } @@ -144,7 +144,7 @@ public void onRequest(int requestCount) { if (flowControlIsDisabled) { // On request is only called when auto flow control is disabled. When auto flow control is // disabled, server latency is measured between onRequest and onResponse. - if (serverLatencyTimerIsRunning.compareAndSet(false, true)) { + synchronized (timerLock) { serverLatencyTimer.start(); } } @@ -159,7 +159,7 @@ public void responseReceived() { // When auto flow control is disabled and application requested multiple responses, server // latency is measured between afterResponse and responseReceived. // In all the cases, we want to stop the serverLatencyTimer here. - if (serverLatencyTimerIsRunning.compareAndSet(true, false)) { + synchronized (timerLock) { totalServerLatency.addAndGet(serverLatencyTimer.elapsed(TimeUnit.MILLISECONDS)); serverLatencyTimer.reset(); } @@ -172,7 +172,7 @@ public void afterResponse(long applicationLatency) { // measured between after the last response is processed and before the next response is // received. If flow control is disabled but requestLeft is greater than 0, // also start the timer to count the time between afterResponse and responseReceived. - if (serverLatencyTimerIsRunning.compareAndSet(false, true)) { + synchronized (timerLock) { serverLatencyTimer.start(); } } @@ -235,7 +235,7 @@ private void recordOperationCompletion(@Nullable Throwable status) { private void recordAttemptCompletion(@Nullable Throwable status) { // If the attempt failed, the time spent in retry should be counted in application latency. // Stop the stopwatch and decrement requestLeft. - if (serverLatencyTimerIsRunning.compareAndSet(true, false)) { + synchronized (timerLock) { requestLeft.decrementAndGet(); totalServerLatency.addAndGet(serverLatencyTimer.elapsed(TimeUnit.MILLISECONDS)); serverLatencyTimer.reset(); From 8ee2ffe9565dbfd005b868c7149757e35ad1d907 Mon Sep 17 00:00:00 2001 From: Mattie Fu Date: Tue, 26 Jul 2022 09:31:43 -0400 Subject: [PATCH 2/2] update --- .../v2/stub/metrics/BuiltinMetricsTracer.java | 32 ++++++++++++++----- 1 file changed, 24 insertions(+), 8 deletions(-) diff --git a/google-cloud-bigtable/src/main/java/com/google/cloud/bigtable/data/v2/stub/metrics/BuiltinMetricsTracer.java b/google-cloud-bigtable/src/main/java/com/google/cloud/bigtable/data/v2/stub/metrics/BuiltinMetricsTracer.java index b6da36c32e..6289951ded 100644 --- a/google-cloud-bigtable/src/main/java/com/google/cloud/bigtable/data/v2/stub/metrics/BuiltinMetricsTracer.java +++ b/google-cloud-bigtable/src/main/java/com/google/cloud/bigtable/data/v2/stub/metrics/BuiltinMetricsTracer.java @@ -58,6 +58,7 @@ class BuiltinMetricsTracer extends BigtableTracer { // Stopwatch is not thread safe so this is a workaround to check if the stopwatch changes is // flushed to memory. private final Stopwatch serverLatencyTimer = Stopwatch.createUnstarted(); + private boolean serverLatencyTimerIsRunning = false; private final Object timerLock = new Object(); private boolean flowControlIsDisabled = false; @@ -118,7 +119,10 @@ public void attemptStarted(Object request, int attemptNumber) { } if (!flowControlIsDisabled) { synchronized (timerLock) { - serverLatencyTimer.start(); + if (!serverLatencyTimerIsRunning) { + serverLatencyTimer.start(); + serverLatencyTimerIsRunning = true; + } } } } @@ -145,7 +149,10 @@ public void onRequest(int requestCount) { // On request is only called when auto flow control is disabled. When auto flow control is // disabled, server latency is measured between onRequest and onResponse. synchronized (timerLock) { - serverLatencyTimer.start(); + if (!serverLatencyTimerIsRunning) { + serverLatencyTimer.start(); + serverLatencyTimerIsRunning = true; + } } } } @@ -160,8 +167,11 @@ public void responseReceived() { // latency is measured between afterResponse and responseReceived. // In all the cases, we want to stop the serverLatencyTimer here. synchronized (timerLock) { - totalServerLatency.addAndGet(serverLatencyTimer.elapsed(TimeUnit.MILLISECONDS)); - serverLatencyTimer.reset(); + if (serverLatencyTimerIsRunning) { + totalServerLatency.addAndGet(serverLatencyTimer.elapsed(TimeUnit.MILLISECONDS)); + serverLatencyTimer.reset(); + serverLatencyTimerIsRunning = false; + } } } @@ -173,7 +183,10 @@ public void afterResponse(long applicationLatency) { // received. If flow control is disabled but requestLeft is greater than 0, // also start the timer to count the time between afterResponse and responseReceived. synchronized (timerLock) { - serverLatencyTimer.start(); + if (!serverLatencyTimerIsRunning) { + serverLatencyTimer.start(); + serverLatencyTimerIsRunning = true; + } } } } @@ -236,9 +249,12 @@ private void recordAttemptCompletion(@Nullable Throwable status) { // If the attempt failed, the time spent in retry should be counted in application latency. // Stop the stopwatch and decrement requestLeft. synchronized (timerLock) { - requestLeft.decrementAndGet(); - totalServerLatency.addAndGet(serverLatencyTimer.elapsed(TimeUnit.MILLISECONDS)); - serverLatencyTimer.reset(); + if (serverLatencyTimerIsRunning) { + requestLeft.decrementAndGet(); + totalServerLatency.addAndGet(serverLatencyTimer.elapsed(TimeUnit.MILLISECONDS)); + serverLatencyTimer.reset(); + serverLatencyTimerIsRunning = false; + } } recorder.putAttemptLatencies(attemptTimer.elapsed(TimeUnit.MILLISECONDS)); recorder.record(Util.extractStatus(status), tableId, zone, cluster);