From d1ae87b7a25e20608fc10bb467bd2244f31ae4f4 Mon Sep 17 00:00:00 2001 From: bsglz <18031031@qq.com> Date: Wed, 23 Jun 2021 20:31:04 +0800 Subject: [PATCH 1/5] HBASE-26027 The calling of HTable.batch blocked at AsyncRequestFutureImpl.waitUntilDone caused by ArrayStoreException --- .../org/apache/hadoop/hbase/client/AsyncRequestFutureImpl.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncRequestFutureImpl.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncRequestFutureImpl.java index 8cfcf0c1dbb1..d21ef57f5920 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncRequestFutureImpl.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncRequestFutureImpl.java @@ -1132,7 +1132,8 @@ private String buildDetailedErrorMsg(String string, int index) { @Override public void waitUntilDone() throws InterruptedIOException { try { - waitUntilDone(Long.MAX_VALUE); + long cutoff = (EnvironmentEdgeManager.currentTime() + this.operationTimeout) * 1000L; + waitUntilDone(cutoff); } catch (InterruptedException iex) { throw new InterruptedIOException(iex.getMessage()); } finally { From 4e515e00ea14dbf275b724da403fb152a93c0bc8 Mon Sep 17 00:00:00 2001 From: bsglz <18031031@qq.com> Date: Mon, 28 Jun 2021 10:48:38 +0800 Subject: [PATCH 2/5] fix review issue --- .../hadoop/hbase/client/AsyncRequestFutureImpl.java | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncRequestFutureImpl.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncRequestFutureImpl.java index d21ef57f5920..36d9c8871cd3 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncRequestFutureImpl.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncRequestFutureImpl.java @@ -1132,8 +1132,15 @@ private String buildDetailedErrorMsg(String string, int index) { @Override public void waitUntilDone() throws InterruptedIOException { try { - long cutoff = (EnvironmentEdgeManager.currentTime() + this.operationTimeout) * 1000L; - waitUntilDone(cutoff); + if (this.operationTimeout > 0) { + // the worker thread maybe over by some exception without decrement the actionsInProgress, + // then the guarantee of operationTimeout will be broken, so we should set cutoff to avoid + // stuck here forever + long cutoff = (EnvironmentEdgeManager.currentTime() + this.operationTimeout) * 1000L; + waitUntilDone(cutoff); + } else { + waitUntilDone(Long.MAX_VALUE); + } } catch (InterruptedException iex) { throw new InterruptedIOException(iex.getMessage()); } finally { From 6c5c6fcd8c4c0822aa39e8ce14cdd2f6e11610c4 Mon Sep 17 00:00:00 2001 From: bsglz <18031031@qq.com> Date: Tue, 7 Dec 2021 23:26:54 +0800 Subject: [PATCH 3/5] fix ut failure --- .../hadoop/hbase/client/AsyncRequestFutureImpl.java | 8 ++++++-- .../apache/hadoop/hbase/TestClientOperationTimeout.java | 4 ++-- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncRequestFutureImpl.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncRequestFutureImpl.java index 36d9c8871cd3..7059202876a5 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncRequestFutureImpl.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncRequestFutureImpl.java @@ -21,6 +21,7 @@ import java.io.IOException; import java.io.InterruptedIOException; +import java.net.SocketTimeoutException; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; @@ -93,6 +94,9 @@ public void run() { } catch (InterruptedException ex) { LOG.error("Replica thread interrupted - no replica calls {}", ex.getMessage()); return; + } catch (SocketTimeoutException ex) { + LOG.error("Replica thread time out"); + return; } } if (done) return; // Done within primary timeout @@ -1152,14 +1156,14 @@ public void waitUntilDone() throws InterruptedIOException { } } - private boolean waitUntilDone(long cutoff) throws InterruptedException { + private boolean waitUntilDone(long cutoff) throws InterruptedException, SocketTimeoutException { boolean hasWait = cutoff != Long.MAX_VALUE; long lastLog = EnvironmentEdgeManager.currentTime(); long currentInProgress; while (0 != (currentInProgress = actionsInProgress.get())) { long now = EnvironmentEdgeManager.currentTime(); if (hasWait && (now * 1000L) > cutoff) { - return false; + throw new SocketTimeoutException("time out before the actionsInProgress changed to zero"); } if (!hasWait) { // Only log if wait is infinite. if (now > lastLog + 10000) { diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/TestClientOperationTimeout.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/TestClientOperationTimeout.java index 09bdf7af7a66..5f80bf5e0fb4 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/TestClientOperationTimeout.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/TestClientOperationTimeout.java @@ -158,7 +158,7 @@ public void testPutTimeout() { } /** - * Tests that a batch mutate on a table throws {@link RetriesExhaustedException} when the + * Tests that a batch mutate on a table throws {@link SocketTimeoutException} when the * operation takes longer than 'hbase.client.operation.timeout'. */ @Test @@ -175,7 +175,7 @@ public void testMultiPutsTimeout() { TABLE.batch(puts, new Object[2]); Assert.fail("should not reach here"); } catch (Exception e) { - Assert.assertTrue(e instanceof RetriesExhaustedWithDetailsException); + Assert.assertTrue(e instanceof SocketTimeoutException); } } From c9c57834ce80b1bd3f89c241e41edd40335f9b7a Mon Sep 17 00:00:00 2001 From: bsglz <18031031@qq.com> Date: Wed, 8 Dec 2021 12:36:50 +0800 Subject: [PATCH 4/5] fix ut failure --- .../hadoop/hbase/client/AsyncRequestFutureImpl.java | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncRequestFutureImpl.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncRequestFutureImpl.java index 7059202876a5..ca6d5342d57a 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncRequestFutureImpl.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncRequestFutureImpl.java @@ -94,9 +94,6 @@ public void run() { } catch (InterruptedException ex) { LOG.error("Replica thread interrupted - no replica calls {}", ex.getMessage()); return; - } catch (SocketTimeoutException ex) { - LOG.error("Replica thread time out"); - return; } } if (done) return; // Done within primary timeout @@ -1141,7 +1138,9 @@ public void waitUntilDone() throws InterruptedIOException { // then the guarantee of operationTimeout will be broken, so we should set cutoff to avoid // stuck here forever long cutoff = (EnvironmentEdgeManager.currentTime() + this.operationTimeout) * 1000L; - waitUntilDone(cutoff); + if (!waitUntilDone(cutoff)) { + throw new SocketTimeoutException("time out before the actionsInProgress changed to zero"); + } } else { waitUntilDone(Long.MAX_VALUE); } @@ -1156,14 +1155,14 @@ public void waitUntilDone() throws InterruptedIOException { } } - private boolean waitUntilDone(long cutoff) throws InterruptedException, SocketTimeoutException { + private boolean waitUntilDone(long cutoff) throws InterruptedException{ boolean hasWait = cutoff != Long.MAX_VALUE; long lastLog = EnvironmentEdgeManager.currentTime(); long currentInProgress; while (0 != (currentInProgress = actionsInProgress.get())) { long now = EnvironmentEdgeManager.currentTime(); if (hasWait && (now * 1000L) > cutoff) { - throw new SocketTimeoutException("time out before the actionsInProgress changed to zero"); + return false; } if (!hasWait) { // Only log if wait is infinite. if (now > lastLog + 10000) { From af803b341aab5ec188d3c7fbc0d1157bd8254ce9 Mon Sep 17 00:00:00 2001 From: bsglz <18031031@qq.com> Date: Wed, 8 Dec 2021 16:58:58 +0800 Subject: [PATCH 5/5] fix ut failure --- .../java/org/apache/hadoop/hbase/TestClientOperationTimeout.java | 1 - 1 file changed, 1 deletion(-) diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/TestClientOperationTimeout.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/TestClientOperationTimeout.java index 5f80bf5e0fb4..20424c99d2b1 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/TestClientOperationTimeout.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/TestClientOperationTimeout.java @@ -29,7 +29,6 @@ import org.apache.hadoop.hbase.client.Put; import org.apache.hadoop.hbase.client.ResultScanner; import org.apache.hadoop.hbase.client.RetriesExhaustedException; -import org.apache.hadoop.hbase.client.RetriesExhaustedWithDetailsException; import org.apache.hadoop.hbase.client.Scan; import org.apache.hadoop.hbase.client.Table; import org.apache.hadoop.hbase.client.TableDescriptorBuilder;