-
Notifications
You must be signed in to change notification settings - Fork 3.3k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
HBASE-28085 Configurably use scanner timeout as rpc timeout for scanner next calls #5402
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -57,6 +57,7 @@ class ScannerCallableWithReplicas implements RetryingCallable<Result[]> { | |
AtomicBoolean replicaSwitched = new AtomicBoolean(false); | ||
private final ClusterConnection cConnection; | ||
protected final ExecutorService pool; | ||
private final boolean useScannerTimeoutForNextCalls; | ||
protected final int timeBeforeReplicas; | ||
private final Scan scan; | ||
private final int retries; | ||
|
@@ -72,11 +73,12 @@ class ScannerCallableWithReplicas implements RetryingCallable<Result[]> { | |
|
||
public ScannerCallableWithReplicas(TableName tableName, ClusterConnection cConnection, | ||
ScannerCallable baseCallable, ExecutorService pool, int timeBeforeReplicas, Scan scan, | ||
int retries, int readRpcTimeout, int scannerTimeout, int caching, Configuration conf, | ||
RpcRetryingCaller<Result[]> caller) { | ||
int retries, int readRpcTimeout, int scannerTimeout, boolean useScannerTimeoutForNextCalls, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is it better to thread through the instance of Probably this is more of a refactor than you were ready to bite off, and all this code gets tossed as of 3.0, yes? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Actually, it's not so simple here. The readRpcTimeout and scannerTimeout both can be overridden on a per-Table basis via TableBuilder. Caching can be overridden by the Scan. So really just retries and my new boolean could be unified in this way. I could do it, but yea trying not to refactor too much. Also I could imagine making retries configurable via TableBuilder in the future which would negate this change... not necessarily somethign we should design for, just saying. Yes, this code goes away in 3.0, but I'm sure we'll be maintaining it for years to come. So I think it's worth doing useful refactors, but not sure this one qualifies given the special cases above. |
||
int caching, Configuration conf, RpcRetryingCaller<Result[]> caller) { | ||
this.currentScannerCallable = baseCallable; | ||
this.cConnection = cConnection; | ||
this.pool = pool; | ||
this.useScannerTimeoutForNextCalls = useScannerTimeoutForNextCalls; | ||
if (timeBeforeReplicas < 0) { | ||
throw new IllegalArgumentException("Invalid value of operation timeout on the primary"); | ||
} | ||
|
@@ -187,9 +189,12 @@ public Result[] call(int timeout) throws IOException { | |
pool, regionReplication * 5); | ||
|
||
AtomicBoolean done = new AtomicBoolean(false); | ||
// make sure we use the same rpcTimeout for current and other replicas | ||
int rpcTimeoutForCall = getRpcTimeout(); | ||
|
||
replicaSwitched.set(false); | ||
// submit call for the primary replica or user specified replica | ||
addCallsForCurrentReplica(cs); | ||
addCallsForCurrentReplica(cs, rpcTimeoutForCall); | ||
int startIndex = 0; | ||
|
||
try { | ||
|
@@ -234,7 +239,7 @@ public Result[] call(int timeout) throws IOException { | |
endIndex = 1; | ||
} else { | ||
// TODO: this may be an overkill for large region replication | ||
addCallsForOtherReplicas(cs, 0, regionReplication - 1); | ||
addCallsForOtherReplicas(cs, 0, regionReplication - 1, rpcTimeoutForCall); | ||
} | ||
|
||
try { | ||
|
@@ -326,15 +331,41 @@ public Cursor getCursor() { | |
return currentScannerCallable != null ? currentScannerCallable.getCursor() : null; | ||
} | ||
|
||
private void | ||
addCallsForCurrentReplica(ResultBoundedCompletionService<Pair<Result[], ScannerCallable>> cs) { | ||
private void addCallsForCurrentReplica( | ||
ResultBoundedCompletionService<Pair<Result[], ScannerCallable>> cs, int rpcTimeout) { | ||
RetryingRPC retryingOnReplica = new RetryingRPC(currentScannerCallable); | ||
outstandingCallables.add(currentScannerCallable); | ||
cs.submit(retryingOnReplica, readRpcTimeout, scannerTimeout, currentScannerCallable.id); | ||
cs.submit(retryingOnReplica, rpcTimeout, scannerTimeout, currentScannerCallable.id); | ||
} | ||
|
||
/** | ||
* As we have a call sequence for scan, it is useless to have a different rpc timeout which is | ||
* less than the scan timeout. If the server does not respond in time(usually this will not happen | ||
* as we have heartbeat now), we will get an OutOfOrderScannerNextException when resending the | ||
* next request and the only way to fix this is to close the scanner and open a new one. | ||
* <p> | ||
* The legacy behavior of ScannerCallable has been to use readRpcTimeout despite the above. If | ||
* using legacy behavior, we always use that. | ||
* <p> | ||
* If new behavior is enabled, we determine the rpc timeout to use based on whether the scanner is | ||
* open. If scanner is open, use scannerTimeout otherwise use readRpcTimeout. | ||
*/ | ||
private int getRpcTimeout() { | ||
if (useScannerTimeoutForNextCalls) { | ||
return isNextCall() ? scannerTimeout : readRpcTimeout; | ||
} else { | ||
return readRpcTimeout; | ||
} | ||
} | ||
|
||
private boolean isNextCall() { | ||
return currentScannerCallable != null && currentScannerCallable.scannerId != -1 | ||
&& !currentScannerCallable.renew && !currentScannerCallable.closed; | ||
} | ||
|
||
private void addCallsForOtherReplicas( | ||
ResultBoundedCompletionService<Pair<Result[], ScannerCallable>> cs, int min, int max) { | ||
ResultBoundedCompletionService<Pair<Result[], ScannerCallable>> cs, int min, int max, | ||
int rpcTimeout) { | ||
|
||
for (int id = min; id <= max; id++) { | ||
if (currentScannerCallable.id == id) { | ||
|
@@ -344,7 +375,7 @@ private void addCallsForOtherReplicas( | |
setStartRowForReplicaCallable(s); | ||
outstandingCallables.add(s); | ||
RetryingRPC retryingOnReplica = new RetryingRPC(s); | ||
cs.submit(retryingOnReplica, readRpcTimeout, scannerTimeout, id); | ||
cs.submit(retryingOnReplica, rpcTimeout, scannerTimeout, id); | ||
} | ||
} | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
While I was here and needed a new config, I cleaned these up. Configuration getter calls introduce synchronization and parse cost, so it's better to use what we already parsed in ConnectionConfiguration.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
👍