Skip to content

Commit

Permalink
Limit RTG exceptions to retry on (#105003)
Browse files Browse the repository at this point in the history
Limit RTG exceptions to retry on and do not throw in
`getCurrentNodeOfPrimary`. Follow up to
#104579 (comment).

Relates ES-5727
  • Loading branch information
pxsalehi authored Feb 1, 2024
1 parent 149ec37 commit e2d68ae
Showing 1 changed file with 9 additions and 5 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
import org.elasticsearch.action.ActionRunnable;
import org.elasticsearch.action.ActionType;
import org.elasticsearch.action.NoShardAvailableActionException;
import org.elasticsearch.action.UnavailableShardsException;
import org.elasticsearch.action.admin.indices.refresh.TransportShardRefreshAction;
import org.elasticsearch.action.support.ActionFilters;
import org.elasticsearch.action.support.replication.BasicReplicationRequest;
Expand Down Expand Up @@ -194,6 +193,10 @@ private void handleGetOnUnpromotableShard(GetRequest request, IndexShard indexSh
ShardId shardId = indexShard.shardId();
if (request.refresh()) {
var node = getCurrentNodeOfPrimary(clusterService.state(), shardId);
if (node == null) {
listener.onFailure(new NoShardAvailableActionException(shardId, "primary shard is not active"));
return;
}
logger.trace("send refresh action for shard {} to node {}", shardId, node.getId());
var refreshRequest = new BasicReplicationRequest(shardId);
refreshRequest.setParentTask(request.getParentTask());
Expand Down Expand Up @@ -230,10 +233,7 @@ private void getFromTranslog(
tryGetFromTranslog(request, indexShard, state, listener.delegateResponse((l, e) -> {
final var cause = ExceptionsHelper.unwrapCause(e);
logger.debug("get_from_translog failed", cause);
if (cause instanceof ShardNotFoundException
|| cause instanceof IndexNotFoundException
|| cause instanceof NoShardAvailableActionException
|| cause instanceof UnavailableShardsException) {
if (cause instanceof ShardNotFoundException || cause instanceof IndexNotFoundException) {
logger.debug("retrying get_from_translog");
observer.waitForNextChange(new ClusterStateObserver.Listener() {
@Override
Expand All @@ -260,6 +260,10 @@ public void onTimeout(TimeValue timeout) {
private void tryGetFromTranslog(GetRequest request, IndexShard indexShard, ClusterState state, ActionListener<GetResponse> listener) {
ShardId shardId = indexShard.shardId();
var node = getCurrentNodeOfPrimary(state, shardId);
if (node == null) {
listener.onFailure(new NoShardAvailableActionException(shardId, "primary shard is not active"));
return;
}
TransportGetFromTranslogAction.Request getFromTranslogRequest = new TransportGetFromTranslogAction.Request(request, shardId);
getFromTranslogRequest.setParentTask(request.getParentTask());
transportService.sendRequest(
Expand Down

0 comments on commit e2d68ae

Please sign in to comment.