From c41357baff43ee6c2bc3266b7b9c3313027b99d5 Mon Sep 17 00:00:00 2001 From: Nhat Nguyen Date: Thu, 13 Aug 2020 14:56:50 -0400 Subject: [PATCH] Log more info when search ops higher than expected (#61108) We have seen a situation where the total search operations are higher than expected. Unfortunately, we did not have enough info to figure it out. This commit adds the failures to the error to provide more context and adjusts the log level in case of failure to debug. --- .../search/AbstractSearchAsyncAction.java | 34 +++++-------------- 1 file changed, 9 insertions(+), 25 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/action/search/AbstractSearchAsyncAction.java b/server/src/main/java/org/elasticsearch/action/search/AbstractSearchAsyncAction.java index 1c1b4ad9eeef6..94499f60fc408 100644 --- a/server/src/main/java/org/elasticsearch/action/search/AbstractSearchAsyncAction.java +++ b/server/src/main/java/org/elasticsearch/action/search/AbstractSearchAsyncAction.java @@ -377,36 +377,20 @@ private void onShardFailure(final int shardIndex, @Nullable ShardRouting shard, onShardFailure(shardIndex, shardTarget, e); final ShardRouting nextShard = shardIt.nextOrNull(); final boolean lastShard = nextShard == null; + logger.debug(() -> new ParameterizedMessage("{}: Failed to execute [{}] lastShard [{}]", + shard != null ? shard.shortSummary() : shardIt.shardId(), request, lastShard), e); if (lastShard) { onShardGroupFailure(shardIndex, shardTarget, e); } - - if (totalOps.incrementAndGet() == expectedTotalOps) { - if (logger.isDebugEnabled()) { - if (e != null && !TransportActions.isShardNotAvailableException(e)) { - logger.debug(new ParameterizedMessage( - "{}: Failed to execute [{}]", shard != null ? shard.shortSummary() : shardIt.shardId(), request), e); - } else if (logger.isTraceEnabled()) { - logger.trace(new ParameterizedMessage("{}: Failed to execute [{}]", shard, request), e); - } - } + final int totalOps = this.totalOps.incrementAndGet(); + if (totalOps == expectedTotalOps) { onPhaseDone(); + } else if (totalOps > expectedTotalOps) { + throw new AssertionError("unexpected higher total ops [" + totalOps + "] compared to expected [" + expectedTotalOps + "]", + new SearchPhaseExecutionException(getName(), "Shard failures", null, buildShardFailures())); } else { - // trace log this exception - logger.trace(() -> new ParameterizedMessage( - "{}: Failed to execute [{}] lastShard [{}]", - shard != null ? shard.shortSummary() : shardIt.shardId(), request, lastShard), e); if (lastShard == false) { performPhaseOnShard(shardIndex, shardIt, nextShard); - } else { - // no more shards active, add a failure - if (logger.isDebugEnabled() && !logger.isTraceEnabled()) { // do not double log this exception - if (e != null && !TransportActions.isShardNotAvailableException(e)) { - logger.debug(new ParameterizedMessage( - "{}: Failed to execute [{}] lastShard [{}]", - shard != null ? shard.shortSummary() : shardIt.shardId(), request, lastShard), e); - } - } } } } @@ -505,8 +489,8 @@ private void successfulShardExecution(SearchShardIterator shardsIt) { if (xTotalOps == expectedTotalOps) { onPhaseDone(); } else if (xTotalOps > expectedTotalOps) { - throw new AssertionError("unexpected higher total ops [" + xTotalOps + "] compared to expected [" - + expectedTotalOps + "]"); + throw new AssertionError("unexpected higher total ops [" + xTotalOps + "] compared to expected [" + expectedTotalOps + "]", + new SearchPhaseExecutionException(getName(), "Shard failures", null, buildShardFailures())); } }