Skip to content

Commit

Permalink
Remove exception wrapping in BatchedRerouteService (#97224)
Browse files Browse the repository at this point in the history
Today the `BatchedRerouteService` wraps exceptions from the reroute
process in an additional `ElasticsearchException`. Most callers do not
care about this wrapping, they simply log the exception. The only places
that do care are:

- `TransportMigrateToDataTiersAction` (only affects the log level)
- `TransportClusterUpdateSettingsAction`

Both of these places expect to receive an exception matching
`isPublishFailureException` on a failure to publish the rerouted state,
but this never happens because of the extra wrapping.

This commit removes the unnecessary wrapping so that these callers see
the exception semantics they expect.
  • Loading branch information
DaveCTurner authored Jun 29, 2023
1 parent e35d9e7 commit 69ff7df
Show file tree
Hide file tree
Showing 3 changed files with 109 additions and 2 deletions.
5 changes: 5 additions & 0 deletions docs/changelog/97224.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 97224
summary: Remove exception wrapping in `BatchedRerouteService`
area: Allocation
type: bug
issues: []
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,7 @@ public void onFailure(Exception e) {
e
);
}
ActionListener.onFailure(currentListeners, new ElasticsearchException("delayed reroute [" + reason + "] failed", e));
ActionListener.onFailure(currentListeners, e);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,17 +7,22 @@
*/
package org.elasticsearch.cluster.routing;

import org.apache.logging.log4j.Level;
import org.elasticsearch.ElasticsearchException;
import org.elasticsearch.action.ActionListener;
import org.elasticsearch.action.support.PlainActionFuture;
import org.elasticsearch.cluster.ClusterState;
import org.elasticsearch.cluster.ClusterStateUpdateTask;
import org.elasticsearch.cluster.NotMasterException;
import org.elasticsearch.cluster.coordination.FailedToCommitClusterStateException;
import org.elasticsearch.cluster.service.ClusterService;
import org.elasticsearch.common.Priority;
import org.elasticsearch.common.Randomness;
import org.elasticsearch.common.util.concurrent.ThreadContext;
import org.elasticsearch.test.ClusterServiceUtils;
import org.elasticsearch.test.ESTestCase;
import org.elasticsearch.test.MockLogAppender;
import org.elasticsearch.test.junit.annotations.TestLogging;
import org.elasticsearch.threadpool.TestThreadPool;
import org.elasticsearch.threadpool.ThreadPool;
import org.junit.After;
Expand All @@ -28,6 +33,7 @@
import java.util.List;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.CyclicBarrier;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicLong;
Expand Down Expand Up @@ -229,6 +235,102 @@ public void testNotifiesOnFailure() throws InterruptedException {
}
}

assertTrue(countDownLatch.await(10, TimeUnit.SECONDS)); // i.e. it doesn't leak any listeners
safeAwait(countDownLatch); // i.e. it doesn't leak any listeners
}

@TestLogging(reason = "testing log output", value = "org.elasticsearch.cluster.routing.BatchedRerouteService:DEBUG")
public void testExceptionFidelity() {

final var mockLogAppender = new MockLogAppender();
try (var ignored = mockLogAppender.capturing(BatchedRerouteService.class)) {

clusterService.getMasterService()
.setClusterStatePublisher(
(event, publishListener, ackListener) -> publishListener.onFailure(new FailedToCommitClusterStateException("simulated"))
);

// Case 1: an exception thrown from within the reroute itself

mockLogAppender.addExpectation(
new MockLogAppender.SeenEventExpectation(
"failure within reroute",
BatchedRerouteService.class.getCanonicalName(),
Level.ERROR,
"unexpected failure"
)
);

final BatchedRerouteService failingRerouteService = new BatchedRerouteService(clusterService, (s, r, l) -> {
throw new ElasticsearchException("simulated");
});
final var rerouteFailureFuture = new PlainActionFuture<Void>();
failingRerouteService.reroute("publish failure", randomFrom(EnumSet.allOf(Priority.class)), rerouteFailureFuture);
assertThat(
expectThrows(ExecutionException.class, ElasticsearchException.class, () -> rerouteFailureFuture.get(10, TimeUnit.SECONDS))
.getMessage(),
equalTo("simulated")
);
mockLogAppender.assertAllExpectationsMatched();

// None of the other cases should yield any log messages by default

mockLogAppender.addExpectation(
new MockLogAppender.UnseenEventExpectation("no errors", BatchedRerouteService.class.getCanonicalName(), Level.ERROR, "*")
);
mockLogAppender.addExpectation(
new MockLogAppender.UnseenEventExpectation("no warnings", BatchedRerouteService.class.getCanonicalName(), Level.WARN, "*")
);
mockLogAppender.addExpectation(
new MockLogAppender.UnseenEventExpectation("no info", BatchedRerouteService.class.getCanonicalName(), Level.INFO, "*")
);

// Case 2: a FailedToCommitClusterStateException (see the call to setClusterStatePublisher above)

final BatchedRerouteService batchedRerouteService = new BatchedRerouteService(clusterService, (s, r, l) -> {
l.onResponse(null);
return ClusterState.builder(s).build();
});

mockLogAppender.addExpectation(
new MockLogAppender.SeenEventExpectation(
"publish failure",
BatchedRerouteService.class.getCanonicalName(),
Level.DEBUG,
"unexpected failure"
)
);

final var publishFailureFuture = new PlainActionFuture<Void>();
batchedRerouteService.reroute("publish failure", randomFrom(EnumSet.allOf(Priority.class)), publishFailureFuture);
expectThrows(
ExecutionException.class,
FailedToCommitClusterStateException.class,
() -> publishFailureFuture.get(10, TimeUnit.SECONDS)
);
mockLogAppender.assertAllExpectationsMatched();

// Case 3: a NotMasterException

PlainActionFuture.<Void, RuntimeException>get(future -> {
clusterService.getClusterApplierService().onNewClusterState("simulated", () -> {
final var state = clusterService.state();
return ClusterState.builder(state).nodes(state.nodes().withMasterNodeId(null)).build();
}, future);
}, 10, TimeUnit.SECONDS);

mockLogAppender.addExpectation(
new MockLogAppender.SeenEventExpectation(
"not-master failure",
BatchedRerouteService.class.getCanonicalName(),
Level.DEBUG,
"unexpected failure"
)
);
final var notMasterFuture = new PlainActionFuture<Void>();
batchedRerouteService.reroute("not-master failure", randomFrom(EnumSet.allOf(Priority.class)), notMasterFuture);
expectThrows(ExecutionException.class, NotMasterException.class, () -> notMasterFuture.get(10, TimeUnit.SECONDS));

mockLogAppender.assertAllExpectationsMatched();
}
}
}

0 comments on commit 69ff7df

Please sign in to comment.