Skip to content

Commit

Permalink
Fix ref count handling in Engine.failEngine (#48639) (#48646)
Browse files Browse the repository at this point in the history
We can run into an already closed store here and hence
throw on trying to increment the ref count => moving to
the guarded ref count increment

closes #48625
  • Loading branch information
original-brownbear authored and dnhatn committed Nov 3, 2019
1 parent 34013d0 commit b05996e
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 9 deletions.
21 changes: 13 additions & 8 deletions server/src/main/java/org/elasticsearch/index/engine/Engine.java
Original file line number Diff line number Diff line change
Expand Up @@ -1162,7 +1162,6 @@ public void failEngine(String reason, @Nullable Exception failure) {
maybeDie(reason, failure);
}
if (failEngineLock.tryLock()) {
store.incRef();
try {
if (failedEngine.get() != null) {
logger.warn(() ->
Expand All @@ -1184,11 +1183,19 @@ public void failEngine(String reason, @Nullable Exception failure) {
// on the same node that we don't see the corrupted marker file when
// the shard is initializing
if (Lucene.isCorruptionException(failure)) {
try {
store.markStoreCorrupted(new IOException("failed engine (reason: [" + reason + "])",
ExceptionsHelper.unwrapCorruption(failure)));
} catch (IOException e) {
logger.warn("Couldn't mark store corrupted", e);
if (store.tryIncRef()) {
try {
store.markStoreCorrupted(new IOException("failed engine (reason: [" + reason + "])",
ExceptionsHelper.unwrapCorruption(failure)));
} catch (IOException e) {
logger.warn("Couldn't mark store corrupted", e);
} finally {
store.decRef();
}
} else {
logger.warn(() ->
new ParameterizedMessage("tried to mark store as corrupted but store is already closed. [{}]", reason),
failure);
}
}
eventListener.onFailedEngine(reason, failure);
Expand All @@ -1197,8 +1204,6 @@ public void failEngine(String reason, @Nullable Exception failure) {
if (failure != null) inner.addSuppressed(failure);
// don't bubble up these exceptions up
logger.warn("failEngine threw exception", inner);
} finally {
store.decRef();
}
} else {
logger.debug(() -> new ParameterizedMessage("tried to fail engine but could not acquire lock - engine should " +
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -490,7 +490,6 @@ public void testIndexAndRelocateConcurrently() throws Exception {
docs[i] = client().prepareIndex("test", "type1", id).setSource("field1", English.intToEnglish(numDocs + i));
}
indexRandom(true, docs);
numDocs *= 2;

logger.info(" --> waiting for relocation to complete");
ensureGreen(TimeValue.timeValueSeconds(60), "test"); // move all shards to the new nodes (it waits on relocation)
Expand Down

0 comments on commit b05996e

Please sign in to comment.