Skip to content

Commit

Permalink
Shard CLI tool always check shards (elastic#41480)
Browse files Browse the repository at this point in the history
The shard CLI tool would not do anything if a corruption marker was not
present. But a corruption marker is only added if a corruption is
detected during indexing/writing, not if a search or other read fails.

Changed the tool to always check shards regardless of corruption marker
presence.

Related to elastic#41298
  • Loading branch information
henningandersen authored and Gurkan Kaymak committed May 27, 2019
1 parent 7a0316d commit f846bd5
Show file tree
Hide file tree
Showing 2 changed files with 79 additions and 28 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -38,9 +38,7 @@ public Tuple<RemoveCorruptedShardDataCommand.CleanStatus, String> getCleanStatus
Lock writeLock,
PrintStream printStream,
boolean verbose) throws IOException {
if (RemoveCorruptedShardDataCommand.isCorruptMarkerFileIsPresent(indexDirectory) == false) {
return Tuple.tuple(RemoveCorruptedShardDataCommand.CleanStatus.CLEAN, null);
}
boolean markedCorrupted = RemoveCorruptedShardDataCommand.isCorruptMarkerFileIsPresent(indexDirectory);

final CheckIndex.Status status;
try (CheckIndex checker = new CheckIndex(indexDirectory, writeLock)) {
Expand All @@ -55,7 +53,9 @@ public Tuple<RemoveCorruptedShardDataCommand.CleanStatus, String> getCleanStatus
}

return status.clean
? Tuple.tuple(RemoveCorruptedShardDataCommand.CleanStatus.CLEAN_WITH_CORRUPTED_MARKER, null)
? Tuple.tuple(markedCorrupted
? RemoveCorruptedShardDataCommand.CleanStatus.CLEAN_WITH_CORRUPTED_MARKER
: RemoveCorruptedShardDataCommand.CleanStatus.CLEAN, null)
: Tuple.tuple(RemoveCorruptedShardDataCommand.CleanStatus.CORRUPTED,
"Corrupted Lucene index segments found - " + status.totLoseDocCount + " documents will be lost.");
}
Expand All @@ -67,8 +67,6 @@ public void execute(Terminal terminal,
Lock writeLock,
PrintStream printStream,
boolean verbose) throws IOException {
checkCorruptMarkerFileIsPresent(indexDirectory);

final CheckIndex.Status status;
try (CheckIndex checker = new CheckIndex(indexDirectory, writeLock)) {

Expand All @@ -90,11 +88,4 @@ public void execute(Terminal terminal,
}
}
}

protected void checkCorruptMarkerFileIsPresent(Directory directory) throws IOException {
if (RemoveCorruptedShardDataCommand.isCorruptMarkerFileIsPresent(directory) == false) {
throw new ElasticsearchException("There is no corruption file marker");
}
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,9 @@ public class RemoveCorruptedShardDataCommandTests extends IndexShardTestCase {
private Path translogPath;
private Path indexPath;

private static final Pattern NUM_CORRUPT_DOCS_PATTERN =
Pattern.compile("Corrupted Lucene index segments found -\\s+(?<docs>\\d+) documents will be lost.");

@Before
public void setup() throws IOException {
shardId = new ShardId("index0", "_na_", 0);
Expand Down Expand Up @@ -154,11 +157,13 @@ public void testCorruptedIndex() throws Exception {
final boolean corruptSegments = randomBoolean();
CorruptionUtils.corruptIndex(random(), indexPath, corruptSegments);

// test corrupted shard
final IndexShard corruptedShard = reopenIndexShard(true);
allowShardFailures();
expectThrows(IndexShardRecoveryException.class, () -> newStartedShard(p -> corruptedShard, true));
closeShards(corruptedShard);
if (randomBoolean()) {
// test corrupted shard and add corruption marker
final IndexShard corruptedShard = reopenIndexShard(true);
allowShardFailures();
expectThrows(IndexShardRecoveryException.class, () -> newStartedShard(p -> corruptedShard, true));
closeShards(corruptedShard);
}

final RemoveCorruptedShardDataCommand command = new RemoveCorruptedShardDataCommand();
final MockTerminal t = new MockTerminal();
Expand Down Expand Up @@ -196,8 +201,7 @@ public void testCorruptedIndex() throws Exception {

final Set<String> shardDocUIDs = getShardDocUIDs(newShard);

final Pattern pattern = Pattern.compile("Corrupted Lucene index segments found -\\s+(?<docs>\\d+) documents will be lost.");
final Matcher matcher = pattern.matcher(output);
final Matcher matcher = NUM_CORRUPT_DOCS_PATTERN.matcher(output);
assertThat(matcher.find(), equalTo(true));
final int expectedNumDocs = numDocs - Integer.parseInt(matcher.group("docs"));

Expand Down Expand Up @@ -272,12 +276,13 @@ public void testCorruptedBothIndexAndTranslog() throws Exception {

CorruptionUtils.corruptIndex(random(), indexPath, false);

// test corrupted shard
final IndexShard corruptedShard = reopenIndexShard(true);
allowShardFailures();
expectThrows(IndexShardRecoveryException.class, () -> newStartedShard(p -> corruptedShard, true));
closeShards(corruptedShard);

if (randomBoolean()) {
// test corrupted shard and add corruption marker
final IndexShard corruptedShard = reopenIndexShard(true);
allowShardFailures();
expectThrows(IndexShardRecoveryException.class, () -> newStartedShard(p -> corruptedShard, true));
closeShards(corruptedShard);
}
TestTranslog.corruptRandomTranslogFile(logger, random(), Arrays.asList(translogPath));

final RemoveCorruptedShardDataCommand command = new RemoveCorruptedShardDataCommand();
Expand Down Expand Up @@ -313,8 +318,7 @@ public void testCorruptedBothIndexAndTranslog() throws Exception {

final Set<String> shardDocUIDs = getShardDocUIDs(newShard);

final Pattern pattern = Pattern.compile("Corrupted Lucene index segments found -\\s+(?<docs>\\d+) documents will be lost.");
final Matcher matcher = pattern.matcher(output);
final Matcher matcher = NUM_CORRUPT_DOCS_PATTERN.matcher(output);
assertThat(matcher.find(), equalTo(true));
final int expectedNumDocs = numDocsToKeep - Integer.parseInt(matcher.group("docs"));

Expand Down Expand Up @@ -347,6 +351,62 @@ public void testResolveIndexDirectory() throws Exception {
shardPath -> assertThat(shardPath.resolveIndex(), equalTo(indexPath)));
}

public void testCleanWithCorruptionMarker() throws Exception {
// index some docs in several segments
final int numDocs = indexDocs(indexShard, true);

indexShard.store().markStoreCorrupted(null);

closeShards(indexShard);

allowShardFailures();
final IndexShard corruptedShard = reopenIndexShard(true);
expectThrows(IndexShardRecoveryException.class, () -> newStartedShard(p -> corruptedShard, true));
closeShards(corruptedShard);

final RemoveCorruptedShardDataCommand command = new RemoveCorruptedShardDataCommand();
final MockTerminal t = new MockTerminal();
final OptionParser parser = command.getParser();

final OptionSet options = parser.parse("-d", translogPath.toString());
// run command with dry-run
t.addTextInput("n"); // mean dry run
t.addTextInput("n"); // mean dry run
t.setVerbosity(Terminal.Verbosity.VERBOSE);
try {
command.execute(t, options, environment);
fail();
} catch (ElasticsearchException e) {
assertThat(e.getMessage(), containsString("aborted by user"));
assertThat(t.getOutput(), containsString("Continue and remove corrupted data from the shard ?"));
assertThat(t.getOutput(), containsString("Lucene index is marked corrupted, but no corruption detected"));
}

logger.info("--> output:\n{}", t.getOutput());

// run command without dry-run
t.reset();
t.addTextInput("y");
t.addTextInput("y");
command.execute(t, options, environment);

final String output = t.getOutput();
logger.info("--> output:\n{}", output);

failOnShardFailures();
final IndexShard newShard = newStartedShard(p -> reopenIndexShard(false), true);

final Set<String> shardDocUIDs = getShardDocUIDs(newShard);
assertEquals(numDocs, shardDocUIDs.size());

assertThat(t.getOutput(), containsString("This shard has been marked as corrupted but no corruption can now be detected."));

final Matcher matcher = NUM_CORRUPT_DOCS_PATTERN.matcher(output);
assertFalse(matcher.find());

closeShards(newShard);
}

private IndexShard reopenIndexShard(boolean corrupted) throws IOException {
// open shard with the same location
final ShardRouting shardRouting = ShardRoutingHelper.initWithSameId(indexShard.routingEntry(),
Expand Down

0 comments on commit f846bd5

Please sign in to comment.