From db09e803ddbdea3dd7f3a03b2e5a7093a1a35805 Mon Sep 17 00:00:00 2001 From: Armin Braun Date: Mon, 8 Jun 2020 17:14:45 +0200 Subject: [PATCH] Fix Broken Numeric Shard Generations in RepositoryData (#57813) Fix broken numeric shard generations when reading them from the wire or physically from the physical repository. This should be the cheapest way to clean up broken shard generations in a BwC and safe-to-backport manner for now. We can potentially further optimize this by also not doing the checks on the generations based on the versions we see in the `RepositoryData` but I don't think it matters much since we will read `RepositoryData` from cache in almost all cases. Closes #57798 --- .../CorruptedBlobStoreRepositoryIT.java | 73 ++++++++++++++++++- .../repositories/RepositoryData.java | 14 +++- .../repositories/ShardGenerations.java | 20 +++++ .../blobstore/BlobStoreRepository.java | 8 +- .../repositories/RepositoryDataTests.java | 12 +-- .../blobstore/BlobStoreTestUtil.java | 2 +- 6 files changed, 116 insertions(+), 13 deletions(-) diff --git a/server/src/internalClusterTest/java/org/elasticsearch/snapshots/CorruptedBlobStoreRepositoryIT.java b/server/src/internalClusterTest/java/org/elasticsearch/snapshots/CorruptedBlobStoreRepositoryIT.java index e9a5fbb734de9..8483dc198de86 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/snapshots/CorruptedBlobStoreRepositoryIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/snapshots/CorruptedBlobStoreRepositoryIT.java @@ -53,6 +53,7 @@ import java.nio.file.StandardOpenOption; import java.util.Collections; import java.util.Locale; +import java.util.Map; import java.util.function.Function; import java.util.stream.Collectors; @@ -332,7 +333,7 @@ public void testHandleSnapshotErrorWithBwCFormat() throws IOException { NamedXContentRegistry.EMPTY, DeprecationHandler.THROW_UNSUPPORTED_OPERATION, Strings.toString(jsonBuilder).replace(Version.CURRENT.toString(), SnapshotsService.OLD_SNAPSHOT_FORMAT.toString())), - repositoryData.getGenId()); + repositoryData.getGenId(), randomBoolean()); Files.write(repoPath.resolve(BlobStoreRepository.INDEX_FILE_PREFIX + repositoryData.getGenId()), BytesReference.toBytes(BytesReference.bytes( downgradedRepoData.snapshotsToXContent(XContentFactory.jsonBuilder(), SnapshotsService.OLD_SNAPSHOT_FORMAT))), @@ -362,6 +363,76 @@ public void testHandleSnapshotErrorWithBwCFormat() throws IOException { assertCreateSnapshotSuccess(repoName, "snapshot-2"); } + public void testRepairBrokenShardGenerations() throws IOException { + final String repoName = "test-repo"; + final Path repoPath = randomRepoPath(); + createRepository(repoName, "fs", repoPath); + + // Workaround to simulate BwC situation: taking a snapshot without indices here so that we don't create any new version shard + // generations (the existence of which would short-circuit checks for the repo containing old version snapshots) + final String oldVersionSnapshot = "old-version-snapshot"; + final CreateSnapshotResponse createSnapshotResponse = client().admin().cluster() + .prepareCreateSnapshot(repoName, oldVersionSnapshot).setIndices().setWaitForCompletion(true).get(); + assertThat(createSnapshotResponse.getSnapshotInfo().totalShards(), is(0)); + + logger.info("--> writing downgraded RepositoryData"); + final RepositoryData repositoryData = getRepositoryData(repoName); + final XContentBuilder jsonBuilder = JsonXContent.contentBuilder(); + repositoryData.snapshotsToXContent(jsonBuilder, SnapshotsService.OLD_SNAPSHOT_FORMAT); + final RepositoryData downgradedRepoData = RepositoryData.snapshotsFromXContent(JsonXContent.jsonXContent.createParser( + NamedXContentRegistry.EMPTY, + DeprecationHandler.THROW_UNSUPPORTED_OPERATION, + Strings.toString(jsonBuilder).replace(Version.CURRENT.toString(), SnapshotsService.OLD_SNAPSHOT_FORMAT.toString())), + repositoryData.getGenId(), randomBoolean()); + Files.write(repoPath.resolve(BlobStoreRepository.INDEX_FILE_PREFIX + repositoryData.getGenId()), + BytesReference.toBytes(BytesReference.bytes( + downgradedRepoData.snapshotsToXContent(XContentFactory.jsonBuilder(), SnapshotsService.OLD_SNAPSHOT_FORMAT))), + StandardOpenOption.TRUNCATE_EXISTING); + + logger.info("--> recreating repository to clear caches"); + client().admin().cluster().prepareDeleteRepository(repoName).get(); + createRepository(repoName, "fs", repoPath); + + final String indexName = "test-index"; + createIndex(indexName); + + assertCreateSnapshotSuccess(repoName, "snapshot-1"); + + logger.info("--> delete old version snapshot"); + client().admin().cluster().prepareDeleteSnapshot(repoName, oldVersionSnapshot).get(); + + logger.info("--> move shard level metadata to new generation and make RepositoryData point at an older generation"); + final IndexId indexId = getRepositoryData(repoName).resolveIndexId(indexName); + final Path shardPath = repoPath.resolve("indices").resolve(indexId.getId()).resolve("0"); + final Path initialShardMetaPath = shardPath.resolve(BlobStoreRepository.INDEX_FILE_PREFIX + "0"); + assertFileExists(initialShardMetaPath); + Files.move(initialShardMetaPath, shardPath.resolve(BlobStoreRepository.INDEX_FILE_PREFIX + randomIntBetween(1, 1000))); + + final RepositoryData repositoryData1 = getRepositoryData(repoName); + final Map snapshotIds = + repositoryData1.getSnapshotIds().stream().collect(Collectors.toMap(SnapshotId::getUUID, Function.identity())); + final RepositoryData brokenRepoData = new RepositoryData( + repositoryData1.getGenId(), snapshotIds, snapshotIds.values().stream().collect( + Collectors.toMap(SnapshotId::getUUID, repositoryData1::getSnapshotState)), + snapshotIds.values().stream().collect( + Collectors.toMap(SnapshotId::getUUID, repositoryData1::getVersion)), + repositoryData1.getIndices().values().stream().collect( + Collectors.toMap(Function.identity(), repositoryData1::getSnapshots) + ), ShardGenerations.builder().putAll(repositoryData1.shardGenerations()).put(indexId, 0, "0").build(), + repositoryData1.indexMetaDataGenerations() + ); + Files.write(repoPath.resolve(BlobStoreRepository.INDEX_FILE_PREFIX + repositoryData1.getGenId()), + BytesReference.toBytes(BytesReference.bytes( + brokenRepoData.snapshotsToXContent(XContentFactory.jsonBuilder(), Version.CURRENT))), + StandardOpenOption.TRUNCATE_EXISTING); + + logger.info("--> recreating repository to clear caches"); + client().admin().cluster().prepareDeleteRepository(repoName).get(); + createRepository(repoName, "fs", repoPath); + + assertCreateSnapshotSuccess(repoName, "snapshot-2"); + } + private void assertCreateSnapshotSuccess(String repoName, String snapshotName) { logger.info("--> create another snapshot"); final SnapshotInfo snapshotInfo = client().admin().cluster().prepareCreateSnapshot(repoName, snapshotName) diff --git a/server/src/main/java/org/elasticsearch/repositories/RepositoryData.java b/server/src/main/java/org/elasticsearch/repositories/RepositoryData.java index 967f32bd40fc7..d660088b374fc 100644 --- a/server/src/main/java/org/elasticsearch/repositories/RepositoryData.java +++ b/server/src/main/java/org/elasticsearch/repositories/RepositoryData.java @@ -502,8 +502,12 @@ public IndexMetaDataGenerations indexMetaDataGenerations() { /** * Reads an instance of {@link RepositoryData} from x-content, loading the snapshots and indices metadata. + * + * @param fixBrokenShardGens set to {@code true} to filter out broken shard generations read from the {@code parser} via + * {@link ShardGenerations#fixShardGeneration}. Used to disable fixing broken generations when reading + * from cached bytes that we trust to not contain broken generations. */ - public static RepositoryData snapshotsFromXContent(final XContentParser parser, long genId) throws IOException { + public static RepositoryData snapshotsFromXContent(XContentParser parser, long genId, boolean fixBrokenShardGens) throws IOException { final Map snapshots = new HashMap<>(); final Map snapshotStates = new HashMap<>(); final Map snapshotVersions = new HashMap<>(); @@ -615,7 +619,13 @@ public static RepositoryData snapshotsFromXContent(final XContentParser parser, assert indexId != null; indexSnapshots.put(indexId, Collections.unmodifiableList(snapshotIds)); for (int i = 0; i < gens.size(); i++) { - shardGenerations.put(indexId, i, gens.get(i)); + String parsedGen = gens.get(i); + if (fixBrokenShardGens) { + parsedGen = ShardGenerations.fixShardGeneration(parsedGen); + } + if (parsedGen != null) { + shardGenerations.put(indexId, i, parsedGen); + } } } } else if (INDEX_METADATA_IDENTIFIERS.equals(field)) { diff --git a/server/src/main/java/org/elasticsearch/repositories/ShardGenerations.java b/server/src/main/java/org/elasticsearch/repositories/ShardGenerations.java index 8b7f799d0e7c2..4115435c1e075 100644 --- a/server/src/main/java/org/elasticsearch/repositories/ShardGenerations.java +++ b/server/src/main/java/org/elasticsearch/repositories/ShardGenerations.java @@ -20,6 +20,7 @@ package org.elasticsearch.repositories; import org.elasticsearch.common.Nullable; +import org.elasticsearch.index.snapshots.IndexShardSnapshotStatus; import java.util.Arrays; import java.util.Collection; @@ -30,6 +31,7 @@ import java.util.Map; import java.util.Objects; import java.util.Set; +import java.util.regex.Pattern; import java.util.stream.Collectors; public final class ShardGenerations { @@ -54,6 +56,24 @@ private ShardGenerations(Map> shardGenerations) { this.shardGenerations = shardGenerations; } + private static final Pattern IS_NUMBER = Pattern.compile("^\\d+$"); + + /** + * Filters out unreliable numeric shard generations read from {@link RepositoryData} or {@link IndexShardSnapshotStatus}, returning + * {@code null} in their place. + * @see Issue #57988 + * + * @param shardGeneration shard generation to fix + * @return given shard generation or {@code null} if it was filtered out or {@code null} was passed + */ + @Nullable + public static String fixShardGeneration(@Nullable String shardGeneration) { + if (shardGeneration == null) { + return null; + } + return IS_NUMBER.matcher(shardGeneration).matches() ? null : shardGeneration; + } + /** * Returns the total number of shards tracked by this instance. */ diff --git a/server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreRepository.java b/server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreRepository.java index 1d9182c0356c1..74950e34be634 100644 --- a/server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreRepository.java +++ b/server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreRepository.java @@ -1340,7 +1340,7 @@ private RepositoryData repositoryDataFromCachedEntry(Tuple return RepositoryData.snapshotsFromXContent( XContentType.JSON.xContent().createParser(NamedXContentRegistry.EMPTY, LoggingDeprecationHandler.INSTANCE, - CompressorFactory.COMPRESSOR.streamInput(cacheEntry.v2().streamInput())), cacheEntry.v1()); + CompressorFactory.COMPRESSOR.streamInput(cacheEntry.v2().streamInput())), cacheEntry.v1(), false); } private RepositoryException corruptedStateException(@Nullable Exception cause) { @@ -1405,7 +1405,7 @@ private RepositoryData getRepositoryData(long indexGen) { try (InputStream blob = blobContainer().readBlob(snapshotsIndexBlobName); XContentParser parser = XContentType.JSON.xContent().createParser(NamedXContentRegistry.EMPTY, LoggingDeprecationHandler.INSTANCE, blob)) { - return RepositoryData.snapshotsFromXContent(parser, indexGen); + return RepositoryData.snapshotsFromXContent(parser, indexGen, true); } } catch (IOException ioe) { if (bestEffortConsistency) { @@ -1697,7 +1697,7 @@ public void snapshotShard(Store store, MapperService mapperService, SnapshotId s final ShardId shardId = store.shardId(); final long startTime = threadPool.absoluteTimeInMillis(); try { - final String generation = snapshotStatus.generation(); + final String generation = ShardGenerations.fixShardGeneration(snapshotStatus.generation()); logger.debug("[{}] [{}] snapshot to [{}] [{}] ...", shardId, snapshotId, metadata.name(), generation); final BlobContainer shardContainer = shardContainer(indexId, shardId); final Set blobs; @@ -2171,6 +2171,8 @@ public BlobStoreIndexShardSnapshot loadShardSnapshot(BlobContainer shardContaine private Tuple buildBlobStoreIndexShardSnapshots(Set blobs, BlobContainer shardContainer, @Nullable String generation) throws IOException { + assert ShardGenerations.fixShardGeneration(generation) == generation + : "Generation must not be numeric but received [" + generation + "]"; if (generation != null) { if (generation.equals(ShardGenerations.NEW_SHARD_GEN)) { return new Tuple<>(BlobStoreIndexShardSnapshots.EMPTY, ShardGenerations.NEW_SHARD_GEN); diff --git a/server/src/test/java/org/elasticsearch/repositories/RepositoryDataTests.java b/server/src/test/java/org/elasticsearch/repositories/RepositoryDataTests.java index 3952c6eb9e275..3bedd479d7020 100644 --- a/server/src/test/java/org/elasticsearch/repositories/RepositoryDataTests.java +++ b/server/src/test/java/org/elasticsearch/repositories/RepositoryDataTests.java @@ -80,7 +80,7 @@ public void testXContent() throws IOException { repositoryData.snapshotsToXContent(builder, Version.CURRENT); try (XContentParser parser = createParser(JsonXContent.jsonXContent, BytesReference.bytes(builder))) { long gen = (long) randomIntBetween(0, 500); - RepositoryData fromXContent = RepositoryData.snapshotsFromXContent(parser, gen); + RepositoryData fromXContent = RepositoryData.snapshotsFromXContent(parser, gen, randomBoolean()); assertEquals(repositoryData, fromXContent); assertEquals(gen, fromXContent.getGenId()); } @@ -100,14 +100,14 @@ public void testAddSnapshots() { IndexId indexId = new IndexId(randomAlphaOfLength(7), UUIDs.randomBase64UUID()); newIndices.add(indexId); indices.add(indexId); - builder.put(indexId, 0, "1"); + builder.put(indexId, 0, UUIDs.randomBase64UUID(random())); } int numOld = randomIntBetween(1, indexIdMap.size()); List indexNames = new ArrayList<>(indexIdMap.keySet()); for (int i = 0; i < numOld; i++) { final IndexId indexId = indexIdMap.get(indexNames.get(i)); indices.add(indexId); - builder.put(indexId, 0, "2"); + builder.put(indexId, 0, UUIDs.randomBase64UUID(random())); } final ShardGenerations shardGenerations = builder.build(); final Map indexLookup = @@ -197,7 +197,7 @@ public void testIndexThatReferencesAnUnknownSnapshot() throws IOException { repositoryData.snapshotsToXContent(builder, Version.CURRENT); RepositoryData parsedRepositoryData; try (XContentParser xParser = createParser(builder)) { - parsedRepositoryData = RepositoryData.snapshotsFromXContent(xParser, repositoryData.getGenId()); + parsedRepositoryData = RepositoryData.snapshotsFromXContent(xParser, repositoryData.getGenId(), randomBoolean()); } assertEquals(repositoryData, parsedRepositoryData); @@ -236,7 +236,7 @@ public void testIndexThatReferencesAnUnknownSnapshot() throws IOException { try (XContentParser xParser = createParser(corruptedBuilder)) { ElasticsearchParseException e = expectThrows(ElasticsearchParseException.class, () -> - RepositoryData.snapshotsFromXContent(xParser, corruptedRepositoryData.getGenId())); + RepositoryData.snapshotsFromXContent(xParser, corruptedRepositoryData.getGenId(), randomBoolean())); assertThat(e.getMessage(), equalTo("Detected a corrupted repository, index " + corruptedIndexId + " references an unknown " + "snapshot uuid [_does_not_exist]")); } @@ -273,7 +273,7 @@ public void testIndexThatReferenceANullSnapshot() throws IOException { try (XContentParser xParser = createParser(builder)) { ElasticsearchParseException e = expectThrows(ElasticsearchParseException.class, () -> - RepositoryData.snapshotsFromXContent(xParser, randomNonNegativeLong())); + RepositoryData.snapshotsFromXContent(xParser, randomNonNegativeLong(), randomBoolean())); assertThat(e.getMessage(), equalTo("Detected a corrupted repository, " + "index [docs/_id] references an unknown snapshot uuid [null]")); } diff --git a/test/framework/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreTestUtil.java b/test/framework/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreTestUtil.java index 138db71fc282c..8a5d4e29c001d 100644 --- a/test/framework/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreTestUtil.java +++ b/test/framework/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreTestUtil.java @@ -129,7 +129,7 @@ public static void assertConsistency(BlobStoreRepository repository, Executor ex try (InputStream blob = blobContainer.readBlob("index-" + latestGen); XContentParser parser = XContentType.JSON.xContent().createParser(NamedXContentRegistry.EMPTY, LoggingDeprecationHandler.INSTANCE, blob)) { - repositoryData = RepositoryData.snapshotsFromXContent(parser, latestGen); + repositoryData = RepositoryData.snapshotsFromXContent(parser, latestGen, false); } assertIndexUUIDs(repository, repositoryData); assertSnapshotUUIDs(repository, repositoryData);