From 61b60867517b54cc892ebc1b230cced965ab2e2b Mon Sep 17 00:00:00 2001 From: Armin Braun Date: Mon, 8 Jun 2020 14:33:42 +0200 Subject: [PATCH 1/2] Fix Broken Numeric Shard Generations in RepositoryData Fix broken numeric shard generations when reading them from the wire or physically from the physical repository. This should be the cheapest way to clean up broken shard generations in a BwC and safe-to-backport manner for now. We can potentially further optimize this by also not doing the checks on the generations based on the versions we see in the `RepositoryData` but I don't think it matters much since we will read `RepositoryData` from cache in almost all cases. Closes #57798 --- .../CorruptedBlobStoreRepositoryIT.java | 73 ++++++++++++++++++- .../repositories/RepositoryData.java | 10 ++- .../repositories/ShardGenerations.java | 20 +++++ .../blobstore/BlobStoreRepository.java | 8 +- .../repositories/RepositoryDataTests.java | 12 +-- .../blobstore/BlobStoreTestUtil.java | 2 +- 6 files changed, 112 insertions(+), 13 deletions(-) diff --git a/server/src/internalClusterTest/java/org/elasticsearch/snapshots/CorruptedBlobStoreRepositoryIT.java b/server/src/internalClusterTest/java/org/elasticsearch/snapshots/CorruptedBlobStoreRepositoryIT.java index e9a5fbb734de9..8483dc198de86 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/snapshots/CorruptedBlobStoreRepositoryIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/snapshots/CorruptedBlobStoreRepositoryIT.java @@ -53,6 +53,7 @@ import java.nio.file.StandardOpenOption; import java.util.Collections; import java.util.Locale; +import java.util.Map; import java.util.function.Function; import java.util.stream.Collectors; @@ -332,7 +333,7 @@ public void testHandleSnapshotErrorWithBwCFormat() throws IOException { NamedXContentRegistry.EMPTY, DeprecationHandler.THROW_UNSUPPORTED_OPERATION, Strings.toString(jsonBuilder).replace(Version.CURRENT.toString(), SnapshotsService.OLD_SNAPSHOT_FORMAT.toString())), - repositoryData.getGenId()); + repositoryData.getGenId(), randomBoolean()); Files.write(repoPath.resolve(BlobStoreRepository.INDEX_FILE_PREFIX + repositoryData.getGenId()), BytesReference.toBytes(BytesReference.bytes( downgradedRepoData.snapshotsToXContent(XContentFactory.jsonBuilder(), SnapshotsService.OLD_SNAPSHOT_FORMAT))), @@ -362,6 +363,76 @@ public void testHandleSnapshotErrorWithBwCFormat() throws IOException { assertCreateSnapshotSuccess(repoName, "snapshot-2"); } + public void testRepairBrokenShardGenerations() throws IOException { + final String repoName = "test-repo"; + final Path repoPath = randomRepoPath(); + createRepository(repoName, "fs", repoPath); + + // Workaround to simulate BwC situation: taking a snapshot without indices here so that we don't create any new version shard + // generations (the existence of which would short-circuit checks for the repo containing old version snapshots) + final String oldVersionSnapshot = "old-version-snapshot"; + final CreateSnapshotResponse createSnapshotResponse = client().admin().cluster() + .prepareCreateSnapshot(repoName, oldVersionSnapshot).setIndices().setWaitForCompletion(true).get(); + assertThat(createSnapshotResponse.getSnapshotInfo().totalShards(), is(0)); + + logger.info("--> writing downgraded RepositoryData"); + final RepositoryData repositoryData = getRepositoryData(repoName); + final XContentBuilder jsonBuilder = JsonXContent.contentBuilder(); + repositoryData.snapshotsToXContent(jsonBuilder, SnapshotsService.OLD_SNAPSHOT_FORMAT); + final RepositoryData downgradedRepoData = RepositoryData.snapshotsFromXContent(JsonXContent.jsonXContent.createParser( + NamedXContentRegistry.EMPTY, + DeprecationHandler.THROW_UNSUPPORTED_OPERATION, + Strings.toString(jsonBuilder).replace(Version.CURRENT.toString(), SnapshotsService.OLD_SNAPSHOT_FORMAT.toString())), + repositoryData.getGenId(), randomBoolean()); + Files.write(repoPath.resolve(BlobStoreRepository.INDEX_FILE_PREFIX + repositoryData.getGenId()), + BytesReference.toBytes(BytesReference.bytes( + downgradedRepoData.snapshotsToXContent(XContentFactory.jsonBuilder(), SnapshotsService.OLD_SNAPSHOT_FORMAT))), + StandardOpenOption.TRUNCATE_EXISTING); + + logger.info("--> recreating repository to clear caches"); + client().admin().cluster().prepareDeleteRepository(repoName).get(); + createRepository(repoName, "fs", repoPath); + + final String indexName = "test-index"; + createIndex(indexName); + + assertCreateSnapshotSuccess(repoName, "snapshot-1"); + + logger.info("--> delete old version snapshot"); + client().admin().cluster().prepareDeleteSnapshot(repoName, oldVersionSnapshot).get(); + + logger.info("--> move shard level metadata to new generation and make RepositoryData point at an older generation"); + final IndexId indexId = getRepositoryData(repoName).resolveIndexId(indexName); + final Path shardPath = repoPath.resolve("indices").resolve(indexId.getId()).resolve("0"); + final Path initialShardMetaPath = shardPath.resolve(BlobStoreRepository.INDEX_FILE_PREFIX + "0"); + assertFileExists(initialShardMetaPath); + Files.move(initialShardMetaPath, shardPath.resolve(BlobStoreRepository.INDEX_FILE_PREFIX + randomIntBetween(1, 1000))); + + final RepositoryData repositoryData1 = getRepositoryData(repoName); + final Map snapshotIds = + repositoryData1.getSnapshotIds().stream().collect(Collectors.toMap(SnapshotId::getUUID, Function.identity())); + final RepositoryData brokenRepoData = new RepositoryData( + repositoryData1.getGenId(), snapshotIds, snapshotIds.values().stream().collect( + Collectors.toMap(SnapshotId::getUUID, repositoryData1::getSnapshotState)), + snapshotIds.values().stream().collect( + Collectors.toMap(SnapshotId::getUUID, repositoryData1::getVersion)), + repositoryData1.getIndices().values().stream().collect( + Collectors.toMap(Function.identity(), repositoryData1::getSnapshots) + ), ShardGenerations.builder().putAll(repositoryData1.shardGenerations()).put(indexId, 0, "0").build(), + repositoryData1.indexMetaDataGenerations() + ); + Files.write(repoPath.resolve(BlobStoreRepository.INDEX_FILE_PREFIX + repositoryData1.getGenId()), + BytesReference.toBytes(BytesReference.bytes( + brokenRepoData.snapshotsToXContent(XContentFactory.jsonBuilder(), Version.CURRENT))), + StandardOpenOption.TRUNCATE_EXISTING); + + logger.info("--> recreating repository to clear caches"); + client().admin().cluster().prepareDeleteRepository(repoName).get(); + createRepository(repoName, "fs", repoPath); + + assertCreateSnapshotSuccess(repoName, "snapshot-2"); + } + private void assertCreateSnapshotSuccess(String repoName, String snapshotName) { logger.info("--> create another snapshot"); final SnapshotInfo snapshotInfo = client().admin().cluster().prepareCreateSnapshot(repoName, snapshotName) diff --git a/server/src/main/java/org/elasticsearch/repositories/RepositoryData.java b/server/src/main/java/org/elasticsearch/repositories/RepositoryData.java index 967f32bd40fc7..8d685db005dd2 100644 --- a/server/src/main/java/org/elasticsearch/repositories/RepositoryData.java +++ b/server/src/main/java/org/elasticsearch/repositories/RepositoryData.java @@ -502,8 +502,12 @@ public IndexMetaDataGenerations indexMetaDataGenerations() { /** * Reads an instance of {@link RepositoryData} from x-content, loading the snapshots and indices metadata. + * + * @param fixBrokenShardGens set to {@code true} to filter out broken shard generations read from the {@code parser} via + * {@link ShardGenerations#fixShardGeneration}. Used to disable fixing broken generations when reading + * from cached bytes that we trust to not contain broken generations. */ - public static RepositoryData snapshotsFromXContent(final XContentParser parser, long genId) throws IOException { + public static RepositoryData snapshotsFromXContent(XContentParser parser, long genId, boolean fixBrokenShardGens) throws IOException { final Map snapshots = new HashMap<>(); final Map snapshotStates = new HashMap<>(); final Map snapshotVersions = new HashMap<>(); @@ -615,7 +619,9 @@ public static RepositoryData snapshotsFromXContent(final XContentParser parser, assert indexId != null; indexSnapshots.put(indexId, Collections.unmodifiableList(snapshotIds)); for (int i = 0; i < gens.size(); i++) { - shardGenerations.put(indexId, i, gens.get(i)); + final String parsedGen = gens.get(i); + shardGenerations.put( + indexId, i, fixBrokenShardGens ? ShardGenerations.fixShardGeneration(parsedGen) : parsedGen); } } } else if (INDEX_METADATA_IDENTIFIERS.equals(field)) { diff --git a/server/src/main/java/org/elasticsearch/repositories/ShardGenerations.java b/server/src/main/java/org/elasticsearch/repositories/ShardGenerations.java index 8b7f799d0e7c2..4115435c1e075 100644 --- a/server/src/main/java/org/elasticsearch/repositories/ShardGenerations.java +++ b/server/src/main/java/org/elasticsearch/repositories/ShardGenerations.java @@ -20,6 +20,7 @@ package org.elasticsearch.repositories; import org.elasticsearch.common.Nullable; +import org.elasticsearch.index.snapshots.IndexShardSnapshotStatus; import java.util.Arrays; import java.util.Collection; @@ -30,6 +31,7 @@ import java.util.Map; import java.util.Objects; import java.util.Set; +import java.util.regex.Pattern; import java.util.stream.Collectors; public final class ShardGenerations { @@ -54,6 +56,24 @@ private ShardGenerations(Map> shardGenerations) { this.shardGenerations = shardGenerations; } + private static final Pattern IS_NUMBER = Pattern.compile("^\\d+$"); + + /** + * Filters out unreliable numeric shard generations read from {@link RepositoryData} or {@link IndexShardSnapshotStatus}, returning + * {@code null} in their place. + * @see Issue #57988 + * + * @param shardGeneration shard generation to fix + * @return given shard generation or {@code null} if it was filtered out or {@code null} was passed + */ + @Nullable + public static String fixShardGeneration(@Nullable String shardGeneration) { + if (shardGeneration == null) { + return null; + } + return IS_NUMBER.matcher(shardGeneration).matches() ? null : shardGeneration; + } + /** * Returns the total number of shards tracked by this instance. */ diff --git a/server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreRepository.java b/server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreRepository.java index 1d9182c0356c1..74950e34be634 100644 --- a/server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreRepository.java +++ b/server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreRepository.java @@ -1340,7 +1340,7 @@ private RepositoryData repositoryDataFromCachedEntry(Tuple return RepositoryData.snapshotsFromXContent( XContentType.JSON.xContent().createParser(NamedXContentRegistry.EMPTY, LoggingDeprecationHandler.INSTANCE, - CompressorFactory.COMPRESSOR.streamInput(cacheEntry.v2().streamInput())), cacheEntry.v1()); + CompressorFactory.COMPRESSOR.streamInput(cacheEntry.v2().streamInput())), cacheEntry.v1(), false); } private RepositoryException corruptedStateException(@Nullable Exception cause) { @@ -1405,7 +1405,7 @@ private RepositoryData getRepositoryData(long indexGen) { try (InputStream blob = blobContainer().readBlob(snapshotsIndexBlobName); XContentParser parser = XContentType.JSON.xContent().createParser(NamedXContentRegistry.EMPTY, LoggingDeprecationHandler.INSTANCE, blob)) { - return RepositoryData.snapshotsFromXContent(parser, indexGen); + return RepositoryData.snapshotsFromXContent(parser, indexGen, true); } } catch (IOException ioe) { if (bestEffortConsistency) { @@ -1697,7 +1697,7 @@ public void snapshotShard(Store store, MapperService mapperService, SnapshotId s final ShardId shardId = store.shardId(); final long startTime = threadPool.absoluteTimeInMillis(); try { - final String generation = snapshotStatus.generation(); + final String generation = ShardGenerations.fixShardGeneration(snapshotStatus.generation()); logger.debug("[{}] [{}] snapshot to [{}] [{}] ...", shardId, snapshotId, metadata.name(), generation); final BlobContainer shardContainer = shardContainer(indexId, shardId); final Set blobs; @@ -2171,6 +2171,8 @@ public BlobStoreIndexShardSnapshot loadShardSnapshot(BlobContainer shardContaine private Tuple buildBlobStoreIndexShardSnapshots(Set blobs, BlobContainer shardContainer, @Nullable String generation) throws IOException { + assert ShardGenerations.fixShardGeneration(generation) == generation + : "Generation must not be numeric but received [" + generation + "]"; if (generation != null) { if (generation.equals(ShardGenerations.NEW_SHARD_GEN)) { return new Tuple<>(BlobStoreIndexShardSnapshots.EMPTY, ShardGenerations.NEW_SHARD_GEN); diff --git a/server/src/test/java/org/elasticsearch/repositories/RepositoryDataTests.java b/server/src/test/java/org/elasticsearch/repositories/RepositoryDataTests.java index 3952c6eb9e275..3bedd479d7020 100644 --- a/server/src/test/java/org/elasticsearch/repositories/RepositoryDataTests.java +++ b/server/src/test/java/org/elasticsearch/repositories/RepositoryDataTests.java @@ -80,7 +80,7 @@ public void testXContent() throws IOException { repositoryData.snapshotsToXContent(builder, Version.CURRENT); try (XContentParser parser = createParser(JsonXContent.jsonXContent, BytesReference.bytes(builder))) { long gen = (long) randomIntBetween(0, 500); - RepositoryData fromXContent = RepositoryData.snapshotsFromXContent(parser, gen); + RepositoryData fromXContent = RepositoryData.snapshotsFromXContent(parser, gen, randomBoolean()); assertEquals(repositoryData, fromXContent); assertEquals(gen, fromXContent.getGenId()); } @@ -100,14 +100,14 @@ public void testAddSnapshots() { IndexId indexId = new IndexId(randomAlphaOfLength(7), UUIDs.randomBase64UUID()); newIndices.add(indexId); indices.add(indexId); - builder.put(indexId, 0, "1"); + builder.put(indexId, 0, UUIDs.randomBase64UUID(random())); } int numOld = randomIntBetween(1, indexIdMap.size()); List indexNames = new ArrayList<>(indexIdMap.keySet()); for (int i = 0; i < numOld; i++) { final IndexId indexId = indexIdMap.get(indexNames.get(i)); indices.add(indexId); - builder.put(indexId, 0, "2"); + builder.put(indexId, 0, UUIDs.randomBase64UUID(random())); } final ShardGenerations shardGenerations = builder.build(); final Map indexLookup = @@ -197,7 +197,7 @@ public void testIndexThatReferencesAnUnknownSnapshot() throws IOException { repositoryData.snapshotsToXContent(builder, Version.CURRENT); RepositoryData parsedRepositoryData; try (XContentParser xParser = createParser(builder)) { - parsedRepositoryData = RepositoryData.snapshotsFromXContent(xParser, repositoryData.getGenId()); + parsedRepositoryData = RepositoryData.snapshotsFromXContent(xParser, repositoryData.getGenId(), randomBoolean()); } assertEquals(repositoryData, parsedRepositoryData); @@ -236,7 +236,7 @@ public void testIndexThatReferencesAnUnknownSnapshot() throws IOException { try (XContentParser xParser = createParser(corruptedBuilder)) { ElasticsearchParseException e = expectThrows(ElasticsearchParseException.class, () -> - RepositoryData.snapshotsFromXContent(xParser, corruptedRepositoryData.getGenId())); + RepositoryData.snapshotsFromXContent(xParser, corruptedRepositoryData.getGenId(), randomBoolean())); assertThat(e.getMessage(), equalTo("Detected a corrupted repository, index " + corruptedIndexId + " references an unknown " + "snapshot uuid [_does_not_exist]")); } @@ -273,7 +273,7 @@ public void testIndexThatReferenceANullSnapshot() throws IOException { try (XContentParser xParser = createParser(builder)) { ElasticsearchParseException e = expectThrows(ElasticsearchParseException.class, () -> - RepositoryData.snapshotsFromXContent(xParser, randomNonNegativeLong())); + RepositoryData.snapshotsFromXContent(xParser, randomNonNegativeLong(), randomBoolean())); assertThat(e.getMessage(), equalTo("Detected a corrupted repository, " + "index [docs/_id] references an unknown snapshot uuid [null]")); } diff --git a/test/framework/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreTestUtil.java b/test/framework/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreTestUtil.java index 138db71fc282c..5749f448d4842 100644 --- a/test/framework/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreTestUtil.java +++ b/test/framework/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreTestUtil.java @@ -129,7 +129,7 @@ public static void assertConsistency(BlobStoreRepository repository, Executor ex try (InputStream blob = blobContainer.readBlob("index-" + latestGen); XContentParser parser = XContentType.JSON.xContent().createParser(NamedXContentRegistry.EMPTY, LoggingDeprecationHandler.INSTANCE, blob)) { - repositoryData = RepositoryData.snapshotsFromXContent(parser, latestGen); + repositoryData = RepositoryData.snapshotsFromXContent(parser, latestGen, true); } assertIndexUUIDs(repository, repositoryData); assertSnapshotUUIDs(repository, repositoryData); From 655c7f3d9f3753a68f2c98718d51ea812fd0f0fd Mon Sep 17 00:00:00 2001 From: Armin Braun Date: Mon, 8 Jun 2020 15:52:10 +0200 Subject: [PATCH 2/2] CR: comments --- .../org/elasticsearch/repositories/RepositoryData.java | 10 +++++++--- .../repositories/blobstore/BlobStoreTestUtil.java | 2 +- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/repositories/RepositoryData.java b/server/src/main/java/org/elasticsearch/repositories/RepositoryData.java index 8d685db005dd2..d660088b374fc 100644 --- a/server/src/main/java/org/elasticsearch/repositories/RepositoryData.java +++ b/server/src/main/java/org/elasticsearch/repositories/RepositoryData.java @@ -619,9 +619,13 @@ public static RepositoryData snapshotsFromXContent(XContentParser parser, long g assert indexId != null; indexSnapshots.put(indexId, Collections.unmodifiableList(snapshotIds)); for (int i = 0; i < gens.size(); i++) { - final String parsedGen = gens.get(i); - shardGenerations.put( - indexId, i, fixBrokenShardGens ? ShardGenerations.fixShardGeneration(parsedGen) : parsedGen); + String parsedGen = gens.get(i); + if (fixBrokenShardGens) { + parsedGen = ShardGenerations.fixShardGeneration(parsedGen); + } + if (parsedGen != null) { + shardGenerations.put(indexId, i, parsedGen); + } } } } else if (INDEX_METADATA_IDENTIFIERS.equals(field)) { diff --git a/test/framework/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreTestUtil.java b/test/framework/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreTestUtil.java index 5749f448d4842..8a5d4e29c001d 100644 --- a/test/framework/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreTestUtil.java +++ b/test/framework/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreTestUtil.java @@ -129,7 +129,7 @@ public static void assertConsistency(BlobStoreRepository repository, Executor ex try (InputStream blob = blobContainer.readBlob("index-" + latestGen); XContentParser parser = XContentType.JSON.xContent().createParser(NamedXContentRegistry.EMPTY, LoggingDeprecationHandler.INSTANCE, blob)) { - repositoryData = RepositoryData.snapshotsFromXContent(parser, latestGen, true); + repositoryData = RepositoryData.snapshotsFromXContent(parser, latestGen, false); } assertIndexUUIDs(repository, repositoryData); assertSnapshotUUIDs(repository, repositoryData);