Fail snapshot operations early on repository corruption (#30140)

A NullPointerException is thrown when trying to create or delete a snapshot in a repository that has been written to by an older Elasticsearch after writing to it with a newer Elasticsearch version. This is because the way snapshots are formatted in the repository snapshots index file changed in #24477. This commit changes the parsing of the repository index file so that it now detects a corrupted index file and fails early the snapshot operation. closes #29052
elastic · Apr 27, 2018 · 63148dd · 63148dd
1 parent a7e69b0
commit 63148dd
Show file tree

Hide file tree

Showing 6 changed files with 121 additions and 29 deletions.
diff --git a/docs/CHANGELOG.asciidoc b/docs/CHANGELOG.asciidoc
@@ -24,6 +24,9 @@
 
 === Bug Fixes
 
+Fail snapshot operations early when creating or deleting a snapshot on a repository that has been
+written to by an older Elasticsearch after writing to it with a newer Elasticsearch version. ({pull}30140[#30140])
+
 === Regressions
 
 === Known Issues

diff --git a/docs/reference/modules/snapshots.asciidoc b/docs/reference/modules/snapshots.asciidoc
@@ -44,12 +44,12 @@ If you register same snapshot repository with multiple clusters, only
 one cluster should have write access to the repository. All other clusters
 connected to that repository should set the repository to `readonly` mode.
 
-NOTE: The snapshot format can change across major versions, so if you have
-clusters on different major versions trying to write the same repository,
-new snapshots written by one version will not be visible to the other. While
-setting the repository to `readonly` on all but one of the clusters should work
-with multiple clusters differing by one major version, it is not a supported
-configuration.
+IMPORTANT: The snapshot format can change across major versions, so if you have
+clusters on different versions trying to write the same repository, snapshots
+written by one version may not be visible to the other and the repository could
+be corrupted. While setting the repository to `readonly` on all but one of the
+clusters should work with multiple clusters differing by one major version, it
+is not a supported configuration.
 
 [source,js]
 -----------------------------------

diff --git a/...g/elasticsearch/action/admin/cluster/snapshots/status/TransportSnapshotsStatusAction.java b/...g/elasticsearch/action/admin/cluster/snapshots/status/TransportSnapshotsStatusAction.java
@@ -230,9 +230,9 @@ private SnapshotsStatusResponse buildResponse(SnapshotsStatusRequest request, Li
                 SnapshotInfo snapshotInfo = snapshotsService.snapshot(repositoryName, snapshotId);
                 List<SnapshotIndexShardStatus> shardStatusBuilder = new ArrayList<>();
                 if (snapshotInfo.state().completed()) {
-                    Map<ShardId, IndexShardSnapshotStatus> shardStatues =
-                        snapshotsService.snapshotShards(request.repository(), snapshotInfo);
-                    for (Map.Entry<ShardId, IndexShardSnapshotStatus> shardStatus : shardStatues.entrySet()) {
+                    Map<ShardId, IndexShardSnapshotStatus> shardStatuses =
+                        snapshotsService.snapshotShards(repositoryName, repositoryData, snapshotInfo);
+                    for (Map.Entry<ShardId, IndexShardSnapshotStatus> shardStatus : shardStatuses.entrySet()) {
                         IndexShardSnapshotStatus.Copy lastSnapshotStatus = shardStatus.getValue().asCopy();
                         shardStatusBuilder.add(new SnapshotIndexShardStatus(shardStatus.getKey(), lastSnapshotStatus));
                     }

diff --git a/server/src/main/java/org/elasticsearch/repositories/RepositoryData.java b/server/src/main/java/org/elasticsearch/repositories/RepositoryData.java
@@ -230,13 +230,6 @@ public Set<SnapshotId> getSnapshots(final IndexId indexId) {
         return snapshotIds;
     }
 
-    /**
-     * Initializes the indices in the repository metadata; returns a new instance.
-     */
-    public RepositoryData initIndices(final Map<IndexId, Set<SnapshotId>> indexSnapshots) {
-        return new RepositoryData(genId, snapshotIds, snapshotStates, indexSnapshots, incompatibleSnapshotIds);
-    }
-
     @Override
     public boolean equals(Object obj) {
         if (this == obj) {
@@ -352,9 +345,10 @@ public XContentBuilder snapshotsToXContent(final XContentBuilder builder, final
      * Reads an instance of {@link RepositoryData} from x-content, loading the snapshots and indices metadata.
      */
     public static RepositoryData snapshotsFromXContent(final XContentParser parser, long genId) throws IOException {
-        Map<String, SnapshotId> snapshots = new HashMap<>();
-        Map<String, SnapshotState> snapshotStates = new HashMap<>();
-        Map<IndexId, Set<SnapshotId>> indexSnapshots = new HashMap<>();
+        final Map<String, SnapshotId> snapshots = new HashMap<>();
+        final Map<String, SnapshotState> snapshotStates = new HashMap<>();
+        final Map<IndexId, Set<SnapshotId>> indexSnapshots = new HashMap<>();
+
         if (parser.nextToken() == XContentParser.Token.START_OBJECT) {
             while (parser.nextToken() == XContentParser.Token.FIELD_NAME) {
                 String field = parser.currentName();
@@ -397,17 +391,18 @@ public static RepositoryData snapshotsFromXContent(final XContentParser parser,
                         throw new ElasticsearchParseException("start object expected [indices]");
                     }
                     while (parser.nextToken() != XContentParser.Token.END_OBJECT) {
-                        String indexName = parser.currentName();
-                        String indexId = null;
-                        Set<SnapshotId> snapshotIds = new LinkedHashSet<>();
+                        final String indexName = parser.currentName();
+                        final Set<SnapshotId> snapshotIds = new LinkedHashSet<>();
+
+                        IndexId indexId = null;
                         if (parser.nextToken() != XContentParser.Token.START_OBJECT) {
                             throw new ElasticsearchParseException("start object expected index[" + indexName + "]");
                         }
                         while (parser.nextToken() != XContentParser.Token.END_OBJECT) {
-                            String indexMetaFieldName = parser.currentName();
+                            final String indexMetaFieldName = parser.currentName();
                             parser.nextToken();
                             if (INDEX_ID.equals(indexMetaFieldName)) {
-                                indexId = parser.text();
+                                indexId = new IndexId(indexName, parser.text());
                             } else if (SNAPSHOTS.equals(indexMetaFieldName)) {
                                 if (parser.currentToken() != XContentParser.Token.START_ARRAY) {
                                     throw new ElasticsearchParseException("start array expected [snapshots]");
@@ -428,12 +423,22 @@ public static RepositoryData snapshotsFromXContent(final XContentParser parser,
                                         // since we already have the name/uuid combo in the snapshots array
                                         uuid = parser.text();
                                     }
-                                    snapshotIds.add(snapshots.get(uuid));
+
+                                    SnapshotId snapshotId = snapshots.get(uuid);
+                                    if (snapshotId != null) {
+                                        snapshotIds.add(snapshotId);
+                                    } else {
+                                        // A snapshotted index references a snapshot which does not exist in
+                                        // the list of snapshots. This can happen when multiple clusters in
+                                        // different versions create or delete snapshot in the same repository.
+                                        throw new ElasticsearchParseException("Detected a corrupted repository, index " + indexId
+                                            + " references an unknown snapshot uuid [" + uuid + "]");
+                                    }
                                 }
                             }
                         }
                         assert indexId != null;
-                        indexSnapshots.put(new IndexId(indexName, indexId), snapshotIds);
+                        indexSnapshots.put(indexId, snapshotIds);
                     }
                 } else {
                     throw new ElasticsearchParseException("unknown field name  [" + field + "]");

diff --git a/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java b/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java
@@ -592,10 +592,9 @@ public List<SnapshotsInProgress.Entry> currentSnapshots(final String repository,
      * @return map of shard id to snapshot status
      */
     public Map<ShardId, IndexShardSnapshotStatus> snapshotShards(final String repositoryName,
+                                                                 final RepositoryData repositoryData,
                                                                  final SnapshotInfo snapshotInfo) throws IOException {
         final Repository repository = repositoriesService.repository(repositoryName);
-        final RepositoryData repositoryData = repository.getRepositoryData();
-
         final Map<ShardId, IndexShardSnapshotStatus> shardStatus = new HashMap<>();
         for (String index : snapshotInfo.indices()) {
             IndexId indexId = repositoryData.resolveIndexId(index);

diff --git a/server/src/test/java/org/elasticsearch/repositories/RepositoryDataTests.java b/server/src/test/java/org/elasticsearch/repositories/RepositoryDataTests.java
@@ -19,11 +19,14 @@
 
 package org.elasticsearch.repositories;
 
+import org.elasticsearch.ElasticsearchParseException;
 import org.elasticsearch.common.UUIDs;
 import org.elasticsearch.common.bytes.BytesReference;
 import org.elasticsearch.common.xcontent.ToXContent;
+import org.elasticsearch.common.xcontent.XContent;
 import org.elasticsearch.common.xcontent.XContentBuilder;
 import org.elasticsearch.common.xcontent.XContentParser;
+import org.elasticsearch.common.xcontent.XContentType;
 import org.elasticsearch.common.xcontent.json.JsonXContent;
 import org.elasticsearch.snapshots.SnapshotId;
 import org.elasticsearch.snapshots.SnapshotState;
@@ -39,7 +42,11 @@
 import java.util.Map;
 import java.util.Set;
 
+import static java.util.Collections.emptySet;
+import static java.util.Collections.singleton;
 import static org.elasticsearch.repositories.RepositoryData.EMPTY_REPO_GEN;
+import static org.hamcrest.Matchers.empty;
+import static org.hamcrest.Matchers.equalTo;
 import static org.hamcrest.Matchers.greaterThan;
 
 /**
@@ -101,15 +108,18 @@ public void testAddSnapshots() {
     public void testInitIndices() {
         final int numSnapshots = randomIntBetween(1, 30);
         final Map<String, SnapshotId> snapshotIds = new HashMap<>(numSnapshots);
+        final Map<String, SnapshotState> snapshotStates = new HashMap<>(numSnapshots);
         for (int i = 0; i < numSnapshots; i++) {
             final SnapshotId snapshotId = new SnapshotId(randomAlphaOfLength(8), UUIDs.randomBase64UUID());
             snapshotIds.put(snapshotId.getUUID(), snapshotId);
+            snapshotStates.put(snapshotId.getUUID(), randomFrom(SnapshotState.values()));
         }
         RepositoryData repositoryData = new RepositoryData(EMPTY_REPO_GEN, snapshotIds,
             Collections.emptyMap(), Collections.emptyMap(), Collections.emptyList());
         // test that initializing indices works
         Map<IndexId, Set<SnapshotId>> indices = randomIndices(snapshotIds);
-        RepositoryData newRepoData = repositoryData.initIndices(indices);
+        RepositoryData newRepoData =  new RepositoryData(repositoryData.getGenId(), snapshotIds, snapshotStates, indices,
+            new ArrayList<>(repositoryData.getIncompatibleSnapshotIds()));
         List<SnapshotId> expected = new ArrayList<>(repositoryData.getSnapshotIds());
         Collections.sort(expected);
         List<SnapshotId> actual = new ArrayList<>(newRepoData.getSnapshotIds());
@@ -153,6 +163,81 @@ public void testGetSnapshotState() {
         assertNull(repositoryData.getSnapshotState(new SnapshotId(randomAlphaOfLength(8), UUIDs.randomBase64UUID())));
     }
 
+    public void testIndexThatReferencesAnUnknownSnapshot() throws IOException {
+        final XContent xContent = randomFrom(XContentType.values()).xContent();
+        final RepositoryData repositoryData = generateRandomRepoData();
+
+        XContentBuilder builder = XContentBuilder.builder(xContent);
+        repositoryData.snapshotsToXContent(builder, ToXContent.EMPTY_PARAMS);
+        RepositoryData parsedRepositoryData = RepositoryData.snapshotsFromXContent(createParser(builder), repositoryData.getGenId());
+        assertEquals(repositoryData, parsedRepositoryData);
+
+        Map<String, SnapshotId> snapshotIds = new HashMap<>();
+        Map<String, SnapshotState> snapshotStates = new HashMap<>();
+        for (SnapshotId snapshotId : parsedRepositoryData.getSnapshotIds()) {
+            snapshotIds.put(snapshotId.getUUID(), snapshotId);
+            snapshotStates.put(snapshotId.getUUID(), parsedRepositoryData.getSnapshotState(snapshotId));
+        }
+
+        final IndexId corruptedIndexId = randomFrom(parsedRepositoryData.getIndices().values());
+
+        Map<IndexId, Set<SnapshotId>> indexSnapshots = new HashMap<>();
+        for (Map.Entry<String, IndexId> snapshottedIndex : parsedRepositoryData.getIndices().entrySet()) {
+            IndexId indexId = snapshottedIndex.getValue();
+            Set<SnapshotId> snapshotsIds = new LinkedHashSet<>(parsedRepositoryData.getSnapshots(indexId));
+            if (corruptedIndexId.equals(indexId)) {
+                snapshotsIds.add(new SnapshotId("_uuid", "_does_not_exist"));
+            }
+            indexSnapshots.put(indexId, snapshotsIds);
+        }
+        assertNotNull(corruptedIndexId);
+
+        RepositoryData corruptedRepositoryData = new RepositoryData(parsedRepositoryData.getGenId(), snapshotIds, snapshotStates,
+            indexSnapshots, new ArrayList<>(parsedRepositoryData.getIncompatibleSnapshotIds()));
+
+        final XContentBuilder corruptedBuilder = XContentBuilder.builder(xContent);
+        corruptedRepositoryData.snapshotsToXContent(corruptedBuilder, ToXContent.EMPTY_PARAMS);
+
+        ElasticsearchParseException e = expectThrows(ElasticsearchParseException.class, () ->
+            RepositoryData.snapshotsFromXContent(createParser(corruptedBuilder), corruptedRepositoryData.getGenId()));
+        assertThat(e.getMessage(), equalTo("Detected a corrupted repository, index " + corruptedIndexId + " references an unknown " +
+            "snapshot uuid [_does_not_exist]"));
+    }
+
+    public void testIndexThatReferenceANullSnapshot() throws IOException {
+        final XContentBuilder builder = XContentBuilder.builder(randomFrom(XContentType.JSON).xContent());
+        builder.startObject();
+        {
+            builder.startArray("snapshots");
+            builder.value(new SnapshotId("_name", "_uuid"));
+            builder.endArray();
+
+            builder.startObject("indices");
+            {
+                builder.startObject("docs");
+                {
+                    builder.field("id", "_id");
+                    builder.startArray("snapshots");
+                    {
+                        builder.startObject();
+                        if (randomBoolean()) {
+                            builder.field("name", "_name");
+                        }
+                        builder.endObject();
+                    }
+                    builder.endArray();
+                }
+                builder.endObject();
+            }
+            builder.endObject();
+        }
+        builder.endObject();
+
+        ElasticsearchParseException e = expectThrows(ElasticsearchParseException.class, () ->
+            RepositoryData.snapshotsFromXContent(createParser(builder), randomNonNegativeLong()));
+        assertThat(e.getMessage(), equalTo("Detected a corrupted repository, index [docs/_id] references an unknown snapshot uuid [null]"));
+    }
+
     public static RepositoryData generateRandomRepoData() {
         final int numIndices = randomIntBetween(1, 30);
         final List<IndexId> indices = new ArrayList<>(numIndices);