From 15228a3f1a1fbced33568ff662f1f962bdaaf7d6 Mon Sep 17 00:00:00 2001 From: David Turner Date: Wed, 10 Jul 2024 07:14:03 +0100 Subject: [PATCH 1/2] Docs links from repo analysis failures We still get too many cases about snapshot repositories which claim to be S3-compatible but then fail repository analysis because of genuine incompatibilities or implementation bugs. The info is all there in the manual but it's not very easy to find. This commit adds more detail to the response message, including docs links, to help direct users to the information they need. --- .../repositories/s3/S3Repository.java | 16 ++++++++ .../repositories/s3/S3RepositoryTests.java | 22 ++++++++++ .../elasticsearch/common/ReferenceDocs.java | 2 + .../blobstore/BlobStoreRepository.java | 12 ++++++ .../common/reference-docs-links.json | 4 +- .../testkit/RepositoryAnalysisFailureIT.java | 24 +++++++++-- .../testkit/RepositoryAnalyzeAction.java | 40 ++++++++++++------- 7 files changed, 102 insertions(+), 18 deletions(-) diff --git a/modules/repository-s3/src/main/java/org/elasticsearch/repositories/s3/S3Repository.java b/modules/repository-s3/src/main/java/org/elasticsearch/repositories/s3/S3Repository.java index d53c379a37644..72b48c5903629 100644 --- a/modules/repository-s3/src/main/java/org/elasticsearch/repositories/s3/S3Repository.java +++ b/modules/repository-s3/src/main/java/org/elasticsearch/repositories/s3/S3Repository.java @@ -14,6 +14,7 @@ import org.elasticsearch.action.ActionRunnable; import org.elasticsearch.cluster.metadata.RepositoryMetadata; import org.elasticsearch.cluster.service.ClusterService; +import org.elasticsearch.common.ReferenceDocs; import org.elasticsearch.common.Strings; import org.elasticsearch.common.blobstore.BlobPath; import org.elasticsearch.common.blobstore.BlobStore; @@ -443,4 +444,19 @@ protected void doClose() { } super.doClose(); } + + @Override + public String getAnalysisFailureExtraDetail() { + return Strings.format( + """ + Elasticsearch observed the storage system underneath this repository behaved incorrectly which indicates it is not \ + suitable for use with Elasticsearch snapshots. Typically this happens when using storage other than AWS S3 which \ + incorrectly claims to be S3-compatible. If so, please report this incompatibility to your storage supplier. Do not report \ + Elasticsearch issues involving storage systems which claim to be S3-compatible unless you can demonstrate that the same \ + issue exists when using a genuine AWS S3 repository. See [%s] for further information about repository analysis, and [%s] \ + for further information about support for S3-compatible repository implementations.""", + ReferenceDocs.SNAPSHOT_REPOSITORY_ANALYSIS, + ReferenceDocs.S3_COMPATIBLE_REPOSITORIES + ); + } } diff --git a/modules/repository-s3/src/test/java/org/elasticsearch/repositories/s3/S3RepositoryTests.java b/modules/repository-s3/src/test/java/org/elasticsearch/repositories/s3/S3RepositoryTests.java index fcb0e82505dac..4bbc791e5fe21 100644 --- a/modules/repository-s3/src/test/java/org/elasticsearch/repositories/s3/S3RepositoryTests.java +++ b/modules/repository-s3/src/test/java/org/elasticsearch/repositories/s3/S3RepositoryTests.java @@ -11,6 +11,7 @@ import com.amazonaws.services.s3.AbstractAmazonS3; import org.elasticsearch.cluster.metadata.RepositoryMetadata; +import org.elasticsearch.common.ReferenceDocs; import org.elasticsearch.common.settings.ClusterSettings; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.unit.ByteSizeUnit; @@ -28,6 +29,7 @@ import java.util.Map; +import static org.hamcrest.Matchers.allOf; import static org.hamcrest.Matchers.containsString; import static org.hamcrest.Matchers.is; import static org.hamcrest.Matchers.not; @@ -152,4 +154,24 @@ private S3Repository createS3Repo(RepositoryMetadata metadata) { ); } + public void testAnalysisFailureDetail() { + try ( + S3Repository s3repo = createS3Repo( + new RepositoryMetadata("dummy-repo", "mock", Settings.builder().put(S3Repository.BUCKET_SETTING.getKey(), "bucket").build()) + ) + ) { + assertThat( + s3repo.getAnalysisFailureExtraDetail(), + allOf( + containsString("storage system underneath this repository behaved incorrectly"), + containsString("incorrectly claims to be S3-compatible"), + containsString("report this incompatibility to your storage supplier"), + containsString("unless you can demonstrate that the same issue exists when using a genuine AWS S3 repository"), + containsString(ReferenceDocs.SNAPSHOT_REPOSITORY_ANALYSIS.toString()), + containsString(ReferenceDocs.S3_COMPATIBLE_REPOSITORIES.toString()) + ) + ); + } + } + } diff --git a/server/src/main/java/org/elasticsearch/common/ReferenceDocs.java b/server/src/main/java/org/elasticsearch/common/ReferenceDocs.java index 1953c1680040a..770ed4d213c55 100644 --- a/server/src/main/java/org/elasticsearch/common/ReferenceDocs.java +++ b/server/src/main/java/org/elasticsearch/common/ReferenceDocs.java @@ -75,6 +75,8 @@ public enum ReferenceDocs { NETWORK_THREADING_MODEL, ALLOCATION_EXPLAIN_API, NETWORK_BINDING_AND_PUBLISHING, + SNAPSHOT_REPOSITORY_ANALYSIS, + S3_COMPATIBLE_REPOSITORIES, // this comment keeps the ';' on the next line so every entry above has a trailing ',' which makes the diff for adding new links cleaner ; diff --git a/server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreRepository.java b/server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreRepository.java index 8f55bf16c1674..5b7a11969973d 100644 --- a/server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreRepository.java +++ b/server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreRepository.java @@ -3946,4 +3946,16 @@ public boolean hasAtomicOverwrites() { public int getReadBufferSizeInBytes() { return bufferSize; } + + /** + * @return extra information to be included in the exception message emitted on failure of a repository analysis. + */ + public String getAnalysisFailureExtraDetail() { + return Strings.format( + """ + Elasticsearch observed the storage system underneath this repository behaved incorrectly which indicates it is not \ + suitable for use with Elasticsearch snapshots. See [%s] for further information.""", + ReferenceDocs.SNAPSHOT_REPOSITORY_ANALYSIS + ); + } } diff --git a/server/src/main/resources/org/elasticsearch/common/reference-docs-links.json b/server/src/main/resources/org/elasticsearch/common/reference-docs-links.json index 303ae22f16269..febcaec1ba057 100644 --- a/server/src/main/resources/org/elasticsearch/common/reference-docs-links.json +++ b/server/src/main/resources/org/elasticsearch/common/reference-docs-links.json @@ -35,5 +35,7 @@ "EXECUTABLE_JNA_TMPDIR": "executable-jna-tmpdir.html", "NETWORK_THREADING_MODEL": "modules-network.html#modules-network-threading-model", "ALLOCATION_EXPLAIN_API": "cluster-allocation-explain.html", - "NETWORK_BINDING_AND_PUBLISHING": "modules-network.html#modules-network-binding-publishing" + "NETWORK_BINDING_AND_PUBLISHING": "modules-network.html#modules-network-binding-publishing", + "SNAPSHOT_REPOSITORY_ANALYSIS": "repo-analysis-api.html", + "S3_COMPATIBLE_REPOSITORIES": "repository-s3.html#repository-s3-compatible-services" } diff --git a/x-pack/plugin/snapshot-repo-test-kit/src/internalClusterTest/java/org/elasticsearch/repositories/blobstore/testkit/RepositoryAnalysisFailureIT.java b/x-pack/plugin/snapshot-repo-test-kit/src/internalClusterTest/java/org/elasticsearch/repositories/blobstore/testkit/RepositoryAnalysisFailureIT.java index 7715b9e8d42b8..2ca5685c83db3 100644 --- a/x-pack/plugin/snapshot-repo-test-kit/src/internalClusterTest/java/org/elasticsearch/repositories/blobstore/testkit/RepositoryAnalysisFailureIT.java +++ b/x-pack/plugin/snapshot-repo-test-kit/src/internalClusterTest/java/org/elasticsearch/repositories/blobstore/testkit/RepositoryAnalysisFailureIT.java @@ -11,6 +11,7 @@ import org.elasticsearch.action.ActionListener; import org.elasticsearch.cluster.metadata.RepositoryMetadata; import org.elasticsearch.cluster.service.ClusterService; +import org.elasticsearch.common.ReferenceDocs; import org.elasticsearch.common.blobstore.BlobContainer; import org.elasticsearch.common.blobstore.BlobPath; import org.elasticsearch.common.blobstore.BlobStore; @@ -363,6 +364,17 @@ public BytesReference onContendedCompareAndExchange(BytesRegister register, Byte } } + private static void assertAnalysisFailureMessage(String message) { + assertThat( + message, + allOf( + containsString("Elasticsearch observed the storage system underneath this repository behaved incorrectly"), + containsString("not suitable for use with Elasticsearch snapshots"), + containsString(ReferenceDocs.SNAPSHOT_REPOSITORY_ANALYSIS.toString()) + ) + ); + } + public void testTimesOutSpinningRegisterAnalysis() { final RepositoryAnalyzeAction.Request request = new RepositoryAnalyzeAction.Request("test-repo"); request.timeout(TimeValue.timeValueMillis(between(1, 1000))); @@ -375,7 +387,13 @@ public boolean compareAndExchangeReturnsWitness(String key) { } }); final var exception = expectThrows(RepositoryVerificationException.class, () -> analyseRepository(request)); - assertThat(exception.getMessage(), containsString("analysis failed")); + assertThat( + exception.getMessage(), + allOf( + containsString("Repository analysis timed out. Consider specifying a longer timeout"), + containsString(ReferenceDocs.SNAPSHOT_REPOSITORY_ANALYSIS.toString()) + ) + ); assertThat( asInstanceOf(RepositoryVerificationException.class, exception.getCause()).getMessage(), containsString("analysis timed out") @@ -391,7 +409,7 @@ public boolean compareAndExchangeReturnsWitness(String key) { } }); final var exception = expectThrows(RepositoryVerificationException.class, () -> analyseRepository(request)); - assertThat(exception.getMessage(), containsString("analysis failed")); + assertAnalysisFailureMessage(exception.getMessage()); assertThat( asInstanceOf(RepositoryVerificationException.class, ExceptionsHelper.unwrapCause(exception.getCause())).getMessage(), allOf(containsString("uncontended register operation failed"), containsString("did not observe any value")) @@ -407,7 +425,7 @@ public boolean acceptsEmptyRegister() { } }); final var exception = expectThrows(RepositoryVerificationException.class, () -> analyseRepository(request)); - assertThat(exception.getMessage(), containsString("analysis failed")); + assertAnalysisFailureMessage(exception.getMessage()); final var cause = ExceptionsHelper.unwrapCause(exception.getCause()); if (cause instanceof IOException ioException) { assertThat(ioException.getMessage(), containsString("empty register update rejected")); diff --git a/x-pack/plugin/snapshot-repo-test-kit/src/main/java/org/elasticsearch/repositories/blobstore/testkit/RepositoryAnalyzeAction.java b/x-pack/plugin/snapshot-repo-test-kit/src/main/java/org/elasticsearch/repositories/blobstore/testkit/RepositoryAnalyzeAction.java index 7b82b69a682fa..494d1d3fedcd9 100644 --- a/x-pack/plugin/snapshot-repo-test-kit/src/main/java/org/elasticsearch/repositories/blobstore/testkit/RepositoryAnalyzeAction.java +++ b/x-pack/plugin/snapshot-repo-test-kit/src/main/java/org/elasticsearch/repositories/blobstore/testkit/RepositoryAnalyzeAction.java @@ -28,6 +28,7 @@ import org.elasticsearch.cluster.node.DiscoveryNode; import org.elasticsearch.cluster.node.DiscoveryNodes; import org.elasticsearch.cluster.service.ClusterService; +import org.elasticsearch.common.ReferenceDocs; import org.elasticsearch.common.Strings; import org.elasticsearch.common.UUIDs; import org.elasticsearch.common.blobstore.BlobContainer; @@ -387,6 +388,9 @@ public static class AsyncAction { private final List responses; private final RepositoryPerformanceSummary.Builder summary = new RepositoryPerformanceSummary.Builder(); + private final RepositoryVerificationException analysisCancelledException; + private final RepositoryVerificationException analysisTimedOutException; + public AsyncAction( TransportService transportService, BlobStoreRepository repository, @@ -410,6 +414,12 @@ public AsyncAction( this.listener = ActionListener.runBefore(listener, () -> cancellationListener.onResponse(null)); responses = new ArrayList<>(request.blobCount); + + this.analysisCancelledException = new RepositoryVerificationException(request.repositoryName, "analysis cancelled"); + this.analysisTimedOutException = new RepositoryVerificationException( + request.repositoryName, + "analysis timed out after [" + request.getTimeout() + "]" + ); } private boolean setFirstFailure(Exception e) { @@ -453,12 +463,7 @@ public void onFailure(Exception e) { assert e instanceof ElasticsearchTimeoutException : e; if (isRunning()) { // if this CAS fails then we're already failing for some other reason, nbd - setFirstFailure( - new RepositoryVerificationException( - request.repositoryName, - "analysis timed out after [" + request.getTimeout() + "]" - ) - ); + setFirstFailure(analysisTimedOutException); } } } @@ -472,7 +477,7 @@ public void run() { cancellationListener.addTimeout(request.getTimeout(), repository.threadPool(), EsExecutors.DIRECT_EXECUTOR_SERVICE); cancellationListener.addListener(new CheckForCancelListener()); - task.addListener(() -> setFirstFailure(new RepositoryVerificationException(request.repositoryName, "analysis cancelled"))); + task.addListener(() -> setFirstFailure(analysisCancelledException)); final Random random = new Random(request.getSeed()); final List nodes = getSnapshotNodes(discoveryNodes); @@ -873,13 +878,20 @@ private void sendResponse(final long listingStartTimeNanos, final long deleteSta ); } else { logger.debug(() -> "analysis of repository [" + request.repositoryName + "] failed", exception); - listener.onFailure( - new RepositoryVerificationException( - request.getRepositoryName(), - "analysis failed, you may need to manually remove [" + blobPath + "]", - exception - ) - ); + + final String failureDetail; + if (exception == analysisCancelledException) { + failureDetail = "Repository analysis was cancelled."; + } else if (exception == analysisTimedOutException) { + failureDetail = Strings.format(""" + Repository analysis timed out. Consider specifying a longer timeout using the [?timeout] request parameter. See \ + [%s] for more information.""", ReferenceDocs.SNAPSHOT_REPOSITORY_ANALYSIS); + } else { + failureDetail = repository.getAnalysisFailureExtraDetail(); + } + listener.onFailure(new RepositoryVerificationException(request.getRepositoryName(), Strings.format(""" + %s Elasticsearch attempted to remove the data it wrote at [%s] but may have left some behind. If so, \ + please now remove this data manually.""", failureDetail, blobPath), exception)); } } } From dce9cb5fdc88034c75048e3c35b897241fad40be Mon Sep 17 00:00:00 2001 From: David Turner Date: Wed, 10 Jul 2024 08:30:50 +0100 Subject: [PATCH 2/2] Fix YAML test --- .../yamlRestTest/resources/rest-api-spec/test/10_analyze.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/x-pack/plugin/snapshot-repo-test-kit/qa/rest/src/yamlRestTest/resources/rest-api-spec/test/10_analyze.yml b/x-pack/plugin/snapshot-repo-test-kit/qa/rest/src/yamlRestTest/resources/rest-api-spec/test/10_analyze.yml index e5babad76eb05..bcee1691e033c 100644 --- a/x-pack/plugin/snapshot-repo-test-kit/qa/rest/src/yamlRestTest/resources/rest-api-spec/test/10_analyze.yml +++ b/x-pack/plugin/snapshot-repo-test-kit/qa/rest/src/yamlRestTest/resources/rest-api-spec/test/10_analyze.yml @@ -175,6 +175,6 @@ setup: - match: { status: 500 } - match: { error.type: repository_verification_exception } - - match: { error.reason: "/.*test_repo_slow..analysis.failed.*/" } + - match: { error.reason: "/.*test_repo_slow..Repository.analysis.timed.out.*/" } - match: { error.root_cause.0.type: repository_verification_exception } - match: { error.root_cause.0.reason: "/.*test_repo_slow..analysis.timed.out.after..1s.*/" }