Skip to content

Commit

Permalink
Docs links from repo analysis failures (elastic#110681)
Browse files Browse the repository at this point in the history
We still get too many cases about snapshot repositories which claim to
be S3-compatible but then fail repository analysis because of genuine
incompatibilities or implementation bugs. The info is all there in the
manual but it's not very easy to find. This commit adds more detail to
the response message, including docs links, to help direct users to the
information they need.
  • Loading branch information
DaveCTurner authored Jul 10, 2024
1 parent 6a50c45 commit 8d3b0ad
Show file tree
Hide file tree
Showing 8 changed files with 103 additions and 19 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
import org.elasticsearch.action.ActionRunnable;
import org.elasticsearch.cluster.metadata.RepositoryMetadata;
import org.elasticsearch.cluster.service.ClusterService;
import org.elasticsearch.common.ReferenceDocs;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.blobstore.BlobPath;
import org.elasticsearch.common.blobstore.BlobStore;
Expand Down Expand Up @@ -443,4 +444,19 @@ protected void doClose() {
}
super.doClose();
}

@Override
public String getAnalysisFailureExtraDetail() {
return Strings.format(
"""
Elasticsearch observed the storage system underneath this repository behaved incorrectly which indicates it is not \
suitable for use with Elasticsearch snapshots. Typically this happens when using storage other than AWS S3 which \
incorrectly claims to be S3-compatible. If so, please report this incompatibility to your storage supplier. Do not report \
Elasticsearch issues involving storage systems which claim to be S3-compatible unless you can demonstrate that the same \
issue exists when using a genuine AWS S3 repository. See [%s] for further information about repository analysis, and [%s] \
for further information about support for S3-compatible repository implementations.""",
ReferenceDocs.SNAPSHOT_REPOSITORY_ANALYSIS,
ReferenceDocs.S3_COMPATIBLE_REPOSITORIES
);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import com.amazonaws.services.s3.AbstractAmazonS3;

import org.elasticsearch.cluster.metadata.RepositoryMetadata;
import org.elasticsearch.common.ReferenceDocs;
import org.elasticsearch.common.settings.ClusterSettings;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.unit.ByteSizeUnit;
Expand All @@ -28,6 +29,7 @@

import java.util.Map;

import static org.hamcrest.Matchers.allOf;
import static org.hamcrest.Matchers.containsString;
import static org.hamcrest.Matchers.is;
import static org.hamcrest.Matchers.not;
Expand Down Expand Up @@ -152,4 +154,24 @@ private S3Repository createS3Repo(RepositoryMetadata metadata) {
);
}

public void testAnalysisFailureDetail() {
try (
S3Repository s3repo = createS3Repo(
new RepositoryMetadata("dummy-repo", "mock", Settings.builder().put(S3Repository.BUCKET_SETTING.getKey(), "bucket").build())
)
) {
assertThat(
s3repo.getAnalysisFailureExtraDetail(),
allOf(
containsString("storage system underneath this repository behaved incorrectly"),
containsString("incorrectly claims to be S3-compatible"),
containsString("report this incompatibility to your storage supplier"),
containsString("unless you can demonstrate that the same issue exists when using a genuine AWS S3 repository"),
containsString(ReferenceDocs.SNAPSHOT_REPOSITORY_ANALYSIS.toString()),
containsString(ReferenceDocs.S3_COMPATIBLE_REPOSITORIES.toString())
)
);
}
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,8 @@ public enum ReferenceDocs {
NETWORK_THREADING_MODEL,
ALLOCATION_EXPLAIN_API,
NETWORK_BINDING_AND_PUBLISHING,
SNAPSHOT_REPOSITORY_ANALYSIS,
S3_COMPATIBLE_REPOSITORIES,
// this comment keeps the ';' on the next line so every entry above has a trailing ',' which makes the diff for adding new links cleaner
;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3946,4 +3946,16 @@ public boolean hasAtomicOverwrites() {
public int getReadBufferSizeInBytes() {
return bufferSize;
}

/**
* @return extra information to be included in the exception message emitted on failure of a repository analysis.
*/
public String getAnalysisFailureExtraDetail() {
return Strings.format(
"""
Elasticsearch observed the storage system underneath this repository behaved incorrectly which indicates it is not \
suitable for use with Elasticsearch snapshots. See [%s] for further information.""",
ReferenceDocs.SNAPSHOT_REPOSITORY_ANALYSIS
);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -35,5 +35,7 @@
"EXECUTABLE_JNA_TMPDIR": "executable-jna-tmpdir.html",
"NETWORK_THREADING_MODEL": "modules-network.html#modules-network-threading-model",
"ALLOCATION_EXPLAIN_API": "cluster-allocation-explain.html",
"NETWORK_BINDING_AND_PUBLISHING": "modules-network.html#modules-network-binding-publishing"
"NETWORK_BINDING_AND_PUBLISHING": "modules-network.html#modules-network-binding-publishing",
"SNAPSHOT_REPOSITORY_ANALYSIS": "repo-analysis-api.html",
"S3_COMPATIBLE_REPOSITORIES": "repository-s3.html#repository-s3-compatible-services"
}
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,6 @@ setup:

- match: { status: 500 }
- match: { error.type: repository_verification_exception }
- match: { error.reason: "/.*test_repo_slow..analysis.failed.*/" }
- match: { error.reason: "/.*test_repo_slow..Repository.analysis.timed.out.*/" }
- match: { error.root_cause.0.type: repository_verification_exception }
- match: { error.root_cause.0.reason: "/.*test_repo_slow..analysis.timed.out.after..1s.*/" }
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import org.elasticsearch.action.ActionListener;
import org.elasticsearch.cluster.metadata.RepositoryMetadata;
import org.elasticsearch.cluster.service.ClusterService;
import org.elasticsearch.common.ReferenceDocs;
import org.elasticsearch.common.blobstore.BlobContainer;
import org.elasticsearch.common.blobstore.BlobPath;
import org.elasticsearch.common.blobstore.BlobStore;
Expand Down Expand Up @@ -363,6 +364,17 @@ public BytesReference onContendedCompareAndExchange(BytesRegister register, Byte
}
}

private static void assertAnalysisFailureMessage(String message) {
assertThat(
message,
allOf(
containsString("Elasticsearch observed the storage system underneath this repository behaved incorrectly"),
containsString("not suitable for use with Elasticsearch snapshots"),
containsString(ReferenceDocs.SNAPSHOT_REPOSITORY_ANALYSIS.toString())
)
);
}

public void testTimesOutSpinningRegisterAnalysis() {
final RepositoryAnalyzeAction.Request request = new RepositoryAnalyzeAction.Request("test-repo");
request.timeout(TimeValue.timeValueMillis(between(1, 1000)));
Expand All @@ -375,7 +387,13 @@ public boolean compareAndExchangeReturnsWitness(String key) {
}
});
final var exception = expectThrows(RepositoryVerificationException.class, () -> analyseRepository(request));
assertThat(exception.getMessage(), containsString("analysis failed"));
assertThat(
exception.getMessage(),
allOf(
containsString("Repository analysis timed out. Consider specifying a longer timeout"),
containsString(ReferenceDocs.SNAPSHOT_REPOSITORY_ANALYSIS.toString())
)
);
assertThat(
asInstanceOf(RepositoryVerificationException.class, exception.getCause()).getMessage(),
containsString("analysis timed out")
Expand All @@ -391,7 +409,7 @@ public boolean compareAndExchangeReturnsWitness(String key) {
}
});
final var exception = expectThrows(RepositoryVerificationException.class, () -> analyseRepository(request));
assertThat(exception.getMessage(), containsString("analysis failed"));
assertAnalysisFailureMessage(exception.getMessage());
assertThat(
asInstanceOf(RepositoryVerificationException.class, ExceptionsHelper.unwrapCause(exception.getCause())).getMessage(),
allOf(containsString("uncontended register operation failed"), containsString("did not observe any value"))
Expand All @@ -407,7 +425,7 @@ public boolean acceptsEmptyRegister() {
}
});
final var exception = expectThrows(RepositoryVerificationException.class, () -> analyseRepository(request));
assertThat(exception.getMessage(), containsString("analysis failed"));
assertAnalysisFailureMessage(exception.getMessage());
final var cause = ExceptionsHelper.unwrapCause(exception.getCause());
if (cause instanceof IOException ioException) {
assertThat(ioException.getMessage(), containsString("empty register update rejected"));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
import org.elasticsearch.cluster.node.DiscoveryNode;
import org.elasticsearch.cluster.node.DiscoveryNodes;
import org.elasticsearch.cluster.service.ClusterService;
import org.elasticsearch.common.ReferenceDocs;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.UUIDs;
import org.elasticsearch.common.blobstore.BlobContainer;
Expand Down Expand Up @@ -387,6 +388,9 @@ public static class AsyncAction {
private final List<BlobAnalyzeAction.Response> responses;
private final RepositoryPerformanceSummary.Builder summary = new RepositoryPerformanceSummary.Builder();

private final RepositoryVerificationException analysisCancelledException;
private final RepositoryVerificationException analysisTimedOutException;

public AsyncAction(
TransportService transportService,
BlobStoreRepository repository,
Expand All @@ -410,6 +414,12 @@ public AsyncAction(
this.listener = ActionListener.runBefore(listener, () -> cancellationListener.onResponse(null));

responses = new ArrayList<>(request.blobCount);

this.analysisCancelledException = new RepositoryVerificationException(request.repositoryName, "analysis cancelled");
this.analysisTimedOutException = new RepositoryVerificationException(
request.repositoryName,
"analysis timed out after [" + request.getTimeout() + "]"
);
}

private boolean setFirstFailure(Exception e) {
Expand Down Expand Up @@ -453,12 +463,7 @@ public void onFailure(Exception e) {
assert e instanceof ElasticsearchTimeoutException : e;
if (isRunning()) {
// if this CAS fails then we're already failing for some other reason, nbd
setFirstFailure(
new RepositoryVerificationException(
request.repositoryName,
"analysis timed out after [" + request.getTimeout() + "]"
)
);
setFirstFailure(analysisTimedOutException);
}
}
}
Expand All @@ -472,7 +477,7 @@ public void run() {
cancellationListener.addTimeout(request.getTimeout(), repository.threadPool(), EsExecutors.DIRECT_EXECUTOR_SERVICE);
cancellationListener.addListener(new CheckForCancelListener());

task.addListener(() -> setFirstFailure(new RepositoryVerificationException(request.repositoryName, "analysis cancelled")));
task.addListener(() -> setFirstFailure(analysisCancelledException));

final Random random = new Random(request.getSeed());
final List<DiscoveryNode> nodes = getSnapshotNodes(discoveryNodes);
Expand Down Expand Up @@ -873,13 +878,20 @@ private void sendResponse(final long listingStartTimeNanos, final long deleteSta
);
} else {
logger.debug(() -> "analysis of repository [" + request.repositoryName + "] failed", exception);
listener.onFailure(
new RepositoryVerificationException(
request.getRepositoryName(),
"analysis failed, you may need to manually remove [" + blobPath + "]",
exception
)
);

final String failureDetail;
if (exception == analysisCancelledException) {
failureDetail = "Repository analysis was cancelled.";
} else if (exception == analysisTimedOutException) {
failureDetail = Strings.format("""
Repository analysis timed out. Consider specifying a longer timeout using the [?timeout] request parameter. See \
[%s] for more information.""", ReferenceDocs.SNAPSHOT_REPOSITORY_ANALYSIS);
} else {
failureDetail = repository.getAnalysisFailureExtraDetail();
}
listener.onFailure(new RepositoryVerificationException(request.getRepositoryName(), Strings.format("""
%s Elasticsearch attempted to remove the data it wrote at [%s] but may have left some behind. If so, \
please now remove this data manually.""", failureDetail, blobPath), exception));
}
}
}
Expand Down

0 comments on commit 8d3b0ad

Please sign in to comment.