From 77704412c8de5842d5dc7618fc003202c2d26eb5 Mon Sep 17 00:00:00 2001 From: David Turner Date: Tue, 5 Nov 2024 10:37:50 +0000 Subject: [PATCH] Handle status code 0 in S3 CMU response (#116212) (#116232) A `CompleteMultipartUpload` action may fail after sending the `200 OK` response line. In this case the response body describes the error, and the SDK translates this situation to an exception with status code 0 but with the `ErrorCode` string set appropriately. This commit enhances the exception handling in `S3BlobContainer` to handle this possibility. Closes #102294 Co-authored-by: Pat Patterson --- docs/changelog/116212.yaml | 6 ++++++ .../repositories/s3/S3BlobContainer.java | 9 +++++++-- .../src/main/java/fixture/s3/S3HttpHandler.java | 17 ++++++++++++++++- .../analyze/S3RepositoryAnalysisRestIT.java | 7 ------- 4 files changed, 29 insertions(+), 10 deletions(-) create mode 100644 docs/changelog/116212.yaml diff --git a/docs/changelog/116212.yaml b/docs/changelog/116212.yaml new file mode 100644 index 0000000000000..7c8756f4054cd --- /dev/null +++ b/docs/changelog/116212.yaml @@ -0,0 +1,6 @@ +pr: 116212 +summary: Handle status code 0 in S3 CMU response +area: Snapshot/Restore +type: bug +issues: + - 102294 diff --git a/modules/repository-s3/src/main/java/org/elasticsearch/repositories/s3/S3BlobContainer.java b/modules/repository-s3/src/main/java/org/elasticsearch/repositories/s3/S3BlobContainer.java index 8e71cb9959044..c1d5dd0c13887 100644 --- a/modules/repository-s3/src/main/java/org/elasticsearch/repositories/s3/S3BlobContainer.java +++ b/modules/repository-s3/src/main/java/org/elasticsearch/repositories/s3/S3BlobContainer.java @@ -882,8 +882,13 @@ public void compareAndExchangeRegister( final var clientReference = blobStore.clientReference(); ActionListener.run(ActionListener.releaseAfter(listener.delegateResponse((delegate, e) -> { logger.trace(() -> Strings.format("[%s]: compareAndExchangeRegister failed", key), e); - if (e instanceof AmazonS3Exception amazonS3Exception && amazonS3Exception.getStatusCode() == 404) { - // an uncaught 404 means that our multipart upload was aborted by a concurrent operation before we could complete it + if (e instanceof AmazonS3Exception amazonS3Exception + && (amazonS3Exception.getStatusCode() == 404 + || amazonS3Exception.getStatusCode() == 0 && "NoSuchUpload".equals(amazonS3Exception.getErrorCode()))) { + // An uncaught 404 means that our multipart upload was aborted by a concurrent operation before we could complete it. + // Also (rarely) S3 can start processing the request during a concurrent abort and this can result in a 200 OK with an + // NoSuchUpload... in the response, which the SDK translates to status code 0. Either way, this means + // that our write encountered contention: delegate.onResponse(OptionalBytesReference.MISSING); } else { delegate.onFailure(e); diff --git a/test/fixtures/s3-fixture/src/main/java/fixture/s3/S3HttpHandler.java b/test/fixtures/s3-fixture/src/main/java/fixture/s3/S3HttpHandler.java index eddce6aae298a..56d3454aa5544 100644 --- a/test/fixtures/s3-fixture/src/main/java/fixture/s3/S3HttpHandler.java +++ b/test/fixtures/s3-fixture/src/main/java/fixture/s3/S3HttpHandler.java @@ -13,6 +13,7 @@ import com.sun.net.httpserver.HttpHandler; import org.elasticsearch.ExceptionsHelper; +import org.elasticsearch.common.Randomness; import org.elasticsearch.common.Strings; import org.elasticsearch.common.UUIDs; import org.elasticsearch.common.bytes.BytesReference; @@ -168,7 +169,21 @@ public void handle(final HttpExchange exchange) throws IOException { RestUtils.decodeQueryString(request, request.indexOf('?') + 1, params); final var upload = uploads.remove(params.get("uploadId")); if (upload == null) { - exchange.sendResponseHeaders(RestStatus.NOT_FOUND.getStatus(), -1); + if (Randomness.get().nextBoolean()) { + exchange.sendResponseHeaders(RestStatus.NOT_FOUND.getStatus(), -1); + } else { + byte[] response = (""" + + + NoSuchUpload + No such upload + test-request-id + test-host-id + """).getBytes(StandardCharsets.UTF_8); + exchange.getResponseHeaders().add("Content-Type", "application/xml"); + exchange.sendResponseHeaders(RestStatus.OK.getStatus(), response.length); + exchange.getResponseBody().write(response); + } } else { final var blobContents = upload.complete(extractPartEtags(Streams.readFully(exchange.getRequestBody()))); blobs.put(requestComponents.path, blobContents); diff --git a/x-pack/plugin/snapshot-repo-test-kit/qa/s3/src/javaRestTest/java/org/elasticsearch/repositories/blobstore/testkit/analyze/S3RepositoryAnalysisRestIT.java b/x-pack/plugin/snapshot-repo-test-kit/qa/s3/src/javaRestTest/java/org/elasticsearch/repositories/blobstore/testkit/analyze/S3RepositoryAnalysisRestIT.java index 8986cf1059191..c0f2b40f5a10f 100644 --- a/x-pack/plugin/snapshot-repo-test-kit/qa/s3/src/javaRestTest/java/org/elasticsearch/repositories/blobstore/testkit/analyze/S3RepositoryAnalysisRestIT.java +++ b/x-pack/plugin/snapshot-repo-test-kit/qa/s3/src/javaRestTest/java/org/elasticsearch/repositories/blobstore/testkit/analyze/S3RepositoryAnalysisRestIT.java @@ -31,13 +31,6 @@ public class S3RepositoryAnalysisRestIT extends AbstractRepositoryAnalysisRestTe .setting("s3.client.repo_test_kit.protocol", () -> "http", (n) -> USE_FIXTURE) .setting("s3.client.repo_test_kit.endpoint", s3Fixture::getAddress, (n) -> USE_FIXTURE) .setting("xpack.security.enabled", "false") - // Additional tracing related to investigation into https://github.com/elastic/elasticsearch/issues/102294 - .setting("logger.org.elasticsearch.repositories.s3", "TRACE") - .setting("logger.org.elasticsearch.repositories.blobstore.testkit", "TRACE") - .setting("logger.com.amazonaws.request", "DEBUG") - .setting("logger.org.apache.http.wire", "DEBUG") - // Necessary to permit setting the above two restricted loggers to DEBUG - .jvmArg("-Des.insecure_network_trace_enabled=true") .build(); @ClassRule