Skip to content

Commit

Permalink
fix: retry specific internal errors (#3565)
Browse files Browse the repository at this point in the history
* chore: make internal auth backend errors retryable

Spanner occasionally returns INTERNAL errors regarding the auth backend server.
These errors should be regarded as retryable.

* fix: retry specific internal errors

Some specific internal errors should be retrid. Instead of adding INTERNAL
as a standard retryable error code, we use an interceptor to catch and
translate those specific errors.

See also b/375684610

* chore: address review comments

* fix: wait for session pool to initialize

* fix: register errors before creating the client
  • Loading branch information
olavloite authored Dec 18, 2024
1 parent 3dc290a commit b9ce1a6
Show file tree
Hide file tree
Showing 4 changed files with 133 additions and 18 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -18,33 +18,35 @@

import com.google.api.gax.rpc.InternalException;
import com.google.common.base.Predicate;
import com.google.common.collect.ImmutableList;
import io.grpc.Status;
import io.grpc.Status.Code;
import io.grpc.StatusRuntimeException;

public class IsRetryableInternalError implements Predicate<Throwable> {
public static final IsRetryableInternalError INSTANCE = new IsRetryableInternalError();

private static final String HTTP2_ERROR_MESSAGE = "HTTP/2 error code: INTERNAL_ERROR";
private static final String CONNECTION_CLOSED_ERROR_MESSAGE =
"Connection closed with unknown cause";
private static final String EOS_ERROR_MESSAGE =
"Received unexpected EOS on DATA frame from server";
private static final ImmutableList<String> RETRYABLE_ERROR_MESSAGES =
ImmutableList.of(
"HTTP/2 error code: INTERNAL_ERROR",
"Connection closed with unknown cause",
"Received unexpected EOS on DATA frame from server",
"stream terminated by RST_STREAM",
"Authentication backend internal server error. Please retry.");

private static final String RST_STREAM_ERROR_MESSAGE = "stream terminated by RST_STREAM";
public boolean isRetryableInternalError(Status status) {
return status.getCode() == Code.INTERNAL
&& status.getDescription() != null
&& isRetryableErrorMessage(status.getDescription());
}

@Override
public boolean apply(Throwable cause) {
if (isInternalError(cause)) {
if (cause.getMessage().contains(HTTP2_ERROR_MESSAGE)) {
return true;
} else if (cause.getMessage().contains(CONNECTION_CLOSED_ERROR_MESSAGE)) {
return true;
} else if (cause.getMessage().contains(EOS_ERROR_MESSAGE)) {
return true;
} else if (cause.getMessage().contains(RST_STREAM_ERROR_MESSAGE)) {
return true;
}
}
return false;
return isInternalError(cause) && isRetryableErrorMessage(cause.getMessage());
}

private boolean isRetryableErrorMessage(String errorMessage) {
return RETRYABLE_ERROR_MESSAGES.stream().anyMatch(errorMessage::contains);
}

private boolean isInternalError(Throwable cause) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

package com.google.cloud.spanner.spi.v1;

import com.google.cloud.spanner.IsRetryableInternalError;
import com.google.rpc.BadRequest;
import com.google.rpc.Help;
import com.google.rpc.LocalizedMessage;
Expand All @@ -32,6 +33,7 @@
import io.grpc.Metadata;
import io.grpc.MethodDescriptor;
import io.grpc.Status;
import io.grpc.Status.Code;
import io.grpc.protobuf.ProtoUtils;
import java.util.logging.Level;
import java.util.logging.Logger;
Expand Down Expand Up @@ -69,6 +71,11 @@ public void start(Listener<RespT> responseListener, Metadata headers) {
@Override
public void onClose(Status status, Metadata trailers) {
try {
// Translate INTERNAL errors that should be retried to a retryable error code.
if (IsRetryableInternalError.INSTANCE.isRetryableInternalError(status)) {
status =
Status.fromCode(Code.UNAVAILABLE).withDescription(status.getDescription());
}
if (trailers.containsKey(LOCALIZED_MESSAGE_KEY)) {
status =
Status.fromCodeValue(status.getCode().value())
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,17 @@ public void rstStreamInternalExceptionIsRetryable() {
assertTrue(predicate.apply(e));
}

@Test
public void testAuthenticationBackendInternalServerErrorIsRetryable() {
final StatusRuntimeException exception =
new StatusRuntimeException(
Status.fromCode(Code.INTERNAL)
.withDescription(
"INTERNAL: Authentication backend internal server error. Please retry."));

assertTrue(predicate.apply(exception));
}

@Test
public void genericInternalExceptionIsNotRetryable() {
final InternalException e =
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
/*
* Copyright 2024 Google LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package com.google.cloud.spanner;

import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;

import com.google.cloud.NoCredentials;
import com.google.cloud.spanner.MockSpannerServiceImpl.SimulatedExecutionTime;
import com.google.cloud.spanner.connection.AbstractMockServerTest;
import com.google.spanner.v1.BatchCreateSessionsRequest;
import com.google.spanner.v1.ExecuteSqlRequest;
import io.grpc.ManagedChannelBuilder;
import io.grpc.Status;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.JUnit4;
import org.threeten.bp.Duration;

@RunWith(JUnit4.class)
public class RetryableInternalErrorTest extends AbstractMockServerTest {
@Test
public void testTranslateInternalException() {
mockSpanner.setBatchCreateSessionsExecutionTime(
SimulatedExecutionTime.ofException(
Status.INTERNAL
.withDescription("Authentication backend internal server error. Please retry.")
.asRuntimeException()));
mockSpanner.setExecuteStreamingSqlExecutionTime(
SimulatedExecutionTime.ofException(
Status.INTERNAL
.withDescription("Authentication backend internal server error. Please retry.")
.asRuntimeException()));

try (Spanner spanner =
SpannerOptions.newBuilder()
.setProjectId("my-project")
.setHost(String.format("http://localhost:%d", getPort()))
.setChannelConfigurator(ManagedChannelBuilder::usePlaintext)
.setCredentials(NoCredentials.getInstance())
.setSessionPoolOption(
SessionPoolOptions.newBuilder()
.setMinSessions(1)
.setMaxSessions(1)
.setWaitForMinSessions(Duration.ofSeconds(5))
.build())
.build()
.getService()) {

DatabaseClient client = spanner.getDatabaseClient(DatabaseId.of("p", "i", "d"));
// Execute a query. This will block until a BatchCreateSessions call has finished and then
// invoke ExecuteStreamingSql. Both of these RPCs should be retried.
try (ResultSet resultSet = client.singleUse().executeQuery(SELECT1_STATEMENT)) {
assertTrue(resultSet.next());
assertFalse(resultSet.next());
}
// Verify that both the BatchCreateSessions call and the ExecuteStreamingSql call were
// retried.
assertEquals(2, mockSpanner.countRequestsOfType(BatchCreateSessionsRequest.class));
assertEquals(2, mockSpanner.countRequestsOfType(ExecuteSqlRequest.class));
// Clear the requests before the next test.
mockSpanner.clearRequests();

// Execute a DML statement. This uses the ExecuteSql RPC.
assertEquals(0, mockSpanner.countRequestsOfType(ExecuteSqlRequest.class));
mockSpanner.setExecuteSqlExecutionTime(
SimulatedExecutionTime.ofException(
Status.INTERNAL
.withDescription("Authentication backend internal server error. Please retry.")
.asRuntimeException()));
assertEquals(
Long.valueOf(1L),
client
.readWriteTransaction()
.run(transaction -> transaction.executeUpdate(INSERT_STATEMENT)));
// Verify that also this request was retried.
assertEquals(2, mockSpanner.countRequestsOfType(ExecuteSqlRequest.class));
}
}
}

0 comments on commit b9ce1a6

Please sign in to comment.