From 2042afee003100c7cb59b2e5f68771eb541d986a Mon Sep 17 00:00:00 2001 From: Yahor Yuzefovich Date: Wed, 9 Aug 2023 10:24:58 -0700 Subject: [PATCH 1/2] sql: fix recently introduced minor bug around PREPARE In just merged 4a3e78753b9c043c51d2a483b8b591f3abc1457e, we had a minor bug that in case `addPreparedStmt` call fails, we don't restore the original placeholders, which can then lead to panics. Release note: None --- pkg/sql/conn_executor_exec.go | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/pkg/sql/conn_executor_exec.go b/pkg/sql/conn_executor_exec.go index fc892e541487..3345e4d7b439 100644 --- a/pkg/sql/conn_executor_exec.go +++ b/pkg/sql/conn_executor_exec.go @@ -903,16 +903,18 @@ func (ex *connExecutor) execStmtInOpenState( // PREPARE statement itself. oldPlaceholders := p.extendedEvalCtx.Placeholders p.extendedEvalCtx.Placeholders = nil + defer func() { + // The call to addPreparedStmt changed the planner stmt to the + // statement being prepared. Set it back to the PREPARE statement, + // so that it's logged correctly. + p.stmt = stmt + p.extendedEvalCtx.Placeholders = oldPlaceholders + }() if _, err := ex.addPreparedStmt( ctx, name, prepStmt, typeHints, rawTypeHints, PreparedStatementOriginSQL, ); err != nil { return makeErrEvent(err) } - // The call to addPreparedStmt changed the planner stmt to the statement - // being prepared. Set it back to the PREPARE statement, so that it's - // logged correctly. - p.stmt = stmt - p.extendedEvalCtx.Placeholders = oldPlaceholders return nil, nil, nil } From 0a1be2f1bb43c68028b37bfe2d8768be6200bddb Mon Sep 17 00:00:00 2001 From: Faizan Qazi Date: Tue, 1 Aug 2023 16:13:12 -0400 Subject: [PATCH 2/2] sql: release save point did not retry for two version invariant errors Previously, when release save point was issued for cockroach_restart this would drive a commit, which could run into retryable errors. Like the two version invariant error, which is not tagged as a user facing re triable error, so the client doesn't know to retry. To address this, this patch converts two version invariant errors in this case into user facing retryable errors; Fixes: #107171 Release note (bug fix): Release save point could incorrectly emit "cannot publish new versions for descriptors" instead of a retryable error to applications. --- pkg/sql/catalog/lease/lease_test.go | 115 ++++++++++++++++++++++++++++ pkg/sql/conn_executor.go | 22 ++++++ pkg/sql/conn_executor_exec.go | 14 ++-- pkg/sql/conn_executor_savepoints.go | 6 ++ 4 files changed, 149 insertions(+), 8 deletions(-) diff --git a/pkg/sql/catalog/lease/lease_test.go b/pkg/sql/catalog/lease/lease_test.go index 7a968dc97cd2..92a40607a930 100644 --- a/pkg/sql/catalog/lease/lease_test.go +++ b/pkg/sql/catalog/lease/lease_test.go @@ -1479,6 +1479,121 @@ CREATE TABLE t.kv (k CHAR PRIMARY KEY, v CHAR); } } +// Tests that when a transaction has already returned results +// to the user and the transaction continues on to make a schema change, +// whenever the table lease two version invariant is violated and the +// transaction needs to be restarted, a retryable error is returned to the +// user. This specific version validations that release savepoint does the +// same with cockroach_restart, which commits on release. +func TestTwoVersionInvariantRetryErrorWitSavePoint(t *testing.T) { + defer leaktest.AfterTest(t)() + var violations int64 + var params base.TestServerArgs + params.Knobs = base.TestingKnobs{ + // Disable execution of schema changers after the schema change + // transaction commits. This is to prevent executing the default + // WaitForOneVersion() code that holds up a schema change + // transaction until the new version has been published to the + // entire cluster. + SQLSchemaChanger: &sql.SchemaChangerTestingKnobs{ + SchemaChangeJobNoOp: func() bool { + return true + }, + TwoVersionLeaseViolation: func() { + atomic.AddInt64(&violations, 1) + }, + }, + } + s, sqlDB, kvDB := serverutils.StartServer(t, params) + defer s.Stopper().Stop(context.Background()) + + if _, err := sqlDB.Exec(` +CREATE DATABASE t; +CREATE TABLE t.kv (k CHAR PRIMARY KEY, v CHAR); +INSERT INTO t.kv VALUES ('a', 'b'); +`); err != nil { + t.Fatal(err) + } + + tableDesc := desctestutils.TestingGetPublicTableDescriptor(kvDB, keys.SystemSQLCodec, "t", "kv") + if tableDesc.GetVersion() != 1 { + t.Fatalf("invalid version %d", tableDesc.GetVersion()) + } + + tx, err := sqlDB.Begin() + if err != nil { + t.Fatal(err) + } + + // Grab a lease on the table. + rows, err := tx.Query("SELECT * FROM t.kv") + if err != nil { + t.Fatal(err) + } + if err := rows.Close(); err != nil { + t.Fatal(err) + } + + // Modify the table descriptor increments the version. + if _, err := sqlDB.Exec(`ALTER TABLE t.kv RENAME to t.kv1`); err != nil { + t.Fatal(err) + } + + txRetry, err := sqlDB.Begin() + if err != nil { + t.Fatal(err) + } + _, err = txRetry.Exec("SAVEPOINT cockroach_restart;") + if err != nil { + t.Fatal(err) + } + + // Read some data using the transaction so that it cannot be + // retried internally + rows, err = txRetry.Query(`SELECT 1`) + if err != nil { + t.Fatal(err) + } + if err := rows.Close(); err != nil { + t.Fatal(err) + } + + if _, err := txRetry.Exec(`ALTER TABLE t.kv1 RENAME TO t.kv2`); err != nil { + t.Fatal(err) + } + + var wg sync.WaitGroup + wg.Add(1) + go func() { + defer wg.Done() + // This can hang waiting for one version before tx.Commit() is + // called below, so it is executed in another goroutine. + _, err := txRetry.Exec("RELEASE SAVEPOINT cockroach_restart;") + if !testutils.IsError(err, + fmt.Sprintf(`TransactionRetryWithProtoRefreshError: cannot publish new versions for descriptors: \[\{kv1 %d 1\}\], old versions still in use`, tableDesc.GetID()), + ) { + t.Errorf("err = %v", err) + } + err = txRetry.Rollback() + if err != nil { + t.Errorf("err = %v", err) + } + }() + + // Make sure that txRetry does violate the two version lease invariant. + testutils.SucceedsSoon(t, func() error { + if atomic.LoadInt64(&violations) == 0 { + return errors.Errorf("didnt retry schema change") + } + return nil + }) + // Commit the first transaction, unblocking txRetry. + if err := tx.Commit(); err != nil { + t.Fatal(err) + } + wg.Wait() +} + // Tests that when a transaction has already returned results // to the user and the transaction continues on to make a schema change, // whenever the table lease two version invariant is violated and the diff --git a/pkg/sql/conn_executor.go b/pkg/sql/conn_executor.go index b43acb8d094c..4cd4ef5ade77 100644 --- a/pkg/sql/conn_executor.go +++ b/pkg/sql/conn_executor.go @@ -3284,6 +3284,28 @@ func errIsRetriable(err error) bool { descs.IsTwoVersionInvariantViolationError(err) } +// convertRetriableErrorIntoUserVisibleError converts internal retriable +// errors into external, so that the client goes and retries this +// transaction. One example of this is two version invariant errors, which +// happens when a schema change is waiting for a schema change transition to +// propagate. When this happens, we either need to retry externally or internally, +// depending on if we are in an explicit transaction. +func (ex *connExecutor) convertRetriableErrorIntoUserVisibleError( + ctx context.Context, origErr error, +) (modifiedErr error, err error) { + if descs.IsTwoVersionInvariantViolationError(origErr) { + if resetErr := ex.resetTransactionOnSchemaChangeRetry(ctx); resetErr != nil { + return nil, resetErr + } + // Generating a forced retry error here, right after resetting the + // transaction is not exactly necessary, but it's a sound way to + // generate the only type of ClientVisibleRetryError we have. + return ex.state.mu.txn.GenerateForcedRetryableError(ctx, redact.Sprint(origErr)), nil + } + // Return the original error, this error will not be surfaced to the user. + return origErr, nil +} + // makeErrEvent takes an error and returns either an eventRetriableErr or an // eventNonRetriableErr, depending on the error type. func (ex *connExecutor) makeErrEvent(err error, stmt tree.Statement) (fsm.Event, fsm.EventPayload) { diff --git a/pkg/sql/conn_executor_exec.go b/pkg/sql/conn_executor_exec.go index fc892e541487..1e85b05a2978 100644 --- a/pkg/sql/conn_executor_exec.go +++ b/pkg/sql/conn_executor_exec.go @@ -1307,14 +1307,12 @@ func (ex *connExecutor) commitSQLTransaction( } ex.phaseTimes.SetSessionPhaseTime(sessionphase.SessionStartTransactionCommit, timeutil.Now()) if err := commitFn(ctx); err != nil { - if descs.IsTwoVersionInvariantViolationError(err) { - if resetErr := ex.resetTransactionOnSchemaChangeRetry(ctx); resetErr != nil { - return ex.makeErrEvent(err, ast) - } - // Generating a forced retry error here, right after resetting the - // transaction is not exactly necessary, but it's a sound way to - // generate the only type of ClientVisibleRetryError we have. - err = ex.state.mu.txn.GenerateForcedRetryableError(ctx, redact.Sprint(err)) + // For certain retryable errors, we should turn them into client visible + // errors, since the client needs to retry now. + var conversionError error + err, conversionError = ex.convertRetriableErrorIntoUserVisibleError(ctx, err) + if conversionError != nil { + return ex.makeErrEvent(conversionError, ast) } return ex.makeErrEvent(err, ast) } diff --git a/pkg/sql/conn_executor_savepoints.go b/pkg/sql/conn_executor_savepoints.go index bda453e9f42e..3cf497851060 100644 --- a/pkg/sql/conn_executor_savepoints.go +++ b/pkg/sql/conn_executor_savepoints.go @@ -152,6 +152,12 @@ func (ex *connExecutor) execRelease( // Committing the transaction failed. We'll go to state RestartWait if // it's a retriable error, or to state RollbackWait otherwise. if errIsRetriable(err) { + // For certain retryable errors, we should turn them into client visible + // errors, since the client needs to retry now. + var conversionError error + if err, conversionError = ex.convertRetriableErrorIntoUserVisibleError(ctx, err); conversionError != nil { + return ex.makeErrEvent(conversionError, s) + } // Add the savepoint back. We want to allow a ROLLBACK TO SAVEPOINT // cockroach_restart (that's the whole point of commitOnRelease). env.push(*entry)