From 8654b02bc25da9ace78ee2055cebca992ce843c6 Mon Sep 17 00:00:00 2001 From: Andrei Matei Date: Fri, 13 Mar 2020 12:09:12 -0400 Subject: [PATCH] sql: resolve schema in high-priority transactions This patch makes reading the system.namespace table and reading table descriptors in high-priority transactions. The transactions will push locks owned by schema changes out of their way. The idea is that regular SQL transactions reading schema don't want to wait for the transactions performing DDL statements. Instead, those DDLs will be pushed and forced to refresh. Besides the benefit to regular transactions, this patch also prevents deadlocks for the transactions performing DDL. Before this patch, the final select in the following sequence would deadlock: begin; savepoint s; create table t(x int); rollback to savepoint s; select * from t; The select is reading the namespace table, using a different txn from its own. That read would block on the intent laid down by the prior create. With this patch, the transaction effectively pushes itself, but gets to otherwise run. Fixes #24885 Release justification: Fix for an old bug that became more preminent when we introduced SAVEPOINTs recently. Release note: A rare bug causing transactions that have performed schema changes to deadlock after they restart has been fixed. --- pkg/sql/lease.go | 17 ++++ pkg/sql/lease_test.go | 97 +++++++++++++++++++ .../testdata/logic_test/schema_change_in_txn | 17 ++++ 3 files changed, 131 insertions(+) diff --git a/pkg/sql/lease.go b/pkg/sql/lease.go index 94b8dfc620b1..c86ed253cd87 100644 --- a/pkg/sql/lease.go +++ b/pkg/sql/lease.go @@ -181,6 +181,14 @@ func (s LeaseStore) acquire( ) (*tableVersionState, error) { var table *tableVersionState err := s.db.Txn(ctx, func(ctx context.Context, txn *kv.Txn) error { + // Run the descriptor read as high-priority, thereby pushing any intents out + // of its way. We don't want schema changes to prevent lease acquisitions; + // we'd rather force them to refresh. Also this prevents deadlocks in cases + // where the name resolution is triggered by the transaction doing the + // schema change itself. + if err := txn.SetUserPriority(roachpb.MaxUserPriority); err != nil { + return err + } expiration := txn.ReadTimestamp() expiration.WallTime += int64(s.jitteredLeaseDuration()) if expiration.LessEq(minExpiration) { @@ -211,6 +219,7 @@ func (s LeaseStore) acquire( ImmutableTableDescriptor: *sqlbase.NewImmutableTableDescriptor(*tableDesc), expiration: expiration, } + log.VEventf(ctx, 2, "LeaseStore acquired lease %+v", storedLease) table.mu.lease = storedLease // ValidateTable instead of Validate, even though we have a txn available, @@ -1607,6 +1616,14 @@ func (m *LeaseManager) resolveName( ) (sqlbase.ID, error) { id := sqlbase.InvalidID if err := m.db.Txn(ctx, func(ctx context.Context, txn *kv.Txn) error { + // Run the name lookup as high-priority, thereby pushing any intents out of + // its way. We don't want schema changes to prevent name resolution/lease + // acquisitions; we'd rather force them to refresh. Also this prevents + // deadlocks in cases where the name resolution is triggered by the + // transaction doing the schema change itself. + if err := txn.SetUserPriority(roachpb.MaxUserPriority); err != nil { + return err + } txn.SetFixedTimestamp(ctx, timestamp) var found bool var err error diff --git a/pkg/sql/lease_test.go b/pkg/sql/lease_test.go index 4ea8483c2ca3..d56996d9245e 100644 --- a/pkg/sql/lease_test.go +++ b/pkg/sql/lease_test.go @@ -1986,3 +1986,100 @@ CREATE TABLE t.after (k CHAR PRIMARY KEY, v CHAR); t.expectLeases(beforeDesc.ID, "") t.expectLeases(afterDesc.ID, "/1/1") } + +// Test that acquiring a lease doesn't block on other transactions performing +// schema changes. Lease acquisitions run in high-priority transactions, thereby +// pushing any locks held by schema-changing transactions out of their ways. +func TestLeaseAcquisitionDoesntBlock(t *testing.T) { + defer leaktest.AfterTest(t)() + ctx := context.Background() + params, _ := tests.CreateTestServerParams() + s, db, _ := serverutils.StartServer(t, params) + defer s.Stopper().Stop(ctx) + + _, err := db.Exec(`CREATE DATABASE t; CREATE TABLE t.test(k CHAR PRIMARY KEY, v CHAR);`) + require.NoError(t, err) + + // Figure out the table ID. + row := db.QueryRow("SELECT id FROM system.namespace WHERE name='test'") + var descID sqlbase.ID + require.NoError(t, row.Scan(&descID)) + + // Spin up another goroutine performing a schema change. We'll suspend its + // execution until the main goroutine is able to acquire its lease. + schemaCh := make(chan error) + schemaUnblock := make(chan struct{}) + go func() { + tx, err := db.BeginTx(ctx, nil) + if err != nil { + schemaCh <- err + return + } + _, err = tx.Exec("ALTER TABLE t.test ADD COLUMN v2 CHAR") + schemaCh <- err + if err != nil { + return + } + + <-schemaUnblock + schemaCh <- tx.Commit() + }() + + require.NoError(t, <-schemaCh) + lease, _, err := s.LeaseManager().(*sql.LeaseManager).Acquire(ctx, s.Clock().Now(), descID) + require.NoError(t, err) + + // Release the lease so that the schema change can proceed. + require.NoError(t, s.LeaseManager().(*sql.LeaseManager).Release(lease)) + // Unblock the schema change. + close(schemaUnblock) + + // Wait for the schema change to finish. + require.NoError(t, <-schemaCh) +} + +// Test that acquiring a lease doesn't block on other transactions performing +// schema changes. This is similar to the previous test, except it acquires a +// lease by table name instead of ID, and correspondingly the schema change +// touches the namespace table. +func TestLeaseAcquisitionByNameDoesntBlock(t *testing.T) { + defer leaktest.AfterTest(t)() + ctx := context.Background() + params, _ := tests.CreateTestServerParams() + s, db, _ := serverutils.StartServer(t, params) + defer s.Stopper().Stop(ctx) + + _, err := db.Exec(`CREATE DATABASE t`) + require.NoError(t, err) + + // Spin up another goroutine performing a schema change - creating a table. + // We'll suspend its execution until the main goroutine is able to acquire its + // lease. The idea is that, before being suspended, this transaction has put + // down locks on the system.namespace table. The point of the test is to check + // that a lease acquisition pushes these locks out of its way. + schemaCh := make(chan error) + schemaUnblock := make(chan struct{}) + go func() { + tx, err := db.BeginTx(ctx, nil) + if err != nil { + schemaCh <- err + return + } + _, err = tx.Exec("CREATE TABLE t.test()") + schemaCh <- err + if err != nil { + return + } + + <-schemaUnblock + schemaCh <- tx.Commit() + }() + + require.NoError(t, <-schemaCh) + _, err = db.Exec("SELECT * from t.test") + require.Error(t, err, `pq: relation "t.test" does not exist`) + close(schemaUnblock) + + // Wait for the schema change to finish. + require.NoError(t, <-schemaCh) +} diff --git a/pkg/sql/logictest/testdata/logic_test/schema_change_in_txn b/pkg/sql/logictest/testdata/logic_test/schema_change_in_txn index 57e8115409e0..dbdf7b92f0db 100644 --- a/pkg/sql/logictest/testdata/logic_test/schema_change_in_txn +++ b/pkg/sql/logictest/testdata/logic_test/schema_change_in_txn @@ -1770,3 +1770,20 @@ ALTER TABLE t42508 ADD COLUMN y INT DEFAULT nextval('s42508') statement error pgcode XXA00 unimplemented: cannot evaluate scalar expressions containing sequence operations.*\nHINT.*\n.*42508 COMMIT + +# Test that rolling back to a savepoint past a schema change does not result in +# a deadlock. This is a regression test for #24885. Rolling back past a schema +# change used to have a problem because leaving locks behind on descriptors or +# namespace entries could block the schema resolution after the rollback (schema +# resolution uses different transactions to do its reads). We've fixed it by having those +# other transactions run at high priority, thus pushing the intents out of their way. +subtest no_schemachange_deadlock_after_savepoint_rollback + +statement ok +begin; savepoint s; create table t(x int); rollback to savepoint s; + +query error relation "t" does not exist +select * from t; + +statement ok +commit;