Skip to content

Commit

Permalink
sql: resolve schema in high-priority transactions
Browse files Browse the repository at this point in the history
This patch makes reading the system.namespace table and reading table
descriptors in high-priority transactions. The transactions will push
locks owned by schema changes out of their way. The idea is that regular
SQL transactions reading schema don't want to wait for the transactions
performing DDL statements. Instead, those DDLs will be pushed and forced
to refresh.

Besides the benefit to regular transactions, this patch also prevents
deadlocks for the transactions performing DDL. Before this patch, the
final select in the following sequence would deadlock:

begin; savepoint s; create table t(x int); rollback to savepoint s;
select * from t;

The select is reading the namespace table, using a different txn from
its own. That read would block on the intent laid down by the prior
create. With this patch, the transaction effectively pushes itself, but
gets to otherwise run.

Fixes cockroachdb#24885

Release justification: Fix for an old bug that became more preminent
when we introduced SAVEPOINTs recently.

Release note: A rare bug causing transactions that have performed schema
changes to deadlock after they restart has been fixed.
  • Loading branch information
andreimatei committed Mar 16, 2020
1 parent a06be62 commit b9a63f2
Show file tree
Hide file tree
Showing 3 changed files with 131 additions and 0 deletions.
17 changes: 17 additions & 0 deletions pkg/sql/lease.go
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,14 @@ func (s LeaseStore) acquire(
) (*tableVersionState, error) {
var table *tableVersionState
err := s.db.Txn(ctx, func(ctx context.Context, txn *kv.Txn) error {
// Run the descriptor read as high-priority, thereby pushing any intents out
// of its way. We don't want schema changes to prevent lease acquisitions;
// we'd rather force them to refresh. Also this prevents deadlocks in cases
// where the name resolution is triggered by the transaction doing the
// schema change itself.
if err := txn.SetUserPriority(roachpb.MaxUserPriority); err != nil {
return err
}
expiration := txn.ReadTimestamp()
expiration.WallTime += int64(s.jitteredLeaseDuration())
if expiration.LessEq(minExpiration) {
Expand Down Expand Up @@ -211,6 +219,7 @@ func (s LeaseStore) acquire(
ImmutableTableDescriptor: *sqlbase.NewImmutableTableDescriptor(*tableDesc),
expiration: expiration,
}
log.VEventf(ctx, 2, "LeaseStore acquired lease %+v", storedLease)
table.mu.lease = storedLease

// ValidateTable instead of Validate, even though we have a txn available,
Expand Down Expand Up @@ -1607,6 +1616,14 @@ func (m *LeaseManager) resolveName(
) (sqlbase.ID, error) {
id := sqlbase.InvalidID
if err := m.db.Txn(ctx, func(ctx context.Context, txn *kv.Txn) error {
// Run the name lookup as high-priority, thereby pushing any intents out of
// its way. We don't want schema changes to prevent name resolution/lease
// acquisitions; we'd rather force them to refresh. Also this prevents
// deadlocks in cases where the name resolution is triggered by the
// transaction doing the schema change itself.
if err := txn.SetUserPriority(roachpb.MaxUserPriority); err != nil {
return err
}
txn.SetFixedTimestamp(ctx, timestamp)
var found bool
var err error
Expand Down
97 changes: 97 additions & 0 deletions pkg/sql/lease_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1990,3 +1990,100 @@ CREATE TABLE t.after (k CHAR PRIMARY KEY, v CHAR);
t.expectLeases(beforeDesc.ID, "")
t.expectLeases(afterDesc.ID, "/1/1")
}

// Test that acquiring a lease doesn't block on other transactions performing
// schema changes. Lease acquisitions run in high-priority transactions, thereby
// pushing any locks held by schema-changing transactions out of their ways.
func TestLeaseAcquisitionDoesntBlock(t *testing.T) {
defer leaktest.AfterTest(t)()
ctx := context.Background()
params, _ := tests.CreateTestServerParams()
s, db, _ := serverutils.StartServer(t, params)
defer s.Stopper().Stop(ctx)

_, err := db.Exec(`CREATE DATABASE t; CREATE TABLE t.test(k CHAR PRIMARY KEY, v CHAR);`)
require.NoError(t, err)

// Figure out the table ID.
row := db.QueryRow("SELECT id FROM system.namespace WHERE name='test'")
var descID sqlbase.ID
require.NoError(t, row.Scan(&descID))

// Spin up another goroutine performing a schema change. We'll suspend its
// execution until the main goroutine is able to acquire its lease.
schemaCh := make(chan error)
schemaUnblock := make(chan struct{})
go func() {
tx, err := db.BeginTx(ctx, nil)
if err != nil {
schemaCh <- err
return
}
_, err = tx.Exec("ALTER TABLE t.test ADD COLUMN v2 CHAR")
schemaCh <- err
if err != nil {
return
}

<-schemaUnblock
schemaCh <- tx.Commit()
}()

require.NoError(t, <-schemaCh)
lease, _, err := s.LeaseManager().(*sql.LeaseManager).Acquire(ctx, s.Clock().Now(), descID)
require.NoError(t, err)

// Release the lease so that the schema change can proceed.
err = s.LeaseManager().(*sql.LeaseManager).Release(lease)
// Unblock the schema change.
close(schemaUnblock)

// Wait for the schema change to finish.
require.NoError(t, <-schemaCh)
}

// Test that acquiring a lease doesn't block on other transactions performing
// schema changes. This is similar to the previous test, except it acquires a
// lease by table name instead of ID, and correspondingly the schema change
// touches the namespace table.
func TestLeaseAcquisitionByNameDoesntBlock(t *testing.T) {
defer leaktest.AfterTest(t)()
ctx := context.Background()
params, _ := tests.CreateTestServerParams()
s, db, _ := serverutils.StartServer(t, params)
defer s.Stopper().Stop(ctx)

_, err := db.Exec(`CREATE DATABASE t`)
require.NoError(t, err)

// Spin up another goroutine performing a schema change - creating a table.
// We'll suspend its execution until the main goroutine is able to acquire its
// lease. The idea is that, before being suspended, this transaction has put
// down locks on the system.namespace table. The point of the test is to check
// that a lease acquisition pushes these locks out of its way.
schemaCh := make(chan error)
schemaUnblock := make(chan struct{})
go func() {
tx, err := db.BeginTx(ctx, nil)
if err != nil {
schemaCh <- err
return
}
_, err = tx.Exec("CREATE TABLE t.test()")
schemaCh <- err
if err != nil {
return
}

<-schemaUnblock
schemaCh <- tx.Commit()
}()

require.NoError(t, <-schemaCh)
_, err = db.Exec("SELECT * from t.test")
require.Error(t, err, `pq: relation "t.test" does not exist`)
close(schemaUnblock)

// Wait for the schema change to finish.
require.NoError(t, <-schemaCh)
}
17 changes: 17 additions & 0 deletions pkg/sql/logictest/testdata/logic_test/schema_change_in_txn
Original file line number Diff line number Diff line change
Expand Up @@ -1765,3 +1765,20 @@ ALTER TABLE t42508 ADD COLUMN y INT DEFAULT nextval('s42508')

statement error pgcode XXA00 unimplemented: cannot evaluate scalar expressions containing sequence operations.*\nHINT.*\n.*42508
COMMIT

# Test that rolling back to a savepoint past a schema change does not result in
# a deadlock. This is a regression test for #24885. Rolling back past a schema
# change used to have a problem because leaving locks behind on descriptors or
# namespace entries could block the schema resolution after the rollback (schema
# resolution uses different transactions to do its reads). We've fixed it by having those
# other transactions run at high priority, thus pushing the intents out of their way.
subtest no_schemachange_deadlock_after_savepoint_rollback

statement ok
begin; savepoint s; create table t(x int); rollback to savepoint s;

query error relation "t" does not exist
select * from t;

statement ok
commit;

0 comments on commit b9a63f2

Please sign in to comment.