diff --git a/pkg/ccl/logictestccl/tests/3node-tenant/generated_test.go b/pkg/ccl/logictestccl/tests/3node-tenant/generated_test.go index 0af11926fbaf..5ef453cac7ba 100644 --- a/pkg/ccl/logictestccl/tests/3node-tenant/generated_test.go +++ b/pkg/ccl/logictestccl/tests/3node-tenant/generated_test.go @@ -1501,6 +1501,13 @@ func TestTenantLogic_save_table( runLogicTest(t, "save_table") } +func TestTenantLogic_savepoints( + t *testing.T, +) { + defer leaktest.AfterTest(t)() + runLogicTest(t, "savepoints") +} + func TestTenantLogic_scale( t *testing.T, ) { diff --git a/pkg/kv/kvserver/replica_read.go b/pkg/kv/kvserver/replica_read.go index ed39fc05d42a..04b86024d0b4 100644 --- a/pkg/kv/kvserver/replica_read.go +++ b/pkg/kv/kvserver/replica_read.go @@ -303,13 +303,13 @@ func (r *Replica) canDropLatchesBeforeEval( // or if any of them need to use an intent interleaving iterator. for _, req := range ba.Requests { reqHeader := req.GetInner().Header() - start, end, seq := reqHeader.Key, reqHeader.EndKey, reqHeader.Sequence + start, end := reqHeader.Key, reqHeader.EndKey var txnID uuid.UUID if ba.Txn != nil { txnID = ba.Txn.ID } needsIntentInterleavingForThisRequest, err := storage.ScanConflictingIntentsForDroppingLatchesEarly( - ctx, rw, txnID, ba.Header.Timestamp, start, end, seq, &intents, maxIntents, + ctx, rw, txnID, ba.Header.Timestamp, start, end, &intents, maxIntents, ) if err != nil { return false /* ok */, true /* stillNeedsIntentInterleaving */, kvpb.NewError( diff --git a/pkg/sql/logictest/testdata/logic_test/savepoints b/pkg/sql/logictest/testdata/logic_test/savepoints new file mode 100644 index 000000000000..a9786e9ddcbd --- /dev/null +++ b/pkg/sql/logictest/testdata/logic_test/savepoints @@ -0,0 +1,35 @@ +# Regression test for https://github.com/cockroachdb/cockroach/issues/94337. +# The linked issue only manifests itself if the read request is able to drop +# latches early. Before this bug was fixed, one of the conditions required for +# a read request to drop its latches early was that it needed to have a higher +# read timestamp compared to the timestamp at which it had written all intents +# the read overlapped with. Adding the sleeps here bumps the transaction's read +# timestamp, by virtue of the closed timestamp interval. + +statement ok +CREATE TABLE a (id INT); + +statement ok +BEGIN + +statement ok +SELECT pg_sleep(1.5) + +statement ok +SAVEPOINT s + +statement ok +SELECT pg_sleep(1.5) + +statement ok +INSERT INTO a(id) VALUES (0); + +statement ok +ROLLBACK TO SAVEPOINT s + +query I +SELECT * FROM a +---- + +statement ok +COMMIT diff --git a/pkg/sql/logictest/tests/fakedist-disk/generated_test.go b/pkg/sql/logictest/tests/fakedist-disk/generated_test.go index 5b12a170ac8e..60e9c080d7f9 100644 --- a/pkg/sql/logictest/tests/fakedist-disk/generated_test.go +++ b/pkg/sql/logictest/tests/fakedist-disk/generated_test.go @@ -1472,6 +1472,13 @@ func TestLogic_save_table( runLogicTest(t, "save_table") } +func TestLogic_savepoints( + t *testing.T, +) { + defer leaktest.AfterTest(t)() + runLogicTest(t, "savepoints") +} + func TestLogic_scale( t *testing.T, ) { diff --git a/pkg/sql/logictest/tests/fakedist-vec-off/generated_test.go b/pkg/sql/logictest/tests/fakedist-vec-off/generated_test.go index c69d7a82a9de..496b46e5502c 100644 --- a/pkg/sql/logictest/tests/fakedist-vec-off/generated_test.go +++ b/pkg/sql/logictest/tests/fakedist-vec-off/generated_test.go @@ -1472,6 +1472,13 @@ func TestLogic_save_table( runLogicTest(t, "save_table") } +func TestLogic_savepoints( + t *testing.T, +) { + defer leaktest.AfterTest(t)() + runLogicTest(t, "savepoints") +} + func TestLogic_scale( t *testing.T, ) { diff --git a/pkg/sql/logictest/tests/fakedist/generated_test.go b/pkg/sql/logictest/tests/fakedist/generated_test.go index 7e7306cd4092..370cb806df7a 100644 --- a/pkg/sql/logictest/tests/fakedist/generated_test.go +++ b/pkg/sql/logictest/tests/fakedist/generated_test.go @@ -1486,6 +1486,13 @@ func TestLogic_save_table( runLogicTest(t, "save_table") } +func TestLogic_savepoints( + t *testing.T, +) { + defer leaktest.AfterTest(t)() + runLogicTest(t, "savepoints") +} + func TestLogic_scale( t *testing.T, ) { diff --git a/pkg/sql/logictest/tests/local-legacy-schema-changer/generated_test.go b/pkg/sql/logictest/tests/local-legacy-schema-changer/generated_test.go index 4db9511ec006..ecbf90f63086 100644 --- a/pkg/sql/logictest/tests/local-legacy-schema-changer/generated_test.go +++ b/pkg/sql/logictest/tests/local-legacy-schema-changer/generated_test.go @@ -1451,6 +1451,13 @@ func TestLogic_save_table( runLogicTest(t, "save_table") } +func TestLogic_savepoints( + t *testing.T, +) { + defer leaktest.AfterTest(t)() + runLogicTest(t, "savepoints") +} + func TestLogic_scale( t *testing.T, ) { diff --git a/pkg/sql/logictest/tests/local-vec-off/generated_test.go b/pkg/sql/logictest/tests/local-vec-off/generated_test.go index 8026e88eda6a..64df77d1252d 100644 --- a/pkg/sql/logictest/tests/local-vec-off/generated_test.go +++ b/pkg/sql/logictest/tests/local-vec-off/generated_test.go @@ -1479,6 +1479,13 @@ func TestLogic_save_table( runLogicTest(t, "save_table") } +func TestLogic_savepoints( + t *testing.T, +) { + defer leaktest.AfterTest(t)() + runLogicTest(t, "savepoints") +} + func TestLogic_scale( t *testing.T, ) { diff --git a/pkg/sql/logictest/tests/local/generated_test.go b/pkg/sql/logictest/tests/local/generated_test.go index 1c7629e5f9a1..f83880ccbb4c 100644 --- a/pkg/sql/logictest/tests/local/generated_test.go +++ b/pkg/sql/logictest/tests/local/generated_test.go @@ -1619,6 +1619,13 @@ func TestLogic_save_table( runLogicTest(t, "save_table") } +func TestLogic_savepoints( + t *testing.T, +) { + defer leaktest.AfterTest(t)() + runLogicTest(t, "savepoints") +} + func TestLogic_scale( t *testing.T, ) { diff --git a/pkg/storage/engine.go b/pkg/storage/engine.go index c05b77682735..a4faca7fe423 100644 --- a/pkg/storage/engine.go +++ b/pkg/storage/engine.go @@ -1731,14 +1731,10 @@ func assertMVCCIteratorInvariants(iter MVCCIterator) error { // proceed until the intents are resolved. Intents that don't conflict with the // transaction referenced by txnID[1] at the supplied `ts` are ignored. // -// The caller must supply the sequence number of the request on behalf of which -// the intents are being scanned. This is used to determine if the caller needs -// to consult intent history when performing a scan over the MVCC keyspace -// (indicated by the `needIntentHistory` return parameter). Intent history is -// required to read the correct provisional value when scanning if we encounter -// an intent written by the `txn` at a higher sequence number than the one -// supplied or at a higher timestamp than the `ts` supplied (regardless of the -// sequence number of the intent). +// The `needsIntentHistory` return value indicates whether the caller needs to +// consult intent history when performing a scan over the MVCC keyspace to +// read correct provisional values for at least one of the keys being scanned. +// Typically, this applies to all transactions that read their own writes. // // [1] The supplied txnID may be empty (uuid.Nil) if the request on behalf of // which the scan is being performed is non-transactional. @@ -1748,7 +1744,6 @@ func ScanConflictingIntentsForDroppingLatchesEarly( txnID uuid.UUID, ts hlc.Timestamp, start, end roachpb.Key, - seq enginepb.TxnSeq, intents *[]roachpb.Intent, maxIntents int64, ) (needIntentHistory bool, err error) { @@ -1791,14 +1786,36 @@ func ScanConflictingIntentsForDroppingLatchesEarly( } ownIntent := txnID != uuid.Nil && txnID == meta.Txn.ID if ownIntent { - // If we ran into one of our own intents, check whether the intent has a - // higher (or equal) sequence number or a higher (or equal) timestamp. If - // either of these conditions is true, a corresponding scan over the MVCC - // key space will need access to the key's intent history in order to read - // the correct provisional value. So we set `needIntentHistory` to true. - if seq <= meta.Txn.Sequence || ts.LessEq(meta.Timestamp.ToTimestamp()) { - needIntentHistory = true - } + // If we ran into one of our own intents, a corresponding scan over the + // MVCC keyspace will need access to the key's intent history in order to + // read the correct provisional value. As such, we set `needsIntentHistory` + // to be true. + // + // This determination is more restrictive than it needs to be. A read + // request needs access to the intent history when performing a scan over + // the MVCC keyspace only if: + // 1. The request is reading at a lower sequence number than the intent's + // sequence number. + // 2. OR the request is reading at a (strictly) lower timestamp than the + // intent timestamp[1]. This can happen if the intent was pushed for some + // reason. + // 3. OR the found intent should be ignored because it was written as part + // of a savepoint which was subsequently rolled back. + // 4. OR the found intent and read request belong to different txn epochs. + // + // The conditions above mirror special case handling for intents by + // pebbleMVCCScanner's getOne method. If we find scanning the lock table + // twice (once during conflict resolution, and once when interleaving + // intents during the MVCC read) is too expensive for transactions that + // read their own writes, there's some optimizations to be had here by + // being smarter about when we decide to interleave intents or not to. + // + // [1] Only relevant if the intent has a sequence number less than or + // equal to the read request's sequence number. Otherwise, we need access + // to the intent history to read the correct provisional value -- one + // written at a lower or equal sequence number compared to the read + // request's. + needIntentHistory = true continue } if conflictingIntent := meta.Timestamp.ToTimestamp().LessEq(ts); !conflictingIntent { diff --git a/pkg/storage/engine_test.go b/pkg/storage/engine_test.go index 199a3f2e3670..746170f14400 100644 --- a/pkg/storage/engine_test.go +++ b/pkg/storage/engine_test.go @@ -1905,6 +1905,366 @@ func TestEngineIteratorVisibility(t *testing.T) { } } +// TestScanConflictingIntentsForDroppingLatchesEarly tests +// ScanConflictingIntentsForDroppingLatchesEarly for all non read-your-own-write +// cases. Read-your-own-write cases are tested separately in +// TestScanConflictingIntentsForDroppingLatchesEarlyReadYourOwnWrites. +func TestScanConflictingIntentsForDroppingLatchesEarly(t *testing.T) { + defer leaktest.AfterTest(t)() + defer log.Scope(t).Close(t) + + ctx := context.Background() + + ts := func(nanos int) hlc.Timestamp { + return hlc.Timestamp{ + WallTime: int64(nanos), + } + } + newTxn := func(ts hlc.Timestamp) *roachpb.Transaction { + txnID := uuid.MakeV4() + return &roachpb.Transaction{ + TxnMeta: enginepb.TxnMeta{ + ID: txnID, + Epoch: 1, + WriteTimestamp: ts, + MinTimestamp: ts, + }, + ReadTimestamp: ts, + } + } + + belowTxnTS := ts(1) + txnTS := ts(2) + aboveTxnTS := ts(3) + + keyA := roachpb.Key("a") + keyB := roachpb.Key("b") + keyC := roachpb.Key("c") + val := roachpb.Value{RawBytes: []byte{'v'}} + + testCases := []struct { + name string + setup func(t *testing.T, rw ReadWriter, txn *roachpb.Transaction) + start roachpb.Key + end roachpb.Key + expNeedsIntentHistory bool + expErr string + expNumFoundIntents int + }{ + { + name: "invalid end key", + start: keyC, + end: keyA, + expErr: "start key must be less than end key", + expNumFoundIntents: 0, + }, + { + name: "no end key", + start: keyA, + end: nil, + expNeedsIntentHistory: false, + expNumFoundIntents: 0, + }, + { + name: "no intents", + start: keyB, + end: keyC, + expNeedsIntentHistory: false, + expNumFoundIntents: 0, + }, + { + name: "conflicting txn intent at lower timestamp", + setup: func(t *testing.T, rw ReadWriter, txn *roachpb.Transaction) { + conflictingTxn := newTxn(belowTxnTS) // test txn should see this intent + err := MVCCPut( + ctx, rw, nil, keyA, conflictingTxn.WriteTimestamp, hlc.ClockTimestamp{}, val, conflictingTxn, + ) + require.NoError(t, err) + }, + start: keyA, + end: keyC, + expNeedsIntentHistory: false, + expNumFoundIntents: 1, + }, + { + name: "conflicting txn intent at higher timestamp", + setup: func(t *testing.T, rw ReadWriter, txn *roachpb.Transaction) { + conflictingTxn := newTxn(aboveTxnTS) // test txn shouldn't see this intent + err := MVCCPut( + ctx, rw, nil, keyA, conflictingTxn.WriteTimestamp, hlc.ClockTimestamp{}, val, conflictingTxn, + ) + require.NoError(t, err) + }, + start: keyA, + end: keyC, + expNeedsIntentHistory: false, + expNumFoundIntents: 0, + }, + { + name: "bounds do not include (latest) own write", + setup: func(t *testing.T, rw ReadWriter, txn *roachpb.Transaction) { + err := MVCCPut(ctx, rw, nil, keyA, txn.WriteTimestamp, hlc.ClockTimestamp{}, val, txn) + require.NoError(t, err) + }, + start: keyB, + end: keyC, + expNeedsIntentHistory: false, + expNumFoundIntents: 0, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + eng := NewDefaultInMemForTesting() + defer eng.Close() + + var intents []roachpb.Intent + txn := newTxn(txnTS) + if tc.setup != nil { + tc.setup(t, eng, txn) + } + needsIntentHistory, err := ScanConflictingIntentsForDroppingLatchesEarly( + ctx, + eng, + txn.ID, + txn.ReadTimestamp, + tc.start, + tc.end, + &intents, + 0, /* maxIntents */ + ) + if tc.expErr != "" { + require.Error(t, err) + testutils.IsError(err, tc.expErr) + return // none of the other fields need to be tested. + } + + require.NoError(t, err) + require.Equal(t, tc.expNeedsIntentHistory, needsIntentHistory) + require.Equal(t, tc.expNumFoundIntents, len(intents)) + }) + } +} + +// TestScanConflictingIntentsForDroppingLatchesEarlyReadYourOwnWrites constructs +// various read-your-own-write cases and ensures we correctly determine whether +// callers of ScanConflictingIntentsForDroppingLatchesEarly need to consult +// intent history or not when performing a scan over the MVCC keyspace. Factors +// that go into this determination are: +// 1. Sequence numbers of the intent/read op. +// 2. Timestamps of the intent/read op. +// 3. Epochs of the intent/read op. +// 4. Whether any savepoints have been rolled back. +// +// NB: When scanning for conflicting intents to determine if latches can be +// dropped early, we fallback to using the intent interleaving iterator in all +// read-your-own-write cases. However, doing so is more restrictive than it +// needs to be -- the in-line test expectations correspond to what an optimized +// determination for `needsIntentHistory` would look like. However, for the +// purposes of this test, we assert that we always fall back to using the intent +// interleaving iterator in ALL read-your-own-write cases. +func TestScanConflictingIntentsForDroppingLatchesEarlyReadYourOwnWrites(t *testing.T) { + defer leaktest.AfterTest(t)() + defer log.Scope(t).Close(t) + + alwaysFallbackToIntentInterleavingIteratorForReadYourOwnWrites := true + + ctx := context.Background() + ts := func(nanos int) hlc.Timestamp { + return hlc.Timestamp{ + WallTime: int64(nanos), + } + } + belowReadTS := ts(1) + readTS := ts(2) + aboveReadTS := ts(3) + + belowReadTxnEpoch := enginepb.TxnEpoch(1) + readTxnEpoch := enginepb.TxnEpoch(3) + aboveReadTxnEpoch := enginepb.TxnEpoch(2) + + belowReadSeqNumber := enginepb.TxnSeq(1) + readSeqNumber := enginepb.TxnSeq(2) + aboveReadSeqNumber := enginepb.TxnSeq(3) + + keyA := roachpb.Key("a") + val := roachpb.Value{RawBytes: []byte{'v'}} + + testCases := []struct { + name string + intentTS hlc.Timestamp + intentSequenceNumber enginepb.TxnSeq + intentEpoch enginepb.TxnEpoch + expNeedsIntentHistory bool // currently unused when testing + ignoredSeqNumbers enginepb.IgnoredSeqNumRange + }{ + { + name: "equal {timestamp, seq number, epoch}", + intentTS: readTS, + intentSequenceNumber: readSeqNumber, + intentEpoch: readTxnEpoch, + expNeedsIntentHistory: false, + }, + { + name: "higher {intent seq number} equal {timestamp, epoch}", + intentTS: readTS, + intentSequenceNumber: aboveReadSeqNumber, + intentEpoch: readTxnEpoch, + expNeedsIntentHistory: true, + }, + { + name: "higher {intent seq number} equal {epoch} lower {intent timestamp}", + intentTS: belowReadTS, + intentSequenceNumber: aboveReadSeqNumber, + intentEpoch: readTxnEpoch, + expNeedsIntentHistory: true, + }, + { + name: "higher {intent seq number, intent timestamp} equal {epoch}", + intentTS: aboveReadTS, + intentSequenceNumber: aboveReadSeqNumber, + intentEpoch: readTxnEpoch, + expNeedsIntentHistory: true, + }, + { + // Naively scanning at the read's timestamp, without accounting for the + // intent, would result in us missing our own write as the writeTS is + // higher than the readTS. + name: "higher {intent timestamp} equal {epoch} lower {intent seq number}", + intentTS: aboveReadTS, + intentSequenceNumber: belowReadSeqNumber, + intentEpoch: readTxnEpoch, + expNeedsIntentHistory: true, + }, + { + // Naively scanning at the read's timestamp, without accounting for the + // intent, would result in us missing our own write as the writeTS is + // higher than the readTS. + name: "higher {intent timestamp} equal {seq number, epoch}", + intentTS: aboveReadTS, + intentSequenceNumber: readSeqNumber, + intentEpoch: readTxnEpoch, + expNeedsIntentHistory: true, + }, + { + name: "equal {timestamp, epoch} lower {intent seq number}", + intentTS: readTS, + intentSequenceNumber: belowReadSeqNumber, + intentEpoch: readTxnEpoch, + expNeedsIntentHistory: false, + }, + { + name: "equal {epoch} lower {intent timestamp, intent seq number}", + intentTS: belowReadTS, + intentSequenceNumber: belowReadSeqNumber, + intentEpoch: readTxnEpoch, + expNeedsIntentHistory: false, + }, + { + name: "equal {epoch, seq number} lower {intent timestamp}", + intentTS: belowReadTS, + intentSequenceNumber: readSeqNumber, + intentEpoch: readTxnEpoch, + expNeedsIntentHistory: false, + }, + + // lower/higher epoch test cases aren't exhaustive. + { + name: "equal {timestamp, seq number} lower {epoch}", + intentTS: readTS, + intentSequenceNumber: readSeqNumber, + intentEpoch: belowReadTxnEpoch, + expNeedsIntentHistory: true, + }, + { + name: "higher{epoch} equal {timestamp, seq number}", + intentTS: readTS, + intentSequenceNumber: readSeqNumber, + intentEpoch: aboveReadTxnEpoch, + expNeedsIntentHistory: true, + }, + { + name: "lower {epoch, intent timestamp, intent seq number}", + intentTS: belowReadTS, + intentSequenceNumber: belowReadSeqNumber, + intentEpoch: belowReadTxnEpoch, + expNeedsIntentHistory: true, + }, + { + name: "higher {epoch} lower {intent timestamp, intent seq number}", + intentTS: belowReadTS, + intentSequenceNumber: belowReadSeqNumber, + intentEpoch: aboveReadTxnEpoch, + expNeedsIntentHistory: true, + }, + // Savepoint related tests. + { + // Scenario from https://github.com/cockroachdb/cockroach/issues/94337. + name: "intent part of rolled back savepoint", + intentTS: belowReadTS, + intentSequenceNumber: belowReadSeqNumber, + intentEpoch: readTxnEpoch, + ignoredSeqNumbers: enginepb.IgnoredSeqNumRange{Start: belowReadSeqNumber, End: belowReadSeqNumber}, + expNeedsIntentHistory: true, + }, + { + name: "intent not part of rolled back savepoint", + intentTS: readTS, + intentSequenceNumber: readSeqNumber, + intentEpoch: readTxnEpoch, + ignoredSeqNumbers: enginepb.IgnoredSeqNumRange{Start: belowReadSeqNumber, End: belowReadSeqNumber}, + expNeedsIntentHistory: true, // should be false + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + eng := NewDefaultInMemForTesting() + defer eng.Close() + + txnID := uuid.MakeV4() + txn := &roachpb.Transaction{ + TxnMeta: enginepb.TxnMeta{ + ID: txnID, + }, + } + + // Write the intent as dictated by the test case. + txn.Epoch = tc.intentEpoch + txn.Sequence = tc.intentSequenceNumber + txn.ReadTimestamp = tc.intentTS + txn.WriteTimestamp = tc.intentTS + err := MVCCPut(ctx, eng, nil, keyA, txn.WriteTimestamp, hlc.ClockTimestamp{}, val, txn) + require.NoError(t, err) + + // Set up the read. + txn.Epoch = readTxnEpoch + txn.Sequence = readSeqNumber + txn.ReadTimestamp = readTS + txn.WriteTimestamp = readTS + txn.IgnoredSeqNums = []enginepb.IgnoredSeqNumRange{tc.ignoredSeqNumbers} + + var intents []roachpb.Intent + needsIntentHistory, err := ScanConflictingIntentsForDroppingLatchesEarly( + ctx, + eng, + txn.ID, + txn.ReadTimestamp, + keyA, + nil, + &intents, + 0, /* maxIntents */ + ) + require.NoError(t, err) + if alwaysFallbackToIntentInterleavingIteratorForReadYourOwnWrites { + require.Equal(t, true, needsIntentHistory) + } else { + require.Equal(t, tc.expNeedsIntentHistory, needsIntentHistory) + } + }) + } +} + // TestEngineRangeKeyMutations tests that range key mutations work as expected, // both for the engine directly and for batches. func TestEngineRangeKeyMutations(t *testing.T) {