diff --git a/pkg/ccl/logictestccl/tests/3node-tenant/generated_test.go b/pkg/ccl/logictestccl/tests/3node-tenant/generated_test.go index 0af11926fbaf..5ef453cac7ba 100644 --- a/pkg/ccl/logictestccl/tests/3node-tenant/generated_test.go +++ b/pkg/ccl/logictestccl/tests/3node-tenant/generated_test.go @@ -1501,6 +1501,13 @@ func TestTenantLogic_save_table( runLogicTest(t, "save_table") } +func TestTenantLogic_savepoints( + t *testing.T, +) { + defer leaktest.AfterTest(t)() + runLogicTest(t, "savepoints") +} + func TestTenantLogic_scale( t *testing.T, ) { diff --git a/pkg/kv/kvserver/replica_read.go b/pkg/kv/kvserver/replica_read.go index ed39fc05d42a..04b86024d0b4 100644 --- a/pkg/kv/kvserver/replica_read.go +++ b/pkg/kv/kvserver/replica_read.go @@ -303,13 +303,13 @@ func (r *Replica) canDropLatchesBeforeEval( // or if any of them need to use an intent interleaving iterator. for _, req := range ba.Requests { reqHeader := req.GetInner().Header() - start, end, seq := reqHeader.Key, reqHeader.EndKey, reqHeader.Sequence + start, end := reqHeader.Key, reqHeader.EndKey var txnID uuid.UUID if ba.Txn != nil { txnID = ba.Txn.ID } needsIntentInterleavingForThisRequest, err := storage.ScanConflictingIntentsForDroppingLatchesEarly( - ctx, rw, txnID, ba.Header.Timestamp, start, end, seq, &intents, maxIntents, + ctx, rw, txnID, ba.Header.Timestamp, start, end, &intents, maxIntents, ) if err != nil { return false /* ok */, true /* stillNeedsIntentInterleaving */, kvpb.NewError( diff --git a/pkg/sql/logictest/testdata/logic_test/savepoints b/pkg/sql/logictest/testdata/logic_test/savepoints new file mode 100644 index 000000000000..a9786e9ddcbd --- /dev/null +++ b/pkg/sql/logictest/testdata/logic_test/savepoints @@ -0,0 +1,35 @@ +# Regression test for https://github.com/cockroachdb/cockroach/issues/94337. +# The linked issue only manifests itself if the read request is able to drop +# latches early. Before this bug was fixed, one of the conditions required for +# a read request to drop its latches early was that it needed to have a higher +# read timestamp compared to the timestamp at which it had written all intents +# the read overlapped with. Adding the sleeps here bumps the transaction's read +# timestamp, by virtue of the closed timestamp interval. + +statement ok +CREATE TABLE a (id INT); + +statement ok +BEGIN + +statement ok +SELECT pg_sleep(1.5) + +statement ok +SAVEPOINT s + +statement ok +SELECT pg_sleep(1.5) + +statement ok +INSERT INTO a(id) VALUES (0); + +statement ok +ROLLBACK TO SAVEPOINT s + +query I +SELECT * FROM a +---- + +statement ok +COMMIT diff --git a/pkg/sql/logictest/tests/fakedist-disk/generated_test.go b/pkg/sql/logictest/tests/fakedist-disk/generated_test.go index 5b12a170ac8e..60e9c080d7f9 100644 --- a/pkg/sql/logictest/tests/fakedist-disk/generated_test.go +++ b/pkg/sql/logictest/tests/fakedist-disk/generated_test.go @@ -1472,6 +1472,13 @@ func TestLogic_save_table( runLogicTest(t, "save_table") } +func TestLogic_savepoints( + t *testing.T, +) { + defer leaktest.AfterTest(t)() + runLogicTest(t, "savepoints") +} + func TestLogic_scale( t *testing.T, ) { diff --git a/pkg/sql/logictest/tests/fakedist-vec-off/generated_test.go b/pkg/sql/logictest/tests/fakedist-vec-off/generated_test.go index c69d7a82a9de..496b46e5502c 100644 --- a/pkg/sql/logictest/tests/fakedist-vec-off/generated_test.go +++ b/pkg/sql/logictest/tests/fakedist-vec-off/generated_test.go @@ -1472,6 +1472,13 @@ func TestLogic_save_table( runLogicTest(t, "save_table") } +func TestLogic_savepoints( + t *testing.T, +) { + defer leaktest.AfterTest(t)() + runLogicTest(t, "savepoints") +} + func TestLogic_scale( t *testing.T, ) { diff --git a/pkg/sql/logictest/tests/fakedist/generated_test.go b/pkg/sql/logictest/tests/fakedist/generated_test.go index 7e7306cd4092..370cb806df7a 100644 --- a/pkg/sql/logictest/tests/fakedist/generated_test.go +++ b/pkg/sql/logictest/tests/fakedist/generated_test.go @@ -1486,6 +1486,13 @@ func TestLogic_save_table( runLogicTest(t, "save_table") } +func TestLogic_savepoints( + t *testing.T, +) { + defer leaktest.AfterTest(t)() + runLogicTest(t, "savepoints") +} + func TestLogic_scale( t *testing.T, ) { diff --git a/pkg/sql/logictest/tests/local-legacy-schema-changer/generated_test.go b/pkg/sql/logictest/tests/local-legacy-schema-changer/generated_test.go index 4db9511ec006..ecbf90f63086 100644 --- a/pkg/sql/logictest/tests/local-legacy-schema-changer/generated_test.go +++ b/pkg/sql/logictest/tests/local-legacy-schema-changer/generated_test.go @@ -1451,6 +1451,13 @@ func TestLogic_save_table( runLogicTest(t, "save_table") } +func TestLogic_savepoints( + t *testing.T, +) { + defer leaktest.AfterTest(t)() + runLogicTest(t, "savepoints") +} + func TestLogic_scale( t *testing.T, ) { diff --git a/pkg/sql/logictest/tests/local-vec-off/generated_test.go b/pkg/sql/logictest/tests/local-vec-off/generated_test.go index 8026e88eda6a..64df77d1252d 100644 --- a/pkg/sql/logictest/tests/local-vec-off/generated_test.go +++ b/pkg/sql/logictest/tests/local-vec-off/generated_test.go @@ -1479,6 +1479,13 @@ func TestLogic_save_table( runLogicTest(t, "save_table") } +func TestLogic_savepoints( + t *testing.T, +) { + defer leaktest.AfterTest(t)() + runLogicTest(t, "savepoints") +} + func TestLogic_scale( t *testing.T, ) { diff --git a/pkg/sql/logictest/tests/local/generated_test.go b/pkg/sql/logictest/tests/local/generated_test.go index 1c7629e5f9a1..f83880ccbb4c 100644 --- a/pkg/sql/logictest/tests/local/generated_test.go +++ b/pkg/sql/logictest/tests/local/generated_test.go @@ -1619,6 +1619,13 @@ func TestLogic_save_table( runLogicTest(t, "save_table") } +func TestLogic_savepoints( + t *testing.T, +) { + defer leaktest.AfterTest(t)() + runLogicTest(t, "savepoints") +} + func TestLogic_scale( t *testing.T, ) { diff --git a/pkg/storage/engine.go b/pkg/storage/engine.go index c05b77682735..a4faca7fe423 100644 --- a/pkg/storage/engine.go +++ b/pkg/storage/engine.go @@ -1731,14 +1731,10 @@ func assertMVCCIteratorInvariants(iter MVCCIterator) error { // proceed until the intents are resolved. Intents that don't conflict with the // transaction referenced by txnID[1] at the supplied `ts` are ignored. // -// The caller must supply the sequence number of the request on behalf of which -// the intents are being scanned. This is used to determine if the caller needs -// to consult intent history when performing a scan over the MVCC keyspace -// (indicated by the `needIntentHistory` return parameter). Intent history is -// required to read the correct provisional value when scanning if we encounter -// an intent written by the `txn` at a higher sequence number than the one -// supplied or at a higher timestamp than the `ts` supplied (regardless of the -// sequence number of the intent). +// The `needsIntentHistory` return value indicates whether the caller needs to +// consult intent history when performing a scan over the MVCC keyspace to +// read correct provisional values for at least one of the keys being scanned. +// Typically, this applies to all transactions that read their own writes. // // [1] The supplied txnID may be empty (uuid.Nil) if the request on behalf of // which the scan is being performed is non-transactional. @@ -1748,7 +1744,6 @@ func ScanConflictingIntentsForDroppingLatchesEarly( txnID uuid.UUID, ts hlc.Timestamp, start, end roachpb.Key, - seq enginepb.TxnSeq, intents *[]roachpb.Intent, maxIntents int64, ) (needIntentHistory bool, err error) { @@ -1791,14 +1786,36 @@ func ScanConflictingIntentsForDroppingLatchesEarly( } ownIntent := txnID != uuid.Nil && txnID == meta.Txn.ID if ownIntent { - // If we ran into one of our own intents, check whether the intent has a - // higher (or equal) sequence number or a higher (or equal) timestamp. If - // either of these conditions is true, a corresponding scan over the MVCC - // key space will need access to the key's intent history in order to read - // the correct provisional value. So we set `needIntentHistory` to true. - if seq <= meta.Txn.Sequence || ts.LessEq(meta.Timestamp.ToTimestamp()) { - needIntentHistory = true - } + // If we ran into one of our own intents, a corresponding scan over the + // MVCC keyspace will need access to the key's intent history in order to + // read the correct provisional value. As such, we set `needsIntentHistory` + // to be true. + // + // This determination is more restrictive than it needs to be. A read + // request needs access to the intent history when performing a scan over + // the MVCC keyspace only if: + // 1. The request is reading at a lower sequence number than the intent's + // sequence number. + // 2. OR the request is reading at a (strictly) lower timestamp than the + // intent timestamp[1]. This can happen if the intent was pushed for some + // reason. + // 3. OR the found intent should be ignored because it was written as part + // of a savepoint which was subsequently rolled back. + // 4. OR the found intent and read request belong to different txn epochs. + // + // The conditions above mirror special case handling for intents by + // pebbleMVCCScanner's getOne method. If we find scanning the lock table + // twice (once during conflict resolution, and once when interleaving + // intents during the MVCC read) is too expensive for transactions that + // read their own writes, there's some optimizations to be had here by + // being smarter about when we decide to interleave intents or not to. + // + // [1] Only relevant if the intent has a sequence number less than or + // equal to the read request's sequence number. Otherwise, we need access + // to the intent history to read the correct provisional value -- one + // written at a lower or equal sequence number compared to the read + // request's. + needIntentHistory = true continue } if conflictingIntent := meta.Timestamp.ToTimestamp().LessEq(ts); !conflictingIntent { diff --git a/pkg/storage/engine_test.go b/pkg/storage/engine_test.go index b0eb2d455e46..b40178849d58 100644 --- a/pkg/storage/engine_test.go +++ b/pkg/storage/engine_test.go @@ -1914,6 +1914,14 @@ func TestEngineIteratorVisibility(t *testing.T) { // 2. Timestamps of the intent/read op. // 3. Epochs of the intent/read op. // 4. Whether any savepoints have been rolled back. +// +// NB: When scanning for conflicting intents to determine if latches can be +// dropped early, we fallback to using the intent interleaving iterator in all +// read-your-own-write cases. However, doing so is more restrictive than it +// needs to be -- the in-line test expectations correspond to what an optimized +// determination for `needsIntentHistory` would look like. However, for the +// purposes of this test, we assert that we always fall back to using the intent +// interleaving iterator in ALL read-your-own-write cases. func TestScanConflictingIntentsForDroppingLatchesEarlyReadYourOwnWrites(t *testing.T) { defer leaktest.AfterTest(t)() defer log.Scope(t).Close(t) @@ -1944,7 +1952,7 @@ func TestScanConflictingIntentsForDroppingLatchesEarlyReadYourOwnWrites(t *testi intentTS hlc.Timestamp intentSequenceNumber enginepb.TxnSeq intentEpoch enginepb.TxnEpoch - expNeedsIntentHistory bool + expNeedsIntentHistory bool // unused while testing ignoredSeqNumbers enginepb.IgnoredSeqNumRange }{ { @@ -1952,7 +1960,7 @@ func TestScanConflictingIntentsForDroppingLatchesEarlyReadYourOwnWrites(t *testi intentTS: readTS, intentSequenceNumber: readSeqNumber, intentEpoch: readTxnEpoch, - expNeedsIntentHistory: false, // fails + expNeedsIntentHistory: false, }, { name: "higher {intent seq number} equal {timestamp, epoch}", @@ -2000,7 +2008,7 @@ func TestScanConflictingIntentsForDroppingLatchesEarlyReadYourOwnWrites(t *testi intentTS: readTS, intentSequenceNumber: belowReadSeqNumber, intentEpoch: readTxnEpoch, - expNeedsIntentHistory: false, // fails + expNeedsIntentHistory: false, }, { name: "equal {epoch} lower {intent timestamp, intent seq number}", @@ -2014,7 +2022,7 @@ func TestScanConflictingIntentsForDroppingLatchesEarlyReadYourOwnWrites(t *testi intentTS: belowReadTS, intentSequenceNumber: readSeqNumber, intentEpoch: readTxnEpoch, - expNeedsIntentHistory: false, // fails + expNeedsIntentHistory: false, }, // lower/higher epoch test cases aren't exhaustive. @@ -2037,14 +2045,14 @@ func TestScanConflictingIntentsForDroppingLatchesEarlyReadYourOwnWrites(t *testi intentTS: belowReadTS, intentSequenceNumber: belowReadSeqNumber, intentEpoch: belowReadTxnEpoch, - expNeedsIntentHistory: true, // fails + expNeedsIntentHistory: true, }, { name: "higher {epoch} lower {intent timestamp, intent seq number}", intentTS: belowReadTS, intentSequenceNumber: belowReadSeqNumber, intentEpoch: aboveReadTxnEpoch, - expNeedsIntentHistory: true, // fails + expNeedsIntentHistory: true, }, // Savepoint related tests. { @@ -2054,7 +2062,7 @@ func TestScanConflictingIntentsForDroppingLatchesEarlyReadYourOwnWrites(t *testi intentSequenceNumber: belowReadSeqNumber, intentEpoch: readTxnEpoch, ignoredSeqNumbers: enginepb.IgnoredSeqNumRange{Start: belowReadSeqNumber, End: belowReadSeqNumber}, - expNeedsIntentHistory: true, // fails + expNeedsIntentHistory: true, }, { name: "intent not part of rolled back savepoint", @@ -2101,12 +2109,11 @@ func TestScanConflictingIntentsForDroppingLatchesEarlyReadYourOwnWrites(t *testi txn.ReadTimestamp, keyA, nil, - txn.Sequence, &intents, 0, /* maxIntents */ ) require.NoError(t, err) - require.Equal(t, tc.expNeedsIntentHistory, needsIntentHistory) + require.Equal(t, true, needsIntentHistory) }) } } @@ -2231,7 +2238,6 @@ func TestScanConflictingIntentsForDroppingLatchesEarly(t *testing.T) { txn.ReadTimestamp, tc.start, tc.end, - txn.Sequence, &intents, 0, /* maxIntents */ )