diff --git a/pkg/ccl/storageccl/engineccl/mvcc_test.go b/pkg/ccl/storageccl/engineccl/mvcc_test.go index 1bbb5e634d9e..c0a322c94539 100644 --- a/pkg/ccl/storageccl/engineccl/mvcc_test.go +++ b/pkg/ccl/storageccl/engineccl/mvcc_test.go @@ -161,25 +161,31 @@ func TestMVCCIterateIncremental(t *testing.T) { // Exercise intent handling. txn1ID := uuid.MakeV4() - txn1 := roachpb.Transaction{TxnMeta: enginepb.TxnMeta{ - Key: testKey1, - ID: txn1ID, - Epoch: 1, - Timestamp: ts4, - }} + txn1 := roachpb.Transaction{ + TxnMeta: enginepb.TxnMeta{ + Key: testKey1, + ID: txn1ID, + Epoch: 1, + Timestamp: ts4, + }, + OrigTimestamp: ts4, + } txn1Val := roachpb.Value{RawBytes: testValue4} - if err := engine.MVCCPut(ctx, e, nil, txn1.TxnMeta.Key, txn1.TxnMeta.Timestamp, txn1Val, &txn1); err != nil { + if err := engine.MVCCPut(ctx, e, nil, txn1.TxnMeta.Key, txn1.OrigTimestamp, txn1Val, &txn1); err != nil { t.Fatal(err) } txn2ID := uuid.MakeV4() - txn2 := roachpb.Transaction{TxnMeta: enginepb.TxnMeta{ - Key: testKey2, - ID: txn2ID, - Epoch: 1, - Timestamp: ts4, - }} + txn2 := roachpb.Transaction{ + TxnMeta: enginepb.TxnMeta{ + Key: testKey2, + ID: txn2ID, + Epoch: 1, + Timestamp: ts4, + }, + OrigTimestamp: ts4, + } txn2Val := roachpb.Value{RawBytes: testValue4} - if err := engine.MVCCPut(ctx, e, nil, txn2.TxnMeta.Key, txn2.TxnMeta.Timestamp, txn2Val, &txn2); err != nil { + if err := engine.MVCCPut(ctx, e, nil, txn2.TxnMeta.Key, txn2.OrigTimestamp, txn2Val, &txn2); err != nil { t.Fatal(err) } t.Run("intents1", @@ -233,25 +239,31 @@ func TestMVCCIterateIncremental(t *testing.T) { // Exercise intent handling. txn1ID := uuid.MakeV4() - txn1 := roachpb.Transaction{TxnMeta: enginepb.TxnMeta{ - Key: testKey1, - ID: txn1ID, - Epoch: 1, - Timestamp: ts4, - }} + txn1 := roachpb.Transaction{ + TxnMeta: enginepb.TxnMeta{ + Key: testKey1, + ID: txn1ID, + Epoch: 1, + Timestamp: ts4, + }, + OrigTimestamp: ts4, + } txn1Val := roachpb.Value{RawBytes: testValue4} - if err := engine.MVCCPut(ctx, e, nil, txn1.TxnMeta.Key, txn1.TxnMeta.Timestamp, txn1Val, &txn1); err != nil { + if err := engine.MVCCPut(ctx, e, nil, txn1.TxnMeta.Key, txn1.OrigTimestamp, txn1Val, &txn1); err != nil { t.Fatal(err) } txn2ID := uuid.MakeV4() - txn2 := roachpb.Transaction{TxnMeta: enginepb.TxnMeta{ - Key: testKey2, - ID: txn2ID, - Epoch: 1, - Timestamp: ts4, - }} + txn2 := roachpb.Transaction{ + TxnMeta: enginepb.TxnMeta{ + Key: testKey2, + ID: txn2ID, + Epoch: 1, + Timestamp: ts4, + }, + OrigTimestamp: ts4, + } txn2Val := roachpb.Value{RawBytes: testValue4} - if err := engine.MVCCPut(ctx, e, nil, txn2.TxnMeta.Key, txn2.TxnMeta.Timestamp, txn2Val, &txn2); err != nil { + if err := engine.MVCCPut(ctx, e, nil, txn2.TxnMeta.Key, txn2.OrigTimestamp, txn2Val, &txn2); err != nil { t.Fatal(err) } t.Run("intents1", @@ -370,6 +382,7 @@ func TestMVCCIncrementalIteratorIntentStraddlesSStables(t *testing.T) { Epoch: 1, Timestamp: hlc.Timestamp{WallTime: 2}, }, + OrigTimestamp: hlc.Timestamp{WallTime: 2}, }) // Create a second DB in which we'll create a specific SSTable structure: the @@ -473,6 +486,7 @@ func TestMVCCIncrementalIteratorIntentDeletion(t *testing.T) { Epoch: 1, Timestamp: ts, }, + OrigTimestamp: ts, } } intent := func(txn *roachpb.Transaction) roachpb.Intent { @@ -542,9 +556,9 @@ func TestMVCCIncrementalIteratorIntentDeletion(t *testing.T) { // kA:3 -> vA3 // kA:2 -> vA2 // kB -> (intent deletion) - require.NoError(t, engine.MVCCPut(ctx, db, nil, kA, txnA1.Timestamp, vA1, txnA1)) - require.NoError(t, engine.MVCCPut(ctx, db, nil, kB, txnB1.Timestamp, vB1, txnB1)) - require.NoError(t, engine.MVCCPut(ctx, db, nil, kC, txnC1.Timestamp, vC1, txnC1)) + require.NoError(t, engine.MVCCPut(ctx, db, nil, kA, txnA1.OrigTimestamp, vA1, txnA1)) + require.NoError(t, engine.MVCCPut(ctx, db, nil, kB, txnB1.OrigTimestamp, vB1, txnB1)) + require.NoError(t, engine.MVCCPut(ctx, db, nil, kC, txnC1.OrigTimestamp, vC1, txnC1)) require.NoError(t, db.Flush()) require.NoError(t, db.Compact()) require.NoError(t, engine.MVCCResolveWriteIntent(ctx, db, nil, intent(txnA1))) diff --git a/pkg/roachpb/batch.go b/pkg/roachpb/batch.go index 2e6ea0b7000d..42b61967350d 100644 --- a/pkg/roachpb/batch.go +++ b/pkg/roachpb/batch.go @@ -38,9 +38,22 @@ func (ba *BatchRequest) SetActiveTimestamp(nowFn func() hlc.Timestamp) error { return errors.New("transactional request must not set batch timestamp") } - // Always use the original timestamp for reads and writes, even - // though some intents may be written at higher timestamps in the - // event of a WriteTooOldError. + // The batch timestamp is the timestamp at which reads are performed. We set + // this to the txn's original timestamp, even if the txn's provisional + // commit timestamp has been forwarded, so that all reads within a txn + // observe the same snapshot of the database. + // + // In other words, we want to preserve the invariant that reading the same + // key multiple times in the same transaction will always return the same + // value. If we were to read at the latest provisional commit timestamp, + // txn.Timestamp, instead, reading the same key twice in the same txn might + // yield different results, e.g., if an intervening write caused the + // provisional commit timestamp to be advanced. Such a txn would fail to + // commit, as its reads would not successfully be refreshed, but only after + // confusing the client with spurious data. + // + // Note that writes will be performed at the provisional commit timestamp, + // txn.Timestamp, regardless of the batch timestamp. ba.Timestamp = txn.OrigTimestamp // If a refreshed timestamp is set for the transaction, forward // the batch timestamp to it. The refreshed timestamp indicates a diff --git a/pkg/roachpb/data.pb.go b/pkg/roachpb/data.pb.go index 74a802676fe1..277257d0838e 100644 --- a/pkg/roachpb/data.pb.go +++ b/pkg/roachpb/data.pb.go @@ -352,42 +352,11 @@ type Transaction struct { // transaction will retry unless we manage to "refresh the reads" - see // refreshed_timestamp. // - // This timestamp is the one at which all transactions will read, unless - // refreshed_timestamp is set. It is also, surprisingly, the timestamp at - // which transactions will provisionally _write_ (i.e. intents are written at - // this orig_timestamp and, after commit, when the intents are resolved, - // their timestamps are bumped to the to the commit timestamp), if - // refreshed_timestamp isn't set. - // This is ultimately because of correctness concerns around SNAPSHOT - // transactions. - // - // Intuitively, one could think that the timestamp at which intents should be - // written should be the provisional commit timestamp, and while this is - // morally true, consider the following scenario, where txn1 is a SNAPSHOT - // txn: - // - // - txn1 at orig_timestamp=5 reads key1: (value) 1. - // - txn1 writes elsewhere, has its commit timestamp increased to 20. - // - txn2 at orig_timestamp=10 reads key1: 1 - // - txn2 increases the value by 5: key1: 6 and commits - // - txn1 increases the value by 1: key1: 2, attempts commit + // This timestamp is the one at which all reads occur, unless + // refreshed_timestamp is set. // - // If txn1 uses its orig_timestamp for updating key1 (as it does), it - // conflicts with txn2's committed value (which is at timestamp 10, in the - // future of 5), and restarts. - // Using instead its candidate commit timestamp, it wouldn't see a conflict - // and commit, but this is not the expected outcome (the expected outcome is - // {key1: 6} (since txn1 is not expected to commit)) and we would be - // experiencing the Lost Update Anomaly. - // - // Note that in practice, before restarting, txn1 would still lay down an - // intent (just above the committed value) not with the intent to commit it, - // but to avoid being starved by short-lived transactions on that key which - // would otherwise not have to go through conflict resolution with txn1. - // - // Again, keep in mind that, when the transaction commits, all the intents are - // bumped to the commit timestamp (otherwise, pushing a transaction wouldn't - // achieve anything). + // Note that writes do not occur at this timestamp; they instead occur at the + // provisional commit timestamp, meta.Timestamp. OrigTimestamp cockroach_util_hlc.Timestamp `protobuf:"bytes,6,opt,name=orig_timestamp,json=origTimestamp" json:"orig_timestamp"` // Initial Timestamp + clock skew. Reads which encounter values with // timestamps between timestamp and max_timestamp trigger a txn @@ -400,9 +369,10 @@ type Transaction struct { // can commit without necessitating a serializable restart. This // value is forwarded to the transaction's current timestamp (meta.timestamp) // if the transaction coordinator is able to refresh all refreshable spans - // encountered during the course of the txn. If set, this take precedence - // over orig_timestamp and is the timestamp at which the transaction both - // reads and writes going forward. + // encountered during the course of the txn. If set, this takes precedence + // over orig_timestamp and is the timestamp at which the transaction reads + // going forward. + // // We need to keep track of both refresh_timestamp and orig_timestamp (instead // of simply overwriting the orig_timestamp after refreshes) because the // orig_timestamp needs to be used as a lower bound timestamp for the diff --git a/pkg/roachpb/data.proto b/pkg/roachpb/data.proto index ae37430748b0..0f17585a4ddb 100644 --- a/pkg/roachpb/data.proto +++ b/pkg/roachpb/data.proto @@ -262,42 +262,11 @@ message Transaction { // transaction will retry unless we manage to "refresh the reads" - see // refreshed_timestamp. // - // This timestamp is the one at which all transactions will read, unless - // refreshed_timestamp is set. It is also, surprisingly, the timestamp at - // which transactions will provisionally _write_ (i.e. intents are written at - // this orig_timestamp and, after commit, when the intents are resolved, - // their timestamps are bumped to the to the commit timestamp), if - // refreshed_timestamp isn't set. - // This is ultimately because of correctness concerns around SNAPSHOT - // transactions. - // - // Intuitively, one could think that the timestamp at which intents should be - // written should be the provisional commit timestamp, and while this is - // morally true, consider the following scenario, where txn1 is a SNAPSHOT - // txn: - // - // - txn1 at orig_timestamp=5 reads key1: (value) 1. - // - txn1 writes elsewhere, has its commit timestamp increased to 20. - // - txn2 at orig_timestamp=10 reads key1: 1 - // - txn2 increases the value by 5: key1: 6 and commits - // - txn1 increases the value by 1: key1: 2, attempts commit + // This timestamp is the one at which all reads occur, unless + // refreshed_timestamp is set. // - // If txn1 uses its orig_timestamp for updating key1 (as it does), it - // conflicts with txn2's committed value (which is at timestamp 10, in the - // future of 5), and restarts. - // Using instead its candidate commit timestamp, it wouldn't see a conflict - // and commit, but this is not the expected outcome (the expected outcome is - // {key1: 6} (since txn1 is not expected to commit)) and we would be - // experiencing the Lost Update Anomaly. - // - // Note that in practice, before restarting, txn1 would still lay down an - // intent (just above the committed value) not with the intent to commit it, - // but to avoid being starved by short-lived transactions on that key which - // would otherwise not have to go through conflict resolution with txn1. - // - // Again, keep in mind that, when the transaction commits, all the intents are - // bumped to the commit timestamp (otherwise, pushing a transaction wouldn't - // achieve anything). + // Note that writes do not occur at this timestamp; they instead occur at the + // provisional commit timestamp, meta.Timestamp. util.hlc.Timestamp orig_timestamp = 6 [(gogoproto.nullable) = false]; // Initial Timestamp + clock skew. Reads which encounter values with // timestamps between timestamp and max_timestamp trigger a txn @@ -310,9 +279,10 @@ message Transaction { // can commit without necessitating a serializable restart. This // value is forwarded to the transaction's current timestamp (meta.timestamp) // if the transaction coordinator is able to refresh all refreshable spans - // encountered during the course of the txn. If set, this take precedence - // over orig_timestamp and is the timestamp at which the transaction both - // reads and writes going forward. + // encountered during the course of the txn. If set, this takes precedence + // over orig_timestamp and is the timestamp at which the transaction reads + // going forward. + // // We need to keep track of both refresh_timestamp and orig_timestamp (instead // of simply overwriting the orig_timestamp after refreshes) because the // orig_timestamp needs to be used as a lower bound timestamp for the diff --git a/pkg/sql/upsert_test.go b/pkg/sql/upsert_test.go index 19cd8f310e70..3f7716843526 100644 --- a/pkg/sql/upsert_test.go +++ b/pkg/sql/upsert_test.go @@ -129,7 +129,7 @@ func TestUpsertFastPath(t *testing.T) { } } -func TestConcurrentUpsertWithSnapshotIsolation(t *testing.T) { +func TestConcurrentUpsert(t *testing.T) { defer leaktest.AfterTest(t)() s, conn, _ := serverutils.StartServer(t, base.TestServerArgs{}) @@ -138,9 +138,6 @@ func TestConcurrentUpsertWithSnapshotIsolation(t *testing.T) { sqlDB.Exec(t, `CREATE DATABASE d`) sqlDB.Exec(t, `CREATE TABLE d.t (a INT PRIMARY KEY, b INT, INDEX b_idx (b))`) - // TODO(andrei): This test is probably broken: it's setting a default - // isolation on a random connection, not on all connections. - sqlDB.Exec(t, `SET DEFAULT_TRANSACTION_ISOLATION TO SNAPSHOT`) testCases := []struct { name string diff --git a/pkg/storage/batcheval/cmd_refresh_range_test.go b/pkg/storage/batcheval/cmd_refresh_range_test.go index 8c5a39e75da9..43fe8af2441e 100644 --- a/pkg/storage/batcheval/cmd_refresh_range_test.go +++ b/pkg/storage/batcheval/cmd_refresh_range_test.go @@ -69,8 +69,9 @@ func TestRefreshRangeTimeBoundIterator(t *testing.T) { Epoch: 1, Timestamp: ts1, }, + OrigTimestamp: ts1, } - if err := engine.MVCCPut(ctx, db, nil, k, txn.Timestamp, v, txn); err != nil { + if err := engine.MVCCPut(ctx, db, nil, k, txn.OrigTimestamp, v, txn); err != nil { t.Fatal(err) } if err := engine.MVCCPut(ctx, db, nil, roachpb.Key("unused1"), ts4, v, nil); err != nil { diff --git a/pkg/storage/engine/bench_test.go b/pkg/storage/engine/bench_test.go index b7955989d4a5..0d102169f2f1 100644 --- a/pkg/storage/engine/bench_test.go +++ b/pkg/storage/engine/bench_test.go @@ -118,7 +118,8 @@ func setupMVCCData( var txn *roachpb.Transaction if opts.transactional { - txn = txn1Commit + txnCopy := *txn1Commit + txn = &txnCopy } writeKey := func(batch Batch, idx int) { @@ -127,6 +128,10 @@ func setupMVCCData( value.InitChecksum(key) counts[idx]++ ts := hlc.Timestamp{WallTime: int64(counts[idx] * 5)} + if txn != nil { + txn.OrigTimestamp = ts + txn.Timestamp = ts + } if err := MVCCPut(ctx, batch, nil /* ms */, key, ts, value, txn); err != nil { b.Fatal(err) } diff --git a/pkg/storage/engine/enginepb/mvcc3.pb.go b/pkg/storage/engine/enginepb/mvcc3.pb.go index 137bab92c9f5..fb5921e68a46 100644 --- a/pkg/storage/engine/enginepb/mvcc3.pb.go +++ b/pkg/storage/engine/enginepb/mvcc3.pb.go @@ -33,15 +33,61 @@ type TxnMeta struct { Key []byte `protobuf:"bytes,3,opt,name=key,proto3" json:"key,omitempty"` // Incremented on txn retry. Epoch uint32 `protobuf:"varint,4,opt,name=epoch,proto3" json:"epoch,omitempty"` - // The proposed timestamp for the transaction. This starts as the - // current wall time on the txn coordinator. This is the timestamp - // at which all of the transaction's writes are performed: even if - // intents have been laid down at different timestamps, the process - // of resolving them (e.g. when the txn commits) will bump them to - // this timestamp. SERIALIZABLE transactions only commit when - // timestamp == orig_timestamp. SNAPSHOT transactions can commit - // even when they've performed their reads (at orig_timestamp) at a - // different timestamp than their writes (at timestamp). + // The proposed timestamp for the transaction. This starts as the current wall + // time on the txn coordinator, and is forwarded by the timestamp cache if the + // txn attempts to write "beneath" another txn's writes. + // + // Writes within the txn are performed using the most up-to-date value of this + // timestamp that is available. For example, suppose a txn starts at some + // timestamp, writes a key/value, and has its timestamp forwarded while doing + // so because a later version already exists at that key. As soon as the txn + // coordinator learns of the updated timestamp, it will begin performing + // writes at the updated timestamp. The coordinator may, however, continue + // issuing writes at the original timestamp before it learns about the + // forwarded timestamp. The process of resolving the intents when the txn + // commits will bump any intents written at an older timestamp to the final + // commit timestamp. + // + // Note that reads do not occur at this timestamp; they instead occur at + // OrigTimestamp, which is tracked in the containing roachpb.Transaction. + // + // Writes used to be performed at the txn's original timestamp, which was + // necessary to avoid lost update anomalies in snapshot isolation mode. We no + // longer support snapshot isolation mode, and there are now several important + // reasons that writes are performed at this timestamp instead of the txn's + // original timestamp: + // + // 1. This timestamp is forwarded by the timestamp cache when this + // transaction attempts to write beneath a more recent read. Leaving the + // intent at the original timestamp would write beneath that read, which + // would violate an invariant that time-bound iterators rely on. + // + // For example, consider a client that uses a time-bound iterator to + // poll for changes to a key. The client reads (ts5, ts10], sees no + // writes, and reports that no changes have occurred up to t10. Then a + // txn writes an intent at its original timestamp ts7. The txn's + // timestamp is forwarded to ts11 by the timestamp cache thanks to the + // client's read. Meanwhile, the client reads (ts10, ts15] and, again + // seeing no intents, reports that no changes have occurred to the key + // up to t15. Now the txn commits at ts11 and bumps the intent to ts11. + // But the client thinks it has seen all changes up to t15, and so never + // sees the intent! We avoid this problem by writing intents at the + // provisional commit timestamp insteadr. In this example, the intent + // would instead be written at ts11 and picked up by the client's next + // read from (ts10, ts15]. + // + // 2. Unnecessary PushTxn roundtrips are avoided. If a transaction is + // forwarded from ts5 to ts10, the rest of its intents will be written + // at ts10. Reads at t < ts10 that encounter these intents can ignore + // them; if the intents had instead been left at ts5, these reads would + // have needed to send PushTxn requests just to find out that the txn + // had, in fact, been forwarded to a non-conflicting time. + // + // 3. Unnecessary intent rewriting is avoided. Writing at the original + // timestamp when this timestamp has been forwarded guarantees that the + // value will need to be rewritten at the forwarded timestamp if the + // transaction commits. + // Timestamp cockroach_util_hlc.Timestamp `protobuf:"bytes,5,opt,name=timestamp" json:"timestamp"` Priority int32 `protobuf:"varint,6,opt,name=priority,proto3" json:"priority,omitempty"` // A one-indexed sequence number which is increased on each request diff --git a/pkg/storage/engine/enginepb/mvcc3.proto b/pkg/storage/engine/enginepb/mvcc3.proto index 043fb27cfd05..c965bdbcf6b5 100644 --- a/pkg/storage/engine/enginepb/mvcc3.proto +++ b/pkg/storage/engine/enginepb/mvcc3.proto @@ -36,15 +36,61 @@ message TxnMeta { bytes key = 3; // TODO(tschottdorf): [(gogoproto.casttype) = "Key"]; // Incremented on txn retry. uint32 epoch = 4; - // The proposed timestamp for the transaction. This starts as the - // current wall time on the txn coordinator. This is the timestamp - // at which all of the transaction's writes are performed: even if - // intents have been laid down at different timestamps, the process - // of resolving them (e.g. when the txn commits) will bump them to - // this timestamp. SERIALIZABLE transactions only commit when - // timestamp == orig_timestamp. SNAPSHOT transactions can commit - // even when they've performed their reads (at orig_timestamp) at a - // different timestamp than their writes (at timestamp). + // The proposed timestamp for the transaction. This starts as the current wall + // time on the txn coordinator, and is forwarded by the timestamp cache if the + // txn attempts to write "beneath" another txn's writes. + // + // Writes within the txn are performed using the most up-to-date value of this + // timestamp that is available. For example, suppose a txn starts at some + // timestamp, writes a key/value, and has its timestamp forwarded while doing + // so because a later version already exists at that key. As soon as the txn + // coordinator learns of the updated timestamp, it will begin performing + // writes at the updated timestamp. The coordinator may, however, continue + // issuing writes at the original timestamp before it learns about the + // forwarded timestamp. The process of resolving the intents when the txn + // commits will bump any intents written at an older timestamp to the final + // commit timestamp. + // + // Note that reads do not occur at this timestamp; they instead occur at + // OrigTimestamp, which is tracked in the containing roachpb.Transaction. + // + // Writes used to be performed at the txn's original timestamp, which was + // necessary to avoid lost update anomalies in snapshot isolation mode. We no + // longer support snapshot isolation mode, and there are now several important + // reasons that writes are performed at this timestamp instead of the txn's + // original timestamp: + // + // 1. This timestamp is forwarded by the timestamp cache when this + // transaction attempts to write beneath a more recent read. Leaving the + // intent at the original timestamp would write beneath that read, which + // would violate an invariant that time-bound iterators rely on. + // + // For example, consider a client that uses a time-bound iterator to + // poll for changes to a key. The client reads (ts5, ts10], sees no + // writes, and reports that no changes have occurred up to t10. Then a + // txn writes an intent at its original timestamp ts7. The txn's + // timestamp is forwarded to ts11 by the timestamp cache thanks to the + // client's read. Meanwhile, the client reads (ts10, ts15] and, again + // seeing no intents, reports that no changes have occurred to the key + // up to t15. Now the txn commits at ts11 and bumps the intent to ts11. + // But the client thinks it has seen all changes up to t15, and so never + // sees the intent! We avoid this problem by writing intents at the + // provisional commit timestamp insteadr. In this example, the intent + // would instead be written at ts11 and picked up by the client's next + // read from (ts10, ts15]. + // + // 2. Unnecessary PushTxn roundtrips are avoided. If a transaction is + // forwarded from ts5 to ts10, the rest of its intents will be written + // at ts10. Reads at t < ts10 that encounter these intents can ignore + // them; if the intents had instead been left at ts5, these reads would + // have needed to send PushTxn requests just to find out that the txn + // had, in fact, been forwarded to a non-conflicting time. + // + // 3. Unnecessary intent rewriting is avoided. Writing at the original + // timestamp when this timestamp has been forwarded guarantees that the + // value will need to be rewritten at the forwarded timestamp if the + // transaction commits. + // util.hlc.Timestamp timestamp = 5 [(gogoproto.nullable) = false]; int32 priority = 6; // A one-indexed sequence number which is increased on each request diff --git a/pkg/storage/engine/mvcc.go b/pkg/storage/engine/mvcc.go index 146f4a669b90..495b767d38d3 100644 --- a/pkg/storage/engine/mvcc.go +++ b/pkg/storage/engine/mvcc.go @@ -999,6 +999,10 @@ func (b *putBuffer) putMeta( // key metadata. The timestamp must be passed as a parameter; using // the Timestamp field on the value results in an error. // +// Note that, when writing transactionally, the txn's timestamps +// dictate the timestamp of the operation, and the timestamp paramater is +// confusing and redundant. See the comment on mvccPutInternal for details. +// // If the timestamp is specified as hlc.Timestamp{}, the value is // inlined instead of being written as a timestamp-versioned value. A // zero timestamp write to a key precludes a subsequent write using a @@ -1032,6 +1036,10 @@ func MVCCPut( // exist in order for stats to be updated properly. If a previous version of // the key does exist it is up to the caller to properly account for their // existence in updating the stats. +// +// Note that, when writing transactionally, the txn's timestamps +// dictate the timestamp of the operation, and the timestamp paramater is +// confusing and redundant. See the comment on mvccPutInternal for details. func MVCCBlindPut( ctx context.Context, engine Writer, @@ -1046,6 +1054,10 @@ func MVCCBlindPut( // MVCCDelete marks the key deleted so that it will not be returned in // future get responses. +// +// Note that, when writing transactionally, the txn's timestamps +// dictate the timestamp of the operation, and the timestamp paramater is +// confusing and redundant. See the comment on mvccPutInternal for details. func MVCCDelete( ctx context.Context, engine ReadWriter, @@ -1232,6 +1244,28 @@ func replayTransactionalWrite( // to write or an error. If valueFn is supplied, value should be nil // and vice versa. valueFn can delete by returning nil. Returning // []byte{} will write an empty value, not delete. +// +// Note that, when writing transactionally, the txn's timestamps +// dictate the timestamp of the operation, and the timestamp parameter +// is redundant. Specifically, the intent is written at the txn's +// provisional commit timestamp, txn.Timestamp, unless it is +// forwarded by an existing committed value beneath that timestamp. +// However, reads (e.g., for a ConditionalPut) are performed at the +// txn's original timestamp, txn.OrigTimestamp, to ensure that the +// client sees a consistent snapshot of the database. Any existing +// committed writes that are newer than txn.OrigTimestamp will thus +// generate a WriteTooOld error. If txn.RefreshedTimestamp is set, +// it is used in place of txn.OrigTimestamp. +// +// In an attempt to reduce confusion about which timestamp applies, +// when writing transactionally, the timestamp parameter must be +// equal to txn.OrigTimestamp. (One could imagine instead requiring +// that the timestamp parameter be set to hlc.Timestamp{} when writing +// transactionally, but hlc.Timestamp{} is already used as a sentinel +// for inline puts.) +// +// TODO(andrei): explore alternate function signatures that reduce +// confusion about which timestamp applies. func mvccPutInternal( ctx context.Context, engine Writer, @@ -1289,6 +1323,19 @@ func mvccPutInternal( return err } + writeTimestamp := timestamp + if txn != nil { + txnReadTimestamp := txn.OrigTimestamp + txnReadTimestamp.Forward(txn.RefreshedTimestamp) + if txnReadTimestamp != timestamp { + return errors.Errorf("mvccPutInternal: txn's read timestamp %s does not match timestamp %s", + txn.OrigTimestamp, timestamp) + } + // A txn writes intents at its provisional commit timestamp. See the + // comment on the txn.Timestamp field definition for rationale. + writeTimestamp = txn.Timestamp + } + // Determine what the logical operation is. Are we writing an intent // or a value directly? logicalOp := MVCCWriteValueOpType @@ -1361,7 +1408,7 @@ func mvccPutInternal( // overwrite the existing intent; otherwise we must manually // delete the old intent, taking care with MVCC stats. logicalOp = MVCCUpdateIntentOpType - if metaTimestamp.Less(timestamp) { + if metaTimestamp.Less(writeTimestamp) { { // If the older write intent has a version underneath it, we need to // read its size because its GCBytesAge contribution may change as we @@ -1382,13 +1429,13 @@ func mvccPutInternal( if err := engine.Clear(versionKey); err != nil { return err } - } else if timestamp.Less(metaTimestamp) { + } else if writeTimestamp.Less(metaTimestamp) { // This case occurs when we're writing a key twice within a // txn, and our timestamp has been pushed forward because of // a write-too-old error on this key. For this case, we want // to continue writing at the higher timestamp or else the // MVCCMetadata could end up pointing *under* the newer write. - timestamp = metaTimestamp + writeTimestamp = metaTimestamp } // Since an intent with a smaller sequence number exists for the // same transaction, we must add the previous value and sequence @@ -1417,8 +1464,8 @@ func mvccPutInternal( // error indicating what the timestamp ended up being. This // timestamp can then be used to increment the txn timestamp and // be returned with the response. - actualTimestamp := metaTimestamp.Next() - maybeTooOldErr = &roachpb.WriteTooOldError{Timestamp: timestamp, ActualTimestamp: actualTimestamp} + writeTimestamp = metaTimestamp.Next() + maybeTooOldErr = &roachpb.WriteTooOldError{Timestamp: timestamp, ActualTimestamp: writeTimestamp} // If we're in a transaction, always get the value at the orig // timestamp. if txn != nil { @@ -1431,11 +1478,10 @@ func mvccPutInternal( // the write timestamp to the latest value's timestamp + 1. The // new timestamp is returned to the caller in maybeTooOldErr. if value, err = maybeGetValue( - ctx, iter, metaKey, value, ok, actualTimestamp, txn, buf, valueFn); err != nil { + ctx, iter, metaKey, value, ok, writeTimestamp, txn, buf, valueFn); err != nil { return err } } - timestamp = actualTimestamp } else { if value, err = maybeGetValue( ctx, iter, metaKey, value, ok, timestamp, txn, buf, valueFn); err != nil { @@ -1458,7 +1504,7 @@ func mvccPutInternal( txnMeta = &txn.TxnMeta } buf.newMeta.Txn = txnMeta - buf.newMeta.Timestamp = hlc.LegacyTimestamp(timestamp) + buf.newMeta.Timestamp = hlc.LegacyTimestamp(writeTimestamp) } newMeta := &buf.newMeta @@ -1492,7 +1538,7 @@ func mvccPutInternal( // RocksDB's skiplist memtable implementation includes a fast-path for // sequential insertion patterns. versionKey := metaKey - versionKey.Timestamp = timestamp + versionKey.Timestamp = writeTimestamp if err := engine.Put(versionKey, value); err != nil { return err } @@ -1506,16 +1552,11 @@ func mvccPutInternal( // Log the logical MVCC operation. logicalOpDetails := MVCCLogicalOpDetails{ Key: key, - Timestamp: timestamp, + Timestamp: writeTimestamp, Safe: true, } if txn := buf.newMeta.Txn; txn != nil { logicalOpDetails.Txn = *txn - // The intent may be at a lower timestamp than the transaction's - // current timestamp, meaning that it will never actually commit - // at the timestamp it's written at. In that case, we can forward - // the timestamp of the logical operation to the txn's timestamp. - logicalOpDetails.Timestamp.Forward(txn.Timestamp) } engine.LogLogicalOp(logicalOp, logicalOpDetails) @@ -1528,6 +1569,10 @@ func mvccPutInternal( // // An initial value is read from the key using the same operational // timestamp as we use to write a value. +// +// Note that, when writing transactionally, the txn's timestamps +// dictate the timestamp of the operation, and the timestamp paramater is +// confusing and redundant. See the comment on mvccPutInternal for details. func MVCCIncrement( ctx context.Context, engine ReadWriter, @@ -1577,6 +1622,10 @@ func MVCCIncrement( // // The condition check reads a value from the key using the same operational // timestamp as we use to write a value. +// +// Note that, when writing transactionally, the txn's timestamps +// dictate the timestamp of the operation, and the timestamp paramater is +// confusing and redundant. See the comment on mvccPutInternal for details. func MVCCConditionalPut( ctx context.Context, engine ReadWriter, @@ -1598,6 +1647,10 @@ func MVCCConditionalPut( // semantics. MVCCBlindConditionalPut skips retrieving the existing metadata // for the key requiring the caller to guarantee no versions for the key // currently exist. +// +// Note that, when writing transactionally, the txn's timestamps +// dictate the timestamp of the operation, and the timestamp paramater is +// confusing and redundant. See the comment on mvccPutInternal for details. func MVCCBlindConditionalPut( ctx context.Context, engine Writer, @@ -1646,6 +1699,10 @@ func mvccConditionalPutUsingIter( // an existing value that is different from the supplied value. If // failOnTombstones is set to true, tombstones count as mismatched values and // will cause a ConditionFailedError. +// +// Note that, when writing transactionally, the txn's timestamps +// dictate the timestamp of the operation, and the timestamp paramater is +// confusing and redundant. See the comment on mvccPutInternal for details. func MVCCInitPut( ctx context.Context, engine ReadWriter, @@ -1665,6 +1722,10 @@ func MVCCInitPut( // comments for details of the semantics. MVCCBlindInitPut skips // retrieving the existing metadata for the key requiring the caller // to guarantee no version for the key currently exist. +// +// Note that, when writing transactionally, the txn's timestamps +// dictate the timestamp of the operation, and the timestamp paramater is +// confusing and redundant. See the comment on mvccPutInternal for details. func MVCCBlindInitPut( ctx context.Context, engine ReadWriter, diff --git a/pkg/storage/engine/mvcc_logical_ops_test.go b/pkg/storage/engine/mvcc_logical_ops_test.go index adfec07e9e89..00c614ec76ed 100644 --- a/pkg/storage/engine/mvcc_logical_ops_test.go +++ b/pkg/storage/engine/mvcc_logical_ops_test.go @@ -41,7 +41,8 @@ func TestMVCCOpLogWriter(t *testing.T) { if err := MVCCPut(ctx, ol, nil, testKey1, hlc.Timestamp{Logical: 1}, value1, nil); err != nil { t.Fatal(err) } - if err := MVCCPut(ctx, ol, nil, testKey1, hlc.Timestamp{Logical: 2}, value2, txn1); err != nil { + txn1ts := makeTxn(*txn1, hlc.Timestamp{Logical: 2}) + if err := MVCCPut(ctx, ol, nil, testKey1, txn1ts.OrigTimestamp, value2, txn1ts); err != nil { t.Fatal(err) } @@ -50,24 +51,24 @@ func TestMVCCOpLogWriter(t *testing.T) { if err := MVCCPut(ctx, ol, nil, localKey, hlc.Timestamp{Logical: 1}, value1, nil); err != nil { t.Fatal(err) } - if err := MVCCPut(ctx, ol, nil, localKey, hlc.Timestamp{Logical: 2}, value2, txn1); err != nil { + if err := MVCCPut(ctx, ol, nil, localKey, txn1ts.OrigTimestamp, value2, txn1ts); err != nil { t.Fatal(err) } // Update the intents and write another. Use a distinct batch. olDist := ol.Distinct() - txn1Seq := *txn1 - txn1Seq.Sequence++ - if err := MVCCPut(ctx, olDist, nil, testKey1, hlc.Timestamp{Logical: 3}, value2, &txn1Seq); err != nil { + txn1ts.Sequence++ + txn1ts.Timestamp = hlc.Timestamp{Logical: 3} + if err := MVCCPut(ctx, olDist, nil, testKey1, txn1ts.OrigTimestamp, value2, txn1ts); err != nil { t.Fatal(err) } - if err := MVCCPut(ctx, olDist, nil, localKey, hlc.Timestamp{Logical: 3}, value2, &txn1Seq); err != nil { + if err := MVCCPut(ctx, olDist, nil, localKey, txn1ts.OrigTimestamp, value2, txn1ts); err != nil { t.Fatal(err) } // Set the txn timestamp to a larger value than the intent. - txn1LargerTS := *txn1 + txn1LargerTS := makeTxn(*txn1, hlc.Timestamp{Logical: 4}) txn1LargerTS.Timestamp = hlc.Timestamp{Logical: 4} - if err := MVCCPut(ctx, olDist, nil, testKey2, hlc.Timestamp{Logical: 3}, value3, &txn1LargerTS); err != nil { + if err := MVCCPut(ctx, olDist, nil, testKey2, txn1LargerTS.OrigTimestamp, value3, txn1LargerTS); err != nil { t.Fatal(err) } olDist.Close() @@ -91,7 +92,8 @@ func TestMVCCOpLogWriter(t *testing.T) { } // Write another intent, push it, then abort it. - if err := MVCCPut(ctx, ol, nil, testKey3, hlc.Timestamp{Logical: 5}, value4, txn2); err != nil { + txn2ts := makeTxn(*txn2, hlc.Timestamp{Logical: 5}) + if err := MVCCPut(ctx, ol, nil, testKey3, txn2ts.OrigTimestamp, value4, txn2ts); err != nil { t.Fatal(err) } txn2Pushed := *txn2 diff --git a/pkg/storage/engine/mvcc_stats_test.go b/pkg/storage/engine/mvcc_stats_test.go index 79211c7662af..37915ba982c5 100644 --- a/pkg/storage/engine/mvcc_stats_test.go +++ b/pkg/storage/engine/mvcc_stats_test.go @@ -102,9 +102,11 @@ func TestMVCCStatsDeleteCommitMovesTimestamp(t *testing.T) { // Delete the value at ts=3. We'll commit this at ts=4 later. ts3 := hlc.Timestamp{WallTime: 3 * 1E9} - txn := &roachpb.Transaction{TxnMeta: enginepb.TxnMeta{ID: uuid.MakeV4(), Timestamp: ts3}} - txn.Timestamp.Forward(ts3) - if err := MVCCDelete(ctx, engine, aggMS, key, ts3, txn); err != nil { + txn := &roachpb.Transaction{ + TxnMeta: enginepb.TxnMeta{ID: uuid.MakeV4(), Timestamp: ts3}, + OrigTimestamp: ts3, + } + if err := MVCCDelete(ctx, engine, aggMS, key, txn.OrigTimestamp, txn); err != nil { t.Fatal(err) } @@ -151,7 +153,10 @@ func TestMVCCStatsPutCommitMovesTimestamp(t *testing.T) { key := roachpb.Key("a") ts1 := hlc.Timestamp{WallTime: 1E9} - txn := &roachpb.Transaction{TxnMeta: enginepb.TxnMeta{ID: uuid.MakeV4(), Timestamp: ts1}} + txn := &roachpb.Transaction{ + TxnMeta: enginepb.TxnMeta{ID: uuid.MakeV4(), Timestamp: ts1}, + OrigTimestamp: ts1, + } // Write an intent at t=1s. value := roachpb.MakeValueFromString("value") if err := MVCCPut(ctx, engine, aggMS, key, ts1, value, txn); err != nil { @@ -223,10 +228,13 @@ func TestMVCCStatsPutPushMovesTimestamp(t *testing.T) { key := roachpb.Key("a") ts1 := hlc.Timestamp{WallTime: 1E9} - txn := &roachpb.Transaction{TxnMeta: enginepb.TxnMeta{ID: uuid.MakeV4(), Timestamp: ts1}} + txn := &roachpb.Transaction{ + TxnMeta: enginepb.TxnMeta{ID: uuid.MakeV4(), Timestamp: ts1}, + OrigTimestamp: ts1, + } // Write an intent. value := roachpb.MakeValueFromString("value") - if err := MVCCPut(ctx, engine, aggMS, key, ts1, value, txn); err != nil { + if err := MVCCPut(ctx, engine, aggMS, key, txn.OrigTimestamp, value, txn); err != nil { t.Fatal(err) } @@ -299,11 +307,14 @@ func TestMVCCStatsDeleteMovesTimestamp(t *testing.T) { ts2 := hlc.Timestamp{WallTime: 2 * 1E9} key := roachpb.Key("a") - txn := &roachpb.Transaction{TxnMeta: enginepb.TxnMeta{ID: uuid.MakeV4(), Timestamp: ts1}} + txn := &roachpb.Transaction{ + TxnMeta: enginepb.TxnMeta{ID: uuid.MakeV4(), Timestamp: ts1}, + OrigTimestamp: ts1, + } // Write an intent. value := roachpb.MakeValueFromString("value") - if err := MVCCPut(ctx, engine, aggMS, key, ts1, value, txn); err != nil { + if err := MVCCPut(ctx, engine, aggMS, key, txn.OrigTimestamp, value, txn); err != nil { t.Fatal(err) } @@ -363,7 +374,7 @@ func TestMVCCStatsDeleteMovesTimestamp(t *testing.T) { }).Size()) require.EqualValues(t, m2ValSize, 62) - if err := MVCCDelete(ctx, engine, aggMS, key, ts2, txn); err != nil { + if err := MVCCDelete(ctx, engine, aggMS, key, txn.OrigTimestamp, txn); err != nil { t.Fatal(err) } @@ -404,10 +415,13 @@ func TestMVCCStatsPutMovesDeletionTimestamp(t *testing.T) { ts2 := hlc.Timestamp{WallTime: 2 * 1E9} key := roachpb.Key("a") - txn := &roachpb.Transaction{TxnMeta: enginepb.TxnMeta{ID: uuid.MakeV4(), Timestamp: ts1}} + txn := &roachpb.Transaction{ + TxnMeta: enginepb.TxnMeta{ID: uuid.MakeV4(), Timestamp: ts1}, + OrigTimestamp: ts1, + } // Write a deletion tombstone intent. - if err := MVCCDelete(ctx, engine, aggMS, key, ts1, txn); err != nil { + if err := MVCCDelete(ctx, engine, aggMS, key, txn.OrigTimestamp, txn); err != nil { t.Fatal(err) } @@ -470,7 +484,7 @@ func TestMVCCStatsPutMovesDeletionTimestamp(t *testing.T) { }).Size()) require.EqualValues(t, m2ValSize, 52) - if err := MVCCPut(ctx, engine, aggMS, key, ts2, value, txn); err != nil { + if err := MVCCPut(ctx, engine, aggMS, key, txn.OrigTimestamp, value, txn); err != nil { t.Fatal(err) } @@ -535,9 +549,12 @@ func TestMVCCStatsDelDelCommitMovesTimestamp(t *testing.T) { assertEq(t, engine, "after non-transactional delete", aggMS, &expMS) - // Write an tombstone intent at t=2s (anchored at ts=1s, just for fun). - txn := &roachpb.Transaction{TxnMeta: enginepb.TxnMeta{ID: uuid.MakeV4(), Timestamp: ts1}} - if err := MVCCDelete(ctx, engine, aggMS, key, ts2, txn); err != nil { + // Write an tombstone intent at t=2s. + txn := &roachpb.Transaction{ + TxnMeta: enginepb.TxnMeta{ID: uuid.MakeV4(), Timestamp: ts2}, + OrigTimestamp: ts2, + } + if err := MVCCDelete(ctx, engine, aggMS, key, txn.OrigTimestamp, txn); err != nil { t.Fatal(err) } @@ -673,9 +690,12 @@ func TestMVCCStatsPutDelPutMovesTimestamp(t *testing.T) { assertEq(t, engine, "after non-transactional put", aggMS, &expMS) - // Write a tombstone intent at t=2s (anchored at ts=1s, just for fun). - txn := &roachpb.Transaction{TxnMeta: enginepb.TxnMeta{ID: uuid.MakeV4(), Timestamp: ts1}} - if err := MVCCDelete(ctx, engine, aggMS, key, ts2, txn); err != nil { + // Write a tombstone intent at t=2s. + txn := &roachpb.Transaction{ + TxnMeta: enginepb.TxnMeta{ID: uuid.MakeV4(), Timestamp: ts2}, + OrigTimestamp: ts2, + } + if err := MVCCDelete(ctx, engine, aggMS, key, txn.OrigTimestamp, txn); err != nil { t.Fatal(err) } @@ -753,7 +773,8 @@ func TestMVCCStatsPutDelPutMovesTimestamp(t *testing.T) { vVal2Size := int64(len(val2.RawBytes)) require.EqualValues(t, vVal2Size, 14) - if err := MVCCPut(ctx, engine, &aggMS, key, ts3, val2, txn); err != nil { + txn.Timestamp.Forward(ts3) + if err := MVCCPut(ctx, engine, &aggMS, key, txn.OrigTimestamp, val2, txn); err != nil { t.Fatal(err) } @@ -876,10 +897,13 @@ func TestMVCCStatsPutIntentTimestampNotPutTimestamp(t *testing.T) { key := roachpb.Key("a") ts201 := hlc.Timestamp{WallTime: 2E9 + 1} ts099 := hlc.Timestamp{WallTime: 1E9 - 1} - txn := &roachpb.Transaction{TxnMeta: enginepb.TxnMeta{ID: uuid.MakeV4(), Timestamp: ts201}} + txn := &roachpb.Transaction{ + TxnMeta: enginepb.TxnMeta{ID: uuid.MakeV4(), Timestamp: ts201}, + OrigTimestamp: ts099, + } // Write an intent at 2s+1. value := roachpb.MakeValueFromString("value") - if err := MVCCPut(ctx, engine, aggMS, key, ts201, value, txn); err != nil { + if err := MVCCPut(ctx, engine, aggMS, key, txn.OrigTimestamp, value, txn); err != nil { t.Fatal(err) } @@ -911,6 +935,7 @@ func TestMVCCStatsPutIntentTimestampNotPutTimestamp(t *testing.T) { // MVCCPut (which is where the intent will actually end up being written at, // and which usually corresponds to txn.OrigTimestamp). txn.Sequence++ + txn.Timestamp = ts099 // Annoyingly, the new meta value is actually a little larger thanks to the // sequence number. @@ -921,7 +946,7 @@ func TestMVCCStatsPutIntentTimestampNotPutTimestamp(t *testing.T) { {Sequence: 0, Value: value.RawBytes}, }, }).Size()) - if err := MVCCPut(ctx, engine, aggMS, key, ts099, value, txn); err != nil { + if err := MVCCPut(ctx, engine, aggMS, key, txn.OrigTimestamp, value, txn); err != nil { t.Fatal(err) } @@ -1110,11 +1135,14 @@ func TestMVCCStatsTxnSysPutPut(t *testing.T) { ts1 := hlc.Timestamp{WallTime: 1E9} ts2 := hlc.Timestamp{WallTime: 2E9} - txn := &roachpb.Transaction{TxnMeta: enginepb.TxnMeta{ID: uuid.MakeV4(), Timestamp: ts1}} + txn := &roachpb.Transaction{ + TxnMeta: enginepb.TxnMeta{ID: uuid.MakeV4(), Timestamp: ts1}, + OrigTimestamp: ts1, + } // Write an intent at ts1. val1 := roachpb.MakeValueFromString("value") - if err := MVCCPut(ctx, engine, aggMS, key, ts1, val1, txn); err != nil { + if err := MVCCPut(ctx, engine, aggMS, key, txn.OrigTimestamp, val1, txn); err != nil { t.Fatal(err) } @@ -1162,7 +1190,7 @@ func TestMVCCStatsTxnSysPutPut(t *testing.T) { }).Size()) require.EqualValues(t, mVal2Size, 62) - if err := MVCCPut(ctx, engine, aggMS, key, ts2, val2, txn); err != nil { + if err := MVCCPut(ctx, engine, aggMS, key, txn.OrigTimestamp, val2, txn); err != nil { t.Fatal(err) } diff --git a/pkg/storage/engine/mvcc_test.go b/pkg/storage/engine/mvcc_test.go index 2f023743c02b..94eb627ad975 100644 --- a/pkg/storage/engine/mvcc_test.go +++ b/pkg/storage/engine/mvcc_test.go @@ -56,13 +56,13 @@ var ( testKey4 = roachpb.Key("/db4") testKey5 = roachpb.Key("/db5") testKey6 = roachpb.Key("/db6") - txn1 = &roachpb.Transaction{TxnMeta: enginepb.TxnMeta{Key: roachpb.Key("a"), ID: txn1ID, Epoch: 1, Timestamp: hlc.Timestamp{Logical: 1}}} - txn1Commit = &roachpb.Transaction{TxnMeta: enginepb.TxnMeta{Key: roachpb.Key("a"), ID: txn1ID, Epoch: 1, Timestamp: hlc.Timestamp{Logical: 1}}, Status: roachpb.COMMITTED} + txn1 = &roachpb.Transaction{TxnMeta: enginepb.TxnMeta{Key: roachpb.Key("a"), ID: txn1ID, Epoch: 1, Timestamp: hlc.Timestamp{Logical: 1}}, OrigTimestamp: hlc.Timestamp{Logical: 1}} + txn1Commit = &roachpb.Transaction{TxnMeta: enginepb.TxnMeta{Key: roachpb.Key("a"), ID: txn1ID, Epoch: 1, Timestamp: hlc.Timestamp{Logical: 1}}, OrigTimestamp: hlc.Timestamp{Logical: 1}, Status: roachpb.COMMITTED} txn1Abort = &roachpb.Transaction{TxnMeta: enginepb.TxnMeta{Key: roachpb.Key("a"), ID: txn1ID, Epoch: 1}, Status: roachpb.ABORTED} - txn1e2 = &roachpb.Transaction{TxnMeta: enginepb.TxnMeta{Key: roachpb.Key("a"), ID: txn1ID, Epoch: 2, Timestamp: hlc.Timestamp{Logical: 1}}} - txn1e2Commit = &roachpb.Transaction{TxnMeta: enginepb.TxnMeta{Key: roachpb.Key("a"), ID: txn1ID, Epoch: 2, Timestamp: hlc.Timestamp{Logical: 1}}, Status: roachpb.COMMITTED} - txn2 = &roachpb.Transaction{TxnMeta: enginepb.TxnMeta{Key: roachpb.Key("a"), ID: txn2ID, Timestamp: hlc.Timestamp{Logical: 1}}} - txn2Commit = &roachpb.Transaction{TxnMeta: enginepb.TxnMeta{Key: roachpb.Key("a"), ID: txn2ID, Timestamp: hlc.Timestamp{Logical: 1}}, Status: roachpb.COMMITTED} + txn1e2 = &roachpb.Transaction{TxnMeta: enginepb.TxnMeta{Key: roachpb.Key("a"), ID: txn1ID, Epoch: 2, Timestamp: hlc.Timestamp{Logical: 1}}, OrigTimestamp: hlc.Timestamp{Logical: 1}} + txn1e2Commit = &roachpb.Transaction{TxnMeta: enginepb.TxnMeta{Key: roachpb.Key("a"), ID: txn1ID, Epoch: 2, Timestamp: hlc.Timestamp{Logical: 1}}, OrigTimestamp: hlc.Timestamp{Logical: 1}, Status: roachpb.COMMITTED} + txn2 = &roachpb.Transaction{TxnMeta: enginepb.TxnMeta{Key: roachpb.Key("a"), ID: txn2ID, Timestamp: hlc.Timestamp{Logical: 1}}, OrigTimestamp: hlc.Timestamp{Logical: 1}} + txn2Commit = &roachpb.Transaction{TxnMeta: enginepb.TxnMeta{Key: roachpb.Key("a"), ID: txn2ID, Timestamp: hlc.Timestamp{Logical: 1}}, OrigTimestamp: hlc.Timestamp{Logical: 1}, Status: roachpb.COMMITTED} value1 = roachpb.MakeValueFromString("testValue1") value2 = roachpb.MakeValueFromString("testValue2") value3 = roachpb.MakeValueFromString("testValue3") @@ -88,6 +88,7 @@ func createTestEngine() Engine { // txn and timestamp. func makeTxn(baseTxn roachpb.Transaction, ts hlc.Timestamp) *roachpb.Transaction { txn := baseTxn.Clone() + txn.OrigTimestamp = ts txn.Timestamp = ts return &txn } @@ -372,7 +373,7 @@ func TestMVCCPutWithTxn(t *testing.T) { engine := createTestEngine() defer engine.Close() - if err := MVCCPut(ctx, engine, nil, testKey1, hlc.Timestamp{Logical: 1}, value1, txn1); err != nil { + if err := MVCCPut(ctx, engine, nil, testKey1, txn1.OrigTimestamp, value1, txn1); err != nil { t.Fatal(err) } @@ -421,14 +422,17 @@ func TestMVCCPutOutOfOrder(t *testing.T) { engine := createTestEngine() defer engine.Close() - if err := MVCCPut(ctx, engine, nil, testKey1, hlc.Timestamp{WallTime: 2, Logical: 1}, value1, txn1); err != nil { + txn := *txn1 + txn.OrigTimestamp = hlc.Timestamp{WallTime: 1} + txn.Timestamp = hlc.Timestamp{WallTime: 2, Logical: 1} + if err := MVCCPut(ctx, engine, nil, testKey1, txn.OrigTimestamp, value1, &txn); err != nil { t.Fatal(err) } // Put operation with earlier wall time. Will NOT be ignored. - txn := *txn1 txn.Sequence++ - if err := MVCCPut(ctx, engine, nil, testKey1, hlc.Timestamp{WallTime: 1}, value2, &txn); err != nil { + txn.Timestamp = hlc.Timestamp{WallTime: 1} + if err := MVCCPut(ctx, engine, nil, testKey1, txn.OrigTimestamp, value2, &txn); err != nil { t.Fatal(err) } @@ -445,7 +449,7 @@ func TestMVCCPutOutOfOrder(t *testing.T) { // Another put operation with earlier logical time. Will NOT be ignored. txn.Sequence++ - if err := MVCCPut(ctx, engine, nil, testKey1, hlc.Timestamp{WallTime: 2}, value2, &txn); err != nil { + if err := MVCCPut(ctx, engine, nil, testKey1, txn.OrigTimestamp, value2, &txn); err != nil { t.Fatal(err) } @@ -473,13 +477,13 @@ func TestMVCCPutNewEpochLowerSequence(t *testing.T) { engine := createTestEngine() defer engine.Close() - txn := *txn1 + txn := makeTxn(*txn1, hlc.Timestamp{WallTime: 1}) txn.Sequence = 5 - if err := MVCCPut(ctx, engine, nil, testKey1, hlc.Timestamp{WallTime: 1}, value1, &txn); err != nil { + if err := MVCCPut(ctx, engine, nil, testKey1, txn.OrigTimestamp, value1, txn); err != nil { t.Fatal(err) } value, _, err := MVCCGet(ctx, engine, testKey1, hlc.Timestamp{WallTime: 3}, MVCCGetOptions{ - Txn: &txn, + Txn: txn, }) if err != nil { t.Fatal(err) @@ -491,7 +495,7 @@ func TestMVCCPutNewEpochLowerSequence(t *testing.T) { txn.Sequence = 4 txn.Epoch++ - if err := MVCCPut(ctx, engine, nil, testKey1, hlc.Timestamp{WallTime: 1}, value2, &txn); err != nil { + if err := MVCCPut(ctx, engine, nil, testKey1, txn.OrigTimestamp, value2, txn); err != nil { t.Fatal(err) } @@ -518,7 +522,7 @@ func TestMVCCPutNewEpochLowerSequence(t *testing.T) { } value, _, err = MVCCGet(ctx, engine, testKey1, hlc.Timestamp{WallTime: 3}, MVCCGetOptions{ - Txn: &txn, + Txn: txn, }) if err != nil { t.Fatal(err) @@ -695,12 +699,13 @@ func TestMVCCUpdateExistingKeyInTxn(t *testing.T) { defer engine.Close() txn := *txn1 - if err := MVCCPut(ctx, engine, nil, testKey1, hlc.Timestamp{Logical: 1}, value1, &txn); err != nil { + if err := MVCCPut(ctx, engine, nil, testKey1, txn.OrigTimestamp, value1, &txn); err != nil { t.Fatal(err) } txn.Sequence++ - if err := MVCCPut(ctx, engine, nil, testKey1, hlc.Timestamp{WallTime: 1}, value1, &txn); err != nil { + txn.Timestamp = hlc.Timestamp{WallTime: 1} + if err := MVCCPut(ctx, engine, nil, testKey1, txn.OrigTimestamp, value1, &txn); err != nil { t.Fatal(err) } } @@ -712,11 +717,11 @@ func TestMVCCUpdateExistingKeyDiffTxn(t *testing.T) { engine := createTestEngine() defer engine.Close() - if err := MVCCPut(ctx, engine, nil, testKey1, hlc.Timestamp{Logical: 1}, value1, txn1); err != nil { + if err := MVCCPut(ctx, engine, nil, testKey1, txn1.OrigTimestamp, value1, txn1); err != nil { t.Fatal(err) } - if err := MVCCPut(ctx, engine, nil, testKey1, hlc.Timestamp{WallTime: 1}, value2, txn2); err == nil { + if err := MVCCPut(ctx, engine, nil, testKey1, txn2.OrigTimestamp, value2, txn2); err == nil { t.Fatal("expected error on uncommitted write intent") } } @@ -1041,14 +1046,14 @@ func TestMVCCGetAndDeleteInTxn(t *testing.T) { engine := createTestEngine() defer engine.Close() - txn := *txn1 + txn := makeTxn(*txn1, hlc.Timestamp{WallTime: 1}) txn.Sequence++ - if err := MVCCPut(ctx, engine, nil, testKey1, hlc.Timestamp{WallTime: 1}, value1, &txn); err != nil { + if err := MVCCPut(ctx, engine, nil, testKey1, txn.OrigTimestamp, value1, txn); err != nil { t.Fatal(err) } if value, _, err := mvccGet(ctx, engine, testKey1, hlc.Timestamp{WallTime: 2}, MVCCGetOptions{ - Txn: &txn, + Txn: txn, }); err != nil { t.Fatal(err) } else if value == nil { @@ -1056,13 +1061,14 @@ func TestMVCCGetAndDeleteInTxn(t *testing.T) { } txn.Sequence++ - if err := MVCCDelete(ctx, engine, nil, testKey1, hlc.Timestamp{WallTime: 3}, &txn); err != nil { + txn.Timestamp = hlc.Timestamp{WallTime: 3} + if err := MVCCDelete(ctx, engine, nil, testKey1, txn.OrigTimestamp, txn); err != nil { t.Fatal(err) } // Read the latest version which should be deleted. if value, _, err := mvccGet(ctx, engine, testKey1, hlc.Timestamp{WallTime: 4}, MVCCGetOptions{ - Txn: &txn, + Txn: txn, }); err != nil { t.Fatal(err) } else if value != nil { @@ -1071,7 +1077,7 @@ func TestMVCCGetAndDeleteInTxn(t *testing.T) { // Read the latest version with tombstone. if value, _, err := MVCCGet(ctx, engine, testKey1, hlc.Timestamp{WallTime: 4}, MVCCGetOptions{ Tombstones: true, - Txn: &txn, + Txn: txn, }); err != nil { t.Fatal(err) } else if value == nil || len(value.RawBytes) != 0 { @@ -1101,7 +1107,7 @@ func TestMVCCGetWriteIntentError(t *testing.T) { engine := createTestEngine() defer engine.Close() - if err := MVCCPut(ctx, engine, nil, testKey1, hlc.Timestamp{Logical: 1}, value1, txn1); err != nil { + if err := MVCCPut(ctx, engine, nil, testKey1, txn1.OrigTimestamp, value1, txn1); err != nil { t.Fatal(err) } @@ -1133,6 +1139,9 @@ func TestMVCCScanWriteIntentError(t *testing.T) { ts := []hlc.Timestamp{{Logical: 1}, {Logical: 2}, {Logical: 3}, {Logical: 4}, {Logical: 5}, {Logical: 6}} + txn1ts := makeTxn(*txn1, ts[2]) + txn2ts := makeTxn(*txn2, ts[5]) + fixtureKVs := []roachpb.KeyValue{ {Key: testKey1, Value: mkVal("testValue1 pre", ts[0])}, {Key: testKey4, Value: mkVal("testValue4 pre", ts[1])}, @@ -1144,9 +1153,9 @@ func TestMVCCScanWriteIntentError(t *testing.T) { for i, kv := range fixtureKVs { var txn *roachpb.Transaction if i == 2 { - txn = txn1 + txn = txn1ts } else if i == 5 { - txn = txn2 + txn = txn2ts } v := *protoutil.Clone(&kv.Value).(*roachpb.Value) v.Timestamp = hlc.Timestamp{} @@ -1165,25 +1174,25 @@ func TestMVCCScanWriteIntentError(t *testing.T) { consistent: true, txn: nil, expIntents: []roachpb.Intent{ - {Span: roachpb.Span{Key: testKey1}, Txn: txn1.TxnMeta}, - {Span: roachpb.Span{Key: testKey4}, Txn: txn2.TxnMeta}, + {Span: roachpb.Span{Key: testKey1}, Txn: txn1ts.TxnMeta}, + {Span: roachpb.Span{Key: testKey4}, Txn: txn2ts.TxnMeta}, }, // would be []roachpb.KeyValue{fixtureKVs[3], fixtureKVs[4]} without WriteIntentError expValues: nil, }, { consistent: true, - txn: txn1, + txn: txn1ts, expIntents: []roachpb.Intent{ - {Span: roachpb.Span{Key: testKey4}, Txn: txn2.TxnMeta}, + {Span: roachpb.Span{Key: testKey4}, Txn: txn2ts.TxnMeta}, }, expValues: nil, // []roachpb.KeyValue{fixtureKVs[2], fixtureKVs[3], fixtureKVs[4]}, }, { consistent: true, - txn: txn2, + txn: txn2ts, expIntents: []roachpb.Intent{ - {Span: roachpb.Span{Key: testKey1}, Txn: txn1.TxnMeta}, + {Span: roachpb.Span{Key: testKey1}, Txn: txn1ts.TxnMeta}, }, expValues: nil, // []roachpb.KeyValue{fixtureKVs[3], fixtureKVs[4], fixtureKVs[5]}, }, @@ -1191,8 +1200,8 @@ func TestMVCCScanWriteIntentError(t *testing.T) { consistent: false, txn: nil, expIntents: []roachpb.Intent{ - {Span: roachpb.Span{Key: testKey1}, Txn: txn1.TxnMeta}, - {Span: roachpb.Span{Key: testKey4}, Txn: txn2.TxnMeta}, + {Span: roachpb.Span{Key: testKey1}, Txn: txn1ts.TxnMeta}, + {Span: roachpb.Span{Key: testKey4}, Txn: txn2ts.TxnMeta}, }, expValues: []roachpb.KeyValue{fixtureKVs[0], fixtureKVs[3], fixtureKVs[4], fixtureKVs[1]}, }, @@ -1252,7 +1261,8 @@ func TestMVCCGetInconsistent(t *testing.T) { if err := MVCCPut(ctx, engine, nil, testKey1, hlc.Timestamp{WallTime: 1}, value1, nil); err != nil { t.Fatal(err) } - if err := MVCCPut(ctx, engine, nil, testKey1, hlc.Timestamp{WallTime: 2}, value2, txn1); err != nil { + txn1ts := makeTxn(*txn1, hlc.Timestamp{WallTime: 2}) + if err := MVCCPut(ctx, engine, nil, testKey1, txn1ts.OrigTimestamp, value2, txn1ts); err != nil { t.Fatal(err) } @@ -1282,7 +1292,7 @@ func TestMVCCGetInconsistent(t *testing.T) { } // Write a single intent for key 2 and verify get returns empty. - if err := MVCCPut(ctx, engine, nil, testKey2, hlc.Timestamp{WallTime: 2}, value1, txn2); err != nil { + if err := MVCCPut(ctx, engine, nil, testKey2, txn2.OrigTimestamp, value1, txn2); err != nil { t.Fatal(err) } val, intent, err := mvccGet(ctx, engine, testKey2, hlc.Timestamp{WallTime: 2}, @@ -1322,7 +1332,8 @@ func TestMVCCGetProtoInconsistent(t *testing.T) { if err := MVCCPut(ctx, engine, nil, testKey1, hlc.Timestamp{WallTime: 1}, v1, nil); err != nil { t.Fatal(err) } - if err := MVCCPut(ctx, engine, nil, testKey1, hlc.Timestamp{WallTime: 2}, v2, txn1); err != nil { + txn1ts := makeTxn(*txn1, hlc.Timestamp{WallTime: 2}) + if err := MVCCPut(ctx, engine, nil, testKey1, txn1ts.OrigTimestamp, v2, txn1ts); err != nil { t.Fatal(err) } @@ -1364,7 +1375,7 @@ func TestMVCCGetProtoInconsistent(t *testing.T) { { // Write a single intent for key 2 and verify get returns empty. - if err := MVCCPut(ctx, engine, nil, testKey2, hlc.Timestamp{WallTime: 2}, v1, txn2); err != nil { + if err := MVCCPut(ctx, engine, nil, testKey2, txn2.OrigTimestamp, v1, txn2); err != nil { t.Fatal(err) } val := roachpb.Value{} @@ -1386,7 +1397,7 @@ func TestMVCCGetProtoInconsistent(t *testing.T) { if err := MVCCPut(ctx, engine, nil, testKey3, hlc.Timestamp{WallTime: 1}, value3, nil); err != nil { t.Fatal(err) } - if err := MVCCPut(ctx, engine, nil, testKey3, hlc.Timestamp{WallTime: 2}, v2, txn1); err != nil { + if err := MVCCPut(ctx, engine, nil, testKey3, txn1ts.OrigTimestamp, v2, txn1ts); err != nil { t.Fatal(err) } val := roachpb.Value{} @@ -1701,7 +1712,8 @@ func TestMVCCScanInTxn(t *testing.T) { if err := MVCCPut(ctx, engine, nil, testKey2, hlc.Timestamp{WallTime: 1}, value2, nil); err != nil { t.Fatal(err) } - if err := MVCCPut(ctx, engine, nil, testKey3, hlc.Timestamp{WallTime: 1}, value3, txn1); err != nil { + txn := makeTxn(*txn1, hlc.Timestamp{WallTime: 1}) + if err := MVCCPut(ctx, engine, nil, testKey3, txn.OrigTimestamp, value3, txn); err != nil { t.Fatal(err) } if err := MVCCPut(ctx, engine, nil, testKey4, hlc.Timestamp{WallTime: 1}, value4, nil); err != nil { @@ -1754,7 +1766,8 @@ func TestMVCCScanInconsistent(t *testing.T) { if err := MVCCPut(ctx, engine, nil, testKey1, ts1, value1, nil); err != nil { t.Fatal(err) } - if err := MVCCPut(ctx, engine, nil, testKey1, ts2, value2, txn1); err != nil { + txn1ts2 := makeTxn(*txn1, ts2) + if err := MVCCPut(ctx, engine, nil, testKey1, txn1ts2.OrigTimestamp, value2, txn1ts2); err != nil { t.Fatal(err) } if err := MVCCPut(ctx, engine, nil, testKey2, ts3, value1, nil); err != nil { @@ -1763,7 +1776,8 @@ func TestMVCCScanInconsistent(t *testing.T) { if err := MVCCPut(ctx, engine, nil, testKey2, ts4, value2, nil); err != nil { t.Fatal(err) } - if err := MVCCPut(ctx, engine, nil, testKey3, ts5, value3, txn2); err != nil { + txn2ts5 := makeTxn(*txn2, ts5) + if err := MVCCPut(ctx, engine, nil, testKey3, txn2ts5.OrigTimestamp, value3, txn2ts5); err != nil { t.Fatal(err) } if err := MVCCPut(ctx, engine, nil, testKey4, ts6, value4, nil); err != nil { @@ -1771,8 +1785,8 @@ func TestMVCCScanInconsistent(t *testing.T) { } expIntents := []roachpb.Intent{ - {Span: roachpb.Span{Key: testKey1}, Txn: txn1.TxnMeta}, - {Span: roachpb.Span{Key: testKey3}, Txn: txn2.TxnMeta}, + {Span: roachpb.Span{Key: testKey1}, Txn: txn1ts2.TxnMeta}, + {Span: roachpb.Span{Key: testKey3}, Txn: txn2ts5.TxnMeta}, } kvs, _, intents, err := MVCCScan( ctx, engine, testKey1, testKey4.Next(), math.MaxInt64, hlc.Timestamp{WallTime: 7}, @@ -2122,16 +2136,16 @@ func TestMVCCDeleteRangeFailed(t *testing.T) { engine := createTestEngine() defer engine.Close() - txn := *txn1 + txn := makeTxn(*txn1, hlc.Timestamp{WallTime: 1}) if err := MVCCPut(ctx, engine, nil, testKey1, hlc.Timestamp{WallTime: 1}, value1, nil); err != nil { t.Fatal(err) } txn.Sequence++ - if err := MVCCPut(ctx, engine, nil, testKey2, hlc.Timestamp{WallTime: 1}, value2, &txn); err != nil { + if err := MVCCPut(ctx, engine, nil, testKey2, txn.OrigTimestamp, value2, txn); err != nil { t.Fatal(err) } txn.Sequence++ - if err := MVCCPut(ctx, engine, nil, testKey3, hlc.Timestamp{WallTime: 1}, value3, &txn); err != nil { + if err := MVCCPut(ctx, engine, nil, testKey3, txn.OrigTimestamp, value3, txn); err != nil { t.Fatal(err) } if err := MVCCPut(ctx, engine, nil, testKey4, hlc.Timestamp{WallTime: 1}, value4, nil); err != nil { @@ -2146,7 +2160,7 @@ func TestMVCCDeleteRangeFailed(t *testing.T) { txn.Sequence++ if _, _, _, err := MVCCDeleteRange( - ctx, engine, nil, testKey2, testKey4, math.MaxInt64, hlc.Timestamp{WallTime: 1}, &txn, false, + ctx, engine, nil, testKey2, testKey4, math.MaxInt64, txn.OrigTimestamp, txn, false, ); err != nil { t.Fatal(err) } @@ -2159,13 +2173,16 @@ func TestMVCCDeleteRangeConcurrentTxn(t *testing.T) { engine := createTestEngine() defer engine.Close() + txn1ts := makeTxn(*txn1, hlc.Timestamp{WallTime: 1}) + txn2ts := makeTxn(*txn2, hlc.Timestamp{WallTime: 2}) + if err := MVCCPut(ctx, engine, nil, testKey1, hlc.Timestamp{WallTime: 1}, value1, nil); err != nil { t.Fatal(err) } - if err := MVCCPut(ctx, engine, nil, testKey2, hlc.Timestamp{WallTime: 1}, value2, txn1); err != nil { + if err := MVCCPut(ctx, engine, nil, testKey2, txn1ts.OrigTimestamp, value2, txn1ts); err != nil { t.Fatal(err) } - if err := MVCCPut(ctx, engine, nil, testKey3, hlc.Timestamp{WallTime: 2}, value3, txn2); err != nil { + if err := MVCCPut(ctx, engine, nil, testKey3, txn2ts.OrigTimestamp, value3, txn2ts); err != nil { t.Fatal(err) } if err := MVCCPut(ctx, engine, nil, testKey4, hlc.Timestamp{WallTime: 1}, value4, nil); err != nil { @@ -2173,7 +2190,7 @@ func TestMVCCDeleteRangeConcurrentTxn(t *testing.T) { } if _, _, _, err := MVCCDeleteRange( - ctx, engine, nil, testKey2, testKey4, math.MaxInt64, hlc.Timestamp{WallTime: 1}, txn1, false, + ctx, engine, nil, testKey2, testKey4, math.MaxInt64, txn1ts.OrigTimestamp, txn1ts, false, ); err == nil { t.Fatal("expected error on uncommitted write intent") } @@ -2210,16 +2227,16 @@ func TestMVCCUncommittedDeleteRangeVisible(t *testing.T) { t.Fatal(err) } - txn := txn1.Clone() + txn := makeTxn(*txn1, hlc.Timestamp{WallTime: 2}) if _, _, _, err := MVCCDeleteRange( - ctx, engine, nil, testKey1, testKey4, math.MaxInt64, hlc.Timestamp{WallTime: 2}, &txn, false, + ctx, engine, nil, testKey1, testKey4, math.MaxInt64, txn.OrigTimestamp, txn, false, ); err != nil { t.Fatal(err) } txn.Epoch++ kvs, _, _, _ := MVCCScan(ctx, engine, testKey1, testKey4, math.MaxInt64, - hlc.Timestamp{WallTime: 3}, MVCCScanOptions{Txn: &txn}) + hlc.Timestamp{WallTime: 3}, MVCCScanOptions{Txn: txn}) if e := 2; len(kvs) != e { t.Fatalf("e = %d, got %d", e, len(kvs)) } @@ -2439,18 +2456,18 @@ func TestMVCCConditionalPutWithTxn(t *testing.T) { // Write value1. txn := *txn1 txn.Sequence++ - if err := MVCCConditionalPut(ctx, engine, nil, testKey1, clock.Now(), value1, nil, &txn); err != nil { + if err := MVCCConditionalPut(ctx, engine, nil, testKey1, txn.OrigTimestamp, value1, nil, &txn); err != nil { t.Fatal(err) } // Now, overwrite value1 with value2 from same txn; should see value1 as pre-existing value. txn.Sequence++ - if err := MVCCConditionalPut(ctx, engine, nil, testKey1, clock.Now(), value2, &value1, &txn); err != nil { + if err := MVCCConditionalPut(ctx, engine, nil, testKey1, txn.OrigTimestamp, value2, &value1, &txn); err != nil { t.Fatal(err) } // Writing value3 from a new epoch should see nil again. txn.Sequence++ txn.Epoch = 2 - if err := MVCCConditionalPut(ctx, engine, nil, testKey1, clock.Now(), value3, nil, &txn); err != nil { + if err := MVCCConditionalPut(ctx, engine, nil, testKey1, txn.OrigTimestamp, value3, nil, &txn); err != nil { t.Fatal(err) } // Commit value3. @@ -2575,14 +2592,14 @@ func TestMVCCInitPutWithTxn(t *testing.T) { txn := *txn1 txn.Sequence++ - err := MVCCInitPut(ctx, engine, nil, testKey1, clock.Now(), value1, false, &txn) + err := MVCCInitPut(ctx, engine, nil, testKey1, txn.OrigTimestamp, value1, false, &txn) if err != nil { t.Fatal(err) } // A repeat of the command will still succeed. txn.Sequence++ - err = MVCCInitPut(ctx, engine, nil, testKey1, clock.Now(), value1, false, &txn) + err = MVCCInitPut(ctx, engine, nil, testKey1, txn.OrigTimestamp, value1, false, &txn) if err != nil { t.Fatal(err) } @@ -2591,7 +2608,7 @@ func TestMVCCInitPutWithTxn(t *testing.T) { // will still succeed. txn.Sequence++ txn.Epoch = 2 - err = MVCCInitPut(ctx, engine, nil, testKey1, clock.Now(), value2, false, &txn) + err = MVCCInitPut(ctx, engine, nil, testKey1, txn.OrigTimestamp, value2, false, &txn) if err != nil { t.Fatal(err) } @@ -2651,12 +2668,13 @@ func TestMVCCConditionalPutWriteTooOld(t *testing.T) { t.Fatalf("expected WriteTooOldError with actual time = %s; got %s", expTS, err) } // Try a transactional put @t=1ns with expectation of value2; should fail. - err = MVCCConditionalPut(ctx, engine, nil, testKey1, hlc.Timestamp{WallTime: 1}, value2, &value1, txn1) + txn := makeTxn(*txn1, hlc.Timestamp{WallTime: 1}) + err = MVCCConditionalPut(ctx, engine, nil, testKey1, txn.OrigTimestamp, value2, &value1, txn) if err == nil { t.Fatal("expected error on conditional put") } // Now do a transactional put @t=1ns with expectation of nil; will succeed @t=10,2. - err = MVCCConditionalPut(ctx, engine, nil, testKey1, hlc.Timestamp{WallTime: 1}, value3, nil, txn1) + err = MVCCConditionalPut(ctx, engine, nil, testKey1, txn.OrigTimestamp, value3, nil, txn) expTS = hlc.Timestamp{WallTime: 10, Logical: 2} if wtoErr, ok := err.(*roachpb.WriteTooOldError); !ok || wtoErr.ActualTimestamp != expTS { t.Fatalf("expected WriteTooOldError with actual time = %s; got %s", expTS, err) @@ -2688,7 +2706,8 @@ func TestMVCCIncrementWriteTooOld(t *testing.T) { t.Fatalf("expected WriteTooOldError with actual time = %s; got %s", expTS, wtoErr) } // Try a transaction increment @t=1ns. - val, err = MVCCIncrement(ctx, engine, nil, testKey1, hlc.Timestamp{WallTime: 1}, txn1, 1) + txn := makeTxn(*txn1, hlc.Timestamp{WallTime: 1}) + val, err = MVCCIncrement(ctx, engine, nil, testKey1, txn.OrigTimestamp, txn, 1) if val != 1 || err == nil { t.Fatalf("expected val=1 (got %d) and nil error: %s", val, err) } @@ -2864,7 +2883,7 @@ func TestMVCCResolveTxn(t *testing.T) { engine := createTestEngine() defer engine.Close() - if err := MVCCPut(ctx, engine, nil, testKey1, hlc.Timestamp{Logical: 1}, value1, txn1); err != nil { + if err := MVCCPut(ctx, engine, nil, testKey1, txn1.OrigTimestamp, value1, txn1); err != nil { t.Fatal(err) } @@ -2917,7 +2936,7 @@ func TestMVCCResolveNewerIntent(t *testing.T) { } // Now, put down an intent which should return a write too old error // (but will still write the intent at tx1Commit.Timestmap+1. - err := MVCCPut(ctx, engine, nil, testKey1, hlc.Timestamp{Logical: 1}, value2, txn1) + err := MVCCPut(ctx, engine, nil, testKey1, txn1.OrigTimestamp, value2, txn1) if _, ok := err.(*roachpb.WriteTooOldError); !ok { t.Fatalf("expected write too old error; got %s", err) } @@ -3057,14 +3076,14 @@ func TestMVCCMultiplePutOldTimestamp(t *testing.T) { // Verify the first txn Put returns a write too old error, but the // intent is written at the advanced timestamp. - txn := *txn1 + txn := makeTxn(*txn1, hlc.Timestamp{WallTime: 1}) txn.Sequence++ - err = MVCCPut(ctx, engine, nil, testKey1, hlc.Timestamp{WallTime: 1}, value2, &txn) + err = MVCCPut(ctx, engine, nil, testKey1, txn.OrigTimestamp, value2, txn) if _, ok := err.(*roachpb.WriteTooOldError); !ok { t.Errorf("expected WriteTooOldError on Put; got %v", err) } // Verify new value was actually written at (3, 1). - value, _, err := MVCCGet(ctx, engine, testKey1, hlc.MaxTimestamp, MVCCGetOptions{Txn: &txn}) + value, _, err := MVCCGet(ctx, engine, testKey1, hlc.MaxTimestamp, MVCCGetOptions{Txn: txn}) if err != nil { t.Fatal(err) } @@ -3077,12 +3096,12 @@ func TestMVCCMultiplePutOldTimestamp(t *testing.T) { // Put again and verify no WriteTooOldError, but timestamp should continue // to be set to (3,1). txn.Sequence++ - err = MVCCPut(ctx, engine, nil, testKey1, hlc.Timestamp{WallTime: 1}, value3, &txn) + err = MVCCPut(ctx, engine, nil, testKey1, txn.OrigTimestamp, value3, txn) if err != nil { t.Error(err) } // Verify new value was actually written at (3, 1). - value, _, err = MVCCGet(ctx, engine, testKey1, hlc.MaxTimestamp, MVCCGetOptions{Txn: &txn}) + value, _, err = MVCCGet(ctx, engine, testKey1, hlc.MaxTimestamp, MVCCGetOptions{Txn: txn}) if err != nil { t.Fatal(err) } @@ -3099,7 +3118,7 @@ func TestMVCCAbortTxn(t *testing.T) { engine := createTestEngine() defer engine.Close() - if err := MVCCPut(ctx, engine, nil, testKey1, hlc.Timestamp{Logical: 1}, value1, txn1); err != nil { + if err := MVCCPut(ctx, engine, nil, testKey1, txn1.OrigTimestamp, value1, txn1); err != nil { t.Fatal(err) } @@ -3141,7 +3160,8 @@ func TestMVCCAbortTxnWithPreviousVersion(t *testing.T) { if err := MVCCPut(ctx, engine, nil, testKey1, hlc.Timestamp{WallTime: 1}, value2, nil); err != nil { t.Fatal(err) } - if err := MVCCPut(ctx, engine, nil, testKey1, hlc.Timestamp{WallTime: 2}, value3, txn1); err != nil { + txn1ts := makeTxn(*txn1, hlc.Timestamp{WallTime: 2}) + if err := MVCCPut(ctx, engine, nil, testKey1, txn1ts.OrigTimestamp, value3, txn1ts); err != nil { t.Fatal(err) } @@ -3184,28 +3204,30 @@ func TestMVCCWriteWithDiffTimestampsAndEpochs(t *testing.T) { // Start with epoch 1. txn := *txn1 txn.Sequence++ - if err := MVCCPut(ctx, engine, nil, testKey1, hlc.Timestamp{Logical: 1}, value1, &txn); err != nil { + if err := MVCCPut(ctx, engine, nil, testKey1, txn.OrigTimestamp, value1, &txn); err != nil { t.Fatal(err) } // Now write with greater timestamp and epoch 2. txne2 := txn txne2.Sequence++ txne2.Epoch = 2 - if err := MVCCPut(ctx, engine, nil, testKey1, hlc.Timestamp{WallTime: 1}, value2, &txne2); err != nil { + txne2.Timestamp = hlc.Timestamp{WallTime: 1} + if err := MVCCPut(ctx, engine, nil, testKey1, txne2.OrigTimestamp, value2, &txne2); err != nil { t.Fatal(err) } // Try a write with an earlier timestamp; this is just ignored. txne2.Sequence++ - if err := MVCCPut(ctx, engine, nil, testKey1, hlc.Timestamp{Logical: 1}, value1, &txne2); err != nil { + txne2.Timestamp = hlc.Timestamp{WallTime: 1} + if err := MVCCPut(ctx, engine, nil, testKey1, txne2.OrigTimestamp, value1, &txne2); err != nil { t.Fatal(err) } // Try a write with an earlier epoch; again ignored. - if err := MVCCPut(ctx, engine, nil, testKey1, hlc.Timestamp{WallTime: 1}, value1, &txn); err == nil { + if err := MVCCPut(ctx, engine, nil, testKey1, txn.OrigTimestamp, value1, &txn); err == nil { t.Fatal("unexpected success of a write with an earlier epoch") } // Try a write with different value using both later timestamp and epoch. txne2.Sequence++ - if err := MVCCPut(ctx, engine, nil, testKey1, hlc.Timestamp{WallTime: 1}, value3, &txne2); err != nil { + if err := MVCCPut(ctx, engine, nil, testKey1, txne2.OrigTimestamp, value3, &txne2); err != nil { t.Fatal(err) } // Resolve the intent. @@ -3236,7 +3258,7 @@ func TestMVCCWriteWithDiffTimestampsAndEpochs(t *testing.T) { err, value.Timestamp, expTS, value4.RawBytes, value.RawBytes) } // Now write an intent with exactly the same timestamp--ties also get WriteTooOldError. - err = MVCCPut(ctx, engine, nil, testKey1, expTS, value5, txn2) + err = MVCCPut(ctx, engine, nil, testKey1, txn2.OrigTimestamp, value5, txn2) intentTS := expTS.Add(0, 1) if wtoErr, ok := err.(*roachpb.WriteTooOldError); !ok { t.Fatal("unexpected success") @@ -3283,7 +3305,8 @@ func TestMVCCReadWithDiffEpochs(t *testing.T) { t.Fatal(err) } // Now write using txn1, epoch 1. - if err := MVCCPut(ctx, engine, nil, testKey1, hlc.Timestamp{WallTime: 1}, value2, txn1); err != nil { + txn1ts := makeTxn(*txn1, hlc.Timestamp{WallTime: 1}) + if err := MVCCPut(ctx, engine, nil, testKey1, txn1ts.OrigTimestamp, value2, txn1ts); err != nil { t.Fatal(err) } // Try reading using different txns & epochs. @@ -3326,7 +3349,7 @@ func TestMVCCReadWithOldEpoch(t *testing.T) { engine := createTestEngine() defer engine.Close() - if err := MVCCPut(ctx, engine, nil, testKey1, hlc.Timestamp{WallTime: 1}, value2, txn1e2); err != nil { + if err := MVCCPut(ctx, engine, nil, testKey1, txn1e2.OrigTimestamp, value2, txn1e2); err != nil { t.Fatal(err) } _, _, err := MVCCGet(ctx, engine, testKey1, hlc.Timestamp{WallTime: 2}, MVCCGetOptions{ @@ -3356,18 +3379,17 @@ func TestMVCCWriteWithSequence(t *testing.T) { {3, ""}, // new sequence } - ts := hlc.Timestamp{Logical: 1} for i, tc := range testCases { key := roachpb.Key(fmt.Sprintf("key-%d", i)) // Start with sequence 2. txn := *txn1 txn.Sequence = 2 - if err := MVCCPut(ctx, engine, nil, key, ts, value1, &txn); err != nil { + if err := MVCCPut(ctx, engine, nil, key, txn.OrigTimestamp, value1, &txn); err != nil { t.Fatal(err) } txn.Sequence = tc.sequence - err := MVCCPut(ctx, engine, nil, key, ts, value1, &txn) + err := MVCCPut(ctx, engine, nil, key, txn.OrigTimestamp, value1, &txn) if tc.expectedErr != "" && err != nil { if !testutils.IsError(err, tc.expectedErr) { t.Fatalf("%d: unexpected error: %s", i, err) @@ -3392,7 +3414,7 @@ func TestMVCCReadWithPushedTimestamp(t *testing.T) { defer engine.Close() // Start with epoch 1. - if err := MVCCPut(ctx, engine, nil, testKey1, hlc.Timestamp{Logical: 1}, value1, txn1); err != nil { + if err := MVCCPut(ctx, engine, nil, testKey1, txn1.OrigTimestamp, value1, txn1); err != nil { t.Fatal(err) } // Resolve the intent, pushing its timestamp forward. @@ -3420,10 +3442,10 @@ func TestMVCCResolveWithDiffEpochs(t *testing.T) { engine := createTestEngine() defer engine.Close() - if err := MVCCPut(ctx, engine, nil, testKey1, hlc.Timestamp{Logical: 1}, value1, txn1); err != nil { + if err := MVCCPut(ctx, engine, nil, testKey1, txn1.OrigTimestamp, value1, txn1); err != nil { t.Fatal(err) } - if err := MVCCPut(ctx, engine, nil, testKey2, hlc.Timestamp{Logical: 1}, value2, txn1e2); err != nil { + if err := MVCCPut(ctx, engine, nil, testKey2, txn1e2.OrigTimestamp, value2, txn1e2); err != nil { t.Fatal(err) } num, _, err := MVCCResolveWriteIntentRange(ctx, engine, nil, roachpb.Intent{ @@ -3463,7 +3485,7 @@ func TestMVCCResolveWithUpdatedTimestamp(t *testing.T) { engine := createTestEngine() defer engine.Close() - if err := MVCCPut(ctx, engine, nil, testKey1, hlc.Timestamp{Logical: 1}, value1, txn1); err != nil { + if err := MVCCPut(ctx, engine, nil, testKey1, txn1.OrigTimestamp, value1, txn1); err != nil { t.Fatal(err) } @@ -3514,7 +3536,7 @@ func TestMVCCResolveWithPushedTimestamp(t *testing.T) { engine := createTestEngine() defer engine.Close() - if err := MVCCPut(ctx, engine, nil, testKey1, hlc.Timestamp{Logical: 1}, value1, txn1); err != nil { + if err := MVCCPut(ctx, engine, nil, testKey1, txn1.OrigTimestamp, value1, txn1); err != nil { t.Fatal(err) } value, _, err := MVCCGet(ctx, engine, testKey1, hlc.Timestamp{WallTime: 1}, MVCCGetOptions{ @@ -3589,14 +3611,14 @@ func TestMVCCResolveTxnNoOps(t *testing.T) { } // Write intent and resolve with different txn. - if err := MVCCPut(ctx, engine, nil, testKey1, hlc.Timestamp{WallTime: 1}, value2, txn1); err != nil { + if err := MVCCPut(ctx, engine, nil, testKey2, txn1.OrigTimestamp, value2, txn1); err != nil { t.Fatal(err) } txn1CommitWithTS := txn2Commit.Clone() txn1CommitWithTS.Timestamp = hlc.Timestamp{WallTime: 1} if err := MVCCResolveWriteIntent(ctx, engine, nil, roachpb.Intent{ - Span: roachpb.Span{Key: testKey1}, + Span: roachpb.Span{Key: testKey2}, Status: txn1CommitWithTS.Status, Txn: txn1CommitWithTS.TxnMeta, }); err != nil { @@ -3611,16 +3633,16 @@ func TestMVCCResolveTxnRange(t *testing.T) { engine := createTestEngine() defer engine.Close() - if err := MVCCPut(ctx, engine, nil, testKey1, hlc.Timestamp{Logical: 1}, value1, txn1); err != nil { + if err := MVCCPut(ctx, engine, nil, testKey1, txn1.OrigTimestamp, value1, txn1); err != nil { t.Fatal(err) } if err := MVCCPut(ctx, engine, nil, testKey2, hlc.Timestamp{Logical: 1}, value2, nil); err != nil { t.Fatal(err) } - if err := MVCCPut(ctx, engine, nil, testKey3, hlc.Timestamp{Logical: 1}, value3, txn2); err != nil { + if err := MVCCPut(ctx, engine, nil, testKey3, txn2.OrigTimestamp, value3, txn2); err != nil { t.Fatal(err) } - if err := MVCCPut(ctx, engine, nil, testKey4, hlc.Timestamp{Logical: 1}, value4, txn1); err != nil { + if err := MVCCPut(ctx, engine, nil, testKey4, txn1.OrigTimestamp, value4, txn1); err != nil { t.Fatal(err) } @@ -3689,15 +3711,15 @@ func TestMVCCResolveTxnRangeResume(t *testing.T) { defer engine.Close() // Write 10 keys from txn1, 10 from txn2, and 10 with no txn, interleaved. - // TODO(benesch): wrap this loop. for i := 0; i < 30; i += 3 { key0 := roachpb.Key(fmt.Sprintf("%02d", i+0)) key1 := roachpb.Key(fmt.Sprintf("%02d", i+1)) key2 := roachpb.Key(fmt.Sprintf("%02d", i+2)) - if err := MVCCPut(ctx, engine, nil, key0, hlc.Timestamp{Logical: 1}, value1, txn1); err != nil { + if err := MVCCPut(ctx, engine, nil, key0, txn1.OrigTimestamp, value1, txn1); err != nil { t.Fatal(err) } - if err := MVCCPut(ctx, engine, nil, key1, hlc.Timestamp{Logical: 2}, value2, txn2); err != nil { + txn2ts := makeTxn(*txn2, hlc.Timestamp{Logical: 2}) + if err := MVCCPut(ctx, engine, nil, key1, txn2ts.OrigTimestamp, value2, txn2ts); err != nil { t.Fatal(err) } if err := MVCCPut(ctx, engine, nil, key2, hlc.Timestamp{Logical: 3}, value3, nil); err != nil { @@ -4409,8 +4431,11 @@ func TestMVCCGarbageCollectIntent(t *testing.T) { t.Fatal(err) } } - txn := &roachpb.Transaction{TxnMeta: enginepb.TxnMeta{ID: uuid.MakeV4(), Timestamp: ts2}} - if err := MVCCDelete(ctx, engine, nil, key, ts2, txn); err != nil { + txn := &roachpb.Transaction{ + TxnMeta: enginepb.TxnMeta{ID: uuid.MakeV4(), Timestamp: ts2}, + OrigTimestamp: ts2, + } + if err := MVCCDelete(ctx, engine, nil, key, txn.OrigTimestamp, txn); err != nil { t.Fatal(err) } keys := []roachpb.GCRequest_GCKey{ @@ -4432,7 +4457,7 @@ func TestResolveIntentWithLowerEpoch(t *testing.T) { defer engine.Close() // Lay down an intent with a high epoch. - if err := MVCCPut(ctx, engine, nil, testKey1, hlc.Timestamp{Logical: 1}, value1, txn1e2); err != nil { + if err := MVCCPut(ctx, engine, nil, testKey1, txn1e2.OrigTimestamp, value1, txn1e2); err != nil { t.Fatal(err) } // Resolve the intent with a low epoch. @@ -4470,21 +4495,24 @@ func TestMVCCIdempotentTransactions(t *testing.T) { key := roachpb.Key("a") value := roachpb.MakeValueFromString("first value") newValue := roachpb.MakeValueFromString("second value") - txn := &roachpb.Transaction{TxnMeta: enginepb.TxnMeta{ID: uuid.MakeV4(), Timestamp: ts1}} - txn.Status = roachpb.PENDING + txn := &roachpb.Transaction{ + TxnMeta: enginepb.TxnMeta{ID: uuid.MakeV4(), Timestamp: ts1}, + OrigTimestamp: ts1, + Status: roachpb.PENDING, + } // Lay down an intent. - if err := MVCCPut(ctx, engine, nil, key, ts1, value, txn); err != nil { + if err := MVCCPut(ctx, engine, nil, key, txn.OrigTimestamp, value, txn); err != nil { t.Fatal(err) } // Lay down an intent again with no problem because we're idempotent. - if err := MVCCPut(ctx, engine, nil, key, ts1, value, txn); err != nil { + if err := MVCCPut(ctx, engine, nil, key, txn.OrigTimestamp, value, txn); err != nil { t.Fatal(err) } // Lay down an intent without increasing the sequence but with a different value. - if err := MVCCPut(ctx, engine, nil, key, ts1, newValue, txn); err != nil { + if err := MVCCPut(ctx, engine, nil, key, txn.OrigTimestamp, newValue, txn); err != nil { if !testutils.IsError(err, "has a different value") { t.Fatal(err) } @@ -4494,13 +4522,13 @@ func TestMVCCIdempotentTransactions(t *testing.T) { // Lay down a second intent. txn.Sequence++ - if err := MVCCPut(ctx, engine, nil, key, ts1, newValue, txn); err != nil { + if err := MVCCPut(ctx, engine, nil, key, txn.OrigTimestamp, newValue, txn); err != nil { t.Fatal(err) } // Replay first intent without writing anything down. txn.Sequence-- - if err := MVCCPut(ctx, engine, nil, key, ts1, value, txn); err != nil { + if err := MVCCPut(ctx, engine, nil, key, txn.OrigTimestamp, value, txn); err != nil { t.Fatal(err) } @@ -4529,7 +4557,7 @@ func TestMVCCIdempotentTransactions(t *testing.T) { } txn.Sequence-- // Lay down an intent without increasing the sequence but with a different value. - if err := MVCCPut(ctx, engine, nil, key, ts1, newValue, txn); err != nil { + if err := MVCCPut(ctx, engine, nil, key, txn.OrigTimestamp, newValue, txn); err != nil { if !testutils.IsError(err, "has a different value") { t.Fatal(err) } @@ -4539,7 +4567,7 @@ func TestMVCCIdempotentTransactions(t *testing.T) { txn.Sequence-- // Lay down an intent with a lower sequence number to see if it detects missing intents. - if err := MVCCPut(ctx, engine, nil, key, ts1, newValue, txn); err != nil { + if err := MVCCPut(ctx, engine, nil, key, txn.OrigTimestamp, newValue, txn); err != nil { if !testutils.IsError(err, "missing an intent") { t.Fatal(err) } @@ -4549,14 +4577,14 @@ func TestMVCCIdempotentTransactions(t *testing.T) { txn.Sequence += 3 // on a separate key, start an increment. - val, err := MVCCIncrement(ctx, engine, nil, testKey1, ts1, txn, 1) + val, err := MVCCIncrement(ctx, engine, nil, testKey1, txn.OrigTimestamp, txn, 1) if val != 1 || err != nil { t.Fatalf("expected val=1 (got %d): %s", val, err) } // As long as the sequence in unchanged, replaying the increment doesn't // increase the value. for i := 0; i < 10; i++ { - val, err = MVCCIncrement(ctx, engine, nil, testKey1, ts1, txn, 1) + val, err = MVCCIncrement(ctx, engine, nil, testKey1, txn.OrigTimestamp, txn, 1) if val != 1 || err != nil { t.Fatalf("expected val=1 (got %d): %s", val, err) } @@ -4564,14 +4592,14 @@ func TestMVCCIdempotentTransactions(t *testing.T) { // Increment again. txn.Sequence++ - val, err = MVCCIncrement(ctx, engine, nil, testKey1, ts1, txn, 1) + val, err = MVCCIncrement(ctx, engine, nil, testKey1, txn.OrigTimestamp, txn, 1) if val != 2 || err != nil { t.Fatalf("expected val=2 (got %d): %s", val, err) } txn.Sequence-- // Replaying an older increment doesn't increase the value. for i := 0; i < 10; i++ { - val, err = MVCCIncrement(ctx, engine, nil, testKey1, ts1, txn, 1) + val, err = MVCCIncrement(ctx, engine, nil, testKey1, txn.OrigTimestamp, txn, 1) if val != 1 || err != nil { t.Fatalf("expected val=1 (got %d): %s", val, err) } @@ -4593,11 +4621,14 @@ func TestMVCCIntentHistory(t *testing.T) { key := roachpb.Key("a") value := roachpb.MakeValueFromString("first value") newValue := roachpb.MakeValueFromString("second value") - txn := &roachpb.Transaction{TxnMeta: enginepb.TxnMeta{ID: uuid.MakeV4(), Timestamp: ts1}} + txn := &roachpb.Transaction{ + TxnMeta: enginepb.TxnMeta{ID: uuid.MakeV4(), Timestamp: ts1}, + OrigTimestamp: ts1, + } txn.Status = roachpb.PENDING // Lay down an intent. - if err := MVCCPut(ctx, engine, nil, key, ts1, value, txn); err != nil { + if err := MVCCPut(ctx, engine, nil, key, txn.OrigTimestamp, value, txn); err != nil { t.Fatal(err) } @@ -4623,7 +4654,8 @@ func TestMVCCIntentHistory(t *testing.T) { // Lay down an overriding intent with a higher sequence. txn.Sequence++ - if err := MVCCPut(ctx, engine, nil, key, ts2, newValue, txn); err != nil { + txn.Timestamp = ts2 + if err := MVCCPut(ctx, engine, nil, key, txn.OrigTimestamp, newValue, txn); err != nil { t.Fatal(err) } @@ -4650,7 +4682,7 @@ func TestMVCCIntentHistory(t *testing.T) { // Lay down a deletion intent with a higher sequence. txn.Sequence++ - if err := MVCCDelete(ctx, engine, nil, key, ts2, txn); err != nil { + if err := MVCCDelete(ctx, engine, nil, key, txn.OrigTimestamp, txn); err != nil { t.Fatal(err) } @@ -4679,7 +4711,7 @@ func TestMVCCIntentHistory(t *testing.T) { // Lay down another intent with a higher sequence to see if history accurately captures deletes. txn.Sequence++ - if err := MVCCPut(ctx, engine, nil, key, ts2, value, txn); err != nil { + if err := MVCCPut(ctx, engine, nil, key, txn.OrigTimestamp, value, txn); err != nil { t.Fatal(err) } diff --git a/pkg/storage/replica_test.go b/pkg/storage/replica_test.go index ecaa13c6b369..9288ef220313 100644 --- a/pkg/storage/replica_test.go +++ b/pkg/storage/replica_test.go @@ -2618,6 +2618,74 @@ func TestReplicaUseTSCache(t *testing.T) { } } +// TestReplicaTSCacheForwardsIntentTS verifies that the timestamp cache affects +// the timestamps at which intents are written. That is, if a transactional +// write is forwarded by the timestamp cache due to a more recent read, the +// written intents must be left at the forwarded timestamp. See the comment on +// the enginepb.TxnMeta.Timestamp field for rationale. +func TestReplicaTSCacheForwardsIntentTS(t *testing.T) { + defer leaktest.AfterTest(t)() + + ctx := context.Background() + tc := testContext{} + stopper := stop.NewStopper() + defer stopper.Stop(ctx) + tc.Start(t, stopper) + + tsOld := tc.Clock().Now() + tsNew := tsOld.Add(time.Millisecond.Nanoseconds(), 0) + + // Read at tNew to populate the read timestamp cache. + // DeleteRange at tNew to populate the write timestamp cache. + txnNew := newTransaction("new", roachpb.Key("txn-anchor"), roachpb.NormalUserPriority, tc.Clock()) + txnNew.OrigTimestamp = tsNew + txnNew.Timestamp = tsNew + keyGet := roachpb.Key("get") + keyDeleteRange := roachpb.Key("delete-range") + gArgs := &roachpb.GetRequest{ + RequestHeader: roachpb.RequestHeader{Key: keyGet}, + } + drArgs := &roachpb.DeleteRangeRequest{ + RequestHeader: roachpb.RequestHeader{Key: keyDeleteRange, EndKey: keyDeleteRange.Next()}, + } + assignSeqNumsForReqs(txnNew, gArgs, drArgs) + var ba roachpb.BatchRequest + ba.Header.Txn = txnNew + ba.Add(gArgs, drArgs) + if _, pErr := tc.Sender().Send(ctx, ba); pErr != nil { + t.Fatal(pErr) + } + + // Write under the timestamp cache within the transaction, and verify that + // the intents are written above the timestamp cache. + txnOld := newTransaction("old", roachpb.Key("txn-anchor"), roachpb.NormalUserPriority, tc.Clock()) + txnOld.OrigTimestamp = tsOld + txnOld.Timestamp = tsOld + for _, key := range []roachpb.Key{keyGet, keyDeleteRange} { + t.Run(string(key), func(t *testing.T) { + pArgs := putArgs(key, []byte("foo")) + assignSeqNumsForReqs(txnOld, &pArgs) + if _, pErr := tc.SendWrappedWith(roachpb.Header{Txn: txnOld}, &pArgs); pErr != nil { + t.Fatal(pErr) + } + iter := tc.engine.NewIterator(engine.IterOptions{Prefix: true}) + defer iter.Close() + mvccKey := engine.MakeMVCCMetadataKey(key) + iter.Seek(mvccKey) + var keyMeta enginepb.MVCCMetadata + if ok, err := iter.Valid(); !ok || !iter.UnsafeKey().Equal(mvccKey) { + t.Fatalf("missing mvcc metadata for %q: %v", mvccKey, err) + } else if err := iter.ValueProto(&keyMeta); err != nil { + t.Fatalf("failed to unmarshal metadata for %q", mvccKey) + } + if tsNext := tsNew.Next(); hlc.Timestamp(keyMeta.Timestamp) != tsNext { + t.Errorf("timestamp not forwarded for %q intent: expected %s but got %s", + key, tsNext, keyMeta.Timestamp) + } + }) + } +} + func TestConditionalPutUpdatesTSCacheOnError(t *testing.T) { defer leaktest.AfterTest(t)() tc := testContext{} @@ -8344,10 +8412,10 @@ func TestNoopRequestsNotProposed(t *testing.T) { var ba roachpb.BatchRequest ba.Header.RangeID = repl.RangeID ba.Add(req) + ba.Txn = txn if err := ba.SetActiveTimestamp(repl.Clock().Now); err != nil { t.Fatal(err) } - ba.Txn = txn _, pErr := repl.Send(ctx, ba) return pErr } @@ -8480,6 +8548,8 @@ func TestNoopRequestsNotProposed(t *testing.T) { ba.RangeID = repl.RangeID if c.useTxn { ba.Txn = txn + ba.Txn.OrigTimestamp = markerTS + ba.Txn.Timestamp = markerTS assignSeqNumsForReqs(txn, c.req) } ba.Add(c.req)