From 45dc49f4abe67f1b047bfe785d13a0648497f055 Mon Sep 17 00:00:00 2001 From: Ben Darnell Date: Sun, 4 Sep 2016 15:30:29 +0800 Subject: [PATCH 1/5] storage: timestampCache returns transaction IDs timestampCache.GetMax{Read,Write} previously took a transaction ID to act as that transaction. However, this wasn't always correct - when acting as a transaction we look past that transaction's writes to see what's underneath, but the trimming of inserted spans means that this would often return the low water mark, ignoring spans that had been present but were trimmed away. Instead, the timestamp cache now returns the transaction associated with the timestamp it is returning, so the caller can make its own decision about whether to ignore the timestamp or not. --- storage/replica.go | 20 +- storage/replica_test.go | 22 +- storage/timestamp_cache.go | 56 ++-- storage/timestamp_cache_test.go | 494 +++++++++++++------------------- 4 files changed, 244 insertions(+), 348 deletions(-) diff --git a/storage/replica.go b/storage/replica.go index 2f58d7ba22cd..1f51bc7e51e9 100644 --- a/storage/replica.go +++ b/storage/replica.go @@ -1240,7 +1240,7 @@ func (r *Replica) applyTimestampCache(ba *roachpb.BatchRequest) *roachpb.Error { // has already been finalized, in which case this is a replay. if _, ok := args.(*roachpb.BeginTransactionRequest); ok { key := keys.TransactionKey(header.Key, ba.GetTxnID()) - wTS, wOK := r.mu.tsCache.GetMaxWrite(key, nil, nil) + wTS, _, wOK := r.mu.tsCache.GetMaxWrite(key, nil) if wOK { return roachpb.NewError(roachpb.NewTransactionReplayError()) } else if !wTS.Less(ba.Txn.Timestamp) { @@ -1257,10 +1257,12 @@ func (r *Replica) applyTimestampCache(ba *roachpb.BatchRequest) *roachpb.Error { continue } - // Forward the timestamp if there's been a more recent read. - rTS, _ := r.mu.tsCache.GetMaxRead(header.Key, header.EndKey, ba.GetTxnID()) + // Forward the timestamp if there's been a more recent read (by someone else). + rTS, rTxnID, _ := r.mu.tsCache.GetMaxRead(header.Key, header.EndKey) if ba.Txn != nil { - ba.Txn.Timestamp.Forward(rTS.Next()) + if rTxnID == nil || !uuid.Equal(*ba.Txn.ID, *rTxnID) { + ba.Txn.Timestamp.Forward(rTS.Next()) + } } else { ba.Timestamp.Forward(rTS.Next()) } @@ -1269,11 +1271,13 @@ func (r *Replica) applyTimestampCache(ba *roachpb.BatchRequest) *roachpb.Error { // write too old boolean for transactions. Note that currently // only EndTransaction and DeleteRange requests update the // write timestamp cache. - wTS, _ := r.mu.tsCache.GetMaxWrite(header.Key, header.EndKey, ba.GetTxnID()) + wTS, wTxnID, _ := r.mu.tsCache.GetMaxWrite(header.Key, header.EndKey) if ba.Txn != nil { - if !wTS.Less(ba.Txn.Timestamp) { - ba.Txn.Timestamp.Forward(wTS.Next()) - ba.Txn.WriteTooOld = true + if wTxnID == nil || !uuid.Equal(*ba.Txn.ID, *wTxnID) { + if !wTS.Less(ba.Txn.Timestamp) { + ba.Txn.Timestamp.Forward(wTS.Next()) + ba.Txn.WriteTooOld = true + } } } else { ba.Timestamp.Forward(wTS.Next()) diff --git a/storage/replica_test.go b/storage/replica_test.go index fadf27795d5f..2f1c181105d6 100644 --- a/storage/replica_test.go +++ b/storage/replica_test.go @@ -873,7 +873,7 @@ func TestReplicaTSCacheLowWaterOnLease(t *testing.T) { now := hlc.Timestamp{WallTime: tc.manualClock.UnixNano()} tc.rng.mu.Lock() - baseRTS, _ := tc.rng.mu.tsCache.GetMaxRead(roachpb.Key("a"), nil /* end */, nil /* txn */) + baseRTS, _, _ := tc.rng.mu.tsCache.GetMaxRead(roachpb.Key("a"), nil /* end */) tc.rng.mu.Unlock() // TODO(tschottdorf): this value is zero, which seems ripe for producing // test cases that do not test anything. @@ -932,8 +932,8 @@ func TestReplicaTSCacheLowWaterOnLease(t *testing.T) { } // Verify expected low water mark. tc.rng.mu.Lock() - rTS, _ := tc.rng.mu.tsCache.GetMaxRead(roachpb.Key("a"), nil, nil) - wTS, _ := tc.rng.mu.tsCache.GetMaxWrite(roachpb.Key("a"), nil, nil) + rTS, _, _ := tc.rng.mu.tsCache.GetMaxRead(roachpb.Key("a"), nil) + wTS, _, _ := tc.rng.mu.tsCache.GetMaxWrite(roachpb.Key("a"), nil) tc.rng.mu.Unlock() if rTS.WallTime != test.expLowWater || wTS.WallTime != test.expLowWater { t.Errorf("%d: expected low water %d; got maxRead=%d, maxWrite=%d", i, test.expLowWater, rTS.WallTime, wTS.WallTime) @@ -1715,26 +1715,26 @@ func TestReplicaUpdateTSCache(t *testing.T) { // Verify the timestamp cache has rTS=1s and wTS=0s for "a". tc.rng.mu.Lock() defer tc.rng.mu.Unlock() - _, rOK := tc.rng.mu.tsCache.GetMaxRead(roachpb.Key("a"), nil, nil) - _, wOK := tc.rng.mu.tsCache.GetMaxWrite(roachpb.Key("a"), nil, nil) + _, _, rOK := tc.rng.mu.tsCache.GetMaxRead(roachpb.Key("a"), nil) + _, _, wOK := tc.rng.mu.tsCache.GetMaxWrite(roachpb.Key("a"), nil) if rOK || wOK { t.Errorf("expected rOK=false and wOK=false; rOK=%t, wOK=%t", rOK, wOK) } tc.rng.mu.tsCache.ExpandRequests(hlc.ZeroTimestamp) - rTS, rOK := tc.rng.mu.tsCache.GetMaxRead(roachpb.Key("a"), nil, nil) - wTS, wOK := tc.rng.mu.tsCache.GetMaxWrite(roachpb.Key("a"), nil, nil) + rTS, _, rOK := tc.rng.mu.tsCache.GetMaxRead(roachpb.Key("a"), nil) + wTS, _, wOK := tc.rng.mu.tsCache.GetMaxWrite(roachpb.Key("a"), nil) if rTS.WallTime != t0.Nanoseconds() || wTS.WallTime != 0 || !rOK || wOK { t.Errorf("expected rTS=1s and wTS=0s, but got %s, %s; rOK=%t, wOK=%t", rTS, wTS, rOK, wOK) } // Verify the timestamp cache has rTS=0s and wTS=2s for "b". - rTS, rOK = tc.rng.mu.tsCache.GetMaxRead(roachpb.Key("b"), nil, nil) - wTS, wOK = tc.rng.mu.tsCache.GetMaxWrite(roachpb.Key("b"), nil, nil) + rTS, _, rOK = tc.rng.mu.tsCache.GetMaxRead(roachpb.Key("b"), nil) + wTS, _, wOK = tc.rng.mu.tsCache.GetMaxWrite(roachpb.Key("b"), nil) if rTS.WallTime != 0 || wTS.WallTime != t1.Nanoseconds() || rOK || !wOK { t.Errorf("expected rTS=0s and wTS=2s, but got %s, %s; rOK=%t, wOK=%t", rTS, wTS, rOK, wOK) } // Verify another key ("c") has 0sec in timestamp cache. - rTS, rOK = tc.rng.mu.tsCache.GetMaxRead(roachpb.Key("c"), nil, nil) - wTS, wOK = tc.rng.mu.tsCache.GetMaxWrite(roachpb.Key("c"), nil, nil) + rTS, _, rOK = tc.rng.mu.tsCache.GetMaxRead(roachpb.Key("c"), nil) + wTS, _, wOK = tc.rng.mu.tsCache.GetMaxWrite(roachpb.Key("c"), nil) if rTS.WallTime != 0 || wTS.WallTime != 0 || rOK || wOK { t.Errorf("expected rTS=0s and wTS=0s, but got %s %s; rOK=%t, wOK=%t", rTS, wTS, rOK, wOK) } diff --git a/storage/timestamp_cache.go b/storage/timestamp_cache.go index ffe5e94fcda9..a4f41a4bd261 100644 --- a/storage/timestamp_cache.go +++ b/storage/timestamp_cache.go @@ -408,35 +408,28 @@ func (tc *timestampCache) ExpandRequests(timestamp hlc.Timestamp) { } // GetMaxRead returns the maximum read timestamp which overlaps the -// interval spanning from start to end. Cached timestamps matching the -// specified txnID are not considered. If no part of the specified -// range is overlapped by timestamps from different transactions in -// the cache, the low water timestamp is returned for the read -// timestamps. Also returns an "ok" bool, indicating whether an -// explicit match of the interval was found in the cache. -func (tc *timestampCache) GetMaxRead(start, end roachpb.Key, txnID *uuid.UUID) (hlc.Timestamp, bool) { - return tc.getMax(start, end, txnID, true) +// interval spanning from start to end. If that timestamp belongs to a +// single transaction, that transaction's ID is returned. If no part +// of the specified range is overlapped by timestamps from different +// transactions in the cache, the low water timestamp is returned for +// the read timestamps. Also returns an "ok" bool, indicating whether +// an explicit match of the interval was found in the cache. +func (tc *timestampCache) GetMaxRead(start, end roachpb.Key) (hlc.Timestamp, *uuid.UUID, bool) { + return tc.getMax(start, end, true) } // GetMaxWrite returns the maximum write timestamp which overlaps the -// interval spanning from start to end. Cached timestamps matching the -// specified txnID are not considered. If no part of the specified -// range is overlapped by timestamps from different transactions in -// the cache, the low water timestamp is returned for the write -// timestamps. Also returns an "ok" bool, indicating whether an -// explicit match of the interval was found in the cache. -// -// The txn ID prevents restarts with a pattern like: read("a"), -// write("a"). The read adds a timestamp for "a". Then the write (for -// the same transaction) would get that as the max timestamp and be -// forced to increment it. This allows timestamps from the same txn -// to be ignored because the write would instead get the low water -// timestamp. -func (tc *timestampCache) GetMaxWrite(start, end roachpb.Key, txnID *uuid.UUID) (hlc.Timestamp, bool) { - return tc.getMax(start, end, txnID, false) +// interval spanning from start to end. If that timestamp belongs to a +// single transaction, that transaction's ID is returned. If no part +// of the specified range is overlapped by timestamps from different +// transactions in the cache, the low water timestamp is returned for +// the write timestamps. Also returns an "ok" bool, indicating whether +// an explicit match of the interval was found in the cache. +func (tc *timestampCache) GetMaxWrite(start, end roachpb.Key) (hlc.Timestamp, *uuid.UUID, bool) { + return tc.getMax(start, end, false) } -func (tc *timestampCache) getMax(start, end roachpb.Key, txnID *uuid.UUID, readTSCache bool) (hlc.Timestamp, bool) { +func (tc *timestampCache) getMax(start, end roachpb.Key, readTSCache bool) (hlc.Timestamp, *uuid.UUID, bool) { if len(end) == 0 { end = start.Next() } @@ -446,16 +439,19 @@ func (tc *timestampCache) getMax(start, end roachpb.Key, txnID *uuid.UUID, readT if readTSCache { cache = tc.rCache } + var txnID *uuid.UUID for _, o := range cache.GetOverlaps(start, end) { ce := o.Value.(*cacheValue) - if ce.txnID == nil || txnID == nil || !roachpb.TxnIDEqual(txnID, ce.txnID) { - if max.Less(ce.timestamp) { - ok = true - max = ce.timestamp - } + if max.Less(ce.timestamp) { + ok = true + max = ce.timestamp + txnID = ce.txnID + } else if max.Equal(ce.timestamp) && txnID != nil && + (ce.txnID == nil || !uuid.Equal(*txnID, *ce.txnID)) { + txnID = nil } } - return max, ok + return max, txnID, ok } // MergeInto merges all entries from this timestamp cache into the diff --git a/storage/timestamp_cache_test.go b/storage/timestamp_cache_test.go index 03245421a009..a183ff09d653 100644 --- a/storage/timestamp_cache_test.go +++ b/storage/timestamp_cache_test.go @@ -17,6 +17,7 @@ package storage import ( + "fmt" "testing" "time" @@ -45,19 +46,19 @@ func TestTimestampCache(t *testing.T) { t.Errorf("expected cache to be empty, but contains %d elements", tc.rCache.Len()) } // Verify GetMax returns the lowWater mark which is maxClockOffset. - if rTS, ok := tc.GetMaxRead(roachpb.Key("a"), nil, nil); rTS.WallTime != maxClockOffset.Nanoseconds() || ok { + if rTS, _, ok := tc.GetMaxRead(roachpb.Key("a"), nil); rTS.WallTime != maxClockOffset.Nanoseconds() || ok { t.Errorf("expected maxClockOffset for key \"a\"; ok=%t", ok) } - if rTS, ok := tc.GetMaxRead(roachpb.Key("notincache"), nil, nil); rTS.WallTime != maxClockOffset.Nanoseconds() || ok { + if rTS, _, ok := tc.GetMaxRead(roachpb.Key("notincache"), nil); rTS.WallTime != maxClockOffset.Nanoseconds() || ok { t.Errorf("expected maxClockOffset for key \"notincache\"; ok=%t", ok) } // Advance the clock and verify same low water mark. manual.Set(maxClockOffset.Nanoseconds() + 1) - if rTS, ok := tc.GetMaxRead(roachpb.Key("a"), nil, nil); rTS.WallTime != maxClockOffset.Nanoseconds() || ok { + if rTS, _, ok := tc.GetMaxRead(roachpb.Key("a"), nil); rTS.WallTime != maxClockOffset.Nanoseconds() || ok { t.Errorf("expected maxClockOffset for key \"a\"; ok=%t", ok) } - if rTS, ok := tc.GetMaxRead(roachpb.Key("notincache"), nil, nil); rTS.WallTime != maxClockOffset.Nanoseconds() || ok { + if rTS, _, ok := tc.GetMaxRead(roachpb.Key("notincache"), nil); rTS.WallTime != maxClockOffset.Nanoseconds() || ok { t.Errorf("expected maxClockOffset for key \"notincache\"; ok=%t", ok) } @@ -66,37 +67,37 @@ func TestTimestampCache(t *testing.T) { tc.add(roachpb.Key("b"), roachpb.Key("c"), ts, nil, true) // Verify all permutations of direct and range access. - if rTS, ok := tc.GetMaxRead(roachpb.Key("b"), nil, nil); !rTS.Equal(ts) || !ok { + if rTS, _, ok := tc.GetMaxRead(roachpb.Key("b"), nil); !rTS.Equal(ts) || !ok { t.Errorf("expected current time for key \"b\"; got %s; ok=%t", rTS, ok) } - if rTS, ok := tc.GetMaxRead(roachpb.Key("bb"), nil, nil); !rTS.Equal(ts) || !ok { + if rTS, _, ok := tc.GetMaxRead(roachpb.Key("bb"), nil); !rTS.Equal(ts) || !ok { t.Errorf("expected current time for key \"bb\"; ok=%t", ok) } - if rTS, ok := tc.GetMaxRead(roachpb.Key("c"), nil, nil); rTS.WallTime != maxClockOffset.Nanoseconds() || ok { + if rTS, _, ok := tc.GetMaxRead(roachpb.Key("c"), nil); rTS.WallTime != maxClockOffset.Nanoseconds() || ok { t.Errorf("expected maxClockOffset for key \"c\"; ok=%t", ok) } - if rTS, ok := tc.GetMaxRead(roachpb.Key("b"), roachpb.Key("c"), nil); !rTS.Equal(ts) || !ok { + if rTS, _, ok := tc.GetMaxRead(roachpb.Key("b"), roachpb.Key("c")); !rTS.Equal(ts) || !ok { t.Errorf("expected current time for key \"b\"-\"c\"; ok=%t", ok) } - if rTS, ok := tc.GetMaxRead(roachpb.Key("bb"), roachpb.Key("bz"), nil); !rTS.Equal(ts) || !ok { + if rTS, _, ok := tc.GetMaxRead(roachpb.Key("bb"), roachpb.Key("bz")); !rTS.Equal(ts) || !ok { t.Errorf("expected current time for key \"bb\"-\"bz\"; ok=%t", ok) } - if rTS, ok := tc.GetMaxRead(roachpb.Key("a"), roachpb.Key("b"), nil); rTS.WallTime != maxClockOffset.Nanoseconds() || ok { + if rTS, _, ok := tc.GetMaxRead(roachpb.Key("a"), roachpb.Key("b")); rTS.WallTime != maxClockOffset.Nanoseconds() || ok { t.Errorf("expected maxClockOffset for key \"a\"-\"b\"; ok=%t", ok) } - if rTS, ok := tc.GetMaxRead(roachpb.Key("a"), roachpb.Key("bb"), nil); !rTS.Equal(ts) || !ok { + if rTS, _, ok := tc.GetMaxRead(roachpb.Key("a"), roachpb.Key("bb")); !rTS.Equal(ts) || !ok { t.Errorf("expected current time for key \"a\"-\"bb\"; ok=%t", ok) } - if rTS, ok := tc.GetMaxRead(roachpb.Key("a"), roachpb.Key("d"), nil); !rTS.Equal(ts) || !ok { + if rTS, _, ok := tc.GetMaxRead(roachpb.Key("a"), roachpb.Key("d")); !rTS.Equal(ts) || !ok { t.Errorf("expected current time for key \"a\"-\"d\"; ok=%t", ok) } - if rTS, ok := tc.GetMaxRead(roachpb.Key("bz"), roachpb.Key("c"), nil); !rTS.Equal(ts) || !ok { + if rTS, _, ok := tc.GetMaxRead(roachpb.Key("bz"), roachpb.Key("c")); !rTS.Equal(ts) || !ok { t.Errorf("expected current time for key \"bz\"-\"c\"; ok=%t", ok) } - if rTS, ok := tc.GetMaxRead(roachpb.Key("bz"), roachpb.Key("d"), nil); !rTS.Equal(ts) || !ok { + if rTS, _, ok := tc.GetMaxRead(roachpb.Key("bz"), roachpb.Key("d")); !rTS.Equal(ts) || !ok { t.Errorf("expected current time for key \"bz\"-\"d\"; ok=%t", ok) } - if rTS, ok := tc.GetMaxRead(roachpb.Key("c"), roachpb.Key("d"), nil); rTS.WallTime != maxClockOffset.Nanoseconds() || ok { + if rTS, _, ok := tc.GetMaxRead(roachpb.Key("c"), roachpb.Key("d")); rTS.WallTime != maxClockOffset.Nanoseconds() || ok { t.Errorf("expected maxClockOffset for key \"c\"-\"d\"; ok=%t", ok) } } @@ -139,14 +140,14 @@ func TestTimestampCacheSetLowWater(t *testing.T) { {roachpb.Key("c"), cTS, true}, {roachpb.Key("d"), bTS, false}, } { - if rTS, ok := tc.GetMaxRead(test.key, nil, nil); !rTS.Equal(test.expTS) || ok != test.expOK { + if rTS, _, ok := tc.GetMaxRead(test.key, nil); !rTS.Equal(test.expTS) || ok != test.expOK { t.Errorf("%d: expected ts %s, got %s; exp ok=%t; got %t", i, test.expTS, rTS, test.expOK, ok) } } // Try setting a lower low water mark than the previous value. tc.SetLowWater(aTS) - if rTS, ok := tc.GetMaxRead(roachpb.Key("d"), nil, nil); !rTS.Equal(bTS) || ok { + if rTS, _, ok := tc.GetMaxRead(roachpb.Key("d"), nil); !rTS.Equal(bTS) || ok { t.Errorf("setting lower low water mark should not be allowed; expected %s; got %s; ok=%t", bTS, rTS, ok) } } @@ -171,7 +172,7 @@ func TestTimestampCacheEviction(t *testing.T) { tc.add(roachpb.Key("b"), nil, clock.Now(), nil, true) // Verify looking up key "c" returns the new low water mark ("a"'s timestamp). - if rTS, ok := tc.GetMaxRead(roachpb.Key("c"), nil, nil); !rTS.Equal(aTS) || ok { + if rTS, _, ok := tc.GetMaxRead(roachpb.Key("c"), nil); !rTS.Equal(aTS) || ok { t.Errorf("expected low water mark %s, got %s; ok=%t", aTS, rTS, ok) } } @@ -249,21 +250,21 @@ func TestTimestampCacheMergeInto(t *testing.T) { t.Errorf("expected latest to be updated to %s; got %s", tc1.latest, tc2.latest) } - if rTS, ok := tc2.GetMaxRead(roachpb.Key("a"), nil, nil); !rTS.Equal(adTS) || !ok { + if rTS, _, ok := tc2.GetMaxRead(roachpb.Key("a"), nil); !rTS.Equal(adTS) || !ok { t.Errorf("expected \"a\" to have adTS timestamp; ok=%t", ok) } - if rTS, ok := tc2.GetMaxRead(roachpb.Key("b"), nil, nil); !rTS.Equal(beTS) || !ok { + if rTS, _, ok := tc2.GetMaxRead(roachpb.Key("b"), nil); !rTS.Equal(beTS) || !ok { t.Errorf("expected \"b\" to have beTS timestamp; ok=%t", ok) } if test.useClear { - if rTS, ok := tc2.GetMaxRead(roachpb.Key("aa"), nil, nil); !rTS.Equal(adTS) || !ok { + if rTS, _, ok := tc2.GetMaxRead(roachpb.Key("aa"), nil); !rTS.Equal(adTS) || !ok { t.Errorf("expected \"aa\" to have adTS timestamp; ok=%t", ok) } } else { - if rTS, ok := tc2.GetMaxRead(roachpb.Key("aa"), nil, nil); !rTS.Equal(aaTS) || !ok { + if rTS, _, ok := tc2.GetMaxRead(roachpb.Key("aa"), nil); !rTS.Equal(aaTS) || !ok { t.Errorf("expected \"aa\" to have aaTS timestamp; ok=%t", ok) } - if rTS, ok := tc2.GetMaxRead(roachpb.Key("a"), roachpb.Key("c"), nil); !rTS.Equal(aaTS) || !ok { + if rTS, _, ok := tc2.GetMaxRead(roachpb.Key("a"), roachpb.Key("c")); !rTS.Equal(aaTS) || !ok { t.Errorf("expected \"a\"-\"c\" to have aaTS timestamp; ok=%t", ok) } @@ -278,67 +279,73 @@ func TestTimestampCacheMergeInto(t *testing.T) { } type layeredIntervalTestCase struct { - actions []func(tc *timestampCache, ts hlc.Timestamp) - validator func(t *testing.T, tc *timestampCache, tss []hlc.Timestamp) + spans []roachpb.Span + validator func(t *testing.T, tc *timestampCache, tss []hlc.Timestamp, txns []*uuid.UUID) +} + +// assertTS is a helper function for layeredIntervalTestCase +// validators. It queries the timestamp cache for the given keys and +// reports a test error if it doesn't match the given timestamp and +// transaction ID. +func assertTS( + t *testing.T, + tc *timestampCache, + start, end roachpb.Key, + expectedTS hlc.Timestamp, + expectedTxnID *uuid.UUID, +) { + var keys string + if len(end) == 0 { + keys = fmt.Sprintf("%q", start) + } else { + keys = fmt.Sprintf("%q-%q", start, end) + } + ts, txnID, _ := tc.GetMaxRead(start, end) + if !ts.Equal(expectedTS) { + t.Errorf("expected %s to have timestamp %v, found %v", keys, expectedTS, ts) + } + if expectedTxnID == nil { + if txnID != nil { + t.Errorf("expected %s to have no txn id, but found %s", keys, txnID.Short()) + } + } else { + if txnID == nil { + t.Errorf("expected %s to have txn id %s, but found nil", keys, expectedTxnID.Short()) + } else if !uuid.Equal(*txnID, *expectedTxnID) { + t.Errorf("expected %s to have txn id %s, but found %s", + keys, expectedTxnID.Short(), txnID.Short()) + } + } } // layeredIntervalTestCase1 tests the left partial overlap and old containing // new cases for adding intervals to the interval cache when tested in order, // and tests the cases' inverses when tested in reverse. var layeredIntervalTestCase1 = layeredIntervalTestCase{ - actions: []func(tc *timestampCache, ts hlc.Timestamp){ - func(tc *timestampCache, ts hlc.Timestamp) { - // No overlap forwards. - // Right partial overlap backwards. - tc.add(roachpb.Key("a"), roachpb.Key("bb"), ts, nil, true) - }, - func(tc *timestampCache, ts hlc.Timestamp) { - // Left partial overlap forwards. - // New contains old backwards. - tc.add(roachpb.Key("b"), roachpb.Key("e"), ts, nil, true) - }, - func(tc *timestampCache, ts hlc.Timestamp) { - // Old contains new forwards. - // No overlap backwards. - tc.add(roachpb.Key("c"), nil, ts, nil, true) - }, + spans: []roachpb.Span{ + // No overlap forwards. + // Right partial overlap backwards. + {Key: roachpb.Key("a"), EndKey: roachpb.Key("bb")}, + // Left partial overlap forwards. + // New contains old backwards. + {Key: roachpb.Key("b"), EndKey: roachpb.Key("e")}, + // Old contains new forwards. + // No overlap backwards. + {Key: roachpb.Key("c")}, }, - validator: func(t *testing.T, tc *timestampCache, tss []hlc.Timestamp) { - abbTS := tss[0] - beTS := tss[1] - cTS := tss[2] - - // Try different sub ranges. - if rTS, _ := tc.GetMaxRead(roachpb.Key("a"), nil, nil); !rTS.Equal(abbTS) { - t.Errorf("expected \"a\" to have abbTS %v timestamp, found %v", abbTS, rTS) - } - if rTS, _ := tc.GetMaxRead(roachpb.Key("b"), nil, nil); !rTS.Equal(beTS) { - t.Errorf("expected \"b\" to have beTS %v timestamp, found %v", beTS, rTS) - } - if rTS, _ := tc.GetMaxRead(roachpb.Key("c"), nil, nil); !rTS.Equal(cTS) { - t.Errorf("expected \"c\" to have cTS %v timestamp, found %v", cTS, rTS) - } - if rTS, _ := tc.GetMaxRead(roachpb.Key("d"), nil, nil); !rTS.Equal(beTS) { - t.Errorf("expected \"d\" to have beTS %v timestamp, found %v", beTS, rTS) - } - if rTS, _ := tc.GetMaxRead(roachpb.Key("a"), roachpb.Key("b"), nil); !rTS.Equal(abbTS) { - t.Errorf("expected \"a\"-\"b\" to have abbTS %v timestamp, found %v", cTS, rTS) - } - if rTS, _ := tc.GetMaxRead(roachpb.Key("a"), roachpb.Key("c"), nil); !rTS.Equal(beTS) { - t.Errorf("expected \"a\"-\"c\" to have beTS %v timestamp, found %v", beTS, rTS) - } - if rTS, _ := tc.GetMaxRead(roachpb.Key("a"), roachpb.Key("d"), nil); !rTS.Equal(cTS) { - t.Errorf("expected \"a\"-\"d\" to have cTS %v timestamp, found %v", cTS, rTS) - } - if rTS, _ := tc.GetMaxRead(roachpb.Key("b"), roachpb.Key("d"), nil); !rTS.Equal(cTS) { - t.Errorf("expected \"b\"-\"d\" to have cTS %v timestamp, found %v", cTS, rTS) - } - if rTS, _ := tc.GetMaxRead(roachpb.Key("c"), roachpb.Key("d"), nil); !rTS.Equal(cTS) { - t.Errorf("expected \"c\"-\"d\" to have cTS %v timestamp, found %v", cTS, rTS) - } - if rTS, _ := tc.GetMaxRead(roachpb.Key("c0"), roachpb.Key("d"), nil); !rTS.Equal(beTS) { - t.Errorf("expected \"c0\"-\"d\" to have beTS %v timestamp, found %v", beTS, rTS) - } + validator: func(t *testing.T, tc *timestampCache, tss []hlc.Timestamp, txns []*uuid.UUID) { + abbIdx, beIdx, cIdx := 0, 1, 2 + + assertTS(t, tc, roachpb.Key("a"), nil, tss[abbIdx], txns[abbIdx]) + assertTS(t, tc, roachpb.Key("b"), nil, tss[beIdx], txns[beIdx]) + assertTS(t, tc, roachpb.Key("c"), nil, tss[cIdx], txns[cIdx]) + assertTS(t, tc, roachpb.Key("d"), nil, tss[beIdx], txns[beIdx]) + assertTS(t, tc, roachpb.Key("a"), roachpb.Key("b"), tss[abbIdx], txns[abbIdx]) + assertTS(t, tc, roachpb.Key("a"), roachpb.Key("c"), tss[beIdx], txns[beIdx]) + assertTS(t, tc, roachpb.Key("a"), roachpb.Key("d"), tss[cIdx], txns[cIdx]) + assertTS(t, tc, roachpb.Key("b"), roachpb.Key("d"), tss[cIdx], txns[cIdx]) + assertTS(t, tc, roachpb.Key("c"), roachpb.Key("d"), tss[cIdx], txns[cIdx]) + assertTS(t, tc, roachpb.Key("c0"), roachpb.Key("d"), tss[beIdx], txns[beIdx]) }, } @@ -346,52 +353,28 @@ var layeredIntervalTestCase1 = layeredIntervalTestCase{ // old cases for adding intervals to the interval cache when tested in order, // and tests the cases' inverses when tested in reverse. var layeredIntervalTestCase2 = layeredIntervalTestCase{ - actions: []func(tc *timestampCache, ts hlc.Timestamp){ - func(tc *timestampCache, ts hlc.Timestamp) { - // No overlap forwards. - // Old contains new backwards. - tc.add(roachpb.Key("d"), roachpb.Key("f"), ts, nil, true) - }, - func(tc *timestampCache, ts hlc.Timestamp) { - // New contains old forwards. - // Left partial overlap backwards. - tc.add(roachpb.Key("b"), roachpb.Key("f"), ts, nil, true) - }, - func(tc *timestampCache, ts hlc.Timestamp) { - // Right partial overlap forwards. - // No overlap backwards. - tc.add(roachpb.Key("a"), roachpb.Key("c"), ts, nil, true) - }, + spans: []roachpb.Span{ + // No overlap forwards. + // Old contains new backwards. + {Key: roachpb.Key("d"), EndKey: roachpb.Key("f")}, + // New contains old forwards. + // Left partial overlap backwards. + {Key: roachpb.Key("b"), EndKey: roachpb.Key("f")}, + // Right partial overlap forwards. + // No overlap backwards. + {Key: roachpb.Key("a"), EndKey: roachpb.Key("c")}, }, - validator: func(t *testing.T, tc *timestampCache, tss []hlc.Timestamp) { - bfTS := tss[1] - acTS := tss[2] - - // Try different sub ranges. - if rTS, _ := tc.GetMaxRead(roachpb.Key("a"), nil, nil); !rTS.Equal(acTS) { - t.Errorf("expected \"a\" to have acTS %v timestamp, found %v", acTS, rTS) - } - if rTS, _ := tc.GetMaxRead(roachpb.Key("b"), nil, nil); !rTS.Equal(acTS) { - t.Errorf("expected \"b\" to have acTS %v timestamp, found %v", acTS, rTS) - } - if rTS, _ := tc.GetMaxRead(roachpb.Key("c"), nil, nil); !rTS.Equal(bfTS) { - t.Errorf("expected \"c\" to have bfTS %v timestamp, found %v", bfTS, rTS) - } - if rTS, _ := tc.GetMaxRead(roachpb.Key("d"), nil, nil); !rTS.Equal(bfTS) { - t.Errorf("expected \"d\" to have bfTS %v timestamp, found %v", bfTS, rTS) - } - if rTS, _ := tc.GetMaxRead(roachpb.Key("a"), roachpb.Key("c"), nil); !rTS.Equal(acTS) { - t.Errorf("expected \"a\"-\"c\" to have acTS %v timestamp, found %v", acTS, rTS) - } - if rTS, _ := tc.GetMaxRead(roachpb.Key("b"), roachpb.Key("d"), nil); !rTS.Equal(acTS) { - t.Errorf("expected \"b\"-\"d\" to have acTS %v timestamp, found %v", acTS, rTS) - } - if rTS, _ := tc.GetMaxRead(roachpb.Key("c"), roachpb.Key("d"), nil); !rTS.Equal(bfTS) { - t.Errorf("expected \"c\"-\"d\" to have bfTS %v timestamp, found %v", bfTS, rTS) - } - if rTS, _ := tc.GetMaxRead(roachpb.Key("c0"), roachpb.Key("d"), nil); !rTS.Equal(bfTS) { - t.Errorf("expected \"c0\"-\"d\" to have bfTS %v timestamp, found %v", bfTS, rTS) - } + validator: func(t *testing.T, tc *timestampCache, tss []hlc.Timestamp, txns []*uuid.UUID) { + _, bfIdx, acIdx := 0, 1, 2 + + assertTS(t, tc, roachpb.Key("a"), nil, tss[acIdx], txns[acIdx]) + assertTS(t, tc, roachpb.Key("b"), nil, tss[acIdx], txns[acIdx]) + assertTS(t, tc, roachpb.Key("c"), nil, tss[bfIdx], txns[bfIdx]) + assertTS(t, tc, roachpb.Key("d"), nil, tss[bfIdx], txns[bfIdx]) + assertTS(t, tc, roachpb.Key("a"), roachpb.Key("c"), tss[acIdx], txns[acIdx]) + assertTS(t, tc, roachpb.Key("b"), roachpb.Key("d"), tss[acIdx], txns[acIdx]) + assertTS(t, tc, roachpb.Key("c"), roachpb.Key("d"), tss[bfIdx], txns[bfIdx]) + assertTS(t, tc, roachpb.Key("c0"), roachpb.Key("d"), tss[bfIdx], txns[bfIdx]) }, } @@ -399,41 +382,23 @@ var layeredIntervalTestCase2 = layeredIntervalTestCase{ // for adding intervals to the interval cache when tested in order, and // tests a left partial overlap with a shared end when tested in reverse. var layeredIntervalTestCase3 = layeredIntervalTestCase{ - actions: []func(tc *timestampCache, ts hlc.Timestamp){ - func(tc *timestampCache, ts hlc.Timestamp) { - // No overlap forwards. - // Right partial overlap backwards. - tc.add(roachpb.Key("a"), roachpb.Key("c"), ts, nil, true) - }, - func(tc *timestampCache, ts hlc.Timestamp) { - // Left partial overlap forwards. - // No overlap backwards. - tc.add(roachpb.Key("b"), roachpb.Key("c"), ts, nil, true) - }, + spans: []roachpb.Span{ + // No overlap forwards. + // Right partial overlap backwards. + {Key: roachpb.Key("a"), EndKey: roachpb.Key("c")}, + // Left partial overlap forwards. + // No overlap backwards. + {Key: roachpb.Key("b"), EndKey: roachpb.Key("c")}, }, - validator: func(t *testing.T, tc *timestampCache, tss []hlc.Timestamp) { - acTS := tss[0] - bcTS := tss[1] - - // Try different sub ranges. - if rTS, _ := tc.GetMaxRead(roachpb.Key("a"), nil, nil); !rTS.Equal(acTS) { - t.Errorf("expected \"a\" to have acTS %v timestamp, found %v", acTS, rTS) - } - if rTS, _ := tc.GetMaxRead(roachpb.Key("b"), nil, nil); !rTS.Equal(bcTS) { - t.Errorf("expected \"b\" to have bcTS %v timestamp, found %v", bcTS, rTS) - } - if rTS, _ := tc.GetMaxRead(roachpb.Key("c"), nil, nil); !rTS.Equal(tc.lowWater) { - t.Errorf("expected \"c\" to have low water %v timestamp, found %v", tc.lowWater, rTS) - } - if rTS, _ := tc.GetMaxRead(roachpb.Key("a"), roachpb.Key("c"), nil); !rTS.Equal(bcTS) { - t.Errorf("expected \"a\"-\"c\" to have bcTS %v timestamp, found %v", bcTS, rTS) - } - if rTS, _ := tc.GetMaxRead(roachpb.Key("a"), roachpb.Key("b"), nil); !rTS.Equal(acTS) { - t.Errorf("expected \"a\"-\"b\" to have acTS %v timestamp, found %v", acTS, rTS) - } - if rTS, _ := tc.GetMaxRead(roachpb.Key("b"), roachpb.Key("c"), nil); !rTS.Equal(bcTS) { - t.Errorf("expected \"b\"-\"c\" to have bcTS %v timestamp, found %v", bcTS, rTS) - } + validator: func(t *testing.T, tc *timestampCache, tss []hlc.Timestamp, txns []*uuid.UUID) { + acIdx, bcIdx := 0, 1 + + assertTS(t, tc, roachpb.Key("a"), nil, tss[acIdx], txns[acIdx]) + assertTS(t, tc, roachpb.Key("b"), nil, tss[bcIdx], txns[bcIdx]) + assertTS(t, tc, roachpb.Key("c"), nil, tc.lowWater, nil) + assertTS(t, tc, roachpb.Key("a"), roachpb.Key("c"), tss[bcIdx], txns[bcIdx]) + assertTS(t, tc, roachpb.Key("a"), roachpb.Key("b"), tss[acIdx], txns[acIdx]) + assertTS(t, tc, roachpb.Key("b"), roachpb.Key("c"), tss[bcIdx], txns[bcIdx]) }, } @@ -441,41 +406,23 @@ var layeredIntervalTestCase3 = layeredIntervalTestCase{ // for adding intervals to the interval cache when tested in order, and // tests a right partial overlap with a shared start when tested in reverse. var layeredIntervalTestCase4 = layeredIntervalTestCase{ - actions: []func(tc *timestampCache, ts hlc.Timestamp){ - func(tc *timestampCache, ts hlc.Timestamp) { - // No overlap forwards. - // Left partial overlap backwards. - tc.add(roachpb.Key("a"), roachpb.Key("c"), ts, nil, true) - }, - func(tc *timestampCache, ts hlc.Timestamp) { - // Right partial overlap forwards. - // No overlap backwards. - tc.add(roachpb.Key("a"), roachpb.Key("b"), ts, nil, true) - }, + spans: []roachpb.Span{ + // No overlap forwards. + // Left partial overlap backwards. + {Key: roachpb.Key("a"), EndKey: roachpb.Key("c")}, + // Right partial overlap forwards. + // No overlap backwards. + {Key: roachpb.Key("a"), EndKey: roachpb.Key("b")}, }, - validator: func(t *testing.T, tc *timestampCache, tss []hlc.Timestamp) { - acTS := tss[0] - abTS := tss[1] - - // Try different sub ranges. - if rTS, _ := tc.GetMaxRead(roachpb.Key("a"), nil, nil); !rTS.Equal(abTS) { - t.Errorf("expected \"a\" to have abTS %v timestamp, found %v", abTS, rTS) - } - if rTS, _ := tc.GetMaxRead(roachpb.Key("b"), nil, nil); !rTS.Equal(acTS) { - t.Errorf("expected \"b\" to have acTS %v timestamp, found %v", acTS, rTS) - } - if rTS, _ := tc.GetMaxRead(roachpb.Key("c"), nil, nil); !rTS.Equal(tc.lowWater) { - t.Errorf("expected \"c\" to have low water %v timestamp, found %v", tc.lowWater, rTS) - } - if rTS, _ := tc.GetMaxRead(roachpb.Key("a"), roachpb.Key("c"), nil); !rTS.Equal(abTS) { - t.Errorf("expected \"a\"-\"c\" to have abTS %v timestamp, found %v", abTS, rTS) - } - if rTS, _ := tc.GetMaxRead(roachpb.Key("a"), roachpb.Key("b"), nil); !rTS.Equal(abTS) { - t.Errorf("expected \"a\"-\"b\" to have abTS %v timestamp, found %v", abTS, rTS) - } - if rTS, _ := tc.GetMaxRead(roachpb.Key("b"), roachpb.Key("c"), nil); !rTS.Equal(acTS) { - t.Errorf("expected \"b\"-\"c\" to have acTS %v timestamp, found %v", acTS, rTS) - } + validator: func(t *testing.T, tc *timestampCache, tss []hlc.Timestamp, txns []*uuid.UUID) { + acIdx, abIdx := 0, 1 + + assertTS(t, tc, roachpb.Key("a"), nil, tss[abIdx], txns[abIdx]) + assertTS(t, tc, roachpb.Key("b"), nil, tss[acIdx], txns[acIdx]) + assertTS(t, tc, roachpb.Key("c"), nil, tc.lowWater, nil) + assertTS(t, tc, roachpb.Key("a"), roachpb.Key("c"), tss[abIdx], txns[abIdx]) + assertTS(t, tc, roachpb.Key("a"), roachpb.Key("b"), tss[abIdx], txns[abIdx]) + assertTS(t, tc, roachpb.Key("b"), roachpb.Key("c"), tss[acIdx], txns[acIdx]) }, } @@ -494,33 +441,41 @@ func TestTimestampCacheLayeredIntervals(t *testing.T) { clock.SetMaxOffset(0) tc := newTimestampCache(clock) - for _, testCase := range []layeredIntervalTestCase{ + for testCaseIdx, testCase := range []layeredIntervalTestCase{ layeredIntervalTestCase1, layeredIntervalTestCase2, layeredIntervalTestCase3, layeredIntervalTestCase4, } { + t.Logf("test case %d", testCaseIdx+1) + tss := make([]hlc.Timestamp, len(testCase.spans)) + txns := make([]*uuid.UUID, len(testCase.spans)) + for i := range testCase.spans { + txns[i] = uuid.NewV4() + } + // Perform actions in order and validate. + t.Log("in order") tc.Clear(clock) - tss := make([]hlc.Timestamp, len(testCase.actions)) - for i := range testCase.actions { + for i := range testCase.spans { tss[i] = clock.Now() } - for i, action := range testCase.actions { - action(tc, tss[i]) + for i, span := range testCase.spans { + tc.add(span.Key, span.EndKey, tss[i], txns[i], true) } - testCase.validator(t, tc, tss) + testCase.validator(t, tc, tss, txns) // Perform actions out of order and validate. + t.Log("reverse order") tc.Clear(clock) - for i := range testCase.actions { + for i := range testCase.spans { // Recreate timestamps because Clear() sets lowWater to Now(). tss[i] = clock.Now() } - for i := len(testCase.actions) - 1; i >= 0; i-- { - testCase.actions[i](tc, tss[i]) + for i := len(testCase.spans) - 1; i >= 0; i-- { + tc.add(testCase.spans[i].Key, testCase.spans[i].EndKey, tss[i], txns[i], true) } - testCase.validator(t, tc, tss) + testCase.validator(t, tc, tss, txns) } } @@ -543,95 +498,11 @@ func TestTimestampCacheClear(t *testing.T) { // Fetching any keys should give current time + maxClockOffset expTS := clock.Timestamp() expTS.WallTime += maxClockOffset.Nanoseconds() - if rTS, ok := tc.GetMaxRead(roachpb.Key("a"), nil, nil); !rTS.Equal(expTS) || ok { + if rTS, _, ok := tc.GetMaxRead(roachpb.Key("a"), nil); !rTS.Equal(expTS) || ok { t.Errorf("expected \"a\" to have cleared timestamp; exp ok=false; got %t", ok) } } -// TestTimestampCacheReplacements verifies that a newer entry -// in the timestamp cache which completely "covers" an older -// entry will replace it. -func TestTimestampCacheReplacements(t *testing.T) { - defer leaktest.AfterTest(t)() - manual := hlc.NewManualClock(0) - clock := hlc.NewClock(manual.UnixNano) - tc := newTimestampCache(clock) - - txn1ID := uuid.NewV4() - txn2ID := uuid.NewV4() - - ts1 := clock.Now() - tc.add(roachpb.Key("a"), nil, ts1, nil, true) - if ts, ok := tc.GetMaxRead(roachpb.Key("a"), nil, nil); !ts.Equal(ts1) || !ok { - t.Errorf("expected %s; got %s; ok=%t", ts1, ts, ok) - } - // Write overlapping value with txn1 and verify with txn1--we should get - // low water mark, not ts1. - ts2 := clock.Now() - tc.add(roachpb.Key("a"), nil, ts2, txn1ID, true) - if ts, ok := tc.GetMaxRead(roachpb.Key("a"), nil, txn1ID); !ts.Equal(tc.lowWater) || ok { - t.Errorf("expected low water (empty) time; got %s; ok=%t", ts, ok) - } - // Write range which overlaps "a" with txn2 and verify with txn2--we should - // get low water mark, not ts2. - ts3 := clock.Now() - tc.add(roachpb.Key("a"), roachpb.Key("c"), ts3, txn2ID, true) - if ts, ok := tc.GetMaxRead(roachpb.Key("a"), nil, txn2ID); !ts.Equal(tc.lowWater) || ok { - t.Errorf("expected low water (empty) time; got %s; ok=%t", ts, ok) - } - // Also, verify txn1 sees ts3. - if ts, ok := tc.GetMaxRead(roachpb.Key("a"), nil, txn1ID); !ts.Equal(ts3) || !ok { - t.Errorf("expected %s; got %s; ok=%t", ts3, ts, ok) - } - // Now, write to "b" with a higher timestamp and no txn. Should be - // visible to all txns. - ts4 := clock.Now() - tc.add(roachpb.Key("b"), nil, ts4, nil, true) - if ts, ok := tc.GetMaxRead(roachpb.Key("b"), nil, nil); !ts.Equal(ts4) || !ok { - t.Errorf("expected %s; got %s; ok=%t", ts4, ts, ok) - } - if ts, ok := tc.GetMaxRead(roachpb.Key("b"), nil, txn1ID); !ts.Equal(ts4) || !ok { - t.Errorf("expected %s; got %s; ok=%t", ts4, ts, ok) - } - // Finally, write an earlier version of "a"; should simply get - // tossed and we should see ts4 still. - tc.add(roachpb.Key("b"), nil, ts1, nil, true) - if ts, ok := tc.GetMaxRead(roachpb.Key("b"), nil, nil); !ts.Equal(ts4) || !ok { - t.Errorf("expected %s; got %s; ok=%t", ts4, ts, ok) - } -} - -// TestTimestampCacheWithTxnID verifies that timestamps matching -// the specified txn ID are ignored. -func TestTimestampCacheWithTxnID(t *testing.T) { - defer leaktest.AfterTest(t)() - manual := hlc.NewManualClock(0) - clock := hlc.NewClock(manual.UnixNano) - tc := newTimestampCache(clock) - - // Add two successive txn entries. - txn1ID := uuid.NewV4() - txn2ID := uuid.NewV4() - ts1 := clock.Now() - tc.add(roachpb.Key("a"), roachpb.Key("c"), ts1, txn1ID, true) - ts2 := clock.Now() - // This entry will remove "a"-"b" from the cache. - tc.add(roachpb.Key("b"), roachpb.Key("d"), ts2, txn2ID, true) - - // Fetching with no transaction gets latest value. - if ts, ok := tc.GetMaxRead(roachpb.Key("b"), nil, nil); !ts.Equal(ts2) || !ok { - t.Errorf("expected %s; got %s; ok=%t", ts2, ts, ok) - } - // Fetching with txn ID "1" gets most recent. - if ts, ok := tc.GetMaxRead(roachpb.Key("b"), nil, txn1ID); !ts.Equal(ts2) || !ok { - t.Errorf("expected %s; got %s; ok=%t", ts2, ts, ok) - } - // Fetching with txn ID "2" skips most recent. - if ts, ok := tc.GetMaxRead(roachpb.Key("b"), nil, txn2ID); !ts.Equal(tc.lowWater) || ok { - t.Errorf("expected %s; got %s; ok=%t", ts1, ts, ok) - } -} - // TestTimestampCacheReadVsWrite verifies that the timestamp cache // can differentiate between read and write timestamp. func TestTimestampCacheReadVsWrite(t *testing.T) { @@ -652,23 +523,48 @@ func TestTimestampCacheReadVsWrite(t *testing.T) { ts3 := clock.Now() tc.add(roachpb.Key("a"), nil, ts3, txn2ID, false) - // Fetching with no transaction gets latest values. - rTS, rOK := tc.GetMaxRead(roachpb.Key("a"), nil, nil) - wTS, wOK := tc.GetMaxWrite(roachpb.Key("a"), nil, nil) + rTS, _, rOK := tc.GetMaxRead(roachpb.Key("a"), nil) + wTS, _, wOK := tc.GetMaxWrite(roachpb.Key("a"), nil) if !rTS.Equal(ts2) || !wTS.Equal(ts3) || !rOK || !wOK { t.Errorf("expected %s %s; got %s %s; rOK=%t, wOK=%t", ts2, ts3, rTS, wTS, rOK, wOK) } - // Fetching with txn ID "1" gets low water mark for read and most recent for write. - rTS, rOK = tc.GetMaxRead(roachpb.Key("a"), nil, txn1ID) - wTS, wOK = tc.GetMaxWrite(roachpb.Key("a"), nil, txn1ID) - if !rTS.Equal(tc.lowWater) || !wTS.Equal(ts3) || rOK || !wOK { - t.Errorf("expected %s %s; got %s %s; rOK=%t, wOK=%t", ts1, ts3, rTS, wTS, rOK, wOK) - } - // Fetching with txn ID "2" gets ts2 for read and low water mark for write. - rTS, rOK = tc.GetMaxRead(roachpb.Key("a"), nil, txn2ID) - wTS, wOK = tc.GetMaxWrite(roachpb.Key("a"), nil, txn2ID) - if !rTS.Equal(ts2) || !wTS.Equal(tc.lowWater) || !rOK || wOK { - t.Errorf("expected %s %s; got %s %s; rOK=%t, wOK=%t", ts2, tc.lowWater, rTS, wTS, rOK, wOK) +} + +// TestTimestampCacheEqualTimestamp verifies that in the event of two +// non-overlapping transactions with equal timestamps, the returned +// timestamp is not owned by either one. +func TestTimestampCacheEqualTimestamps(t *testing.T) { + defer leaktest.AfterTest(t)() + manual := hlc.NewManualClock(0) + clock := hlc.NewClock(manual.UnixNano) + tc := newTimestampCache(clock) + + txn1 := uuid.NewV4() + txn2 := uuid.NewV4() + + // Add two non-overlapping transactions at the same timestamp. + ts1 := clock.Now() + tc.add(roachpb.Key("a"), roachpb.Key("b"), ts1, txn1, true) + tc.add(roachpb.Key("b"), roachpb.Key("c"), ts1, txn2, true) + + // When querying either side separately, the transaction ID is returned. + if ts, txn, _ := tc.GetMaxRead(roachpb.Key("a"), roachpb.Key("b")); !ts.Equal(ts1) { + t.Errorf("expected 'a'-'b' to have timestamp %s, but found %s", ts1, ts) + } else if !uuid.Equal(*txn, *txn1) { + t.Errorf("expected 'a'-'b' to have txn id %s, but found %s", txn1, txn) + } + if ts, txn, _ := tc.GetMaxRead(roachpb.Key("b"), roachpb.Key("c")); !ts.Equal(ts1) { + t.Errorf("expected 'b'-'c' to have timestamp %s, but found %s", ts1, ts) + } else if !uuid.Equal(*txn, *txn2) { + t.Errorf("expected 'b'-'c' to have txn id %s, but found %s", txn2, txn) + } + + // Querying a span that overlaps both returns a nil txn ID; neither + // can proceed here. + if ts, txn, _ := tc.GetMaxRead(roachpb.Key("a"), roachpb.Key("c")); !ts.Equal(ts1) { + t.Errorf("expected 'a'-'c' to have timestamp %s, but found %s", ts1, ts) + } else if txn != nil { + t.Errorf("expected 'a'-'c' to have nil txn id, but found %s", txn) } } From c0fd012980b1f9e2394c5e2b357a522e98b9ca8c Mon Sep 17 00:00:00 2001 From: Ben Darnell Date: Sun, 4 Sep 2016 18:36:00 +0800 Subject: [PATCH 2/5] storage: Handle timestamp collisions in timestampCache When two intervals in the timestamp cache have the same timestamp, neither of them can be said to own that timestamp. We must adjust intervals and clear transaction IDs whenever this collision occurs. Failure to do so allowed one transaction to write after another transaction had read at the same timestamp, leading to a violation of serializability. Fixes #9083 --- storage/timestamp_cache.go | 152 ++++++++++++++++++++++++- storage/timestamp_cache_test.go | 190 +++++++++++++++++++++----------- 2 files changed, 273 insertions(+), 69 deletions(-) diff --git a/storage/timestamp_cache.go b/storage/timestamp_cache.go index a4f41a4bd261..78497ba54ea2 100644 --- a/storage/timestamp_cache.go +++ b/storage/timestamp_cache.go @@ -220,9 +220,10 @@ func (tc *timestampCache) add( key := entry.Key.(*cache.IntervalKey) sCmp := r.Start.Compare(key.Start) eCmp := r.End.Compare(key.End) - if !timestamp.Less(cv.timestamp) { - // The existing interval has a timestamp less than or equal to the new interval. - // Compare interval ranges to determine how to modify existing interval. + if cv.timestamp.Less(timestamp) { + // The existing interval has a timestamp less than the new + // interval. Compare interval ranges to determine how to + // modify existing interval. switch { case sCmp == 0 && eCmp == 0: // New and old are equal; replace old with new and avoid the need to insert new. @@ -274,7 +275,7 @@ func (tc *timestampCache) add( default: panic(fmt.Sprintf("no overlap between %v and %v", key.Range, r)) } - } else { + } else if timestamp.Less(cv.timestamp) { // The existing interval has a timestamp greater than the new interval. // Compare interval ranges to determine how to modify new interval before // adding it to the timestamp cache. @@ -319,6 +320,149 @@ func (tc *timestampCache) add( default: panic(fmt.Sprintf("no overlap between %v and %v", key.Range, r)) } + } else { + // The existing interval has a timestamp equal to the new + // interval. Compare interval ranges to determine how to + // modify existing interval. + + // If the intervals have two different transactions, we must + // clear the transaction id. + clearTxnIfDifferent := func(a **uuid.UUID, b *uuid.UUID) { + if b == nil || (*a != nil && !uuid.Equal(**a, *b)) { + *a = nil + } + } + switch { + case sCmp == 0 && eCmp == 0: + // New and old are equal; replace old with new and avoid the + // need to insert new. Segment is no longer owned by any + // transaction. + // + // New: ------------ + // Old: ------------ + // + // Nil: ------------ + clearTxnIfDifferent(&cv.txnID, txnID) + tcache.MoveToEnd(entry) + return + case sCmp == 0 && eCmp > 0: + // New contains old, left-aligned. Clear ownership of the + // existing segment and truncate new. + // + // New: ------------ + // Old: ---------- + // + // New: -- + // Nil: ---------- + clearTxnIfDifferent(&cv.txnID, txnID) + r.Start = key.End + case sCmp < 0 && eCmp == 0: + // New contains old, right-aligned. Clear ownership of the + // existing segment and truncate new. + // + // New: ------------ + // Old: ---------- + // + // New: -- + // Nil: ---------- + clearTxnIfDifferent(&cv.txnID, txnID) + r.End = key.Start + addRange(r) + return + case sCmp < 0 && eCmp > 0: + // New contains old; split into three segments with the + // overlap owned by no txn. + // + // New: ------------ + // Old: -------- + // + // New: -- -- + // Nil: -------- + clearTxnIfDifferent(&cv.txnID, txnID) + newKey := tcache.MakeKey(r.Start, key.Start) + newEntry := makeCacheEntry(newKey, cacheValue{timestamp: timestamp, txnID: txnID}) + tcache.AddEntryAfter(newEntry, entry) + r.Start = key.End + case sCmp > 0 && eCmp < 0: + // Old contains new; split up old into two. New segment is + // owned by no txn. + // + // New: ---- + // Old: ------------ + // + // Nil: ---- + // Old: ---- ---- + oldEnd := key.End + key.End = r.Start + clearTxnIfDifferent(&txnID, cv.txnID) + + key := tcache.MakeKey(r.End, oldEnd) + newEntry := makeCacheEntry(key, *cv) + tcache.AddEntryAfter(newEntry, entry) + case eCmp == 0: + // Right-aligned partial overlap; truncate old end and clear ownership of + // new segment. + // + // New: -------- + // Old: ------------ + // + // New: + // Nil: -------- + // Old: ---- + key.End = r.Start + clearTxnIfDifferent(&txnID, cv.txnID) + case eCmp > 0: + // Left partial overlap; truncate old end and split new into + // segments owned by no txn (the overlap) and the new txn. + // + // New: -------- + // Old: -------- + // + // New: ---- + // Nil: ---- + // Old: ---- + key.End, r.Start = r.Start, key.End + key := tcache.MakeKey(key.End, r.Start) + newCV := cacheValue{timestamp: cv.timestamp, txnID: txnID} + clearTxnIfDifferent(&newCV.txnID, cv.txnID) + newEntry := makeCacheEntry(key, newCV) + tcache.AddEntryAfter(newEntry, entry) + case sCmp == 0: + // Left-aligned partial overlap; truncate old start and + // clear ownership of new segment. + // New: -------- + // Old: ------------ + // + // New: + // Nil: -------- + // Old: ---- + key.Start = r.End + clearTxnIfDifferent(&txnID, cv.txnID) + case sCmp < 0: + // Right partial overlap; truncate old start and split new into + // segments owned by no txn (the overlap) and the new txn. + // + // New: -------- + // Old: -------- + // + // New: ---- + // Nil: ---- + // Old: ---- + key.Start, r.End = r.End, key.Start + key := tcache.MakeKey(r.End, key.Start) + newCV := cacheValue{timestamp: cv.timestamp, txnID: txnID} + clearTxnIfDifferent(&newCV.txnID, cv.txnID) + newEntry := makeCacheEntry(key, newCV) + tcache.AddEntryAfter(newEntry, entry) + // We can add the new range now because it is guaranteed to + // be any other overlaps; we ust do so because we've changed + // our boundaries and continuing to iterate may hit the "no + // overlap" panic. + addRange(r) + return + default: + panic(fmt.Sprintf("no overlap between %v and %v", key.Range, r)) + } } } addRange(r) diff --git a/storage/timestamp_cache_test.go b/storage/timestamp_cache_test.go index a183ff09d653..1c14e9bdaa6b 100644 --- a/storage/timestamp_cache_test.go +++ b/storage/timestamp_cache_test.go @@ -278,9 +278,14 @@ func TestTimestampCacheMergeInto(t *testing.T) { } } +type txnState struct { + ts hlc.Timestamp + id *uuid.UUID +} + type layeredIntervalTestCase struct { spans []roachpb.Span - validator func(t *testing.T, tc *timestampCache, tss []hlc.Timestamp, txns []*uuid.UUID) + validator func(t *testing.T, tc *timestampCache, txns []txnState) } // assertTS is a helper function for layeredIntervalTestCase @@ -318,6 +323,18 @@ func assertTS( } } +// nilIfSimul returns nil if this test involves multiple transactions +// with the same timestamp (i.e. the timestamps in txns are identical +// but the transaction ids are not), and the given txnID if they are +// not. This is because timestampCache.GetMaxRead must not return a +// transaction ID when two different transactions have the same timestamp. +func nilIfSimul(txns []txnState, txnID *uuid.UUID) *uuid.UUID { + if txns[0].ts.Equal(txns[1].ts) && !uuid.Equal(*txns[0].id, *txns[1].id) { + return nil + } + return txnID +} + // layeredIntervalTestCase1 tests the left partial overlap and old containing // new cases for adding intervals to the interval cache when tested in order, // and tests the cases' inverses when tested in reverse. @@ -333,19 +350,19 @@ var layeredIntervalTestCase1 = layeredIntervalTestCase{ // No overlap backwards. {Key: roachpb.Key("c")}, }, - validator: func(t *testing.T, tc *timestampCache, tss []hlc.Timestamp, txns []*uuid.UUID) { - abbIdx, beIdx, cIdx := 0, 1, 2 - - assertTS(t, tc, roachpb.Key("a"), nil, tss[abbIdx], txns[abbIdx]) - assertTS(t, tc, roachpb.Key("b"), nil, tss[beIdx], txns[beIdx]) - assertTS(t, tc, roachpb.Key("c"), nil, tss[cIdx], txns[cIdx]) - assertTS(t, tc, roachpb.Key("d"), nil, tss[beIdx], txns[beIdx]) - assertTS(t, tc, roachpb.Key("a"), roachpb.Key("b"), tss[abbIdx], txns[abbIdx]) - assertTS(t, tc, roachpb.Key("a"), roachpb.Key("c"), tss[beIdx], txns[beIdx]) - assertTS(t, tc, roachpb.Key("a"), roachpb.Key("d"), tss[cIdx], txns[cIdx]) - assertTS(t, tc, roachpb.Key("b"), roachpb.Key("d"), tss[cIdx], txns[cIdx]) - assertTS(t, tc, roachpb.Key("c"), roachpb.Key("d"), tss[cIdx], txns[cIdx]) - assertTS(t, tc, roachpb.Key("c0"), roachpb.Key("d"), tss[beIdx], txns[beIdx]) + validator: func(t *testing.T, tc *timestampCache, txns []txnState) { + abbTx, beTx, cTx := txns[0], txns[1], txns[2] + + assertTS(t, tc, roachpb.Key("a"), nil, abbTx.ts, abbTx.id) + assertTS(t, tc, roachpb.Key("b"), nil, beTx.ts, nilIfSimul(txns, beTx.id)) + assertTS(t, tc, roachpb.Key("c"), nil, cTx.ts, nilIfSimul(txns, cTx.id)) + assertTS(t, tc, roachpb.Key("d"), nil, beTx.ts, beTx.id) + assertTS(t, tc, roachpb.Key("a"), roachpb.Key("b"), abbTx.ts, abbTx.id) + assertTS(t, tc, roachpb.Key("a"), roachpb.Key("c"), beTx.ts, nilIfSimul(txns, beTx.id)) + assertTS(t, tc, roachpb.Key("a"), roachpb.Key("d"), cTx.ts, nilIfSimul(txns, cTx.id)) + assertTS(t, tc, roachpb.Key("b"), roachpb.Key("d"), cTx.ts, nilIfSimul(txns, cTx.id)) + assertTS(t, tc, roachpb.Key("c"), roachpb.Key("d"), cTx.ts, nilIfSimul(txns, cTx.id)) + assertTS(t, tc, roachpb.Key("c0"), roachpb.Key("d"), beTx.ts, beTx.id) }, } @@ -364,17 +381,17 @@ var layeredIntervalTestCase2 = layeredIntervalTestCase{ // No overlap backwards. {Key: roachpb.Key("a"), EndKey: roachpb.Key("c")}, }, - validator: func(t *testing.T, tc *timestampCache, tss []hlc.Timestamp, txns []*uuid.UUID) { - _, bfIdx, acIdx := 0, 1, 2 - - assertTS(t, tc, roachpb.Key("a"), nil, tss[acIdx], txns[acIdx]) - assertTS(t, tc, roachpb.Key("b"), nil, tss[acIdx], txns[acIdx]) - assertTS(t, tc, roachpb.Key("c"), nil, tss[bfIdx], txns[bfIdx]) - assertTS(t, tc, roachpb.Key("d"), nil, tss[bfIdx], txns[bfIdx]) - assertTS(t, tc, roachpb.Key("a"), roachpb.Key("c"), tss[acIdx], txns[acIdx]) - assertTS(t, tc, roachpb.Key("b"), roachpb.Key("d"), tss[acIdx], txns[acIdx]) - assertTS(t, tc, roachpb.Key("c"), roachpb.Key("d"), tss[bfIdx], txns[bfIdx]) - assertTS(t, tc, roachpb.Key("c0"), roachpb.Key("d"), tss[bfIdx], txns[bfIdx]) + validator: func(t *testing.T, tc *timestampCache, txns []txnState) { + _, bfTx, acTx := txns[0], txns[1], txns[2] + + assertTS(t, tc, roachpb.Key("a"), nil, acTx.ts, acTx.id) + assertTS(t, tc, roachpb.Key("b"), nil, acTx.ts, nilIfSimul(txns, acTx.id)) + assertTS(t, tc, roachpb.Key("c"), nil, bfTx.ts, bfTx.id) + assertTS(t, tc, roachpb.Key("d"), nil, bfTx.ts, nilIfSimul(txns, bfTx.id)) + assertTS(t, tc, roachpb.Key("a"), roachpb.Key("c"), acTx.ts, nilIfSimul(txns, acTx.id)) + assertTS(t, tc, roachpb.Key("b"), roachpb.Key("d"), acTx.ts, nilIfSimul(txns, acTx.id)) + assertTS(t, tc, roachpb.Key("c"), roachpb.Key("d"), bfTx.ts, bfTx.id) + assertTS(t, tc, roachpb.Key("c0"), roachpb.Key("d"), bfTx.ts, bfTx.id) }, } @@ -390,15 +407,15 @@ var layeredIntervalTestCase3 = layeredIntervalTestCase{ // No overlap backwards. {Key: roachpb.Key("b"), EndKey: roachpb.Key("c")}, }, - validator: func(t *testing.T, tc *timestampCache, tss []hlc.Timestamp, txns []*uuid.UUID) { - acIdx, bcIdx := 0, 1 + validator: func(t *testing.T, tc *timestampCache, txns []txnState) { + acTx, bcTx := txns[0], txns[1] - assertTS(t, tc, roachpb.Key("a"), nil, tss[acIdx], txns[acIdx]) - assertTS(t, tc, roachpb.Key("b"), nil, tss[bcIdx], txns[bcIdx]) + assertTS(t, tc, roachpb.Key("a"), nil, acTx.ts, acTx.id) + assertTS(t, tc, roachpb.Key("b"), nil, bcTx.ts, nilIfSimul(txns, bcTx.id)) assertTS(t, tc, roachpb.Key("c"), nil, tc.lowWater, nil) - assertTS(t, tc, roachpb.Key("a"), roachpb.Key("c"), tss[bcIdx], txns[bcIdx]) - assertTS(t, tc, roachpb.Key("a"), roachpb.Key("b"), tss[acIdx], txns[acIdx]) - assertTS(t, tc, roachpb.Key("b"), roachpb.Key("c"), tss[bcIdx], txns[bcIdx]) + assertTS(t, tc, roachpb.Key("a"), roachpb.Key("c"), bcTx.ts, nilIfSimul(txns, bcTx.id)) + assertTS(t, tc, roachpb.Key("a"), roachpb.Key("b"), acTx.ts, acTx.id) + assertTS(t, tc, roachpb.Key("b"), roachpb.Key("c"), bcTx.ts, nilIfSimul(txns, bcTx.id)) }, } @@ -414,15 +431,26 @@ var layeredIntervalTestCase4 = layeredIntervalTestCase{ // No overlap backwards. {Key: roachpb.Key("a"), EndKey: roachpb.Key("b")}, }, - validator: func(t *testing.T, tc *timestampCache, tss []hlc.Timestamp, txns []*uuid.UUID) { - acIdx, abIdx := 0, 1 + validator: func(t *testing.T, tc *timestampCache, txns []txnState) { + acTx, abTx := txns[0], txns[1] - assertTS(t, tc, roachpb.Key("a"), nil, tss[abIdx], txns[abIdx]) - assertTS(t, tc, roachpb.Key("b"), nil, tss[acIdx], txns[acIdx]) + assertTS(t, tc, roachpb.Key("a"), nil, abTx.ts, nilIfSimul(txns, abTx.id)) + assertTS(t, tc, roachpb.Key("b"), nil, acTx.ts, acTx.id) assertTS(t, tc, roachpb.Key("c"), nil, tc.lowWater, nil) - assertTS(t, tc, roachpb.Key("a"), roachpb.Key("c"), tss[abIdx], txns[abIdx]) - assertTS(t, tc, roachpb.Key("a"), roachpb.Key("b"), tss[abIdx], txns[abIdx]) - assertTS(t, tc, roachpb.Key("b"), roachpb.Key("c"), tss[acIdx], txns[acIdx]) + assertTS(t, tc, roachpb.Key("a"), roachpb.Key("c"), abTx.ts, nilIfSimul(txns, abTx.id)) + assertTS(t, tc, roachpb.Key("a"), roachpb.Key("b"), abTx.ts, nilIfSimul(txns, abTx.id)) + assertTS(t, tc, roachpb.Key("b"), roachpb.Key("c"), acTx.ts, acTx.id) + }, +} + +var layeredIntervalTestCase5 = layeredIntervalTestCase{ + spans: []roachpb.Span{ + // Two identical spans + {Key: roachpb.Key("a"), EndKey: roachpb.Key("b")}, + {Key: roachpb.Key("a"), EndKey: roachpb.Key("b")}, + }, + validator: func(t *testing.T, tc *timestampCache, txns []txnState) { + assertTS(t, tc, roachpb.Key("a"), nil, txns[1].ts, nilIfSimul(txns, txns[1].id)) }, } @@ -441,41 +469,73 @@ func TestTimestampCacheLayeredIntervals(t *testing.T) { clock.SetMaxOffset(0) tc := newTimestampCache(clock) + // Run each test case in several configurations. for testCaseIdx, testCase := range []layeredIntervalTestCase{ layeredIntervalTestCase1, layeredIntervalTestCase2, layeredIntervalTestCase3, layeredIntervalTestCase4, + layeredIntervalTestCase5, } { t.Logf("test case %d", testCaseIdx+1) - tss := make([]hlc.Timestamp, len(testCase.spans)) - txns := make([]*uuid.UUID, len(testCase.spans)) - for i := range testCase.spans { - txns[i] = uuid.NewV4() - } - - // Perform actions in order and validate. - t.Log("in order") - tc.Clear(clock) - for i := range testCase.spans { - tss[i] = clock.Now() - } - for i, span := range testCase.spans { - tc.add(span.Key, span.EndKey, tss[i], txns[i], true) - } - testCase.validator(t, tc, tss, txns) - // Perform actions out of order and validate. - t.Log("reverse order") - tc.Clear(clock) - for i := range testCase.spans { - // Recreate timestamps because Clear() sets lowWater to Now(). - tss[i] = clock.Now() - } - for i := len(testCase.spans) - 1; i >= 0; i-- { - tc.add(testCase.spans[i].Key, testCase.spans[i].EndKey, tss[i], txns[i], true) + // In simultaneous runs, each span in the test case is given the + // same time. Otherwise each gets a distinct timestamp (in the + // order of definition). + for _, simultaneous := range []bool{false, true} { + t.Logf("simultaneous: %v", simultaneous) + + // In reverse runs, spans are inserted into the timestamp cache + // out of order (so spans with higher timestamps are inserted + // before those with lower timestamps). In simultaneous+reverse + // runs, timestamps are all the same, but running in both + // directions is still necessary to exercise all branches in the + // code. + for _, reverse := range []bool{false, true} { + t.Logf("reverse: %v", reverse) + + // In sameTxn runs, all spans are inserted as a part of the + // same transaction; otherwise each is a separate transaction. + for _, sameTxn := range []bool{false, true} { + t.Logf("sameTxn: %v", sameTxn) + + txns := make([]txnState, len(testCase.spans)) + if sameTxn { + id := uuid.NewV4() + for i := range testCase.spans { + txns[i].id = id + } + } else { + for i := range testCase.spans { + txns[i].id = uuid.NewV4() + } + } + + tc.Clear(clock) + if simultaneous { + now := clock.Now() + for i := range txns { + txns[i].ts = now + } + } else { + for i := range txns { + txns[i].ts = clock.Now() + } + } + + if reverse { + for i := len(testCase.spans) - 1; i >= 0; i-- { + tc.add(testCase.spans[i].Key, testCase.spans[i].EndKey, txns[i].ts, txns[i].id, true) + } + } else { + for i := range testCase.spans { + tc.add(testCase.spans[i].Key, testCase.spans[i].EndKey, txns[i].ts, txns[i].id, true) + } + } + testCase.validator(t, tc, txns) + } + } } - testCase.validator(t, tc, tss, txns) } } From 17d1ca900a9deb2c6f3de2d05a6eb9575bf82a64 Mon Sep 17 00:00:00 2001 From: Ben Darnell Date: Mon, 5 Sep 2016 13:13:42 +0800 Subject: [PATCH 3/5] storage: timestampCache PR feedback --- storage/timestamp_cache.go | 89 ++++++++++++++++++++------------------ 1 file changed, 48 insertions(+), 41 deletions(-) diff --git a/storage/timestamp_cache.go b/storage/timestamp_cache.go index 78497ba54ea2..c70d55987b10 100644 --- a/storage/timestamp_cache.go +++ b/storage/timestamp_cache.go @@ -232,6 +232,7 @@ func (tc *timestampCache) add( // Old: ------------ // // New: ------------ + // Old: *cv = cacheValue{timestamp: timestamp, txnID: txnID} tcache.MoveToEnd(entry) return @@ -241,6 +242,7 @@ func (tc *timestampCache) add( // New: ------------ ------------ ------------ // Old: -------- or ---------- or ---------- // + // New: ------------ ------------ ------------ // Old: tcache.DelEntry(entry) case sCmp > 0 && eCmp < 0: @@ -249,12 +251,13 @@ func (tc *timestampCache) add( // New: ---- // Old: ------------ // + // New: ---- // Old: ---- ---- oldEnd := key.End key.End = r.Start - key := tcache.MakeKey(r.End, oldEnd) - newEntry := makeCacheEntry(key, *cv) + newKey := tcache.MakeKey(r.End, oldEnd) + newEntry := makeCacheEntry(newKey, *cv) tcache.AddEntryAfter(newEntry, entry) case eCmp >= 0: // Left partial overlap; truncate old end. @@ -262,6 +265,7 @@ func (tc *timestampCache) add( // New: -------- -------- // Old: -------- or ------------ // + // New: -------- -------- // Old: ---- ---- key.End = r.Start case sCmp <= 0: @@ -270,6 +274,7 @@ func (tc *timestampCache) add( // New: -------- -------- // Old: -------- or ------------ // + // New: -------- -------- // Old: ---- ---- key.Start = r.End default: @@ -286,6 +291,7 @@ func (tc *timestampCache) add( // Old: ----------- ----------- ----------- ----------- // New: ----- or ----------- or -------- or -------- // + // Old: ----------- ----------- ----------- ----------- // New: return case sCmp < 0 && eCmp > 0: @@ -296,6 +302,7 @@ func (tc *timestampCache) add( // Old: ------ // New: ------------ // + // Old: ------ // New: --- --- lr := interval.Range{Start: r.Start, End: key.Start} addRange(lr) @@ -307,6 +314,7 @@ func (tc *timestampCache) add( // Old: -------- -------- // New: -------- or ------------ // + // Old: -------- -------- // New: ---- ---- r.Start = key.End case sCmp < 0: @@ -315,6 +323,7 @@ func (tc *timestampCache) add( // Old: -------- -------- // New: -------- or ------------ // + // Old: -------- -------- // New: ---- ---- r.End = key.Start default: @@ -334,16 +343,16 @@ func (tc *timestampCache) add( } switch { case sCmp == 0 && eCmp == 0: - // New and old are equal; replace old with new and avoid the - // need to insert new. Segment is no longer owned by any + // New and old are equal. Segment is no longer owned by any // transaction. // // New: ------------ // Old: ------------ // - // Nil: ------------ + // New: + // Nil: ============ + // Old: clearTxnIfDifferent(&cv.txnID, txnID) - tcache.MoveToEnd(entry) return case sCmp == 0 && eCmp > 0: // New contains old, left-aligned. Clear ownership of the @@ -353,7 +362,8 @@ func (tc *timestampCache) add( // Old: ---------- // // New: -- - // Nil: ---------- + // Nil: ========== + // Old: clearTxnIfDifferent(&cv.txnID, txnID) r.Start = key.End case sCmp < 0 && eCmp == 0: @@ -364,11 +374,10 @@ func (tc *timestampCache) add( // Old: ---------- // // New: -- - // Nil: ---------- + // Nil: ========== + // Old: clearTxnIfDifferent(&cv.txnID, txnID) r.End = key.Start - addRange(r) - return case sCmp < 0 && eCmp > 0: // New contains old; split into three segments with the // overlap owned by no txn. @@ -377,7 +386,8 @@ func (tc *timestampCache) add( // Old: -------- // // New: -- -- - // Nil: -------- + // Nil: ======== + // Old: clearTxnIfDifferent(&cv.txnID, txnID) newKey := tcache.MakeKey(r.Start, key.Start) newEntry := makeCacheEntry(newKey, cacheValue{timestamp: timestamp, txnID: txnID}) @@ -390,24 +400,25 @@ func (tc *timestampCache) add( // New: ---- // Old: ------------ // - // Nil: ---- + // New: + // Nil: ==== // Old: ---- ---- oldEnd := key.End key.End = r.Start clearTxnIfDifferent(&txnID, cv.txnID) - key := tcache.MakeKey(r.End, oldEnd) - newEntry := makeCacheEntry(key, *cv) + newKey := tcache.MakeKey(r.End, oldEnd) + newEntry := makeCacheEntry(newKey, *cv) tcache.AddEntryAfter(newEntry, entry) case eCmp == 0: - // Right-aligned partial overlap; truncate old end and clear ownership of - // new segment. + // Right-aligned partial overlap; truncate old end and clear + // ownership of new segment. // // New: -------- // Old: ------------ // // New: - // Nil: -------- + // Nil: ======== // Old: ---- key.End = r.Start clearTxnIfDifferent(&txnID, cv.txnID) @@ -419,13 +430,13 @@ func (tc *timestampCache) add( // Old: -------- // // New: ---- - // Nil: ---- + // Nil: ==== // Old: ---- key.End, r.Start = r.Start, key.End - key := tcache.MakeKey(key.End, r.Start) + newKey := tcache.MakeKey(key.End, r.Start) newCV := cacheValue{timestamp: cv.timestamp, txnID: txnID} clearTxnIfDifferent(&newCV.txnID, cv.txnID) - newEntry := makeCacheEntry(key, newCV) + newEntry := makeCacheEntry(newKey, newCV) tcache.AddEntryAfter(newEntry, entry) case sCmp == 0: // Left-aligned partial overlap; truncate old start and @@ -434,7 +445,7 @@ func (tc *timestampCache) add( // Old: ------------ // // New: - // Nil: -------- + // Nil: ======== // Old: ---- key.Start = r.End clearTxnIfDifferent(&txnID, cv.txnID) @@ -446,20 +457,14 @@ func (tc *timestampCache) add( // Old: -------- // // New: ---- - // Nil: ---- + // Nil: ==== // Old: ---- key.Start, r.End = r.End, key.Start - key := tcache.MakeKey(r.End, key.Start) + newKey := tcache.MakeKey(r.End, key.Start) newCV := cacheValue{timestamp: cv.timestamp, txnID: txnID} clearTxnIfDifferent(&newCV.txnID, cv.txnID) - newEntry := makeCacheEntry(key, newCV) + newEntry := makeCacheEntry(newKey, newCV) tcache.AddEntryAfter(newEntry, entry) - // We can add the new range now because it is guaranteed to - // be any other overlaps; we ust do so because we've changed - // our boundaries and continuing to iterate may hit the "no - // overlap" panic. - addRange(r) - return default: panic(fmt.Sprintf("no overlap between %v and %v", key.Range, r)) } @@ -557,7 +562,8 @@ func (tc *timestampCache) ExpandRequests(timestamp hlc.Timestamp) { // of the specified range is overlapped by timestamps from different // transactions in the cache, the low water timestamp is returned for // the read timestamps. Also returns an "ok" bool, indicating whether -// an explicit match of the interval was found in the cache. +// an explicit match of the interval was found in the cache (as +// opposed to using the low-water mark). func (tc *timestampCache) GetMaxRead(start, end roachpb.Key) (hlc.Timestamp, *uuid.UUID, bool) { return tc.getMax(start, end, true) } @@ -568,7 +574,8 @@ func (tc *timestampCache) GetMaxRead(start, end roachpb.Key) (hlc.Timestamp, *uu // of the specified range is overlapped by timestamps from different // transactions in the cache, the low water timestamp is returned for // the write timestamps. Also returns an "ok" bool, indicating whether -// an explicit match of the interval was found in the cache. +// an explicit match of the interval was found in the cache (as +// opposed to using the low-water mark). func (tc *timestampCache) GetMaxWrite(start, end roachpb.Key) (hlc.Timestamp, *uuid.UUID, bool) { return tc.getMax(start, end, false) } @@ -578,24 +585,24 @@ func (tc *timestampCache) getMax(start, end roachpb.Key, readTSCache bool) (hlc. end = start.Next() } var ok bool - max := tc.lowWater + maxTS := tc.lowWater + var maxTxnID *uuid.UUID cache := tc.wCache if readTSCache { cache = tc.rCache } - var txnID *uuid.UUID for _, o := range cache.GetOverlaps(start, end) { ce := o.Value.(*cacheValue) - if max.Less(ce.timestamp) { + if maxTS.Less(ce.timestamp) { ok = true - max = ce.timestamp - txnID = ce.txnID - } else if max.Equal(ce.timestamp) && txnID != nil && - (ce.txnID == nil || !uuid.Equal(*txnID, *ce.txnID)) { - txnID = nil + maxTS = ce.timestamp + maxTxnID = ce.txnID + } else if maxTS.Equal(ce.timestamp) && maxTxnID != nil && + (ce.txnID == nil || !uuid.Equal(*maxTxnID, *ce.txnID)) { + maxTxnID = nil } } - return max, txnID, ok + return maxTS, maxTxnID, ok } // MergeInto merges all entries from this timestamp cache into the From 798f7a1fb8a990ea4cbc2a71a201db7f08954ca3 Mon Sep 17 00:00:00 2001 From: Ben Darnell Date: Mon, 5 Sep 2016 14:11:56 +0800 Subject: [PATCH 4/5] storage: Optimize timestampCache for equal timestamps and txns When intervals have both the same timestamp and transaction ID (which is fairly common since this is what happens whenever a transaction touches the same rows more than once), we can avoid the splitting required when timestamps are equal but transaction IDs differ. Introduce (simple) special cases for this scenario. --- storage/timestamp_cache.go | 96 +++++++++++++++++++++++++------------- 1 file changed, 64 insertions(+), 32 deletions(-) diff --git a/storage/timestamp_cache.go b/storage/timestamp_cache.go index c70d55987b10..6930311ffe11 100644 --- a/storage/timestamp_cache.go +++ b/storage/timestamp_cache.go @@ -329,18 +329,52 @@ func (tc *timestampCache) add( default: panic(fmt.Sprintf("no overlap between %v and %v", key.Range, r)) } - } else { + } else if (cv.txnID == nil && txnID == nil) || uuid.Equal(*cv.txnID, *txnID) { // The existing interval has a timestamp equal to the new - // interval. Compare interval ranges to determine how to - // modify existing interval. - - // If the intervals have two different transactions, we must - // clear the transaction id. - clearTxnIfDifferent := func(a **uuid.UUID, b *uuid.UUID) { - if b == nil || (*a != nil && !uuid.Equal(**a, *b)) { - *a = nil - } + // interval, and the same transaction ID. + switch { + case sCmp >= 0 && eCmp <= 0: + // Old contains or is equal to new; no need to add. + // + // New: ----- or ----------- or -------- or -------- + // Old: ----------- ----------- ----------- ----------- + // + // New: + // Old: ----------- ----------- ----------- ----------- + return + case sCmp <= 0 && eCmp >= 0: + // New contains old; delete old. + // + // New: ------------ ------------ ------------ + // Old: -------- or ---------- or ---------- + // + // New: ------------ ------------ ------------ + // Old: + tcache.DelEntry(entry) + case eCmp >= 0: + // Left partial overlap; truncate old end. + // + // New: -------- -------- + // Old: -------- or ------------ + // + // New: -------- -------- + // Old: ---- ---- + key.End = r.Start + case sCmp <= 0: + // Right partial overlap; truncate old start. + // + // New: -------- -------- + // Old: -------- or ------------ + // + // New: -------- -------- + // Old: ---- ---- + key.Start = r.End + default: + panic(fmt.Sprintf("no overlap between %v and %v", key.Range, r)) } + } else { + // The existing interval has a timestamp equal to the new + // interval and a different transaction ID. switch { case sCmp == 0 && eCmp == 0: // New and old are equal. Segment is no longer owned by any @@ -352,7 +386,7 @@ func (tc *timestampCache) add( // New: // Nil: ============ // Old: - clearTxnIfDifferent(&cv.txnID, txnID) + cv.txnID = nil return case sCmp == 0 && eCmp > 0: // New contains old, left-aligned. Clear ownership of the @@ -364,7 +398,7 @@ func (tc *timestampCache) add( // New: -- // Nil: ========== // Old: - clearTxnIfDifferent(&cv.txnID, txnID) + cv.txnID = nil r.Start = key.End case sCmp < 0 && eCmp == 0: // New contains old, right-aligned. Clear ownership of the @@ -376,7 +410,7 @@ func (tc *timestampCache) add( // New: -- // Nil: ========== // Old: - clearTxnIfDifferent(&cv.txnID, txnID) + cv.txnID = nil r.End = key.Start case sCmp < 0 && eCmp > 0: // New contains old; split into three segments with the @@ -388,7 +422,7 @@ func (tc *timestampCache) add( // New: -- -- // Nil: ======== // Old: - clearTxnIfDifferent(&cv.txnID, txnID) + cv.txnID = nil newKey := tcache.MakeKey(r.Start, key.Start) newEntry := makeCacheEntry(newKey, cacheValue{timestamp: timestamp, txnID: txnID}) tcache.AddEntryAfter(newEntry, entry) @@ -403,15 +437,15 @@ func (tc *timestampCache) add( // New: // Nil: ==== // Old: ---- ---- + txnID = nil oldEnd := key.End key.End = r.Start - clearTxnIfDifferent(&txnID, cv.txnID) newKey := tcache.MakeKey(r.End, oldEnd) newEntry := makeCacheEntry(newKey, *cv) tcache.AddEntryAfter(newEntry, entry) case eCmp == 0: - // Right-aligned partial overlap; truncate old end and clear + // Old contains new, right-aligned; truncate old end and clear // ownership of new segment. // // New: -------- @@ -420,8 +454,19 @@ func (tc *timestampCache) add( // New: // Nil: ======== // Old: ---- + txnID = nil key.End = r.Start - clearTxnIfDifferent(&txnID, cv.txnID) + case sCmp == 0: + // Old contains new, left-aligned; truncate old start and + // clear ownership of new segment. + // New: -------- + // Old: ------------ + // + // New: + // Nil: ======== + // Old: ---- + txnID = nil + key.Start = r.End case eCmp > 0: // Left partial overlap; truncate old end and split new into // segments owned by no txn (the overlap) and the new txn. @@ -434,21 +479,9 @@ func (tc *timestampCache) add( // Old: ---- key.End, r.Start = r.Start, key.End newKey := tcache.MakeKey(key.End, r.Start) - newCV := cacheValue{timestamp: cv.timestamp, txnID: txnID} - clearTxnIfDifferent(&newCV.txnID, cv.txnID) + newCV := cacheValue{timestamp: cv.timestamp, txnID: nil} newEntry := makeCacheEntry(newKey, newCV) tcache.AddEntryAfter(newEntry, entry) - case sCmp == 0: - // Left-aligned partial overlap; truncate old start and - // clear ownership of new segment. - // New: -------- - // Old: ------------ - // - // New: - // Nil: ======== - // Old: ---- - key.Start = r.End - clearTxnIfDifferent(&txnID, cv.txnID) case sCmp < 0: // Right partial overlap; truncate old start and split new into // segments owned by no txn (the overlap) and the new txn. @@ -461,8 +494,7 @@ func (tc *timestampCache) add( // Old: ---- key.Start, r.End = r.End, key.Start newKey := tcache.MakeKey(r.End, key.Start) - newCV := cacheValue{timestamp: cv.timestamp, txnID: txnID} - clearTxnIfDifferent(&newCV.txnID, cv.txnID) + newCV := cacheValue{timestamp: cv.timestamp, txnID: nil} newEntry := makeCacheEntry(newKey, newCV) tcache.AddEntryAfter(newEntry, entry) default: From 581caf048858a991a098b9721e3954acf3e06478 Mon Sep 17 00:00:00 2001 From: Ben Darnell Date: Wed, 7 Sep 2016 11:40:28 +0800 Subject: [PATCH 5/5] storage: Adapt to removal of uuid.Equal --- storage/replica.go | 4 ++-- storage/timestamp_cache.go | 5 +++-- storage/timestamp_cache_test.go | 8 ++++---- 3 files changed, 9 insertions(+), 8 deletions(-) diff --git a/storage/replica.go b/storage/replica.go index 1f51bc7e51e9..ab229c389d74 100644 --- a/storage/replica.go +++ b/storage/replica.go @@ -1260,7 +1260,7 @@ func (r *Replica) applyTimestampCache(ba *roachpb.BatchRequest) *roachpb.Error { // Forward the timestamp if there's been a more recent read (by someone else). rTS, rTxnID, _ := r.mu.tsCache.GetMaxRead(header.Key, header.EndKey) if ba.Txn != nil { - if rTxnID == nil || !uuid.Equal(*ba.Txn.ID, *rTxnID) { + if rTxnID == nil || *ba.Txn.ID != *rTxnID { ba.Txn.Timestamp.Forward(rTS.Next()) } } else { @@ -1273,7 +1273,7 @@ func (r *Replica) applyTimestampCache(ba *roachpb.BatchRequest) *roachpb.Error { // write timestamp cache. wTS, wTxnID, _ := r.mu.tsCache.GetMaxWrite(header.Key, header.EndKey) if ba.Txn != nil { - if wTxnID == nil || !uuid.Equal(*ba.Txn.ID, *wTxnID) { + if wTxnID == nil || *ba.Txn.ID != *wTxnID { if !wTS.Less(ba.Txn.Timestamp) { ba.Txn.Timestamp.Forward(wTS.Next()) ba.Txn.WriteTooOld = true diff --git a/storage/timestamp_cache.go b/storage/timestamp_cache.go index 6930311ffe11..3347e8e2b080 100644 --- a/storage/timestamp_cache.go +++ b/storage/timestamp_cache.go @@ -329,7 +329,8 @@ func (tc *timestampCache) add( default: panic(fmt.Sprintf("no overlap between %v and %v", key.Range, r)) } - } else if (cv.txnID == nil && txnID == nil) || uuid.Equal(*cv.txnID, *txnID) { + } else if (cv.txnID == nil && txnID == nil) || + (cv.txnID != nil && txnID != nil && *cv.txnID == *txnID) { // The existing interval has a timestamp equal to the new // interval, and the same transaction ID. switch { @@ -630,7 +631,7 @@ func (tc *timestampCache) getMax(start, end roachpb.Key, readTSCache bool) (hlc. maxTS = ce.timestamp maxTxnID = ce.txnID } else if maxTS.Equal(ce.timestamp) && maxTxnID != nil && - (ce.txnID == nil || !uuid.Equal(*maxTxnID, *ce.txnID)) { + (ce.txnID == nil || *maxTxnID != *ce.txnID) { maxTxnID = nil } } diff --git a/storage/timestamp_cache_test.go b/storage/timestamp_cache_test.go index 1c14e9bdaa6b..076ddfb0424b 100644 --- a/storage/timestamp_cache_test.go +++ b/storage/timestamp_cache_test.go @@ -316,7 +316,7 @@ func assertTS( } else { if txnID == nil { t.Errorf("expected %s to have txn id %s, but found nil", keys, expectedTxnID.Short()) - } else if !uuid.Equal(*txnID, *expectedTxnID) { + } else if *txnID != *expectedTxnID { t.Errorf("expected %s to have txn id %s, but found %s", keys, expectedTxnID.Short(), txnID.Short()) } @@ -329,7 +329,7 @@ func assertTS( // not. This is because timestampCache.GetMaxRead must not return a // transaction ID when two different transactions have the same timestamp. func nilIfSimul(txns []txnState, txnID *uuid.UUID) *uuid.UUID { - if txns[0].ts.Equal(txns[1].ts) && !uuid.Equal(*txns[0].id, *txns[1].id) { + if txns[0].ts.Equal(txns[1].ts) && *txns[0].id != *txns[1].id { return nil } return txnID @@ -610,12 +610,12 @@ func TestTimestampCacheEqualTimestamps(t *testing.T) { // When querying either side separately, the transaction ID is returned. if ts, txn, _ := tc.GetMaxRead(roachpb.Key("a"), roachpb.Key("b")); !ts.Equal(ts1) { t.Errorf("expected 'a'-'b' to have timestamp %s, but found %s", ts1, ts) - } else if !uuid.Equal(*txn, *txn1) { + } else if *txn != *txn1 { t.Errorf("expected 'a'-'b' to have txn id %s, but found %s", txn1, txn) } if ts, txn, _ := tc.GetMaxRead(roachpb.Key("b"), roachpb.Key("c")); !ts.Equal(ts1) { t.Errorf("expected 'b'-'c' to have timestamp %s, but found %s", ts1, ts) - } else if !uuid.Equal(*txn, *txn2) { + } else if *txn != *txn2 { t.Errorf("expected 'b'-'c' to have txn id %s, but found %s", txn2, txn) }