diff --git a/pkg/storage/mvcc_history_test.go b/pkg/storage/mvcc_history_test.go index 15e52ec37beb..02229f264783 100644 --- a/pkg/storage/mvcc_history_test.go +++ b/pkg/storage/mvcc_history_test.go @@ -670,6 +670,7 @@ var commands = map[string]cmd{ "iter_new": {typReadOnly, cmdIterNew}, "iter_new_incremental": {typReadOnly, cmdIterNewIncremental}, // MVCCIncrementalIterator + "iter_new_read_as_of": {typReadOnly, cmdIterNewReadAsOf}, // readAsOfIterator "iter_seek_ge": {typReadOnly, cmdIterSeekGE}, "iter_seek_lt": {typReadOnly, cmdIterSeekLT}, "iter_seek_intent_ge": {typReadOnly, cmdIterSeekIntentGE}, @@ -1396,6 +1397,29 @@ func cmdIterNewIncremental(e *evalCtx) error { return nil } +func cmdIterNewReadAsOf(e *evalCtx) error { + if e.iter != nil { + e.iter.Close() + } + var asOf hlc.Timestamp + if e.hasArg("asOfTs") { + asOf = e.getTsWithName("asOfTs") + } + opts := IterOptions{ + KeyTypes: IterKeyTypePointsAndRanges, + RangeKeyMaskingBelow: asOf} + if e.hasArg("k") { + opts.LowerBound, opts.UpperBound = e.getKeyRange() + } + if len(opts.UpperBound) == 0 { + opts.UpperBound = keys.MaxKey + } + r, closer := metamorphicReader(e, "iter-reader") + iter := &iterWithCloser{r.NewMVCCIterator(MVCCKeyIterKind, opts), closer} + e.iter = NewReadAsOfIterator(iter, asOf) + return nil +} + func cmdIterSeekGE(e *evalCtx) error { key := e.getKey() ts := e.getTs(nil) diff --git a/pkg/storage/read_as_of_iterator.go b/pkg/storage/read_as_of_iterator.go index ce97929a4426..9b51620fc0ba 100644 --- a/pkg/storage/read_as_of_iterator.go +++ b/pkg/storage/read_as_of_iterator.go @@ -16,15 +16,21 @@ import ( ) // ReadAsOfIterator wraps a SimpleMVCCIterator and only surfaces the latest -// valid key of a given MVCC key that is also below the asOf timestamp, if set. -// Further, the iterator does not surface delete tombstones, nor any MVCC keys -// shadowed by delete tombstones below the asOf timestamp, if set. The iterator -// assumes that it will not encounter any write intents. +// valid point key of a given MVCC key that is also below the asOf timestamp, if +// set. Further, the iterator does not surface point or range tombstones, nor +// any MVCC keys shadowed by tombstones below the asOf timestamp, if set. The +// iterator assumes that it will not encounter any write intents. type ReadAsOfIterator struct { iter SimpleMVCCIterator // asOf is the latest timestamp of a key surfaced by the iterator. asOf hlc.Timestamp + + // valid tracks if the current key is valid + valid bool + + // err tracks if iterating to the current key returned an error + err error } var _ SimpleMVCCIterator = &ReadAsOfIterator{} @@ -45,7 +51,7 @@ func (f *ReadAsOfIterator) SeekGE(originalKey MVCCKey) { synthetic := MVCCKey{Key: originalKey.Key, Timestamp: f.asOf} f.iter.SeekGE(synthetic) - if ok := f.advance(); ok && f.UnsafeKey().Less(originalKey) { + if f.advance(); f.valid && f.UnsafeKey().Less(originalKey) { // The following is true: // originalKey.Key == f.UnsafeKey && // f.asOf timestamp (if set) >= current timestamp > originalKey timestamp. @@ -59,7 +65,7 @@ func (f *ReadAsOfIterator) SeekGE(originalKey MVCCKey) { // Valid implements the simpleMVCCIterator. func (f *ReadAsOfIterator) Valid() (bool, error) { - return f.iter.Valid() + return f.valid, f.err } // Next advances the iterator to the next valid MVCC key obeying the iterator's @@ -94,42 +100,87 @@ func (f *ReadAsOfIterator) UnsafeValue() []byte { // HasPointAndRange implements SimpleMVCCIterator. func (f *ReadAsOfIterator) HasPointAndRange() (bool, bool) { - panic("not implemented") + // the ReadAsOfIterator only surfaces point keys; therefore hasPoint is always + // true, unless the iterator is invalid, and hasRange is always false. + return f.valid, false } -// RangeBounds implements SimpleMVCCIterator. +// RangeBounds always returns an empty span, since the iterator never surfaces +// rangekeys. func (f *ReadAsOfIterator) RangeBounds() roachpb.Span { - panic("not implemented") + return roachpb.Span{} } -// RangeKeys implements SimpleMVCCIterator. +// RangeKeys is always empty since this iterator never surfaces rangeKeys. func (f *ReadAsOfIterator) RangeKeys() []MVCCRangeKeyValue { - panic("not implemented") + return []MVCCRangeKeyValue{} +} + +// updateValid updates i.valid and i.err based on the underlying iterator, and +// returns true if valid. +func (f *ReadAsOfIterator) updateValid() bool { + f.valid, f.err = f.iter.Valid() + return f.valid } // advance moves past keys with timestamps later than f.asOf and skips MVCC keys -// whose latest value (subject to f.asOF) has been deleted. Note that advance -// moves past keys above asOF before evaluating tombstones, implying the -// iterator will never call f.iter.NextKey() on a tombstone with a timestamp -// later than f.asOF. -func (f *ReadAsOfIterator) advance() bool { +// whose latest value (subject to f.asOF) has been deleted by a point or range +// tombstone. +func (f *ReadAsOfIterator) advance() { for { - if ok, err := f.Valid(); err != nil || !ok { - // No valid keys found. - return false - } else if !f.asOf.IsEmpty() && f.asOf.Less(f.iter.UnsafeKey().Timestamp) { - // Skip keys above the asOf timestamp. + if ok := f.updateValid(); !ok { + return + } + + if !f.asOf.IsEmpty() && f.asOf.Less(f.iter.UnsafeKey().Timestamp) { + // Skip keys above the asOf timestamp, regardless of type of key (e.g. point or range) + f.iter.Next() + } else if hasPoint, hasRange := f.iter.HasPointAndRange(); !hasPoint && hasRange { + // Bare range keys get surfaced before the point key, even though the + // point key shadows it; thus, because we can infer range key information + // when the point key surfaces, skip over the bare range key, and reason + // about shadowed keys at the surfaced point key. + // + // E.g. Scanning the keys below: + // 2 a2 + // 1 o---o + // a b + // + // would result in two surfaced keys: + // {a-b}@1; + // a2, {a-b}@1 + f.iter.Next() } else if len(f.iter.UnsafeValue()) == 0 { - // Skip to the next MVCC key if we find a tombstone. + // Skip to the next MVCC key if we find a point tombstone. + f.iter.NextKey() + } else if !hasRange { + // On a valid key without a range key + return + } else if f.asOfRangeKeyShadows() { + // The latest range key, as of system time, shadows the latest point key. + // This key is therefore deleted as of system time. f.iter.NextKey() } else { - // On a valid key. - return true + // On a valid key that potentially shadows range key(s) + return } } } +// asOfRangeKeyShadows returns true if there exists a range key at or below the asOf timestamp +// that shadows the latest point key +// +// TODO (msbutler): ensure this function caches range key values (#84379) before +// the 22.2 branch cut, else we face a steep perf cliff for RESTORE with range keys. +func (f *ReadAsOfIterator) asOfRangeKeyShadows() (shadows bool) { + rangeKeys := f.iter.RangeKeys() + if f.asOf.IsEmpty() { + return f.iter.UnsafeKey().Timestamp.LessEq(rangeKeys[0].RangeKey.Timestamp) + } + return HasRangeKeyBetween(rangeKeys, f.asOf, f.iter.UnsafeKey().Timestamp) +} + // NewReadAsOfIterator constructs a ReadAsOfIterator. func NewReadAsOfIterator(iter SimpleMVCCIterator, asOf hlc.Timestamp) *ReadAsOfIterator { return &ReadAsOfIterator{iter: iter, asOf: asOf} diff --git a/pkg/storage/testdata/mvcc_histories/range_key_iter_read_as_of b/pkg/storage/testdata/mvcc_histories/range_key_iter_read_as_of new file mode 100644 index 000000000000..5b49bc68c726 --- /dev/null +++ b/pkg/storage/testdata/mvcc_histories/range_key_iter_read_as_of @@ -0,0 +1,168 @@ +# Tests range key handling in ReadAsOfIterator. Note that the iterator assumes it will not see an +# intent. +# +# Sets up the following dataset, where x is tombstone, o-o is range tombstone +# +# 6 f6 +# 5 o---------------o k5 +# 4 x x d4 f4 g4 x +# 3 o-------o e3 o-------oh3 o---o +# 2 a2 f2 g2 +# 1 o---------------------------------------o +# a b c d e f g h i j k l m n o +# +run ok +put_rangekey k=a end=k ts=1 +put k=a ts=2 v=a2 +del k=a ts=4 +put_rangekey k=b end=d ts=3 +del k=b ts=4 +put k=d ts=4 v=d4 +put k=e ts=3 v=e3 +put k=f ts=2 v=f2 +put k=g ts=2 v=g2 +put_rangekey k=f end=h ts=3 +put k=f ts=4 v=f4 +put_rangekey k=c end=g ts=5 +put k=f ts=6 v=f6 +put k=g ts=4 v=g4 +put k=h ts=3 v=h3 +del k=h ts=4 +put k=k ts=5 v=k5 +put_rangekey k=m end=n ts=3 localTs=2 +---- +>> at end: +rangekey: {a-b}/[1.000000000,0=/] +rangekey: {b-c}/[3.000000000,0=/ 1.000000000,0=/] +rangekey: {c-d}/[5.000000000,0=/ 3.000000000,0=/ 1.000000000,0=/] +rangekey: {d-f}/[5.000000000,0=/ 1.000000000,0=/] +rangekey: {f-g}/[5.000000000,0=/ 3.000000000,0=/ 1.000000000,0=/] +rangekey: {g-h}/[3.000000000,0=/ 1.000000000,0=/] +rangekey: {h-k}/[1.000000000,0=/] +rangekey: {m-n}/[3.000000000,0={localTs=2.000000000,0}/] +data: "a"/4.000000000,0 -> / +data: "a"/2.000000000,0 -> /BYTES/a2 +data: "b"/4.000000000,0 -> / +data: "d"/4.000000000,0 -> /BYTES/d4 +data: "e"/3.000000000,0 -> /BYTES/e3 +data: "f"/6.000000000,0 -> /BYTES/f6 +data: "f"/4.000000000,0 -> /BYTES/f4 +data: "f"/2.000000000,0 -> /BYTES/f2 +data: "g"/4.000000000,0 -> /BYTES/g4 +data: "g"/2.000000000,0 -> /BYTES/g2 +data: "h"/4.000000000,0 -> / +data: "h"/3.000000000,0 -> /BYTES/h3 +data: "k"/5.000000000,0 -> /BYTES/k5 + +# test range keys are ignored if above asOf, even with multiple range keys +run ok +iter_new_read_as_of asOfTs=2 +iter_seek_ge k=a +iter_scan +---- +iter_seek_ge: "a"/2.000000000,0=/BYTES/a2 +iter_scan: "a"/2.000000000,0=/BYTES/a2 +iter_scan: "f"/2.000000000,0=/BYTES/f2 +iter_scan: "g"/2.000000000,0=/BYTES/g2 +iter_scan: . + +# test range key at or below asOf properly shadows keys +run ok +iter_new_read_as_of asOfTs=3 +iter_seek_ge k=a +iter_scan +---- +iter_seek_ge: "a"/2.000000000,0=/BYTES/a2 +iter_scan: "a"/2.000000000,0=/BYTES/a2 +iter_scan: "e"/3.000000000,0=/BYTES/e3 +iter_scan: "h"/3.000000000,0=/BYTES/h3 +iter_scan: . + +# iterate over a few point tombstones at the asOf time +run ok +iter_new_read_as_of asOfTs=4 +iter_seek_ge k=a +iter_scan +---- +iter_seek_ge: "d"/4.000000000,0=/BYTES/d4 +iter_scan: "d"/4.000000000,0=/BYTES/d4 +iter_scan: "e"/3.000000000,0=/BYTES/e3 +iter_scan: "f"/4.000000000,0=/BYTES/f4 +iter_scan: "g"/4.000000000,0=/BYTES/g4 +iter_scan: . + +# iterate over ts 5-7 because the test is cheap +run ok +iter_new_read_as_of asOfTs=5 +iter_seek_ge k=a +iter_scan +---- +iter_seek_ge: "g"/4.000000000,0=/BYTES/g4 +iter_scan: "g"/4.000000000,0=/BYTES/g4 +iter_scan: "k"/5.000000000,0=/BYTES/k5 +iter_scan: . + +# iterate over ts 5-7 because the test is cheap +run ok +iter_new_read_as_of asOfTs=6 +iter_seek_ge k=a +iter_scan +---- +iter_seek_ge: "f"/6.000000000,0=/BYTES/f6 +iter_scan: "f"/6.000000000,0=/BYTES/f6 +iter_scan: "g"/4.000000000,0=/BYTES/g4 +iter_scan: "k"/5.000000000,0=/BYTES/k5 +iter_scan: . + +# iterate over ts 5-7 for completeness +run ok +iter_new_read_as_of asOfTs=7 +iter_seek_ge k=a +iter_scan +---- +iter_seek_ge: "f"/6.000000000,0=/BYTES/f6 +iter_scan: "f"/6.000000000,0=/BYTES/f6 +iter_scan: "g"/4.000000000,0=/BYTES/g4 +iter_scan: "k"/5.000000000,0=/BYTES/k5 +iter_scan: . + + +# test range key handling when asOf is empty +run ok +iter_new_read_as_of +iter_seek_ge k=a +iter_scan +---- +iter_seek_ge: "f"/6.000000000,0=/BYTES/f6 +iter_scan: "f"/6.000000000,0=/BYTES/f6 +iter_scan: "g"/4.000000000,0=/BYTES/g4 +iter_scan: "k"/5.000000000,0=/BYTES/k5 +iter_scan: . + +# seek to a point key shadowed by a range key +run ok +iter_new_read_as_of asOfTs=5 +iter_seek_ge k=d +---- +iter_seek_ge: "g"/4.000000000,0=/BYTES/g4 + +# seek to the start of a range key +run ok +iter_new_read_as_of asOfTs=5 +iter_seek_ge k=c +---- +iter_seek_ge: "g"/4.000000000,0=/BYTES/g4 + +# seek to the same point key, with AsOf empty +run ok +iter_new_read_as_of +iter_seek_ge k=d +---- +iter_seek_ge: "f"/6.000000000,0=/BYTES/f6 + +# attempt seek to the same point key, but ignore the range key because its above AsOf +run ok +iter_new_read_as_of asOfTs=4 +iter_seek_ge k=d +---- +iter_seek_ge: "d"/4.000000000,0=/BYTES/d4