Skip to content

Commit

Permalink
storage: handle range keys in readAsOfIterator
Browse files Browse the repository at this point in the history
Previously, the readAsOfIterator used in RESTORE could not handle range keys.
This PR implements the new SimpleMVCCIterator methods that handle range keys.
Further, this patch ensures the  readAsOfIterator skips over point keys
shadowed by range keys  at or below the caller's specified asOf timestamp.

Next, Backup needs to be tought about RangeKeys.

Informs cockroachdb#71155

Release note: none
  • Loading branch information
msbutler committed Jul 15, 2022
1 parent 37df70d commit 8370850
Show file tree
Hide file tree
Showing 3 changed files with 275 additions and 23 deletions.
24 changes: 24 additions & 0 deletions pkg/storage/mvcc_history_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -670,6 +670,7 @@ var commands = map[string]cmd{

"iter_new": {typReadOnly, cmdIterNew},
"iter_new_incremental": {typReadOnly, cmdIterNewIncremental}, // MVCCIncrementalIterator
"iter_new_read_as_of": {typReadOnly, cmdIterNewReadAsOf}, // readAsOfIterator
"iter_seek_ge": {typReadOnly, cmdIterSeekGE},
"iter_seek_lt": {typReadOnly, cmdIterSeekLT},
"iter_seek_intent_ge": {typReadOnly, cmdIterSeekIntentGE},
Expand Down Expand Up @@ -1396,6 +1397,29 @@ func cmdIterNewIncremental(e *evalCtx) error {
return nil
}

func cmdIterNewReadAsOf(e *evalCtx) error {
if e.iter != nil {
e.iter.Close()
}
var asOf hlc.Timestamp
if e.hasArg("asOfTs") {
asOf = e.getTsWithName("asOfTs")
}
opts := IterOptions{
KeyTypes: IterKeyTypePointsAndRanges,
RangeKeyMaskingBelow: asOf}
if e.hasArg("k") {
opts.LowerBound, opts.UpperBound = e.getKeyRange()
}
if len(opts.UpperBound) == 0 {
opts.UpperBound = keys.MaxKey
}
r, closer := metamorphicReader(e, "iter-reader")
iter := &iterWithCloser{r.NewMVCCIterator(MVCCKeyIterKind, opts), closer}
e.iter = NewReadAsOfIterator(iter, asOf)
return nil
}

func cmdIterSeekGE(e *evalCtx) error {
key := e.getKey()
ts := e.getTs(nil)
Expand Down
142 changes: 119 additions & 23 deletions pkg/storage/read_as_of_iterator.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,15 +16,24 @@ import (
)

// ReadAsOfIterator wraps a SimpleMVCCIterator and only surfaces the latest
// valid key of a given MVCC key that is also below the asOf timestamp, if set.
// Further, the iterator does not surface delete tombstones, nor any MVCC keys
// shadowed by delete tombstones below the asOf timestamp, if set. The iterator
// assumes that it will not encounter any write intents.
// valid point key of a given MVCC key that is also below the asOf timestamp, if
// set. Further, the iterator does not surface point or range tombstones, nor
// any MVCC keys shadowed by tombstones below the asOf timestamp, if set. The
// iterator assumes that it will not encounter any write intents.
type ReadAsOfIterator struct {
iter SimpleMVCCIterator

// asOf is the latest timestamp of a key surfaced by the iterator.
asOf hlc.Timestamp

// hasRange is true if the current key has a range key at or below f.asOf
hasRange bool

// valid tracks if the current key is valid
valid bool

// err tracks if iterating to the current key returned an error
err error
}

var _ SimpleMVCCIterator = &ReadAsOfIterator{}
Expand All @@ -45,7 +54,7 @@ func (f *ReadAsOfIterator) SeekGE(originalKey MVCCKey) {
synthetic := MVCCKey{Key: originalKey.Key, Timestamp: f.asOf}
f.iter.SeekGE(synthetic)

if ok := f.advance(); ok && f.UnsafeKey().Less(originalKey) {
if f.advance(); f.valid && f.UnsafeKey().Less(originalKey) {
// The following is true:
// originalKey.Key == f.UnsafeKey &&
// f.asOf timestamp (if set) >= current timestamp > originalKey timestamp.
Expand All @@ -59,7 +68,7 @@ func (f *ReadAsOfIterator) SeekGE(originalKey MVCCKey) {

// Valid implements the simpleMVCCIterator.
func (f *ReadAsOfIterator) Valid() (bool, error) {
return f.iter.Valid()
return f.valid, f.err
}

// Next advances the iterator to the next valid MVCC key obeying the iterator's
Expand Down Expand Up @@ -94,40 +103,127 @@ func (f *ReadAsOfIterator) UnsafeValue() []byte {

// HasPointAndRange implements SimpleMVCCIterator.
func (f *ReadAsOfIterator) HasPointAndRange() (bool, bool) {
panic("not implemented")
// the ReadAsOfIterator only surfaces point keys; therefore hasPoint is always
// true, unless the iterator is invalid.
return f.valid, f.hasRange && f.valid
}

// RangeBounds implements SimpleMVCCIterator.
func (f *ReadAsOfIterator) RangeBounds() roachpb.Span {
panic("not implemented")
if !f.hasRange || !f.valid {
return roachpb.Span{}
}
return f.iter.RangeBounds()
}

// RangeKeys implements SimpleMVCCIterator.
// RangeKeys surfaces all RangeKeys that overlap the given key that are at or
// below f.AsOf()
func (f *ReadAsOfIterator) RangeKeys() []MVCCRangeKeyValue {
panic("not implemented")
if !f.hasRange || !f.valid {
return []MVCCRangeKeyValue{}
}

if f.asOf.IsEmpty() {
return f.iter.RangeKeys()
}
// TODO(erikgrinaker): It may be worthwhile to clone and memoize this result
// for the same range key. However, callers may avoid calling RangeKeys()
// unnecessarily, and we may optimize parent iterators, so let's measure.
rangeKeys := f.iter.RangeKeys()

// Find the first range key at or below f.AsOf, and truncate rangeKeys. We do
// a linear search rather than a binary search, because we expect endTime to
// be near the current time, so the first element will typically match.
first := len(rangeKeys) - 1
for idx, rkv := range rangeKeys {
if rkv.RangeKey.Timestamp.LessEq(f.asOf) {
first = idx
break
}
}
return rangeKeys[first:]
}

// updateValid updates i.valid and i.err based on the underlying iterator, and
// returns true if valid.
func (f *ReadAsOfIterator) updateValid() bool {
f.valid, f.err = f.iter.Valid()
return f.valid
}

// advance moves past keys with timestamps later than f.asOf and skips MVCC keys
// whose latest value (subject to f.asOF) has been deleted. Note that advance
// moves past keys above asOF before evaluating tombstones, implying the
// iterator will never call f.iter.NextKey() on a tombstone with a timestamp
// later than f.asOF.
func (f *ReadAsOfIterator) advance() bool {
// whose latest value (subject to f.asOF) has been deleted by a point or range
// tombstone.
func (f *ReadAsOfIterator) advance() {
for {
if ok, err := f.Valid(); err != nil || !ok {
// No valid keys found.
return false
} else if !f.asOf.IsEmpty() && f.asOf.Less(f.iter.UnsafeKey().Timestamp) {
// Skip keys above the asOf timestamp.
if ok := f.updateValid(); !ok {
return
}
if !f.asOf.IsEmpty() && f.asOf.Less(f.iter.UnsafeKey().Timestamp) {
// Skip keys above the asOf timestamp, regardless of type of key (e.g. point or range)
f.iter.Next()
continue
}

// Advance or return without iterating over actual range keys, if possible.
hasPoint, hasRange := f.iter.HasPointAndRange()
if !hasPoint && hasRange {
// Bare range keys get surfaced before the point key, even though the
// point key shadows it; thus, because we can infer range key information
// when the point key surfaces, skip over the bare range key, and reason
// about shadowed keys at the surfaced point key.
//
// E.g. Scanning the keys below:
// 2 a2
// 1 o---o
// a b
//
// would result in two surfaced keys:
// {a-b}@1;
// a2, {a-b}@1

f.iter.Next()
continue
} else if len(f.iter.UnsafeValue()) == 0 {
// Skip to the next MVCC key if we find a tombstone.
// Skip to the next MVCC key if we find a point tombstone.
f.iter.NextKey()
continue
} else if !hasRange {
// On a valid key without a range key
f.hasRange = false
return
}

// At this point, there exists a point key below f.AsOf and range key(s) that
// _may_ be below f.AsOf.
if hasAsOfRange, shadows := f.getAsOfRangeKey(); shadows {
// The latest range key, as of system time, shadows the latest point tombstone.
// This key is therefore deleted as of system time.
f.iter.NextKey()
} else {
// On a valid key.
return true
// On a valid key that potentially shadows range key(s)
f.hasRange = hasAsOfRange
return
}
}
}

// asOfRangeKeyShadows returns two bools: the first indicates if there exists a
// range key at or below the asOf timestamp, and the second indicates if the
// asOf range key shadows all point keys
func (f *ReadAsOfIterator) getAsOfRangeKey() (hasRange bool, shadows bool) {
rangeKVs := f.iter.RangeKeys()
for _, rkv := range rangeKVs {
if !f.asOf.IsEmpty() && f.asOf.Less(rkv.RangeKey.Timestamp) {
// Skip evaluating any range key above asOF.
} else if f.iter.UnsafeKey().Timestamp.Less(rkv.RangeKey.Timestamp) {
return true, true
} else {
return true, false
}
}
// all range keys are above the asOf timestamp
return false, false
}

// NewReadAsOfIterator constructs a ReadAsOfIterator.
Expand Down
132 changes: 132 additions & 0 deletions pkg/storage/testdata/mvcc_histories/range_key_iter_read_as_of
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
# Tests range key handling in ReadAsOfIterator. Note that the iterator assumes it will not see an
# intent.
#
# Sets up the following dataset, where x is tombstone, o-o is range tombstone
#
# 6 f6
# 5 o---------------o k5
# 4 x x d4 f4 g4 x
# 3 o-------o e3 o-------oh3 o---o
# 2 a2 f2 g2
# 1 o---------------------------------------o
# a b c d e f g h i j k l m n o
#
run ok
put_rangekey k=a end=k ts=1
put k=a ts=2 v=a2
del k=a ts=4
put_rangekey k=b end=d ts=3
del k=b ts=4
put k=d ts=4 v=d4
put k=e ts=3 v=e3
put k=f ts=2 v=f2
put k=g ts=2 v=g2
put_rangekey k=f end=h ts=3
put k=f ts=4 v=f4
put_rangekey k=c end=g ts=5
put k=f ts=6 v=f6
put k=g ts=4 v=g4
put k=h ts=3 v=h3
del k=h ts=4
put k=k ts=5 v=k5
put_rangekey k=m end=n ts=3 localTs=2
----
>> at end:
rangekey: {a-b}/[1.000000000,0=/<empty>]
rangekey: {b-c}/[3.000000000,0=/<empty> 1.000000000,0=/<empty>]
rangekey: {c-d}/[5.000000000,0=/<empty> 3.000000000,0=/<empty> 1.000000000,0=/<empty>]
rangekey: {d-f}/[5.000000000,0=/<empty> 1.000000000,0=/<empty>]
rangekey: {f-g}/[5.000000000,0=/<empty> 3.000000000,0=/<empty> 1.000000000,0=/<empty>]
rangekey: {g-h}/[3.000000000,0=/<empty> 1.000000000,0=/<empty>]
rangekey: {h-k}/[1.000000000,0=/<empty>]
rangekey: {m-n}/[3.000000000,0={localTs=2.000000000,0}/<empty>]
data: "a"/4.000000000,0 -> /<empty>
data: "a"/2.000000000,0 -> /BYTES/a2
data: "b"/4.000000000,0 -> /<empty>
data: "d"/4.000000000,0 -> /BYTES/d4
data: "e"/3.000000000,0 -> /BYTES/e3
data: "f"/6.000000000,0 -> /BYTES/f6
data: "f"/4.000000000,0 -> /BYTES/f4
data: "f"/2.000000000,0 -> /BYTES/f2
data: "g"/4.000000000,0 -> /BYTES/g4
data: "g"/2.000000000,0 -> /BYTES/g2
data: "h"/4.000000000,0 -> /<empty>
data: "h"/3.000000000,0 -> /BYTES/h3
data: "k"/5.000000000,0 -> /BYTES/k5

# test range keys are ignored if above asOf, even with multiple range keys
run ok
iter_new_read_as_of asOfTs=2
iter_seek_ge k=a
iter_scan
----
iter_seek_ge: "a"/2.000000000,0=/BYTES/a2 {a-b}/[1.000000000,0=/<empty>]
iter_scan: "a"/2.000000000,0=/BYTES/a2 {a-b}/[1.000000000,0=/<empty>]
iter_scan: "f"/2.000000000,0=/BYTES/f2 {f-g}/[1.000000000,0=/<empty>]
iter_scan: "g"/2.000000000,0=/BYTES/g2 {g-h}/[1.000000000,0=/<empty>]
iter_scan: .

# test range key at or below asOf properly shadows keys
run ok
iter_new_read_as_of asOfTs=3
iter_seek_ge k=a
iter_scan
----
iter_seek_ge: "a"/2.000000000,0=/BYTES/a2 {a-b}/[1.000000000,0=/<empty>]
iter_scan: "a"/2.000000000,0=/BYTES/a2 {a-b}/[1.000000000,0=/<empty>]
iter_scan: "e"/3.000000000,0=/BYTES/e3 {d-f}/[1.000000000,0=/<empty>]
iter_scan: "h"/3.000000000,0=/BYTES/h3 {h-k}/[1.000000000,0=/<empty>]
iter_scan: .

# iterate over a few point tombstones at the asOf time
run ok
iter_new_read_as_of asOfTs=4
iter_seek_ge k=a
iter_scan
----
iter_seek_ge: "d"/4.000000000,0=/BYTES/d4 {d-f}/[1.000000000,0=/<empty>]
iter_scan: "d"/4.000000000,0=/BYTES/d4 {d-f}/[1.000000000,0=/<empty>]
iter_scan: "e"/3.000000000,0=/BYTES/e3 {d-f}/[1.000000000,0=/<empty>]
iter_scan: "f"/4.000000000,0=/BYTES/f4 {f-g}/[3.000000000,0=/<empty> 1.000000000,0=/<empty>]
iter_scan: "g"/4.000000000,0=/BYTES/g4 {g-h}/[3.000000000,0=/<empty> 1.000000000,0=/<empty>]
iter_scan: .

# test range key handling when asOf is empty
run ok
iter_new_read_as_of
iter_seek_ge k=a
iter_scan
----
iter_seek_ge: "f"/6.000000000,0=/BYTES/f6 {f-g}/[5.000000000,0=/<empty> 3.000000000,0=/<empty> 1.000000000,0=/<empty>]
iter_scan: "f"/6.000000000,0=/BYTES/f6 {f-g}/[5.000000000,0=/<empty> 3.000000000,0=/<empty> 1.000000000,0=/<empty>]
iter_scan: "g"/4.000000000,0=/BYTES/g4 {g-h}/[3.000000000,0=/<empty> 1.000000000,0=/<empty>]
iter_scan: "k"/5.000000000,0=/BYTES/k5
iter_scan: .

# seek to a point key shadowed by a range key
run ok
iter_new_read_as_of asOfTs=5
iter_seek_ge k=d
----
iter_seek_ge: "g"/4.000000000,0=/BYTES/g4 {g-h}/[3.000000000,0=/<empty> 1.000000000,0=/<empty>]

# seek to the start of a range key
run ok
iter_new_read_as_of asOfTs=5
iter_seek_ge k=c
----
iter_seek_ge: "g"/4.000000000,0=/BYTES/g4 {g-h}/[3.000000000,0=/<empty> 1.000000000,0=/<empty>]

# seek to the same point key, with AsOf empty
run ok
iter_new_read_as_of
iter_seek_ge k=d
----
iter_seek_ge: "f"/6.000000000,0=/BYTES/f6 {f-g}/[5.000000000,0=/<empty> 3.000000000,0=/<empty> 1.000000000,0=/<empty>]

# attempt seek to the same point key, but ignore the range key because its above AsOf
run ok
iter_new_read_as_of asOfTs=4
iter_seek_ge k=d
----
iter_seek_ge: "d"/4.000000000,0=/BYTES/d4 {d-f}/[1.000000000,0=/<empty>]

0 comments on commit 8370850

Please sign in to comment.