Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

storage: handle range keys in readAsOfIterator #84214

Merged
merged 1 commit into from
Jul 21, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 24 additions & 0 deletions pkg/storage/mvcc_history_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -670,6 +670,7 @@ var commands = map[string]cmd{

"iter_new": {typReadOnly, cmdIterNew},
"iter_new_incremental": {typReadOnly, cmdIterNewIncremental}, // MVCCIncrementalIterator
"iter_new_read_as_of": {typReadOnly, cmdIterNewReadAsOf}, // readAsOfIterator
"iter_seek_ge": {typReadOnly, cmdIterSeekGE},
"iter_seek_lt": {typReadOnly, cmdIterSeekLT},
"iter_seek_intent_ge": {typReadOnly, cmdIterSeekIntentGE},
Expand Down Expand Up @@ -1396,6 +1397,29 @@ func cmdIterNewIncremental(e *evalCtx) error {
return nil
}

func cmdIterNewReadAsOf(e *evalCtx) error {
if e.iter != nil {
e.iter.Close()
}
var asOf hlc.Timestamp
if e.hasArg("asOfTs") {
asOf = e.getTsWithName("asOfTs")
}
opts := IterOptions{
KeyTypes: IterKeyTypePointsAndRanges,
RangeKeyMaskingBelow: asOf}
if e.hasArg("k") {
opts.LowerBound, opts.UpperBound = e.getKeyRange()
}
if len(opts.UpperBound) == 0 {
opts.UpperBound = keys.MaxKey
}
r, closer := metamorphicReader(e, "iter-reader")
iter := &iterWithCloser{r.NewMVCCIterator(MVCCKeyIterKind, opts), closer}
e.iter = NewReadAsOfIterator(iter, asOf)
return nil
}

func cmdIterSeekGE(e *evalCtx) error {
key := e.getKey()
ts := e.getTs(nil)
Expand Down
99 changes: 75 additions & 24 deletions pkg/storage/read_as_of_iterator.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,15 +16,21 @@ import (
)

// ReadAsOfIterator wraps a SimpleMVCCIterator and only surfaces the latest
// valid key of a given MVCC key that is also below the asOf timestamp, if set.
// Further, the iterator does not surface delete tombstones, nor any MVCC keys
// shadowed by delete tombstones below the asOf timestamp, if set. The iterator
// assumes that it will not encounter any write intents.
// valid point key of a given MVCC key that is also below the asOf timestamp, if
// set. Further, the iterator does not surface point or range tombstones, nor
// any MVCC keys shadowed by tombstones below the asOf timestamp, if set. The
// iterator assumes that it will not encounter any write intents.
type ReadAsOfIterator struct {
iter SimpleMVCCIterator

// asOf is the latest timestamp of a key surfaced by the iterator.
asOf hlc.Timestamp

// valid tracks if the current key is valid
valid bool

// err tracks if iterating to the current key returned an error
err error
}

var _ SimpleMVCCIterator = &ReadAsOfIterator{}
Expand All @@ -45,7 +51,7 @@ func (f *ReadAsOfIterator) SeekGE(originalKey MVCCKey) {
synthetic := MVCCKey{Key: originalKey.Key, Timestamp: f.asOf}
f.iter.SeekGE(synthetic)

if ok := f.advance(); ok && f.UnsafeKey().Less(originalKey) {
if f.advance(); f.valid && f.UnsafeKey().Less(originalKey) {
// The following is true:
// originalKey.Key == f.UnsafeKey &&
// f.asOf timestamp (if set) >= current timestamp > originalKey timestamp.
Expand All @@ -59,7 +65,7 @@ func (f *ReadAsOfIterator) SeekGE(originalKey MVCCKey) {

// Valid implements the simpleMVCCIterator.
func (f *ReadAsOfIterator) Valid() (bool, error) {
return f.iter.Valid()
return f.valid, f.err
}

// Next advances the iterator to the next valid MVCC key obeying the iterator's
Expand Down Expand Up @@ -94,42 +100,87 @@ func (f *ReadAsOfIterator) UnsafeValue() []byte {

// HasPointAndRange implements SimpleMVCCIterator.
func (f *ReadAsOfIterator) HasPointAndRange() (bool, bool) {
panic("not implemented")
// the ReadAsOfIterator only surfaces point keys; therefore hasPoint is always
// true, unless the iterator is invalid, and hasRange is always false.
return f.valid, false
}

// RangeBounds implements SimpleMVCCIterator.
// RangeBounds always returns an empty span, since the iterator never surfaces
// rangekeys.
func (f *ReadAsOfIterator) RangeBounds() roachpb.Span {
panic("not implemented")
return roachpb.Span{}
}

// RangeKeys implements SimpleMVCCIterator.
// RangeKeys is always empty since this iterator never surfaces rangeKeys.
func (f *ReadAsOfIterator) RangeKeys() []MVCCRangeKeyValue {
panic("not implemented")
return []MVCCRangeKeyValue{}
}

// updateValid updates i.valid and i.err based on the underlying iterator, and
// returns true if valid.
func (f *ReadAsOfIterator) updateValid() bool {
erikgrinaker marked this conversation as resolved.
Show resolved Hide resolved
f.valid, f.err = f.iter.Valid()
return f.valid
}

// advance moves past keys with timestamps later than f.asOf and skips MVCC keys
// whose latest value (subject to f.asOF) has been deleted. Note that advance
// moves past keys above asOF before evaluating tombstones, implying the
// iterator will never call f.iter.NextKey() on a tombstone with a timestamp
// later than f.asOF.
func (f *ReadAsOfIterator) advance() bool {
// whose latest value (subject to f.asOF) has been deleted by a point or range
// tombstone.
func (f *ReadAsOfIterator) advance() {
for {
if ok, err := f.Valid(); err != nil || !ok {
// No valid keys found.
return false
} else if !f.asOf.IsEmpty() && f.asOf.Less(f.iter.UnsafeKey().Timestamp) {
// Skip keys above the asOf timestamp.
if ok := f.updateValid(); !ok {
return
}

if !f.asOf.IsEmpty() && f.asOf.Less(f.iter.UnsafeKey().Timestamp) {
// Skip keys above the asOf timestamp, regardless of type of key (e.g. point or range)
f.iter.Next()
} else if hasPoint, hasRange := f.iter.HasPointAndRange(); !hasPoint && hasRange {
// Bare range keys get surfaced before the point key, even though the
// point key shadows it; thus, because we can infer range key information
// when the point key surfaces, skip over the bare range key, and reason
// about shadowed keys at the surfaced point key.
//
// E.g. Scanning the keys below:
// 2 a2
// 1 o---o
// a b
//
// would result in two surfaced keys:
// {a-b}@1;
// a2, {a-b}@1

f.iter.Next()
} else if len(f.iter.UnsafeValue()) == 0 {
// Skip to the next MVCC key if we find a tombstone.
// Skip to the next MVCC key if we find a point tombstone.
f.iter.NextKey()
} else if !hasRange {
// On a valid key without a range key
return
} else if f.asOfRangeKeyShadows() {
// The latest range key, as of system time, shadows the latest point key.
// This key is therefore deleted as of system time.
f.iter.NextKey()
} else {
// On a valid key.
return true
// On a valid key that potentially shadows range key(s)
return
}
}
}

// asOfRangeKeyShadows returns true if there exists a range key at or below the asOf timestamp
// that shadows the latest point key
//
// TODO (msbutler): ensure this function caches range key values (#84379) before
// the 22.2 branch cut, else we face a steep perf cliff for RESTORE with range keys.
func (f *ReadAsOfIterator) asOfRangeKeyShadows() (shadows bool) {
erikgrinaker marked this conversation as resolved.
Show resolved Hide resolved
rangeKeys := f.iter.RangeKeys()
erikgrinaker marked this conversation as resolved.
Show resolved Hide resolved
if f.asOf.IsEmpty() {
return f.iter.UnsafeKey().Timestamp.LessEq(rangeKeys[0].RangeKey.Timestamp)
}
return HasRangeKeyBetween(rangeKeys, f.asOf, f.iter.UnsafeKey().Timestamp)
}

// NewReadAsOfIterator constructs a ReadAsOfIterator.
func NewReadAsOfIterator(iter SimpleMVCCIterator, asOf hlc.Timestamp) *ReadAsOfIterator {
return &ReadAsOfIterator{iter: iter, asOf: asOf}
Expand Down
168 changes: 168 additions & 0 deletions pkg/storage/testdata/mvcc_histories/range_key_iter_read_as_of
Original file line number Diff line number Diff line change
@@ -0,0 +1,168 @@
# Tests range key handling in ReadAsOfIterator. Note that the iterator assumes it will not see an
# intent.
#
# Sets up the following dataset, where x is tombstone, o-o is range tombstone
#
# 6 f6
# 5 o---------------o k5
# 4 x x d4 f4 g4 x
# 3 o-------o e3 o-------oh3 o---o
# 2 a2 f2 g2
# 1 o---------------------------------------o
# a b c d e f g h i j k l m n o
#
run ok
put_rangekey k=a end=k ts=1
put k=a ts=2 v=a2
del k=a ts=4
put_rangekey k=b end=d ts=3
del k=b ts=4
put k=d ts=4 v=d4
put k=e ts=3 v=e3
put k=f ts=2 v=f2
put k=g ts=2 v=g2
put_rangekey k=f end=h ts=3
put k=f ts=4 v=f4
put_rangekey k=c end=g ts=5
put k=f ts=6 v=f6
put k=g ts=4 v=g4
put k=h ts=3 v=h3
del k=h ts=4
put k=k ts=5 v=k5
put_rangekey k=m end=n ts=3 localTs=2
----
>> at end:
rangekey: {a-b}/[1.000000000,0=/<empty>]
rangekey: {b-c}/[3.000000000,0=/<empty> 1.000000000,0=/<empty>]
rangekey: {c-d}/[5.000000000,0=/<empty> 3.000000000,0=/<empty> 1.000000000,0=/<empty>]
rangekey: {d-f}/[5.000000000,0=/<empty> 1.000000000,0=/<empty>]
rangekey: {f-g}/[5.000000000,0=/<empty> 3.000000000,0=/<empty> 1.000000000,0=/<empty>]
rangekey: {g-h}/[3.000000000,0=/<empty> 1.000000000,0=/<empty>]
rangekey: {h-k}/[1.000000000,0=/<empty>]
rangekey: {m-n}/[3.000000000,0={localTs=2.000000000,0}/<empty>]
data: "a"/4.000000000,0 -> /<empty>
data: "a"/2.000000000,0 -> /BYTES/a2
data: "b"/4.000000000,0 -> /<empty>
data: "d"/4.000000000,0 -> /BYTES/d4
data: "e"/3.000000000,0 -> /BYTES/e3
data: "f"/6.000000000,0 -> /BYTES/f6
data: "f"/4.000000000,0 -> /BYTES/f4
data: "f"/2.000000000,0 -> /BYTES/f2
data: "g"/4.000000000,0 -> /BYTES/g4
data: "g"/2.000000000,0 -> /BYTES/g2
data: "h"/4.000000000,0 -> /<empty>
data: "h"/3.000000000,0 -> /BYTES/h3
data: "k"/5.000000000,0 -> /BYTES/k5

# test range keys are ignored if above asOf, even with multiple range keys
run ok
iter_new_read_as_of asOfTs=2
iter_seek_ge k=a
iter_scan
----
iter_seek_ge: "a"/2.000000000,0=/BYTES/a2
iter_scan: "a"/2.000000000,0=/BYTES/a2
iter_scan: "f"/2.000000000,0=/BYTES/f2
iter_scan: "g"/2.000000000,0=/BYTES/g2
iter_scan: .

# test range key at or below asOf properly shadows keys
run ok
iter_new_read_as_of asOfTs=3
iter_seek_ge k=a
iter_scan
----
iter_seek_ge: "a"/2.000000000,0=/BYTES/a2
iter_scan: "a"/2.000000000,0=/BYTES/a2
iter_scan: "e"/3.000000000,0=/BYTES/e3
iter_scan: "h"/3.000000000,0=/BYTES/h3
iter_scan: .

# iterate over a few point tombstones at the asOf time
run ok
iter_new_read_as_of asOfTs=4
msbutler marked this conversation as resolved.
Show resolved Hide resolved
iter_seek_ge k=a
iter_scan
----
iter_seek_ge: "d"/4.000000000,0=/BYTES/d4
iter_scan: "d"/4.000000000,0=/BYTES/d4
iter_scan: "e"/3.000000000,0=/BYTES/e3
iter_scan: "f"/4.000000000,0=/BYTES/f4
iter_scan: "g"/4.000000000,0=/BYTES/g4
iter_scan: .

# iterate over ts 5-7 because the test is cheap
run ok
iter_new_read_as_of asOfTs=5
iter_seek_ge k=a
iter_scan
----
iter_seek_ge: "g"/4.000000000,0=/BYTES/g4
iter_scan: "g"/4.000000000,0=/BYTES/g4
iter_scan: "k"/5.000000000,0=/BYTES/k5
iter_scan: .

# iterate over ts 5-7 because the test is cheap
run ok
iter_new_read_as_of asOfTs=6
iter_seek_ge k=a
iter_scan
----
iter_seek_ge: "f"/6.000000000,0=/BYTES/f6
iter_scan: "f"/6.000000000,0=/BYTES/f6
iter_scan: "g"/4.000000000,0=/BYTES/g4
iter_scan: "k"/5.000000000,0=/BYTES/k5
iter_scan: .

# iterate over ts 5-7 for completeness
run ok
iter_new_read_as_of asOfTs=7
iter_seek_ge k=a
iter_scan
----
iter_seek_ge: "f"/6.000000000,0=/BYTES/f6
iter_scan: "f"/6.000000000,0=/BYTES/f6
iter_scan: "g"/4.000000000,0=/BYTES/g4
iter_scan: "k"/5.000000000,0=/BYTES/k5
iter_scan: .


# test range key handling when asOf is empty
run ok
iter_new_read_as_of
iter_seek_ge k=a
iter_scan
----
iter_seek_ge: "f"/6.000000000,0=/BYTES/f6
iter_scan: "f"/6.000000000,0=/BYTES/f6
iter_scan: "g"/4.000000000,0=/BYTES/g4
iter_scan: "k"/5.000000000,0=/BYTES/k5
iter_scan: .

# seek to a point key shadowed by a range key
run ok
iter_new_read_as_of asOfTs=5
iter_seek_ge k=d
----
iter_seek_ge: "g"/4.000000000,0=/BYTES/g4

# seek to the start of a range key
run ok
iter_new_read_as_of asOfTs=5
iter_seek_ge k=c
----
iter_seek_ge: "g"/4.000000000,0=/BYTES/g4

# seek to the same point key, with AsOf empty
run ok
iter_new_read_as_of
iter_seek_ge k=d
----
iter_seek_ge: "f"/6.000000000,0=/BYTES/f6

# attempt seek to the same point key, but ignore the range key because its above AsOf
run ok
iter_new_read_as_of asOfTs=4
iter_seek_ge k=d
----
iter_seek_ge: "d"/4.000000000,0=/BYTES/d4