From 4d5415247d2e47d16105935466c465aa1e873fce Mon Sep 17 00:00:00 2001 From: Erik Grinaker Date: Sun, 24 Jul 2022 20:27:47 +0000 Subject: [PATCH 1/6] storage: add `MVCCRangeKeyStack` for range keys This patch adds `MVCCRangeKeyStack` and `MVCCRangeKeyVersion`, a new range key representation that will be returned by `SimpleMVCCIterator`. It is more compact, for efficiency, and comes with a set of convenience methods to simplify common range key processing. Release note: None --- pkg/storage/mvcc_key.go | 233 ++++++++++++++++++++++++++++++++++- pkg/storage/mvcc_key_test.go | 217 +++++++++++++++++++++++++++----- 2 files changed, 419 insertions(+), 31 deletions(-) diff --git a/pkg/storage/mvcc_key.go b/pkg/storage/mvcc_key.go index 4791a6cb550c..54f21a63c35b 100644 --- a/pkg/storage/mvcc_key.go +++ b/pkg/storage/mvcc_key.go @@ -11,13 +11,13 @@ package storage import ( + "bytes" "encoding/binary" "fmt" "sort" "github.com/cockroachdb/cockroach/pkg/roachpb" "github.com/cockroachdb/cockroach/pkg/storage/enginepb" - "github.com/cockroachdb/cockroach/pkg/util" "github.com/cockroachdb/cockroach/pkg/util/hlc" "github.com/cockroachdb/errors" ) @@ -365,6 +365,11 @@ type MVCCRangeKey struct { Timestamp hlc.Timestamp } +// Bounds returns the range key bounds as a Span. +func (k MVCCRangeKey) Bounds() roachpb.Span { + return roachpb.Span{Key: k.StartKey, EndKey: k.EndKey} +} + // Clone returns a copy of the range key. func (k MVCCRangeKey) Clone() MVCCRangeKey { // k is already a copy, but byte slices must be cloned. @@ -439,6 +444,232 @@ func (k MVCCRangeKey) Validate() (err error) { } } +// MVCCRangeKeyStack represents a stack of range key fragments as returned +// by SimpleMVCCIterator.RangeKeys(). All fragments have the same key bounds, +// and are ordered from newest to oldest. +type MVCCRangeKeyStack struct { + Bounds roachpb.Span + Versions MVCCRangeKeyVersions +} + +// MVCCRangeKeyVersions represents a stack of range key fragment versions. +type MVCCRangeKeyVersions []MVCCRangeKeyVersion + +// MVCCRangeKeyVersion represents a single range key fragment version. +type MVCCRangeKeyVersion struct { + Timestamp hlc.Timestamp + Value []byte +} + +// AsRangeKey returns an MVCCRangeKey for the given version. Byte slices +// are shared with the stack. +func (s MVCCRangeKeyStack) AsRangeKey(v MVCCRangeKeyVersion) MVCCRangeKey { + return MVCCRangeKey{ + StartKey: s.Bounds.Key, + EndKey: s.Bounds.EndKey, + Timestamp: v.Timestamp, + } +} + +// AsRangeKeys converts the stack into a slice of MVCCRangeKey. Byte slices +// are shared with the stack. +func (s MVCCRangeKeyStack) AsRangeKeys() []MVCCRangeKey { + rangeKeys := make([]MVCCRangeKey, 0, len(s.Versions)) + for _, v := range s.Versions { + rangeKeys = append(rangeKeys, s.AsRangeKey(v)) + } + return rangeKeys +} + +// AsRangeKeyValue returns an MVCCRangeKeyValue for the given version. Byte +// slices are shared with the stack. +func (s MVCCRangeKeyStack) AsRangeKeyValue(v MVCCRangeKeyVersion) MVCCRangeKeyValue { + return MVCCRangeKeyValue{ + RangeKey: s.AsRangeKey(v), + Value: v.Value, + } +} + +// AsRangeKeyValues converts the stack into a slice of MVCCRangeKeyValue. Byte +// slices are shared with the stack. +func (s MVCCRangeKeyStack) AsRangeKeyValues() []MVCCRangeKeyValue { + kvs := make([]MVCCRangeKeyValue, 0, len(s.Versions)) + for _, v := range s.Versions { + kvs = append(kvs, s.AsRangeKeyValue(v)) + } + return kvs +} + +// CanMergeRight returns true if the current stack will merge with the given +// right-hand stack. The key bounds must touch exactly, i.e. lhs.EndKey must +// equal rhs.Key. +func (s MVCCRangeKeyStack) CanMergeRight(r MVCCRangeKeyStack) bool { + if s.IsEmpty() || s.Len() != r.Len() || !s.Bounds.EndKey.Equal(r.Bounds.Key) { + return false + } + for i := range s.Versions { + if !s.Versions[i].Equal(r.Versions[i]) { + return false + } + } + return true +} + +// Clone clones the stack. +func (s MVCCRangeKeyStack) Clone() MVCCRangeKeyStack { + // TODO(erikgrinaker): We can optimize this by using a single memory + // allocation for all byte slices in the entire stack. + s.Bounds = s.Bounds.Clone() + s.Versions = s.Versions.Clone() + return s +} + +// Covers returns true if any range key in the stack covers the given point key. +func (s MVCCRangeKeyStack) Covers(k MVCCKey) bool { + return s.Versions.Covers(k.Timestamp) && s.Bounds.ContainsKey(k.Key) +} + +// CoversTimestamp returns true if any range key in the stack covers the given timestamp. +func (s MVCCRangeKeyStack) CoversTimestamp(ts hlc.Timestamp) bool { + return s.Versions.Covers(ts) +} + +// FirstAbove does a binary search for the first range key version at or above +// the given timestamp. Returns false if no matching range key was found. +func (s MVCCRangeKeyStack) FirstAbove(ts hlc.Timestamp) (MVCCRangeKeyVersion, bool) { + return s.Versions.FirstAbove(ts) +} + +// FirstBelow does a binary search for the first range key version at or below +// the given timestamp. Returns false if no matching range key was found. +func (s MVCCRangeKeyStack) FirstBelow(ts hlc.Timestamp) (MVCCRangeKeyVersion, bool) { + return s.Versions.FirstBelow(ts) +} + +// HasBetween checks whether an MVCC range key exists between the two given +// timestamps (both inclusive, in order). +func (s MVCCRangeKeyStack) HasBetween(lower, upper hlc.Timestamp) bool { + return s.Versions.HasBetween(lower, upper) +} + +// IsEmpty returns true if the stack is empty (no versions). +func (s MVCCRangeKeyStack) IsEmpty() bool { + return s.Versions.IsEmpty() +} + +// Len returns the number of versions in the stack. +func (s MVCCRangeKeyStack) Len() int { + return len(s.Versions) +} + +// Timestamps returns the timestamps of all versions. +func (s MVCCRangeKeyStack) Timestamps() []hlc.Timestamp { + return s.Versions.Timestamps() +} + +// Trim trims the versions to the time span [from, to] (both inclusive). +func (s MVCCRangeKeyStack) Trim(from, to hlc.Timestamp) MVCCRangeKeyStack { + s.Versions = s.Versions.Trim(from, to) + return s +} + +// Clone clones the versions. +func (v MVCCRangeKeyVersions) Clone() MVCCRangeKeyVersions { + c := make(MVCCRangeKeyVersions, len(v)) + for i, version := range v { + c[i] = version.Clone() + } + return c +} + +// Covers returns true if any version in the stack is above the given timestamp. +func (v MVCCRangeKeyVersions) Covers(ts hlc.Timestamp) bool { + return !v.IsEmpty() && ts.LessEq(v[0].Timestamp) +} + +// FirstAbove does a binary search for the first range key version at or above +// the given timestamp. Returns false if no matching range key was found. +func (v MVCCRangeKeyVersions) FirstAbove(ts hlc.Timestamp) (MVCCRangeKeyVersion, bool) { + // This is kind of odd due to sort.Search() semantics: we do a binary search + // for the first range key that's below the timestamp, then return the + // previous range key if any. + if i := sort.Search(len(v), func(i int) bool { + return v[i].Timestamp.Less(ts) + }); i > 0 { + return v[i-1], true + } + return MVCCRangeKeyVersion{}, false +} + +// FirstBelow does a binary search for the first range key version at or below +// the given timestamp. Returns false if no matching range key was found. +func (v MVCCRangeKeyVersions) FirstBelow(ts hlc.Timestamp) (MVCCRangeKeyVersion, bool) { + if i := sort.Search(len(v), func(i int) bool { + return v[i].Timestamp.LessEq(ts) + }); i < len(v) { + return v[i], true + } + return MVCCRangeKeyVersion{}, false +} + +// HasBetween checks whether an MVCC range key exists between the two given +// timestamps (both inclusive, in order). +func (v MVCCRangeKeyVersions) HasBetween(lower, upper hlc.Timestamp) bool { + if version, ok := v.FirstAbove(lower); ok { + // Consider equal timestamps to be "between". This shouldn't really happen, + // since MVCC enforces point and range keys can't have the same timestamp. + return version.Timestamp.LessEq(upper) + } + return false +} + +// IsEmpty returns true if the stack is empty (no versions). +func (v MVCCRangeKeyVersions) IsEmpty() bool { + return len(v) == 0 +} + +// Timestamps returns the timestamps of all versions. +func (v MVCCRangeKeyVersions) Timestamps() []hlc.Timestamp { + timestamps := make([]hlc.Timestamp, 0, len(v)) + for _, version := range v { + timestamps = append(timestamps, version.Timestamp) + } + return timestamps +} + +// Trim trims the versions to the time span [from, to] (both inclusive). +func (v MVCCRangeKeyVersions) Trim(from, to hlc.Timestamp) MVCCRangeKeyVersions { + // We assume that to will often be near the current time, and use a linear + // rather than a binary search, which will often match on the first range key. + start := len(v) + for i, version := range v { + if version.Timestamp.LessEq(to) { + start = i + break + } + } + v = v[start:] + + // We then use a binary search to find the lower bound. + end := sort.Search(len(v), func(i int) bool { + return v[i].Timestamp.Less(from) + }) + return v[:end] +} + +// Clone clones the version. +func (v MVCCRangeKeyVersion) Clone() MVCCRangeKeyVersion { + if v.Value != nil { + v.Value = append([]byte(nil), v.Value...) + } + return v +} + +// Equal returns true if the two versions are equal. +func (v MVCCRangeKeyVersion) Equal(o MVCCRangeKeyVersion) bool { + return v.Timestamp.Equal(o.Timestamp) && bytes.Equal(v.Value, o.Value) +} + // FirstRangeKeyAbove does a binary search for the first range key at or above // the given timestamp. It assumes the range keys are ordered in descending // timestamp order, as returned by SimpleMVCCIterator.RangeKeys(). Returns false diff --git a/pkg/storage/mvcc_key_test.go b/pkg/storage/mvcc_key_test.go index e0525d146605..a3e8c33cd3eb 100644 --- a/pkg/storage/mvcc_key_test.go +++ b/pkg/storage/mvcc_key_test.go @@ -22,7 +22,6 @@ import ( "testing/quick" "github.com/cockroachdb/cockroach/pkg/roachpb" - "github.com/cockroachdb/cockroach/pkg/util" "github.com/cockroachdb/cockroach/pkg/util/hlc" "github.com/cockroachdb/cockroach/pkg/util/leaktest" "github.com/cockroachdb/cockroach/pkg/util/log" @@ -499,19 +498,88 @@ func TestMVCCRangeKeyValidate(t *testing.T) { } } -func TestFirstRangeKeyAbove(t *testing.T) { +func TestMVCCRangeKeyStackCanMergeRight(t *testing.T) { defer leaktest.AfterTest(t)() - rangeKVs := []MVCCRangeKeyValue{ - rangeKV("a", "f", 6, MVCCValue{}), - rangeKV("a", "f", 4, MVCCValue{}), - rangeKV("a", "f", 3, MVCCValue{}), - rangeKV("a", "f", 1, MVCCValue{}), + testcases := map[string]struct { + lhs, rhs MVCCRangeKeyStack + expect bool + }{ + "empty stacks don't merge": { + rangeKeyStack("", "", nil), + rangeKeyStack("", "", nil), + false}, + + "empty lhs doesn't merge": { + rangeKeyStack("a", "b", map[int]MVCCValue{}), + rangeKeyStack("b", "c", map[int]MVCCValue{1: {}}), + false}, + + "empty rhs doesn't merge": { + rangeKeyStack("a", "b", map[int]MVCCValue{1: {}}), + rangeKeyStack("b", "c", map[int]MVCCValue{}), + false}, + + "stacks merge": { + rangeKeyStack("a", "b", map[int]MVCCValue{5: {}, 3: {}, 1: {}}), + rangeKeyStack("b", "c", map[int]MVCCValue{5: {}, 3: {}, 1: {}}), + true}, + + "missing lhs version end": { + rangeKeyStack("a", "b", map[int]MVCCValue{5: {}, 3: {}}), + rangeKeyStack("b", "c", map[int]MVCCValue{5: {}, 3: {}, 1: {}}), + false}, + + "missing rhs version end": { + rangeKeyStack("a", "b", map[int]MVCCValue{5: {}, 3: {}, 1: {}}), + rangeKeyStack("b", "c", map[int]MVCCValue{5: {}, 3: {}}), + false}, + + "different version timestamp": { + rangeKeyStack("a", "b", map[int]MVCCValue{5: {}, 3: {}, 1: {}}), + rangeKeyStack("b", "c", map[int]MVCCValue{5: {}, 2: {}, 1: {}}), + false}, + + "different version value": { + rangeKeyStack("a", "b", map[int]MVCCValue{5: {}, 3: tombstoneLocalTS(9), 1: {}}), + rangeKeyStack("b", "c", map[int]MVCCValue{5: {}, 3: {}, 1: {}}), + false}, + + "bounds not touching": { + rangeKeyStack("a", "b", map[int]MVCCValue{5: {}, 3: {}, 1: {}}), + rangeKeyStack("c", "d", map[int]MVCCValue{5: {}, 3: {}, 1: {}}), + false}, + + "overlapping range keys don't merge": { + rangeKeyStack("a", "c", map[int]MVCCValue{5: {}, 3: {}, 1: {}}), + rangeKeyStack("b", "d", map[int]MVCCValue{5: {}, 3: {}, 1: {}}), + false}, + + "same range keys don't merge": { + rangeKeyStack("a", "c", map[int]MVCCValue{5: {}, 3: {}, 1: {}}), + rangeKeyStack("a", "c", map[int]MVCCValue{5: {}, 3: {}, 1: {}}), + false}, + + "wrong order don't merge": { + rangeKeyStack("b", "c", map[int]MVCCValue{5: {}, 3: {}, 1: {}}), + rangeKeyStack("a", "b", map[int]MVCCValue{5: {}, 3: {}, 1: {}}), + false}, + } + for name, tc := range testcases { + t.Run(name, func(t *testing.T) { + require.Equal(t, tc.expect, tc.lhs.CanMergeRight(tc.rhs)) + }) } +} + +func TestMVCCRangeKeyStackFirstAbove(t *testing.T) { + defer leaktest.AfterTest(t)() + + rangeKeys := rangeKeyStack("a", "f", map[int]MVCCValue{6: {}, 4: {}, 3: {}, 1: {}}) testcases := []struct { - ts int64 - expect int64 + ts int + expect int }{ {0, 1}, {1, 1}, @@ -524,51 +592,109 @@ func TestFirstRangeKeyAbove(t *testing.T) { } for _, tc := range testcases { t.Run(fmt.Sprintf("%d", tc.ts), func(t *testing.T) { - rkv, ok := FirstRangeKeyAbove(rangeKVs, hlc.Timestamp{WallTime: tc.ts}) + v, ok := rangeKeys.FirstAbove(wallTS(tc.ts)) if tc.expect == 0 { require.False(t, ok) - require.Empty(t, rkv) + require.Empty(t, v) } else { require.True(t, ok) - require.Equal(t, rangeKV("a", "f", int(tc.expect), MVCCValue{}), rkv) + require.Equal(t, rangeKeyVersion(tc.expect, MVCCValue{}), v) } }) } } -func TestHasRangeKeyBetween(t *testing.T) { +func TestMVCCRangeKeyStackFirstBelow(t *testing.T) { defer leaktest.AfterTest(t)() - rangeKVs := []MVCCRangeKeyValue{ - rangeKV("a", "f", 5, MVCCValue{}), - rangeKV("a", "f", 1, MVCCValue{}), + rangeKeys := rangeKeyStack("a", "f", map[int]MVCCValue{6: {}, 4: {}, 3: {}, 1: {}}) + + testcases := []struct { + ts int + expect int + }{ + {0, 0}, + {1, 1}, + {2, 1}, + {3, 3}, + {4, 4}, + {5, 4}, + {6, 6}, + {7, 6}, } + for _, tc := range testcases { + t.Run(fmt.Sprintf("%d", tc.ts), func(t *testing.T) { + v, ok := rangeKeys.FirstBelow(wallTS(tc.ts)) + if tc.expect == 0 { + require.False(t, ok) + require.Empty(t, v) + } else { + require.True(t, ok) + require.Equal(t, rangeKeyVersion(tc.expect, MVCCValue{}), v) + } + }) + } +} + +func TestMVCCRangeKeyStackHasBetween(t *testing.T) { + defer leaktest.AfterTest(t)() + + rangeKeys := rangeKeyStack("a", "f", map[int]MVCCValue{5: {}, 1: {}}) testcases := []struct { - upper, lower int + lower, upper int expect bool }{ {0, 0, false}, - {0, 1, false}, // wrong order - {1, 0, true}, + {1, 0, false}, // wrong order + {0, 1, true}, {1, 1, true}, - {0, 2, false}, // wrong order - {4, 6, false}, // wrong order - {6, 4, true}, + {2, 0, false}, // wrong order + {6, 4, false}, // wrong order + {4, 6, true}, {5, 5, true}, {4, 4, false}, {6, 6, false}, - {4, 2, false}, - {0, 9, false}, // wrong order - {9, 0, true}, + {2, 4, false}, + {9, 0, false}, // wrong order + {0, 9, true}, } for _, tc := range testcases { - t.Run(fmt.Sprintf("%d,%d", tc.upper, tc.lower), func(t *testing.T) { - if util.RaceEnabled && tc.upper < tc.lower { - require.Panics(t, func() { HasRangeKeyBetween(rangeKVs, wallTS(tc.upper), wallTS(tc.lower)) }) - } else { - require.Equal(t, tc.expect, HasRangeKeyBetween(rangeKVs, wallTS(tc.upper), wallTS(tc.lower))) + t.Run(fmt.Sprintf("%d,%d", tc.lower, tc.upper), func(t *testing.T) { + require.Equal(t, tc.expect, rangeKeys.HasBetween(wallTS(tc.lower), wallTS(tc.upper))) + }) + } +} + +func TestMVCCRangeKeyStackTrim(t *testing.T) { + defer leaktest.AfterTest(t)() + + rangeKeys := rangeKeyStack("a", "f", map[int]MVCCValue{7: {}, 5: {}, 3: {}}) + + testcases := []struct { + from, to int + expect []int + }{ + {0, 10, []int{7, 5, 3}}, + {10, 0, []int{}}, // wrong order + {0, 0, []int{}}, + {0, 2, []int{}}, + {8, 9, []int{}}, + {3, 7, []int{7, 5, 3}}, + {4, 7, []int{7, 5}}, + {4, 6, []int{5}}, + {5, 6, []int{5}}, + {4, 5, []int{5}}, + {5, 5, []int{5}}, + } + for _, tc := range testcases { + t.Run(fmt.Sprintf("%d,%d", tc.from, tc.to), func(t *testing.T) { + expect := rangeKeyStack("a", "f", nil) + for _, ts := range tc.expect { + expect.Versions = append(expect.Versions, rangeKeyVersion(ts, MVCCValue{})) } + + require.Equal(t, expect, rangeKeys.Trim(wallTS(tc.from), wallTS(tc.to))) }) } } @@ -608,6 +734,37 @@ func rangeKV(start, end string, ts int, v MVCCValue) MVCCRangeKeyValue { } } +func rangeKeyStack(start, end string, versions map[int]MVCCValue) MVCCRangeKeyStack { + return MVCCRangeKeyStack{ + Bounds: roachpb.Span{Key: roachpb.Key(start), EndKey: roachpb.Key(end)}, + Versions: rangeKeyVersions(versions), + } +} + +func rangeKeyVersions(v map[int]MVCCValue) MVCCRangeKeyVersions { + versions := make([]MVCCRangeKeyVersion, len(v)) + var timestamps []int + for i := range v { + timestamps = append(timestamps, i) + } + sort.Ints(timestamps) + for i, ts := range timestamps { + versions[len(versions)-1-i] = rangeKeyVersion(ts, v[ts]) + } + return versions +} + +func rangeKeyVersion(ts int, v MVCCValue) MVCCRangeKeyVersion { + valueRaw, err := EncodeMVCCValue(v) + if err != nil { + panic(err) + } + return MVCCRangeKeyVersion{ + Timestamp: wallTS(ts), + Value: valueRaw, + } +} + func wallTS(ts int) hlc.Timestamp { return hlc.Timestamp{WallTime: int64(ts)} } From 5e2930bede5bbaf40801fd4bdf6a740228e207b7 Mon Sep 17 00:00:00 2001 From: Erik Grinaker Date: Thu, 28 Jul 2022 19:54:01 +0000 Subject: [PATCH 2/6] storage: return `MVCCRangeKeyStack` from `SimpleMVCCIterator` This patch changes `SimpleMVCCIterator.RangeKeys()` to return `MVCCRangeKeyStack` instead of `[]MVCCRangeKeyValue`. Callers have not been migrated to properly make use of this -- instead, they call `AsRangeKeyValues()` and construct and use the old data structure. The MVCC range tombstones tech note is also updated to reflect this. Release note: None --- docs/tech-notes/mvcc-range-tombstones.md | 37 +++++++++++++----- pkg/ccl/backupccl/file_sst_sink.go | 2 +- pkg/kv/kvserver/batcheval/cmd_add_sstable.go | 4 +- pkg/kv/kvserver/batcheval/cmd_clear_range.go | 2 +- .../batcheval/cmd_delete_range_test.go | 4 +- .../kvserver/batcheval/cmd_end_transaction.go | 2 +- .../kvserver/batcheval/cmd_refresh_range.go | 2 +- pkg/kv/kvserver/gc/gc.go | 2 +- pkg/kv/kvserver/gc/gc_iterator.go | 2 +- pkg/kv/kvserver/gc/gc_random_test.go | 8 ++-- pkg/kv/kvserver/gc/gc_test.go | 2 +- pkg/kv/kvserver/rangefeed/catchup_scan.go | 4 +- pkg/kv/kvserver/rangefeed/task_test.go | 6 +-- pkg/kv/kvserver/rditer/replica_data_iter.go | 2 +- .../kvserver/rditer/replica_data_iter_test.go | 2 +- pkg/kv/kvserver/spanset/batch.go | 2 +- pkg/storage/engine.go | 12 +++--- pkg/storage/engine_test.go | 8 ++-- pkg/storage/intent_interleaving_iter.go | 4 +- pkg/storage/multi_iterator.go | 2 +- pkg/storage/mvcc.go | 38 +++++++++---------- pkg/storage/mvcc_history_test.go | 24 ++++++------ pkg/storage/mvcc_incremental_iterator.go | 20 +++++----- pkg/storage/mvcc_test.go | 2 +- pkg/storage/pebble_iterator.go | 20 +++++----- pkg/storage/point_synthesizing_iter.go | 6 +-- pkg/storage/read_as_of_iterator.go | 6 +-- pkg/storage/sst.go | 2 +- pkg/storage/sst_iterator.go | 4 +- pkg/testutils/storageutils/scan.go | 2 +- 30 files changed, 126 insertions(+), 107 deletions(-) diff --git a/docs/tech-notes/mvcc-range-tombstones.md b/docs/tech-notes/mvcc-range-tombstones.md index d39104eeb287..e16ad702c31b 100644 --- a/docs/tech-notes/mvcc-range-tombstones.md +++ b/docs/tech-notes/mvcc-range-tombstones.md @@ -75,8 +75,8 @@ type MVCCRangeKey struct { } ``` -A range key stores an encoded `MVCCValue`, similarly to `MVCCKey`. They are -often paired as an `MVCCRangeKeyValue`: +A range key stores an encoded `MVCCValue`, similarly to `MVCCKey`. They can be +paired as an `MVCCRangeKeyValue`: ```go type MVCCRangeKeyValue struct { @@ -122,12 +122,31 @@ exist between the bounds `[a-d)`, and `c` is within those bounds. The same is true for `a@5`, even though it is above both MVCC range tombstones. It is up to the iterator caller to interpret the range keys as appropriate relative to the point key. It follows that all range keys overlapping a key will be pulled into -memory at once, but we assume that overlapping range keys will be few. More on -MVCC iteration later. +memory at once, but we assume that overlapping range keys will be few. -In the KV API, however, this distinction doesn't really matter: `Get(c)` at -timestamp >= 5 would return nothing, while `Get(b)` would return `b5`. Again, -more on this later. +This is represented as a specialized compact data structure, +`MVCCRangeKeyStack`, where all range keys have the same bounds due to +fragmentation (described below): + +```go +type MVCCRangeKeyStack struct { + Bounds roachpb.Span + Versions MVCCRangeKeyVersions +} + +type MVCCRangeKeyVersions []MVCCRangeKeyVersion + +type MVCCRangeKeyVersion struct { + Timestamp hlc.Timestamp + Value []byte // encoded MVCCValue +} +``` + +In the KV API, however, the relationship between point keys and range keys +doesn't really matter: `Get(c)` at timestamp >= 5 would simply return nothing, +while `Get(b)` would return `b5`. More on this later. + +### Fragmentation Range keys do not have a stable, discrete identity, and should be considered a continuum: they may be partially removed or replaced, merged or fragmented by @@ -161,7 +180,7 @@ Pebble: `[a-b)@1`, `[b-c)@2`, `[b-c)@1`, and `[c-d)@2`. Similarly, clearing `[b-d)@2` would merge the remaining keys back into `[a-c)@1`. This implies that all range keys exposed for a specific key position all have -the same key bounds. +the same key bounds, as shown in `MVCCRangeKeyStack`. Fragmentation is beneficial because it makes all range key properties local, which avoids incurring unnecessary access costs across SSTs and CRDB ranges when @@ -268,7 +287,7 @@ The properties of point and range keys are accessed via: * `RangeBounds()`: start and end bounds of range keys overlapping the current position, if any. * `RangeKeys()`: all range keys at the current key position (i.e. at all - timestamps), as `[]MVCCRangeKeyValue`. + timestamps), as `MVCCRangeKeyStack`. During iteration with `IterKeyTypePointsAndRanges`, range keys are emitted at their start key and at every overlapping point key. Consider a modified diff --git a/pkg/ccl/backupccl/file_sst_sink.go b/pkg/ccl/backupccl/file_sst_sink.go index b5c49da6d13e..f1017b6b3bb2 100644 --- a/pkg/ccl/backupccl/file_sst_sink.go +++ b/pkg/ccl/backupccl/file_sst_sink.go @@ -363,7 +363,7 @@ func (s *fileSSTSink) copyRangeKeys(dataSST []byte) error { } else if !ok { break } - for _, rkv := range iter.RangeKeys() { + for _, rkv := range iter.RangeKeys().AsRangeKeyValues() { if err := s.sst.PutRawMVCCRangeKey(rkv.RangeKey, rkv.Value); err != nil { return err } diff --git a/pkg/kv/kvserver/batcheval/cmd_add_sstable.go b/pkg/kv/kvserver/batcheval/cmd_add_sstable.go index 32e460ebccc0..6569362dea1f 100644 --- a/pkg/kv/kvserver/batcheval/cmd_add_sstable.go +++ b/pkg/kv/kvserver/batcheval/cmd_add_sstable.go @@ -417,7 +417,7 @@ func EvalAddSSTable( } else if !ok { break } - for _, rkv := range rangeIter.RangeKeys() { + for _, rkv := range rangeIter.RangeKeys().AsRangeKeyValues() { if err = readWriter.PutRawMVCCRangeKey(rkv.RangeKey, rkv.Value); err != nil { return result.Result{}, err } @@ -517,7 +517,7 @@ func assertSSTContents(sst []byte, sstTimestamp hlc.Timestamp, stats *enginepb.M break } - for _, rkv := range iter.RangeKeys() { + for _, rkv := range iter.RangeKeys().AsRangeKeyValues() { if err := rkv.RangeKey.Validate(); err != nil { return errors.NewAssertionErrorWithWrappedErrf(err, "SST contains invalid range key") } diff --git a/pkg/kv/kvserver/batcheval/cmd_clear_range.go b/pkg/kv/kvserver/batcheval/cmd_clear_range.go index 9ecea57c93af..59cefa36b5cd 100644 --- a/pkg/kv/kvserver/batcheval/cmd_clear_range.go +++ b/pkg/kv/kvserver/batcheval/cmd_clear_range.go @@ -218,7 +218,7 @@ func computeStatsDelta( if ok, err := iter.Valid(); err != nil { return err } else if ok && iter.RangeBounds().Key.Compare(bound) < 0 { - for i, rkv := range iter.RangeKeys() { + for i, rkv := range iter.RangeKeys().AsRangeKeyValues() { keyBytes := int64(storage.EncodedMVCCTimestampSuffixLength(rkv.RangeKey.Timestamp)) valBytes := int64(len(rkv.Value)) if i == 0 { diff --git a/pkg/kv/kvserver/batcheval/cmd_delete_range_test.go b/pkg/kv/kvserver/batcheval/cmd_delete_range_test.go index 4e454a1001ea..99457c294bcb 100644 --- a/pkg/kv/kvserver/batcheval/cmd_delete_range_test.go +++ b/pkg/kv/kvserver/batcheval/cmd_delete_range_test.go @@ -306,7 +306,7 @@ func checkPredicateDeleteRange(t *testing.T, engine storage.Reader, rKeyInfo sto // PredicateDeleteRange should not have written any delete tombstones; // therefore, any range key tombstones in the span should have been // written before the request was issued. - for _, rKey := range iter.RangeKeys() { + for _, rKey := range iter.RangeKeys().AsRangeKeyValues() { require.Equal(t, true, rKey.RangeKey.Timestamp.Less(rKeyInfo.Timestamp)) } continue @@ -337,7 +337,7 @@ func checkDeleteRangeTombstone( break } require.True(t, ok) - for _, rkv := range iter.RangeKeys() { + for _, rkv := range iter.RangeKeys().AsRangeKeyValues() { if rkv.RangeKey.Timestamp.Equal(rangeKey.Timestamp) { if len(seen.RangeKey.StartKey) == 0 { seen = rkv.Clone() diff --git a/pkg/kv/kvserver/batcheval/cmd_end_transaction.go b/pkg/kv/kvserver/batcheval/cmd_end_transaction.go index 61e816d2fc30..0fd41891e0cf 100644 --- a/pkg/kv/kvserver/batcheval/cmd_end_transaction.go +++ b/pkg/kv/kvserver/batcheval/cmd_end_transaction.go @@ -1315,7 +1315,7 @@ func computeSplitRangeKeyStatsDelta( // contribution of the range key fragmentation. The naïve calculation would be // rhs.EncodedSize() - (keyLen(rhs.EndKey) - keyLen(lhs.EndKey)) // which simplifies to 2 * keyLen(rhs.StartKey) + tsLen(rhs.Timestamp). - for i, rkv := range iter.RangeKeys() { + for i, rkv := range iter.RangeKeys().AsRangeKeyValues() { keyBytes := int64(storage.EncodedMVCCTimestampSuffixLength(rkv.RangeKey.Timestamp)) valBytes := int64(len(rkv.Value)) if i == 0 { diff --git a/pkg/kv/kvserver/batcheval/cmd_refresh_range.go b/pkg/kv/kvserver/batcheval/cmd_refresh_range.go index f5bf350d07fe..2dadc3a47bc0 100644 --- a/pkg/kv/kvserver/batcheval/cmd_refresh_range.go +++ b/pkg/kv/kvserver/batcheval/cmd_refresh_range.go @@ -93,7 +93,7 @@ func refreshRange( key := iter.UnsafeKey().Clone() if _, hasRange := iter.HasPointAndRange(); hasRange { - rangeKVs := iter.RangeKeys() + rangeKVs := iter.RangeKeys().AsRangeKeyValues() if len(rangeKVs) == 0 { // defensive return errors.Errorf("expected range key at %s not found", key) } diff --git a/pkg/kv/kvserver/gc/gc.go b/pkg/kv/kvserver/gc/gc.go index 21bbff07c058..8e0d70fe7da8 100644 --- a/pkg/kv/kvserver/gc/gc.go +++ b/pkg/kv/kvserver/gc/gc.go @@ -855,7 +855,7 @@ func processReplicatedRangeTombstones( if !ok { break } - rangeKeys := iter.RangeKeys() + rangeKeys := iter.RangeKeys().AsRangeKeyValues() if idx := sort.Search(len(rangeKeys), func(i int) bool { return rangeKeys[i].RangeKey.Timestamp.LessEq(gcThreshold) diff --git a/pkg/kv/kvserver/gc/gc_iterator.go b/pkg/kv/kvserver/gc/gc_iterator.go index c657033dcfed..2320d7315326 100644 --- a/pkg/kv/kvserver/gc/gc_iterator.go +++ b/pkg/kv/kvserver/gc/gc_iterator.go @@ -195,7 +195,7 @@ func (it *gcIterator) currentRangeTS() hlc.Timestamp { } it.cachedRangeTombstoneTS = hlc.Timestamp{} - rangeKeys := it.it.RangeKeys() + rangeKeys := it.it.RangeKeys().AsRangeKeyValues() if idx := sort.Search(len(rangeKeys), func(i int) bool { return rangeKeys[i].RangeKey.Timestamp.LessEq(it.threshold) }); idx < len(rangeKeys) { diff --git a/pkg/kv/kvserver/gc/gc_random_test.go b/pkg/kv/kvserver/gc/gc_random_test.go index 1e6b73216fce..a67f06b38540 100644 --- a/pkg/kv/kvserver/gc/gc_random_test.go +++ b/pkg/kv/kvserver/gc/gc_random_test.go @@ -466,7 +466,7 @@ func getExpectationsGenerator( // so we will add them to history of current key for analysis. // Bare range tombstones are ignored. if r { - for _, r := range it.RangeKeys() { + for _, r := range it.RangeKeys().AsRangeKeyValues() { history = append(history, historyItem{ MVCCKeyValue: storage.MVCCKeyValue{ Key: storage.MVCCKey{ @@ -603,7 +603,7 @@ func getKeyHistory(t *testing.T, r storage.Reader, key roachpb.Key) string { break } if r && len(result) == 0 { - for _, rk := range it.RangeKeys() { + for _, rk := range it.RangeKeys().AsRangeKeyValues() { result = append(result, fmt.Sprintf("R:%s", rk.RangeKey.String())) } } @@ -624,8 +624,8 @@ func rangeFragmentsFromIt(t *testing.T, it storage.MVCCIterator) [][]storage.MVC } _, r := it.HasPointAndRange() if r { - fragments := make([]storage.MVCCRangeKeyValue, len(it.RangeKeys())) - for i, r := range it.RangeKeys() { + fragments := make([]storage.MVCCRangeKeyValue, len(it.RangeKeys().AsRangeKeyValues())) + for i, r := range it.RangeKeys().AsRangeKeyValues() { fragments[i] = r.Clone() } result = append(result, fragments) diff --git a/pkg/kv/kvserver/gc/gc_test.go b/pkg/kv/kvserver/gc/gc_test.go index 29b5145b0def..696af35d5c32 100644 --- a/pkg/kv/kvserver/gc/gc_test.go +++ b/pkg/kv/kvserver/gc/gc_test.go @@ -1084,7 +1084,7 @@ func engineData(t *testing.T, r storage.Reader, desc roachpb.RangeDescriptor) [] _, r := rangeIt.HasPointAndRange() if r { span := rangeIt.RangeBounds() - newKeys := rangeIt.RangeKeys() + newKeys := rangeIt.RangeKeys().AsRangeKeyValues() if lastEnd.Equal(span.Key) { // Try merging keys by timestamp. var newPartial []storage.MVCCRangeKey diff --git a/pkg/kv/kvserver/rangefeed/catchup_scan.go b/pkg/kv/kvserver/rangefeed/catchup_scan.go index f64258f941ab..1cf82b645727 100644 --- a/pkg/kv/kvserver/rangefeed/catchup_scan.go +++ b/pkg/kv/kvserver/rangefeed/catchup_scan.go @@ -154,7 +154,7 @@ func (i *CatchUpIterator) CatchUpScan(outputFn outputEventFn, withDiff bool) err rangeKeysStart = append(rangeKeysStart[:0], rangeBounds.Key...) // Emit events for these MVCC range tombstones, in chronological order. - rangeKeys := i.RangeKeys() + rangeKeys := i.RangeKeys().AsRangeKeyValues() for i := len(rangeKeys) - 1; i >= 0; i-- { var span roachpb.Span a, span.Key = a.Copy(rangeBounds.Key, 0) @@ -272,7 +272,7 @@ func (i *CatchUpIterator) CatchUpScan(outputFn outputEventFn, withDiff bool) err // additional MVCC range tombstones below StartTime that cover this // point. We need to find a more performant way to handle this. if !hasRange || !storage.HasRangeKeyBetween( - i.RangeKeys(), reorderBuf[l].Val.Value.Timestamp, ts) { + i.RangeKeys().AsRangeKeyValues(), reorderBuf[l].Val.Value.Timestamp, ts) { // TODO(sumeer): find out if it is deliberate that we are not populating // PrevValue.Timestamp. reorderBuf[l].Val.PrevValue.RawBytes = val diff --git a/pkg/kv/kvserver/rangefeed/task_test.go b/pkg/kv/kvserver/rangefeed/task_test.go index 6cb45e20a0ef..263ef4fcfb23 100644 --- a/pkg/kv/kvserver/rangefeed/task_test.go +++ b/pkg/kv/kvserver/rangefeed/task_test.go @@ -204,9 +204,9 @@ func (s *testIterator) RangeBounds() roachpb.Span { return roachpb.Span{} } -// RangeTombstones implements SimpleMVCCIterator. -func (s *testIterator) RangeKeys() []storage.MVCCRangeKeyValue { - return []storage.MVCCRangeKeyValue{} +// RangeKeys implements SimpleMVCCIterator. +func (s *testIterator) RangeKeys() storage.MVCCRangeKeyStack { + return storage.MVCCRangeKeyStack{} } func TestInitResolvedTSScan(t *testing.T) { diff --git a/pkg/kv/kvserver/rditer/replica_data_iter.go b/pkg/kv/kvserver/rditer/replica_data_iter.go index 078835b8c467..9435fedf55c2 100644 --- a/pkg/kv/kvserver/rditer/replica_data_iter.go +++ b/pkg/kv/kvserver/rditer/replica_data_iter.go @@ -341,7 +341,7 @@ func (ri *ReplicaMVCCDataIterator) UnsafeValue() []byte { // RangeKeys exposes RangeKeys from underlying iterator. See // storage.SimpleMVCCIterator for details. -func (ri *ReplicaMVCCDataIterator) RangeKeys() []storage.MVCCRangeKeyValue { +func (ri *ReplicaMVCCDataIterator) RangeKeys() storage.MVCCRangeKeyStack { return ri.it.RangeKeys() } diff --git a/pkg/kv/kvserver/rditer/replica_data_iter_test.go b/pkg/kv/kvserver/rditer/replica_data_iter_test.go index ffe1bb4ae70d..f7eeef4e4452 100644 --- a/pkg/kv/kvserver/rditer/replica_data_iter_test.go +++ b/pkg/kv/kvserver/rditer/replica_data_iter_test.go @@ -176,7 +176,7 @@ func verifyRDReplicatedOnlyMVCCIter( } } if r { - rks := iter.RangeKeys() + rks := iter.RangeKeys().AsRangeKeyValues() if !rks[0].RangeKey.StartKey.Equal(rangeStart) { if !reverse { for _, rk := range rks { diff --git a/pkg/kv/kvserver/spanset/batch.go b/pkg/kv/kvserver/spanset/batch.go index f07d5c5c98d9..44c1c91a6ae7 100644 --- a/pkg/kv/kvserver/spanset/batch.go +++ b/pkg/kv/kvserver/spanset/batch.go @@ -186,7 +186,7 @@ func (i *MVCCIterator) RangeBounds() roachpb.Span { } // RangeKeys implements SimpleMVCCIterator. -func (i *MVCCIterator) RangeKeys() []storage.MVCCRangeKeyValue { +func (i *MVCCIterator) RangeKeys() storage.MVCCRangeKeyStack { return i.i.RangeKeys() } diff --git a/pkg/storage/engine.go b/pkg/storage/engine.go index 2e64be956a8e..3d9c5da1cacc 100644 --- a/pkg/storage/engine.go +++ b/pkg/storage/engine.go @@ -168,13 +168,13 @@ type SimpleMVCCIterator interface { // empty span if there are none. The returned keys are only valid until the // next iterator call. RangeBounds() roachpb.Span - // RangeKeys returns all range keys (with different timestamps) at the current - // key position, or an empty list if there are none. When at a point key, it - // will return all range keys overlapping that point key. The keys are only - // valid until the next iterator operation. For details on range keys, see - // comment on SimpleMVCCIterator, or this tech note: + // RangeKeys returns a stack of all range keys (with different timestamps) at + // the current key position. When at a point key, it will return all range + // keys overlapping that point key. The stack is only valid until the next + // iterator operation. For details on range keys, see comment on + // SimpleMVCCIterator, or this tech note: // https://github.com/cockroachdb/cockroach/blob/master/docs/tech-notes/mvcc-range-tombstones.md - RangeKeys() []MVCCRangeKeyValue + RangeKeys() MVCCRangeKeyStack } // IteratorStats is returned from {MVCCIterator,EngineIterator}.Stats. diff --git a/pkg/storage/engine_test.go b/pkg/storage/engine_test.go index db816062077c..4a4a77d4b424 100644 --- a/pkg/storage/engine_test.go +++ b/pkg/storage/engine_test.go @@ -2312,8 +2312,8 @@ func scanRangeKeys(t *testing.T, r Reader) []MVCCRangeKeyValue { if !ok { break } - for _, rangeKey := range iter.RangeKeys() { - rangeKeys = append(rangeKeys, rangeKey.Clone()) + for _, rkv := range iter.RangeKeys().AsRangeKeyValues() { + rangeKeys = append(rangeKeys, rkv.Clone()) } } return rangeKeys @@ -2373,8 +2373,8 @@ func scanIter(t *testing.T, iter SimpleMVCCIterator) []interface{} { hasPoint, hasRange := iter.HasPointAndRange() if hasRange { if bounds := iter.RangeBounds(); !bounds.Key.Equal(prevRangeStart) { - for _, rk := range iter.RangeKeys() { - keys = append(keys, rk.Clone()) + for _, rkv := range iter.RangeKeys().AsRangeKeyValues() { + keys = append(keys, rkv.Clone()) } prevRangeStart = bounds.Key.Clone() } diff --git a/pkg/storage/intent_interleaving_iter.go b/pkg/storage/intent_interleaving_iter.go index 3adfa3458c37..35f4cf723f14 100644 --- a/pkg/storage/intent_interleaving_iter.go +++ b/pkg/storage/intent_interleaving_iter.go @@ -934,9 +934,9 @@ func (i *intentInterleavingIter) RangeBounds() roachpb.Span { } // RangeKeys implements SimpleMVCCIterator. -func (i *intentInterleavingIter) RangeKeys() []MVCCRangeKeyValue { +func (i *intentInterleavingIter) RangeKeys() MVCCRangeKeyStack { if _, hasRange := i.HasPointAndRange(); !hasRange { - return []MVCCRangeKeyValue{} + return MVCCRangeKeyStack{} } return i.iter.RangeKeys() } diff --git a/pkg/storage/multi_iterator.go b/pkg/storage/multi_iterator.go index 0f4c2f00561a..1612a8e7008f 100644 --- a/pkg/storage/multi_iterator.go +++ b/pkg/storage/multi_iterator.go @@ -106,7 +106,7 @@ func (f *multiIterator) RangeBounds() roachpb.Span { } // RangeKeys implements SimpleMVCCIterator. -func (f *multiIterator) RangeKeys() []MVCCRangeKeyValue { +func (f *multiIterator) RangeKeys() MVCCRangeKeyStack { panic("not implemented") } diff --git a/pkg/storage/mvcc.go b/pkg/storage/mvcc.go index 705ddc85bde9..8c73e5487778 100644 --- a/pkg/storage/mvcc.go +++ b/pkg/storage/mvcc.go @@ -974,7 +974,7 @@ func mvccGetMetadata( // If we land on a (bare) range key, step to look for a colocated point key. if hasRange && !hasPoint { - rkTimestamp := iter.RangeKeys()[0].RangeKey.Timestamp + rkTimestamp := iter.RangeKeys().Versions[0].Timestamp iter.Next() if ok, err = iter.Valid(); err != nil { @@ -1010,7 +1010,7 @@ func mvccGetMetadata( // lowest range tombstone above the key (not the highest which is used for // metadata), or the point version's timestamp if it was a tombstone. if hasRange { - rangeKeys := iter.RangeKeys() + rangeKeys := iter.RangeKeys().AsRangeKeyValues() if rkv, ok := FirstRangeKeyAbove(rangeKeys, unsafeKey.Timestamp); ok { meta.Deleted = true meta.Timestamp = rangeKeys[0].RangeKey.Timestamp.ToLegacyTimestamp() @@ -1665,7 +1665,7 @@ func mvccPutInternal( // We must now be on a point key, but it may be covered by an // existing MVCC range tombstone. If it isn't, account for it. _, hasRange := iter.HasPointAndRange() - if !hasRange || iter.RangeKeys()[0].RangeKey.Timestamp.Less(prevUnsafeKey.Timestamp) { + if !hasRange || iter.RangeKeys().Versions[0].Timestamp.Less(prevUnsafeKey.Timestamp) { prevValRaw := iter.UnsafeValue() prevVal, err := DecodeMVCCValue(prevValRaw) if err != nil { @@ -2568,7 +2568,7 @@ func MVCCPredicateDeleteRange( // TODO (msbutler): cache the range keys while the range bounds remain // constant, since iter.RangeKeys() is expensive. Manual caching may not be necessary if // https://github.com/cockroachdb/cockroach/issues/84379 lands. - rangeKeys := iter.RangeKeys() + rangeKeys := iter.RangeKeys().AsRangeKeyValues() if endTime.LessEq(rangeKeys[0].RangeKey.Timestamp) { return false, false, false, roachpb.NewWriteTooOldError(endTime, rangeKeys[0].RangeKey.Timestamp.Next(), k.Key.Clone()) @@ -2872,7 +2872,7 @@ func MVCCDeleteRangeUsingTombstone( if hasRange { // Check if we've encountered a new range key stack. if rangeBounds := iter.RangeBounds(); !rangeBounds.EndKey.Equal(prevRangeEnd) { - newest := iter.RangeKeys()[0].RangeKey + newest := iter.RangeKeys().AsRangeKeyValues()[0].RangeKey // Check for conflict with newer range key. if timestamp.LessEq(newest.Timestamp) { @@ -3003,9 +3003,9 @@ func MVCCDeleteRangeUsingTombstone( } else if ok { switch iter.RangeBounds().EndKey.Compare(startKey) { case 1: // fragment - fragmentRangeKeys(iter.RangeKeys(), startKey) + fragmentRangeKeys(iter.RangeKeys().AsRangeKeyValues(), startKey) case 0: // merge - lhs := iter.RangeKeys() + lhs := iter.RangeKeys().AsRangeKeyValues() for i := range lhs { lhs[i] = lhs[i].Clone() } @@ -3014,7 +3014,7 @@ func MVCCDeleteRangeUsingTombstone( if ok, err := iter.Valid(); err != nil { return err } else if ok { - rhs = append(rhs, iter.RangeKeys()...) + rhs = append(rhs, iter.RangeKeys().AsRangeKeyValues()...) } maybeMergeRangeKeys(lhs, rhs) } @@ -3038,10 +3038,10 @@ func MVCCDeleteRangeUsingTombstone( } else if ok { switch iter.RangeBounds().Key.Compare(endKey) { case -1: // fragment - fragmentRangeKeys(iter.RangeKeys(), endKey) + fragmentRangeKeys(iter.RangeKeys().AsRangeKeyValues(), endKey) case 0: // merge lhs := []MVCCRangeKeyValue{{RangeKey: rangeKey, Value: valueRaw}} - rhs := iter.RangeKeys() + rhs := iter.RangeKeys().AsRangeKeyValues() for i := range rhs { rhs[i] = rhs[i].Clone() } @@ -3049,7 +3049,7 @@ func MVCCDeleteRangeUsingTombstone( if ok, err := iter.Valid(); err != nil { return err } else if ok { - lhs = append(lhs, iter.RangeKeys()...) + lhs = append(lhs, iter.RangeKeys().AsRangeKeyValues()...) } maybeMergeRangeKeys(lhs, rhs) } @@ -3558,7 +3558,7 @@ type iterForKeyVersions interface { UnsafeKey() MVCCKey UnsafeValue() []byte ValueProto(msg protoutil.Message) error - RangeKeys() []MVCCRangeKeyValue + RangeKeys() MVCCRangeKeyStack } // separatedIntentAndVersionIter is an implementation of iterForKeyVersions @@ -3634,7 +3634,7 @@ func (s *separatedIntentAndVersionIter) HasPointAndRange() (bool, bool) { return hasPoint, hasRange } -func (s *separatedIntentAndVersionIter) RangeKeys() []MVCCRangeKeyValue { +func (s *separatedIntentAndVersionIter) RangeKeys() MVCCRangeKeyStack { return s.mvccIter.RangeKeys() } @@ -4038,7 +4038,7 @@ func mvccResolveWriteIntent( } else if valid { if hasPoint, hasRange := iter.HasPointAndRange(); hasPoint { if unsafeKey := iter.UnsafeKey(); unsafeKey.Key.Equal(oldKey.Key) { - if !hasRange || iter.RangeKeys()[0].RangeKey.Timestamp.Less(unsafeKey.Timestamp) { + if !hasRange || iter.RangeKeys().Versions[0].Timestamp.Less(unsafeKey.Timestamp) { unsafeValRaw := iter.UnsafeValue() prevVal, prevValOK, err := tryDecodeSimpleMVCCValue(unsafeValRaw) if !prevValOK && err == nil { @@ -4149,7 +4149,7 @@ func mvccResolveWriteIntent( // synthesize a point tombstone at the lowest range tombstone covering it. // This is where the point key ceases to exist, contributing to GCBytesAge. if len(unsafeNextValueRaw) > 0 { - if rk, found := FirstRangeKeyAbove(iter.RangeKeys(), unsafeNextKey.Timestamp); found { + if rk, found := FirstRangeKeyAbove(iter.RangeKeys().AsRangeKeyValues(), unsafeNextKey.Timestamp); found { unsafeNextKey.Timestamp = rk.RangeKey.Timestamp unsafeNextValueRaw = []byte{} } @@ -4609,7 +4609,7 @@ func MVCCGarbageCollect( // We need to iterate ranges only to compute GCBytesAge if we are updating // stats. if _, hasRange := iter.HasPointAndRange(); hasRange && !lastRangeTombstoneStart.Equal(iter.RangeBounds().Key) { - rangeKeys := iter.RangeKeys() + rangeKeys := iter.RangeKeys().AsRangeKeyValues() newLen := len(rangeKeys) if cap(rangeTombstoneTss) < newLen { rangeTombstoneTss = make([]hlc.Timestamp, newLen) @@ -4826,7 +4826,7 @@ func MVCCGarbageCollectRangeKeys( } bounds := iter.RangeBounds() - unsafeRangeKeys := iter.RangeKeys() + unsafeRangeKeys := iter.RangeKeys().AsRangeKeyValues() // Check if preceding range tombstone is adjacent to GC'd one. If we // started iterating too early, just skip to next key. If boundaries @@ -5173,7 +5173,7 @@ func ComputeStatsForRangeWithVisitors( if hasRange { if rangeStart := iter.RangeBounds().Key; !rangeStart.Equal(prevRangeStart) { prevRangeStart = append(prevRangeStart[:0], rangeStart...) - rangeKeys = iter.RangeKeys() + rangeKeys = iter.RangeKeys().AsRangeKeyValues() for i, rkv := range rangeKeys { // Only the top-most fragment contributes the key and its bounds, but @@ -5552,7 +5552,7 @@ func MVCCExportToSST( rangeBounds := iter.RangeBounds() rangeKeysEnd = append(rangeKeysEnd[:0], rangeBounds.EndKey...) - for _, rkv := range iter.RangeKeys() { + for _, rkv := range iter.RangeKeys().AsRangeKeyValues() { rangeKeys = append(rangeKeys, rkv.Clone()) rangeKeysSize += int64(len(rkv.RangeKey.StartKey) + len(rkv.RangeKey.EndKey) + len(rkv.Value)) if !opts.ExportAllRevisions { diff --git a/pkg/storage/mvcc_history_test.go b/pkg/storage/mvcc_history_test.go index bd753e7a3494..e1c5bff589e9 100644 --- a/pkg/storage/mvcc_history_test.go +++ b/pkg/storage/mvcc_history_test.go @@ -176,16 +176,17 @@ func TestMVCCHistories(t *testing.T) { break } hasData = true - buf.Printf("rangekey: %s/[", iter.RangeBounds()) - for i, rangeKV := range iter.RangeKeys() { - val, err := DecodeMVCCValue(rangeKV.Value) + rangeKeys := iter.RangeKeys() + buf.Printf("rangekey: %s/[", rangeKeys.Bounds) + for i, version := range rangeKeys.Versions { + val, err := DecodeMVCCValue(version.Value) if err != nil { t.Fatal(err) } if i > 0 { buf.Printf(" ") } - buf.Printf("%s=%s", rangeKV.RangeKey.Timestamp, val) + buf.Printf("%s=%s", version.Timestamp, val) } buf.Printf("]\n") iter.Next() @@ -1211,15 +1212,15 @@ func cmdExport(e *evalCtx) error { if rangeBounds := iter.RangeBounds(); !rangeBounds.Key.Equal(rangeStart) { rangeStart = append(rangeStart[:0], rangeBounds.Key...) e.results.buf.Printf("export: %s/[", rangeBounds) - for i, rangeKV := range iter.RangeKeys() { - val, err := DecodeMVCCValue(rangeKV.Value) + for i, version := range iter.RangeKeys().Versions { + val, err := DecodeMVCCValue(version.Value) if err != nil { return err } if i > 0 { e.results.buf.Printf(" ") } - e.results.buf.Printf("%s=%s", rangeKV.RangeKey.Timestamp, val) + e.results.buf.Printf("%s=%s", version.Timestamp, val) } e.results.buf.Printf("]\n") } @@ -1627,16 +1628,17 @@ func printIter(e *evalCtx) { } } if hasRange { - e.results.buf.Printf(" %s/[", e.iter.RangeBounds()) - for i, rangeKV := range e.iter.RangeKeys() { - value, err := DecodeMVCCValue(rangeKV.Value) + rangeKeys := e.iter.RangeKeys() + e.results.buf.Printf(" %s/[", rangeKeys.Bounds) + for i, version := range rangeKeys.Versions { + value, err := DecodeMVCCValue(version.Value) if err != nil { e.Fatalf("%v", err) } if i > 0 { e.results.buf.Printf(" ") } - e.results.buf.Printf("%s=%s", rangeKV.RangeKey.Timestamp, value) + e.results.buf.Printf("%s=%s", version.Timestamp, value) } e.results.buf.Printf("]") } diff --git a/pkg/storage/mvcc_incremental_iterator.go b/pkg/storage/mvcc_incremental_iterator.go index 2b068ad09cc1..6e43d7905872 100644 --- a/pkg/storage/mvcc_incremental_iterator.go +++ b/pkg/storage/mvcc_incremental_iterator.go @@ -448,7 +448,7 @@ func (i *MVCCIncrementalIterator) advance() { // rather than a binary search because we expect EndTime to be near the // current time, so the first range key will typically be sufficient. hasRange = false - for _, rkv := range i.iter.RangeKeys() { + for _, rkv := range i.iter.RangeKeys().AsRangeKeyValues() { if ts := rkv.RangeKey.Timestamp; ts.LessEq(i.endTime) { hasRange = i.startTime.Less(ts) break @@ -547,9 +547,9 @@ func (i *MVCCIncrementalIterator) RangeBounds() roachpb.Span { } // RangeKeys implements SimpleMVCCIterator. -func (i *MVCCIncrementalIterator) RangeKeys() []MVCCRangeKeyValue { +func (i *MVCCIncrementalIterator) RangeKeys() MVCCRangeKeyStack { if !i.hasRange { - return []MVCCRangeKeyValue{} + return MVCCRangeKeyStack{} } // TODO(erikgrinaker): It may be worthwhile to clone and memoize this result @@ -564,21 +564,21 @@ func (i *MVCCIncrementalIterator) RangeKeys() []MVCCRangeKeyValue { // Find the first range key at or below endTime, and truncate rangeKeys. We do // a linear search rather than a binary search, because we expect endTime to // be near the current time, so the first element will typically match. - first := len(rangeKeys) - 1 - for idx, rkv := range rangeKeys { - if rkv.RangeKey.Timestamp.LessEq(i.endTime) { + first := len(rangeKeys.Versions) - 1 + for idx, v := range rangeKeys.Versions { + if v.Timestamp.LessEq(i.endTime) { first = idx break } } - rangeKeys = rangeKeys[first:] + rangeKeys.Versions = rangeKeys.Versions[first:] // Find the first range key at or below startTime, and truncate rangeKeys. if i.startTime.IsSet() { - if idx := sort.Search(len(rangeKeys), func(idx int) bool { - return rangeKeys[idx].RangeKey.Timestamp.LessEq(i.startTime) + if idx := sort.Search(len(rangeKeys.Versions), func(idx int) bool { + return rangeKeys.Versions[idx].Timestamp.LessEq(i.startTime) }); idx >= 0 { - rangeKeys = rangeKeys[:idx] + rangeKeys.Versions = rangeKeys.Versions[:idx] } } diff --git a/pkg/storage/mvcc_test.go b/pkg/storage/mvcc_test.go index 59900718a7f7..d5c21cc06e68 100644 --- a/pkg/storage/mvcc_test.go +++ b/pkg/storage/mvcc_test.go @@ -5763,7 +5763,7 @@ func TestMVCCGarbageCollectRanges(t *testing.T) { if !ok { break } - for _, rkv := range it.RangeKeys() { + for _, rkv := range it.RangeKeys().AsRangeKeyValues() { require.Less(t, expectIndex, len(d.after), "not enough expectations; at unexpected range:", rkv.RangeKey.String()) require.EqualValues(t, d.after[expectIndex], rkv.RangeKey, "range key is not equal") expectIndex++ diff --git a/pkg/storage/pebble_iterator.go b/pkg/storage/pebble_iterator.go index eeeddc7bd769..da919c907474 100644 --- a/pkg/storage/pebble_iterator.go +++ b/pkg/storage/pebble_iterator.go @@ -663,10 +663,12 @@ func (p *pebbleIterator) EngineRangeBounds() (roachpb.Span, error) { } // RangeKeys implements the MVCCIterator interface. -func (p *pebbleIterator) RangeKeys() []MVCCRangeKeyValue { - bounds := p.RangeBounds() +func (p *pebbleIterator) RangeKeys() MVCCRangeKeyStack { rangeKeys := p.iter.RangeKeys() - rangeKVs := make([]MVCCRangeKeyValue, 0, len(rangeKeys)) + stack := MVCCRangeKeyStack{ + Bounds: p.RangeBounds(), + Versions: make(MVCCRangeKeyVersions, 0, len(rangeKeys)), + } for _, rangeKey := range rangeKeys { timestamp, err := DecodeMVCCTimestampSuffix(rangeKey.Suffix) @@ -675,16 +677,12 @@ func (p *pebbleIterator) RangeKeys() []MVCCRangeKeyValue { // we follow UnsafeKey()'s example and silently skip them. continue } - rangeKVs = append(rangeKVs, MVCCRangeKeyValue{ - RangeKey: MVCCRangeKey{ - StartKey: bounds.Key, - EndKey: bounds.EndKey, - Timestamp: timestamp, - }, - Value: rangeKey.Value, + stack.Versions = append(stack.Versions, MVCCRangeKeyVersion{ + Timestamp: timestamp, + Value: rangeKey.Value, }) } - return rangeKVs + return stack } // EngineRangeKeys implements the EngineIterator interface. diff --git a/pkg/storage/point_synthesizing_iter.go b/pkg/storage/point_synthesizing_iter.go index 53947213995c..74b10b59f949 100644 --- a/pkg/storage/point_synthesizing_iter.go +++ b/pkg/storage/point_synthesizing_iter.go @@ -176,7 +176,7 @@ func (i *pointSynthesizingIter) updateRangeKeys() { if rangeStart := i.iter.RangeBounds().Key; !rangeStart.Equal(i.rangeKeysStart) { i.rangeKeysStart = append(i.rangeKeysStart[:0], rangeStart...) i.rangeKeys = i.rangeKeys[:0] - for _, rk := range i.iter.RangeKeys() { + for _, rk := range i.iter.RangeKeys().AsRangeKeyValues() { // TODO(erikgrinaker): We should optimize the clone cost. i.rangeKeys = append(i.rangeKeys, rk.Clone()) } @@ -624,8 +624,8 @@ func (i *pointSynthesizingIter) RangeBounds() roachpb.Span { } // RangeKeys implements MVCCIterator. -func (i *pointSynthesizingIter) RangeKeys() []MVCCRangeKeyValue { - return []MVCCRangeKeyValue{} +func (i *pointSynthesizingIter) RangeKeys() MVCCRangeKeyStack { + return MVCCRangeKeyStack{} } // ComputeStats implements MVCCIterator. diff --git a/pkg/storage/read_as_of_iterator.go b/pkg/storage/read_as_of_iterator.go index 65afef5910b0..39f3536f8dc3 100644 --- a/pkg/storage/read_as_of_iterator.go +++ b/pkg/storage/read_as_of_iterator.go @@ -110,8 +110,8 @@ func (f *ReadAsOfIterator) RangeBounds() roachpb.Span { } // RangeKeys is always empty since this iterator never surfaces rangeKeys. -func (f *ReadAsOfIterator) RangeKeys() []MVCCRangeKeyValue { - return []MVCCRangeKeyValue{} +func (f *ReadAsOfIterator) RangeKeys() MVCCRangeKeyStack { + return MVCCRangeKeyStack{} } // updateValid updates i.valid and i.err based on the underlying iterator, and @@ -172,7 +172,7 @@ func (f *ReadAsOfIterator) advance() { // TODO (msbutler): ensure this function caches range key values (#84379) before // the 22.2 branch cut, else we face a steep perf cliff for RESTORE with range keys. func (f *ReadAsOfIterator) asOfRangeKeyShadows() (shadows bool) { - rangeKeys := f.iter.RangeKeys() + rangeKeys := f.iter.RangeKeys().AsRangeKeyValues() if f.asOf.IsEmpty() { return f.iter.UnsafeKey().Timestamp.LessEq(rangeKeys[0].RangeKey.Timestamp) } diff --git a/pkg/storage/sst.go b/pkg/storage/sst.go index f0ad9c1c601e..557cc6384863 100644 --- a/pkg/storage/sst.go +++ b/pkg/storage/sst.go @@ -409,7 +409,7 @@ func UpdateSSTTimestamps( } else if !ok { break } - for _, rkv := range iter.RangeKeys() { + for _, rkv := range iter.RangeKeys().AsRangeKeyValues() { if rkv.RangeKey.Timestamp != from { return nil, errors.Errorf("unexpected timestamp %s (expected %s) for range key %s", rkv.RangeKey.Timestamp, from, rkv.RangeKey) diff --git a/pkg/storage/sst_iterator.go b/pkg/storage/sst_iterator.go index 2e5269111b3e..72c3e302278e 100644 --- a/pkg/storage/sst_iterator.go +++ b/pkg/storage/sst_iterator.go @@ -232,6 +232,6 @@ func (r *sstIterator) RangeBounds() roachpb.Span { } // RangeKeys implements SimpleMVCCIterator. -func (r *sstIterator) RangeKeys() []MVCCRangeKeyValue { - return []MVCCRangeKeyValue{} +func (r *sstIterator) RangeKeys() MVCCRangeKeyStack { + return MVCCRangeKeyStack{} } diff --git a/pkg/testutils/storageutils/scan.go b/pkg/testutils/storageutils/scan.go index ac22f74613df..af1665af4d78 100644 --- a/pkg/testutils/storageutils/scan.go +++ b/pkg/testutils/storageutils/scan.go @@ -45,7 +45,7 @@ func ScanIter(t *testing.T, iter storage.SimpleMVCCIterator) KVs { hasPoint, hasRange := iter.HasPointAndRange() if hasRange { if bounds := iter.RangeBounds(); !bounds.Key.Equal(prevRangeStart) { - for _, rkv := range iter.RangeKeys() { + for _, rkv := range iter.RangeKeys().AsRangeKeyValues() { if len(rkv.Value) == 0 { rkv.Value = nil } From 5fdec926dfbc35184953040ecc8d861ae62b4a04 Mon Sep 17 00:00:00 2001 From: Erik Grinaker Date: Fri, 29 Jul 2022 14:12:59 +0000 Subject: [PATCH 3/6] storage: migrate MVCC code to `MVCCRangeKeyStack` Release note: None --- .../batcheval/cmd_add_sstable_test.go | 2 +- pkg/storage/mvcc.go | 176 +++++++++--------- pkg/storage/mvcc_incremental_iterator.go | 49 +---- pkg/storage/point_synthesizing_iter.go | 27 ++- pkg/storage/read_as_of_iterator.go | 30 ++- pkg/storage/sst.go | 11 +- pkg/testutils/storageutils/scan.go | 11 +- 7 files changed, 131 insertions(+), 175 deletions(-) diff --git a/pkg/kv/kvserver/batcheval/cmd_add_sstable_test.go b/pkg/kv/kvserver/batcheval/cmd_add_sstable_test.go index 05d9189b947b..fa6797d40cbf 100644 --- a/pkg/kv/kvserver/batcheval/cmd_add_sstable_test.go +++ b/pkg/kv/kvserver/batcheval/cmd_add_sstable_test.go @@ -186,7 +186,7 @@ func TestEvalAddSSTable(t *testing.T) { toReqTS: 1, sst: kvs{pointKV("a", 1, "a1"), rangeKV("c", "d", 2, "")}, expectErr: []string{ - `unexpected timestamp 0.000000002,0 (expected 0.000000001,0) for range key {c-d}/0.000000002,0`, + `unexpected timestamp 0.000000002,0 (expected 0.000000001,0) for range key {c-d}`, `key has suffix "\x00\x00\x00\x00\x00\x00\x00\x02\t", expected "\x00\x00\x00\x00\x00\x00\x00\x01\t"`, }, }, diff --git a/pkg/storage/mvcc.go b/pkg/storage/mvcc.go index 8c73e5487778..a0963e97a1fb 100644 --- a/pkg/storage/mvcc.go +++ b/pkg/storage/mvcc.go @@ -1010,11 +1010,11 @@ func mvccGetMetadata( // lowest range tombstone above the key (not the highest which is used for // metadata), or the point version's timestamp if it was a tombstone. if hasRange { - rangeKeys := iter.RangeKeys().AsRangeKeyValues() - if rkv, ok := FirstRangeKeyAbove(rangeKeys, unsafeKey.Timestamp); ok { + rangeKeys := iter.RangeKeys() + if v, ok := rangeKeys.FirstAbove(unsafeKey.Timestamp); ok { meta.Deleted = true - meta.Timestamp = rangeKeys[0].RangeKey.Timestamp.ToLegacyTimestamp() - keyLastSeen := rkv.RangeKey.Timestamp + meta.Timestamp = rangeKeys.Versions[0].Timestamp.ToLegacyTimestamp() + keyLastSeen := v.Timestamp if unsafeVal.IsTombstone() { keyLastSeen = unsafeKey.Timestamp } @@ -2568,16 +2568,16 @@ func MVCCPredicateDeleteRange( // TODO (msbutler): cache the range keys while the range bounds remain // constant, since iter.RangeKeys() is expensive. Manual caching may not be necessary if // https://github.com/cockroachdb/cockroach/issues/84379 lands. - rangeKeys := iter.RangeKeys().AsRangeKeyValues() - if endTime.LessEq(rangeKeys[0].RangeKey.Timestamp) { - return false, false, false, roachpb.NewWriteTooOldError(endTime, - rangeKeys[0].RangeKey.Timestamp.Next(), k.Key.Clone()) + newestRangeKey := iter.RangeKeys().Versions[0].Timestamp + if endTime.LessEq(newestRangeKey) { + return false, false, false, roachpb.NewWriteTooOldError( + endTime, newestRangeKey.Next(), k.Key.Clone()) } if !hasPointKey { // landed on bare range key. return true, false, true, nil } - if k.Timestamp.Less(rangeKeys[0].RangeKey.Timestamp) { + if k.Timestamp.Less(newestRangeKey) { // The latest range tombstone shadows the point key; ok to continue run. return true, false, true, nil } @@ -2872,12 +2872,11 @@ func MVCCDeleteRangeUsingTombstone( if hasRange { // Check if we've encountered a new range key stack. if rangeBounds := iter.RangeBounds(); !rangeBounds.EndKey.Equal(prevRangeEnd) { - newest := iter.RangeKeys().AsRangeKeyValues()[0].RangeKey + newest := iter.RangeKeys().Versions[0].Timestamp // Check for conflict with newer range key. - if timestamp.LessEq(newest.Timestamp) { - return roachpb.NewWriteTooOldError( - timestamp, newest.Timestamp.Next(), newest.StartKey.Clone()) + if timestamp.LessEq(newest) { + return roachpb.NewWriteTooOldError(timestamp, newest.Next(), rangeBounds.Key.Clone()) } if ms != nil { @@ -2899,7 +2898,7 @@ func MVCCDeleteRangeUsingTombstone( msDelta.RangeKeyBytes += int64(EncodedMVCCTimestampSuffixLength(timestamp)) msDelta.RangeValCount++ msDelta.RangeValBytes += int64(len(valueRaw)) - msDelta.GCBytesAge -= (timestamp.WallTime/1e9 - newest.Timestamp.WallTime/1e9) * + msDelta.GCBytesAge -= (timestamp.WallTime/1e9 - newest.WallTime/1e9) * int64(EncodedMVCCKeyPrefixLength(rangeBounds.Key)+ EncodedMVCCKeyPrefixLength(rangeBounds.EndKey)) } @@ -2944,10 +2943,10 @@ func MVCCDeleteRangeUsingTombstone( if ms != nil { // fragmentRangeKeys adjusts ms to fragment an existing range key stack // at the given split point. - fragmentRangeKeys := func(rangeKeys []MVCCRangeKeyValue, splitKey roachpb.Key) { - for i, rkv := range rangeKeys { - keyBytes := int64(EncodedMVCCTimestampSuffixLength(rkv.RangeKey.Timestamp)) - valBytes := int64(len(rkv.Value)) + fragmentRangeKeys := func(rangeKeys MVCCRangeKeyStack, splitKey roachpb.Key) { + for i, v := range rangeKeys.Versions { + keyBytes := int64(EncodedMVCCTimestampSuffixLength(v.Timestamp)) + valBytes := int64(len(v.Value)) if i == 0 { msDelta.RangeKeyCount++ keyBytes += 2 * int64(EncodedMVCCKeyPrefixLength(splitKey)) @@ -2955,37 +2954,29 @@ func MVCCDeleteRangeUsingTombstone( msDelta.RangeKeyBytes += keyBytes msDelta.RangeValCount++ msDelta.RangeValBytes += valBytes - msDelta.GCBytesAge += (keyBytes + valBytes) * (timestamp.WallTime/1e9 - rkv.RangeKey.Timestamp.WallTime/1e9) + msDelta.GCBytesAge += (keyBytes + valBytes) * (timestamp.WallTime/1e9 - v.Timestamp.WallTime/1e9) } } // maybeMergeRangeKeys adjusts ms to merge two abutting range key stacks if // they have the same timestamps and values. It assumes the lhs end key // equals the rhs start key, and that they are in descending order. - maybeMergeRangeKeys := func(lhs, rhs []MVCCRangeKeyValue) { - if len(lhs) != len(rhs) || len(lhs) == 0 { + maybeMergeRangeKeys := func(lhs, rhs MVCCRangeKeyStack) { + if !lhs.CanMergeRight(rhs) { return } - for i, l := range lhs { - if !l.RangeKey.Timestamp.Equal(rhs[i].RangeKey.Timestamp) { - return - } else if !bytes.Equal(l.Value, rhs[i].Value) { - return - } - } - mergeKey := rhs[0].RangeKey.StartKey - for i, rkv := range lhs { - keyBytes := int64(EncodedMVCCTimestampSuffixLength(rkv.RangeKey.Timestamp)) - valBytes := int64(len(rkv.Value)) + for i, v := range lhs.Versions { + keyBytes := int64(EncodedMVCCTimestampSuffixLength(v.Timestamp)) + valBytes := int64(len(v.Value)) if i == 0 { msDelta.RangeKeyCount-- - keyBytes += 2 * int64(EncodedMVCCKeyPrefixLength(mergeKey)) + keyBytes += 2 * int64(EncodedMVCCKeyPrefixLength(rhs.Bounds.Key)) } msDelta.RangeKeyBytes -= keyBytes msDelta.RangeValCount-- msDelta.RangeValBytes -= valBytes msDelta.GCBytesAge -= (keyBytes + valBytes) * - (timestamp.WallTime/1e9 - rkv.RangeKey.Timestamp.WallTime/1e9) + (timestamp.WallTime/1e9 - v.Timestamp.WallTime/1e9) } } @@ -3003,18 +2994,18 @@ func MVCCDeleteRangeUsingTombstone( } else if ok { switch iter.RangeBounds().EndKey.Compare(startKey) { case 1: // fragment - fragmentRangeKeys(iter.RangeKeys().AsRangeKeyValues(), startKey) + fragmentRangeKeys(iter.RangeKeys(), startKey) case 0: // merge - lhs := iter.RangeKeys().AsRangeKeyValues() - for i := range lhs { - lhs[i] = lhs[i].Clone() + lhs := iter.RangeKeys().Clone() + rhs := MVCCRangeKeyStack{ + Bounds: rangeKey.Bounds(), + Versions: MVCCRangeKeyVersions{{Timestamp: rangeKey.Timestamp, Value: valueRaw}}, } - rhs := []MVCCRangeKeyValue{{RangeKey: rangeKey, Value: valueRaw}} iter.SeekGE(MVCCKey{Key: startKey}) if ok, err := iter.Valid(); err != nil { return err } else if ok { - rhs = append(rhs, iter.RangeKeys().AsRangeKeyValues()...) + rhs.Versions = append(rhs.Versions, iter.RangeKeys().Versions...) } maybeMergeRangeKeys(lhs, rhs) } @@ -3038,18 +3029,18 @@ func MVCCDeleteRangeUsingTombstone( } else if ok { switch iter.RangeBounds().Key.Compare(endKey) { case -1: // fragment - fragmentRangeKeys(iter.RangeKeys().AsRangeKeyValues(), endKey) + fragmentRangeKeys(iter.RangeKeys(), endKey) case 0: // merge - lhs := []MVCCRangeKeyValue{{RangeKey: rangeKey, Value: valueRaw}} - rhs := iter.RangeKeys().AsRangeKeyValues() - for i := range rhs { - rhs[i] = rhs[i].Clone() + lhs := MVCCRangeKeyStack{ + Bounds: rangeKey.Bounds(), + Versions: MVCCRangeKeyVersions{{Timestamp: rangeKey.Timestamp, Value: valueRaw}}, } + rhs := iter.RangeKeys().Clone() iter.SeekLT(MVCCKey{Key: endKey}) if ok, err := iter.Valid(); err != nil { return err } else if ok { - lhs = append(lhs, iter.RangeKeys().AsRangeKeyValues()...) + lhs.Versions = append(lhs.Versions, iter.RangeKeys().Versions...) } maybeMergeRangeKeys(lhs, rhs) } @@ -4149,8 +4140,8 @@ func mvccResolveWriteIntent( // synthesize a point tombstone at the lowest range tombstone covering it. // This is where the point key ceases to exist, contributing to GCBytesAge. if len(unsafeNextValueRaw) > 0 { - if rk, found := FirstRangeKeyAbove(iter.RangeKeys().AsRangeKeyValues(), unsafeNextKey.Timestamp); found { - unsafeNextKey.Timestamp = rk.RangeKey.Timestamp + if v, found := iter.RangeKeys().FirstAbove(unsafeNextKey.Timestamp); found { + unsafeNextKey.Timestamp = v.Timestamp unsafeNextValueRaw = []byte{} } } @@ -5158,8 +5149,8 @@ func ComputeStatsForRangeWithVisitors( // reverse chronological order and use this variable to keep track // of the point in time at which the current key begins to age. var accrueGCAgeNanos int64 + var rangeKeys MVCCRangeKeyStack mvccEndKey := MakeMVCCMetadataKey(end) - rangeKeys := []MVCCRangeKeyValue{} for iter.SeekGE(MakeMVCCMetadataKey(start)); ; iter.Next() { if ok, err := iter.Valid(); err != nil { @@ -5173,9 +5164,9 @@ func ComputeStatsForRangeWithVisitors( if hasRange { if rangeStart := iter.RangeBounds().Key; !rangeStart.Equal(prevRangeStart) { prevRangeStart = append(prevRangeStart[:0], rangeStart...) - rangeKeys = iter.RangeKeys().AsRangeKeyValues() + rangeKeys = iter.RangeKeys().Clone() - for i, rkv := range rangeKeys { + for i, v := range rangeKeys.Versions { // Only the top-most fragment contributes the key and its bounds, but // all versions contribute timestamps and values. // @@ -5183,28 +5174,27 @@ func ComputeStatsForRangeWithVisitors( // though it is actually variable-length, likely for historical // reasons. But for range keys we may as well use the actual // variable-length encoded size. - keyBytes := int64(EncodedMVCCTimestampSuffixLength(rkv.RangeKey.Timestamp)) - valBytes := int64(len(rkv.Value)) + keyBytes := int64(EncodedMVCCTimestampSuffixLength(v.Timestamp)) + valBytes := int64(len(v.Value)) if i == 0 { ms.RangeKeyCount++ - keyBytes += int64(EncodedMVCCKeyPrefixLength(rkv.RangeKey.StartKey) + - EncodedMVCCKeyPrefixLength(rkv.RangeKey.EndKey)) + keyBytes += int64(EncodedMVCCKeyPrefixLength(rangeKeys.Bounds.Key) + + EncodedMVCCKeyPrefixLength(rangeKeys.Bounds.EndKey)) } ms.RangeKeyBytes += keyBytes ms.RangeValCount++ ms.RangeValBytes += valBytes - ms.GCBytesAge += (keyBytes + valBytes) * - (nowNanos/1e9 - rkv.RangeKey.Timestamp.WallTime/1e9) + ms.GCBytesAge += (keyBytes + valBytes) * (nowNanos/1e9 - v.Timestamp.WallTime/1e9) if rangeKeyVisitor != nil { - if err := rangeKeyVisitor(rkv); err != nil { + if err := rangeKeyVisitor(rangeKeys.AsRangeKeyValue(v)); err != nil { return enginepb.MVCCStats{}, err } } } } - } else if len(rangeKeys) > 0 { - rangeKeys = rangeKeys[:0] + } else if !rangeKeys.IsEmpty() { + rangeKeys = MVCCRangeKeyStack{} } if !hasPoint { @@ -5247,8 +5237,8 @@ func ComputeStatsForRangeWithVisitors( // only take them into account for versioned values. var nextRangeTombstone hlc.Timestamp if isValue { - if rkv, ok := FirstRangeKeyAbove(rangeKeys, unsafeKey.Timestamp); ok { - nextRangeTombstone = rkv.RangeKey.Timestamp + if v, ok := rangeKeys.FirstAbove(unsafeKey.Timestamp); ok { + nextRangeTombstone = v.Timestamp } } @@ -5435,7 +5425,7 @@ func MVCCExportToSST( var rows RowCounter var curKey roachpb.Key // only used if exportAllRevisions var resumeKey MVCCKey - var rangeKeys []MVCCRangeKeyValue + var rangeKeys MVCCRangeKeyStack var rangeKeysEnd roachpb.Key var rangeKeysSize int64 @@ -5464,7 +5454,7 @@ func MVCCExportToSST( // We could be truncated in the middle of a point key version series, which // would require adding on a \0 byte via Key.Next(), so let's assume that. maxSize := rangeKeysSize - if s := maxSize + int64(len(rangeKeys)*(len(resumeKey)-len(rangeKeysEnd)+1)); s > maxSize { + if s := maxSize + int64(rangeKeys.Len()*(len(resumeKey)-len(rangeKeysEnd)+1)); s > maxSize { maxSize = s } return maxSize @@ -5527,37 +5517,43 @@ func MVCCExportToSST( // comparisons. Pebble should expose an API to cheaply detect range key // changes. if len(rangeKeysEnd) > 0 && bytes.Compare(unsafeKey.Key, rangeKeysEnd) >= 0 { - for _, rkv := range rangeKeys { - mvccValue, ok, err := tryDecodeSimpleMVCCValue(rkv.Value) + for _, v := range rangeKeys.Versions { + mvccValue, ok, err := tryDecodeSimpleMVCCValue(v.Value) if !ok && err == nil { - mvccValue, err = decodeExtendedMVCCValue(rkv.Value) + mvccValue, err = decodeExtendedMVCCValue(v.Value) } if err != nil { return roachpb.BulkOpSummary{}, MVCCKey{}, errors.Wrapf(err, - "decoding mvcc value %s", rkv.Value) + "decoding mvcc value %s", v.Value) } // Export only the inner roachpb.Value, not the MVCCValue header. mvccValue = MVCCValue{Value: mvccValue.Value} - if err := sstWriter.PutMVCCRangeKey(rkv.RangeKey, mvccValue); err != nil { + if err := sstWriter.PutMVCCRangeKey(rangeKeys.AsRangeKey(v), mvccValue); err != nil { return roachpb.BulkOpSummary{}, MVCCKey{}, err } } rows.BulkOpSummary.DataSize += rangeKeysSize - rangeKeys, rangeKeysEnd, rangeKeysSize = rangeKeys[:0], nil, 0 + rangeKeys, rangeKeysEnd, rangeKeysSize = MVCCRangeKeyStack{}, nil, 0 } // If we find any new range keys and we haven't buffered any range keys yet, // buffer them. - if hasRange && !skipTombstones && len(rangeKeys) == 0 { - rangeBounds := iter.RangeBounds() - rangeKeysEnd = append(rangeKeysEnd[:0], rangeBounds.EndKey...) - - for _, rkv := range iter.RangeKeys().AsRangeKeyValues() { - rangeKeys = append(rangeKeys, rkv.Clone()) - rangeKeysSize += int64(len(rkv.RangeKey.StartKey) + len(rkv.RangeKey.EndKey) + len(rkv.Value)) - if !opts.ExportAllRevisions { - break - } + if hasRange && !skipTombstones && rangeKeys.IsEmpty() { + rangeKeys = iter.RangeKeys() + rangeKeysEnd = append(rangeKeysEnd[:0], rangeKeys.Bounds.EndKey...) + if !opts.ExportAllRevisions { + rangeKeys.Versions = rangeKeys.Versions[:1] + } + + // TODO(erikgrinaker): We should consider a CloneInto() method on the + // MVCCRangeKeyStack that allows reusing a byte buffer. See also TODO in + // Clone() about using a single allocation for the entire clone (all byte + // slices). + rangeKeys = rangeKeys.Clone() + + for _, v := range rangeKeys.Versions { + rangeKeysSize += int64( + len(rangeKeys.Bounds.Key) + len(rangeKeys.Bounds.EndKey) + len(v.Value)) } // Check if the range keys exceed a limit, using similar logic as point @@ -5571,10 +5567,10 @@ func MVCCExportToSST( // of some of the options and clean this up. curSize := rows.BulkOpSummary.DataSize reachedTargetSize := opts.TargetSize > 0 && uint64(curSize) >= opts.TargetSize - newSize := curSize + maxRangeKeysSizeIfTruncated(rangeBounds.Key) + newSize := curSize + maxRangeKeysSizeIfTruncated(rangeKeys.Bounds.Key) reachedMaxSize := opts.MaxSize > 0 && newSize > int64(opts.MaxSize) if paginated && (reachedTargetSize || reachedMaxSize) { - rangeKeys, rangeKeysEnd, rangeKeysSize = rangeKeys[:0], nil, 0 + rangeKeys, rangeKeysEnd, rangeKeysSize = MVCCRangeKeyStack{}, nil, 0 resumeKey = unsafeKey.Clone() break } @@ -5686,7 +5682,7 @@ func MVCCExportToSST( // next export's range keys to overlap with this one, e.g.: [a-f) with resume // key c@7 will export range keys [a-c\0) first, and then [c-f) when resuming, // which overlaps at [c-c\0). - if len(rangeKeys) > 0 { + if !rangeKeys.IsEmpty() { // Calculate the new rangeKeysSize due to the new resume bounds. if len(resumeKey.Key) > 0 && rangeKeysEnd.Compare(resumeKey.Key) > 0 { oldEndLen := len(rangeKeysEnd) @@ -5694,21 +5690,21 @@ func MVCCExportToSST( if resumeKey.Timestamp.IsSet() { rangeKeysEnd = rangeKeysEnd.Next() } - rangeKeysSize += int64(len(rangeKeys) * (len(rangeKeysEnd) - oldEndLen)) + rangeKeysSize += int64(rangeKeys.Len() * (len(rangeKeysEnd) - oldEndLen)) } - for _, rkv := range rangeKeys { - rkv.RangeKey.EndKey = rangeKeysEnd - mvccValue, ok, err := tryDecodeSimpleMVCCValue(rkv.Value) + rangeKeys.Bounds.EndKey = rangeKeysEnd + for _, v := range rangeKeys.Versions { + mvccValue, ok, err := tryDecodeSimpleMVCCValue(v.Value) if !ok && err == nil { - mvccValue, err = decodeExtendedMVCCValue(rkv.Value) + mvccValue, err = decodeExtendedMVCCValue(v.Value) } if err != nil { return roachpb.BulkOpSummary{}, MVCCKey{}, errors.Wrapf(err, - "decoding mvcc value %s", rkv.Value) + "decoding mvcc value %s", v.Value) } // Export only the inner roachpb.Value, not the MVCCValue header. mvccValue = MVCCValue{Value: mvccValue.Value} - if err := sstWriter.PutMVCCRangeKey(rkv.RangeKey, mvccValue); err != nil { + if err := sstWriter.PutMVCCRangeKey(rangeKeys.AsRangeKey(v), mvccValue); err != nil { return roachpb.BulkOpSummary{}, MVCCKey{}, err } } diff --git a/pkg/storage/mvcc_incremental_iterator.go b/pkg/storage/mvcc_incremental_iterator.go index 6e43d7905872..ac65b88a214d 100644 --- a/pkg/storage/mvcc_incremental_iterator.go +++ b/pkg/storage/mvcc_incremental_iterator.go @@ -11,8 +11,6 @@ package storage import ( - "sort" - "github.com/cockroachdb/cockroach/pkg/roachpb" "github.com/cockroachdb/cockroach/pkg/storage/enginepb" "github.com/cockroachdb/cockroach/pkg/util" @@ -429,7 +427,9 @@ func (i *MVCCIncrementalIterator) advance() { } // NB: Don't update i.hasRange directly -- we only change it when - // i.rangeKeysStart changes, to avoid unnecessary checks. + // i.rangeKeysStart changes, which allows us to retain i.hasRange=false if + // we've already determined that the current range keys are outside of the + // time bounds. hasPoint, hasRange := i.iter.HasPointAndRange() i.hasPoint = hasPoint @@ -443,21 +443,10 @@ func (i *MVCCIncrementalIterator) advance() { if hasRange { if rangeStart := i.iter.RangeBounds().Key; !rangeStart.Equal(i.rangeKeysStart) { i.rangeKeysStart = append(i.rangeKeysStart[:0], rangeStart...) - // Find the first range key at or below EndTime. If that's also above - // StartTime then we have visible range keys. We use a linear search - // rather than a binary search because we expect EndTime to be near the - // current time, so the first range key will typically be sufficient. - hasRange = false - for _, rkv := range i.iter.RangeKeys().AsRangeKeyValues() { - if ts := rkv.RangeKey.Timestamp; ts.LessEq(i.endTime) { - hasRange = i.startTime.Less(ts) - break - } - } - i.hasRange = hasRange - newRangeKey = hasRange + i.hasRange = i.iter.RangeKeys().HasBetween(i.startTime.Next(), i.endTime) + newRangeKey = i.hasRange } - // else keep i.hasRange from last i.rangeKeysStart change. + // Else: keep i.hasRange from last i.rangeKeysStart change. } else { i.hasRange = false } @@ -551,38 +540,14 @@ func (i *MVCCIncrementalIterator) RangeKeys() MVCCRangeKeyStack { if !i.hasRange { return MVCCRangeKeyStack{} } - // TODO(erikgrinaker): It may be worthwhile to clone and memoize this result // for the same range key. However, callers may avoid calling RangeKeys() // unnecessarily, and we may optimize parent iterators, so let's measure. rangeKeys := i.iter.RangeKeys() - if i.ignoringTime { return rangeKeys } - - // Find the first range key at or below endTime, and truncate rangeKeys. We do - // a linear search rather than a binary search, because we expect endTime to - // be near the current time, so the first element will typically match. - first := len(rangeKeys.Versions) - 1 - for idx, v := range rangeKeys.Versions { - if v.Timestamp.LessEq(i.endTime) { - first = idx - break - } - } - rangeKeys.Versions = rangeKeys.Versions[first:] - - // Find the first range key at or below startTime, and truncate rangeKeys. - if i.startTime.IsSet() { - if idx := sort.Search(len(rangeKeys.Versions), func(idx int) bool { - return rangeKeys.Versions[idx].Timestamp.LessEq(i.startTime) - }); idx >= 0 { - rangeKeys.Versions = rangeKeys.Versions[:idx] - } - } - - return rangeKeys + return rangeKeys.Trim(i.startTime.Next(), i.endTime) } // UnsafeValue implements SimpleMVCCIterator. diff --git a/pkg/storage/point_synthesizing_iter.go b/pkg/storage/point_synthesizing_iter.go index 74b10b59f949..2f4ecd26aab3 100644 --- a/pkg/storage/point_synthesizing_iter.go +++ b/pkg/storage/point_synthesizing_iter.go @@ -65,9 +65,9 @@ var pointSynthesizingIterPool = sync.Pool{ type pointSynthesizingIter struct { iter MVCCIterator - // rangeKeys contains any range keys that overlap the current key position, - // for which points will be synthesized. - rangeKeys []MVCCRangeKeyValue + // rangeKeys contains any range key versions that overlap the current key + // position, for which points will be synthesized. + rangeKeys MVCCRangeKeyVersions // rangeKeysPos is the current key (along the rangeKeys span) that points will // be synthesized for. It is only set if rangeKeys is non-empty, and may @@ -172,14 +172,11 @@ func (i *pointSynthesizingIter) updateRangeKeys() { if !i.iterValid { i.clearRangeKeys() } else if _, hasRange := i.iter.HasPointAndRange(); hasRange { + // TODO(erikgrinaker): Optimize this. i.rangeKeysPos = append(i.rangeKeysPos[:0], i.iter.UnsafeKey().Key...) if rangeStart := i.iter.RangeBounds().Key; !rangeStart.Equal(i.rangeKeysStart) { i.rangeKeysStart = append(i.rangeKeysStart[:0], rangeStart...) - i.rangeKeys = i.rangeKeys[:0] - for _, rk := range i.iter.RangeKeys().AsRangeKeyValues() { - // TODO(erikgrinaker): We should optimize the clone cost. - i.rangeKeys = append(i.rangeKeys, rk.Clone()) - } + i.rangeKeys = i.iter.RangeKeys().Versions.Clone() } if !i.reverse { i.rangeKeysIdx = 0 @@ -206,10 +203,10 @@ func (i *pointSynthesizingIter) updateAtPoint() { } else if !i.reverse { i.atPoint = i.rangeKeysIdx >= len(i.rangeKeys) || !point.Timestamp.IsSet() || - i.rangeKeys[i.rangeKeysIdx].RangeKey.Timestamp.LessEq(point.Timestamp) + i.rangeKeys[i.rangeKeysIdx].Timestamp.LessEq(point.Timestamp) } else { i.atPoint = i.rangeKeysIdx < 0 || (point.Timestamp.IsSet() && - point.Timestamp.LessEq(i.rangeKeys[i.rangeKeysIdx].RangeKey.Timestamp)) + point.Timestamp.LessEq(i.rangeKeys[i.rangeKeysIdx].Timestamp)) } } @@ -341,7 +338,7 @@ func (i *pointSynthesizingIter) SeekGE(seekKey MVCCKey) { // If we're seeking to a specific version, skip newer range keys. if len(i.rangeKeys) > 0 && seekKey.Timestamp.IsSet() && seekKey.Key.Equal(i.rangeKeysPos) { i.rangeKeysIdx = sort.Search(len(i.rangeKeys), func(idx int) bool { - return i.rangeKeys[idx].RangeKey.Timestamp.LessEq(seekKey.Timestamp) + return i.rangeKeys[idx].Timestamp.LessEq(seekKey.Timestamp) }) } @@ -492,7 +489,7 @@ func (i *pointSynthesizingIter) SeekLT(seekKey MVCCKey) { // If we're seeking to a specific version, skip over older range keys. if seekKey.Timestamp.IsSet() && seekKey.Key.Equal(i.rangeKeysPos) { i.rangeKeysIdx = sort.Search(len(i.rangeKeys), func(idx int) bool { - return i.rangeKeys[idx].RangeKey.Timestamp.LessEq(seekKey.Timestamp) + return i.rangeKeys[idx].Timestamp.LessEq(seekKey.Timestamp) }) - 1 } @@ -569,7 +566,7 @@ func (i *pointSynthesizingIter) UnsafeKey() MVCCKey { } return MVCCKey{ Key: i.rangeKeysPos, - Timestamp: i.rangeKeys[i.rangeKeysIdx].RangeKey.Timestamp, + Timestamp: i.rangeKeys[i.rangeKeysIdx].Timestamp, } } @@ -752,10 +749,10 @@ func (i *pointSynthesizingIter) assertInvariants() error { maxIdx = i.rangeKeysIdx + 1 } if minIdx >= 0 && minIdx < len(i.rangeKeys) { - minKey = MVCCKey{Key: i.rangeKeysPos, Timestamp: i.rangeKeys[minIdx].RangeKey.Timestamp} + minKey = MVCCKey{Key: i.rangeKeysPos, Timestamp: i.rangeKeys[minIdx].Timestamp} } if maxIdx >= 0 && maxIdx < len(i.rangeKeys) { - maxKey = MVCCKey{Key: i.rangeKeysPos, Timestamp: i.rangeKeys[maxIdx].RangeKey.Timestamp} + maxKey = MVCCKey{Key: i.rangeKeysPos, Timestamp: i.rangeKeys[maxIdx].Timestamp} } iterKey := i.iter.Key() diff --git a/pkg/storage/read_as_of_iterator.go b/pkg/storage/read_as_of_iterator.go index 39f3536f8dc3..a88862dd9e93 100644 --- a/pkg/storage/read_as_of_iterator.go +++ b/pkg/storage/read_as_of_iterator.go @@ -130,7 +130,9 @@ func (f *ReadAsOfIterator) advance() { return } - if !f.asOf.IsEmpty() && f.asOf.Less(f.iter.UnsafeKey().Timestamp) { + key := f.iter.UnsafeKey() + + if f.asOf.Less(key.Timestamp) { // Skip keys above the asOf timestamp, regardless of type of key (e.g. point or range) f.iter.Next() } else if hasPoint, hasRange := f.iter.HasPointAndRange(); !hasPoint && hasRange { @@ -155,31 +157,25 @@ func (f *ReadAsOfIterator) advance() { } else if !hasRange { // On a valid key without a range key return - } else if f.asOfRangeKeyShadows() { + // TODO (msbutler): ensure this caches range key values (#84379) before + // the 22.2 branch cut, else we face a steep perf cliff for RESTORE with + // range keys. + } else if f.iter.RangeKeys().HasBetween(key.Timestamp, f.asOf) { // The latest range key, as of system time, shadows the latest point key. // This key is therefore deleted as of system time. f.iter.NextKey() } else { - // On a valid key that potentially shadows range key(s) + // On a valid key that potentially shadows range key(s). return } } } -// asOfRangeKeyShadows returns true if there exists a range key at or below the asOf timestamp -// that shadows the latest point key -// -// TODO (msbutler): ensure this function caches range key values (#84379) before -// the 22.2 branch cut, else we face a steep perf cliff for RESTORE with range keys. -func (f *ReadAsOfIterator) asOfRangeKeyShadows() (shadows bool) { - rangeKeys := f.iter.RangeKeys().AsRangeKeyValues() - if f.asOf.IsEmpty() { - return f.iter.UnsafeKey().Timestamp.LessEq(rangeKeys[0].RangeKey.Timestamp) - } - return HasRangeKeyBetween(rangeKeys, f.asOf, f.iter.UnsafeKey().Timestamp) -} - -// NewReadAsOfIterator constructs a ReadAsOfIterator. +// NewReadAsOfIterator constructs a ReadAsOfIterator. If asOf is not set, the +// iterator reads the most recent data. func NewReadAsOfIterator(iter SimpleMVCCIterator, asOf hlc.Timestamp) *ReadAsOfIterator { + if asOf.IsEmpty() { + asOf = hlc.MaxTimestamp + } return &ReadAsOfIterator{iter: iter, asOf: asOf} } diff --git a/pkg/storage/sst.go b/pkg/storage/sst.go index 557cc6384863..ef004407f158 100644 --- a/pkg/storage/sst.go +++ b/pkg/storage/sst.go @@ -409,13 +409,14 @@ func UpdateSSTTimestamps( } else if !ok { break } - for _, rkv := range iter.RangeKeys().AsRangeKeyValues() { - if rkv.RangeKey.Timestamp != from { + rangeKeys := iter.RangeKeys() + for _, v := range rangeKeys.Versions { + if v.Timestamp != from { return nil, errors.Errorf("unexpected timestamp %s (expected %s) for range key %s", - rkv.RangeKey.Timestamp, from, rkv.RangeKey) + v.Timestamp, from, rangeKeys.Bounds) } - rkv.RangeKey.Timestamp = to - if err = writer.PutRawMVCCRangeKey(rkv.RangeKey, rkv.Value); err != nil { + v.Timestamp = to + if err = writer.PutRawMVCCRangeKey(rangeKeys.AsRangeKey(v), v.Value); err != nil { return nil, err } } diff --git a/pkg/testutils/storageutils/scan.go b/pkg/testutils/storageutils/scan.go index af1665af4d78..bd21e5377ef6 100644 --- a/pkg/testutils/storageutils/scan.go +++ b/pkg/testutils/storageutils/scan.go @@ -45,13 +45,14 @@ func ScanIter(t *testing.T, iter storage.SimpleMVCCIterator) KVs { hasPoint, hasRange := iter.HasPointAndRange() if hasRange { if bounds := iter.RangeBounds(); !bounds.Key.Equal(prevRangeStart) { - for _, rkv := range iter.RangeKeys().AsRangeKeyValues() { - if len(rkv.Value) == 0 { - rkv.Value = nil + prevRangeStart = bounds.Key.Clone() + rangeKeys := iter.RangeKeys().Clone() + for _, v := range rangeKeys.Versions { + if len(v.Value) == 0 { + v.Value = nil } - kvs = append(kvs, rkv.Clone()) + kvs = append(kvs, rangeKeys.AsRangeKeyValue(v)) } - prevRangeStart = bounds.Key.Clone() } } if hasPoint { From 4800f7c7d37b721c63453afc54c23d335ec141db Mon Sep 17 00:00:00 2001 From: Erik Grinaker Date: Thu, 28 Jul 2022 23:43:51 +0000 Subject: [PATCH 4/6] *: migrate higher-level code to `MVCCRangeKeyStack` Release note: None --- pkg/ccl/backupccl/file_sst_sink.go | 5 ++-- pkg/kv/kvserver/batcheval/cmd_add_sstable.go | 29 ++++++++++--------- pkg/kv/kvserver/batcheval/cmd_clear_range.go | 8 ++--- .../batcheval/cmd_delete_range_test.go | 15 +++++----- .../kvserver/batcheval/cmd_end_transaction.go | 8 ++--- .../kvserver/batcheval/cmd_refresh_range.go | 6 +--- pkg/kv/kvserver/rangefeed/catchup_scan.go | 9 +++--- .../kvserver/rditer/replica_data_iter_test.go | 15 +++++----- 8 files changed, 48 insertions(+), 47 deletions(-) diff --git a/pkg/ccl/backupccl/file_sst_sink.go b/pkg/ccl/backupccl/file_sst_sink.go index f1017b6b3bb2..7dffa08e944b 100644 --- a/pkg/ccl/backupccl/file_sst_sink.go +++ b/pkg/ccl/backupccl/file_sst_sink.go @@ -363,8 +363,9 @@ func (s *fileSSTSink) copyRangeKeys(dataSST []byte) error { } else if !ok { break } - for _, rkv := range iter.RangeKeys().AsRangeKeyValues() { - if err := s.sst.PutRawMVCCRangeKey(rkv.RangeKey, rkv.Value); err != nil { + rangeKeys := iter.RangeKeys() + for _, v := range rangeKeys.Versions { + if err := s.sst.PutRawMVCCRangeKey(rangeKeys.AsRangeKey(v), v.Value); err != nil { return err } } diff --git a/pkg/kv/kvserver/batcheval/cmd_add_sstable.go b/pkg/kv/kvserver/batcheval/cmd_add_sstable.go index 6569362dea1f..efd7382f03b5 100644 --- a/pkg/kv/kvserver/batcheval/cmd_add_sstable.go +++ b/pkg/kv/kvserver/batcheval/cmd_add_sstable.go @@ -417,15 +417,16 @@ func EvalAddSSTable( } else if !ok { break } - for _, rkv := range rangeIter.RangeKeys().AsRangeKeyValues() { - if err = readWriter.PutRawMVCCRangeKey(rkv.RangeKey, rkv.Value); err != nil { + rangeKeys := rangeIter.RangeKeys() + for _, v := range rangeKeys.Versions { + if err = readWriter.PutRawMVCCRangeKey(rangeKeys.AsRangeKey(v), v.Value); err != nil { return result.Result{}, err } if sstToReqTS.IsSet() { readWriter.LogLogicalOp(storage.MVCCDeleteRangeOpType, storage.MVCCLogicalOpDetails{ - Key: rkv.RangeKey.StartKey, - EndKey: rkv.RangeKey.EndKey, - Timestamp: rkv.RangeKey.Timestamp, + Key: rangeKeys.Bounds.Key, + EndKey: rangeKeys.Bounds.EndKey, + Timestamp: v.Timestamp, }) } } @@ -517,26 +518,28 @@ func assertSSTContents(sst []byte, sstTimestamp hlc.Timestamp, stats *enginepb.M break } - for _, rkv := range iter.RangeKeys().AsRangeKeyValues() { - if err := rkv.RangeKey.Validate(); err != nil { + rangeKeys := iter.RangeKeys() + for _, v := range rangeKeys.Versions { + rangeKey := rangeKeys.AsRangeKey(v) + if err := rangeKey.Validate(); err != nil { return errors.NewAssertionErrorWithWrappedErrf(err, "SST contains invalid range key") } - if sstTimestamp.IsSet() && rkv.RangeKey.Timestamp != sstTimestamp { + if sstTimestamp.IsSet() && v.Timestamp != sstTimestamp { return errors.AssertionFailedf( "SST has unexpected timestamp %s (expected %s) for range key %s", - rkv.RangeKey.Timestamp, sstTimestamp, rkv.RangeKey) + v.Timestamp, sstTimestamp, rangeKeys.Bounds) } - value, err := storage.DecodeMVCCValue(rkv.Value) + value, err := storage.DecodeMVCCValue(v.Value) if err != nil { return errors.NewAssertionErrorWithWrappedErrf(err, - "SST contains invalid range key value for range key %s", rkv.RangeKey) + "SST contains invalid range key value for range key %s", rangeKey) } if !value.IsTombstone() { - return errors.AssertionFailedf("SST contains non-tombstone range key %s", rkv.RangeKey) + return errors.AssertionFailedf("SST contains non-tombstone range key %s", rangeKey) } if value.MVCCValueHeader != (enginepb.MVCCValueHeader{}) { return errors.AssertionFailedf("SST contains non-empty MVCC value header for range key %s", - rkv.RangeKey) + rangeKey) } } } diff --git a/pkg/kv/kvserver/batcheval/cmd_clear_range.go b/pkg/kv/kvserver/batcheval/cmd_clear_range.go index 59cefa36b5cd..0cc476408354 100644 --- a/pkg/kv/kvserver/batcheval/cmd_clear_range.go +++ b/pkg/kv/kvserver/batcheval/cmd_clear_range.go @@ -218,9 +218,9 @@ func computeStatsDelta( if ok, err := iter.Valid(); err != nil { return err } else if ok && iter.RangeBounds().Key.Compare(bound) < 0 { - for i, rkv := range iter.RangeKeys().AsRangeKeyValues() { - keyBytes := int64(storage.EncodedMVCCTimestampSuffixLength(rkv.RangeKey.Timestamp)) - valBytes := int64(len(rkv.Value)) + for i, v := range iter.RangeKeys().Versions { + keyBytes := int64(storage.EncodedMVCCTimestampSuffixLength(v.Timestamp)) + valBytes := int64(len(v.Value)) if i == 0 { delta.RangeKeyCount-- keyBytes += 2 * int64(storage.EncodedMVCCKeyPrefixLength(bound)) @@ -229,7 +229,7 @@ func computeStatsDelta( delta.RangeValCount-- delta.RangeValBytes -= valBytes delta.GCBytesAge -= (keyBytes + valBytes) * - (delta.LastUpdateNanos/1e9 - rkv.RangeKey.Timestamp.WallTime/1e9) + (delta.LastUpdateNanos/1e9 - v.Timestamp.WallTime/1e9) } } return nil diff --git a/pkg/kv/kvserver/batcheval/cmd_delete_range_test.go b/pkg/kv/kvserver/batcheval/cmd_delete_range_test.go index 99457c294bcb..bae99f27b225 100644 --- a/pkg/kv/kvserver/batcheval/cmd_delete_range_test.go +++ b/pkg/kv/kvserver/batcheval/cmd_delete_range_test.go @@ -306,8 +306,8 @@ func checkPredicateDeleteRange(t *testing.T, engine storage.Reader, rKeyInfo sto // PredicateDeleteRange should not have written any delete tombstones; // therefore, any range key tombstones in the span should have been // written before the request was issued. - for _, rKey := range iter.RangeKeys().AsRangeKeyValues() { - require.Equal(t, true, rKey.RangeKey.Timestamp.Less(rKeyInfo.Timestamp)) + for _, v := range iter.RangeKeys().Versions { + require.True(t, v.Timestamp.Less(rKeyInfo.Timestamp)) } continue } @@ -337,13 +337,14 @@ func checkDeleteRangeTombstone( break } require.True(t, ok) - for _, rkv := range iter.RangeKeys().AsRangeKeyValues() { - if rkv.RangeKey.Timestamp.Equal(rangeKey.Timestamp) { + rangeKeys := iter.RangeKeys() + for _, v := range rangeKeys.Versions { + if v.Timestamp.Equal(rangeKey.Timestamp) { if len(seen.RangeKey.StartKey) == 0 { - seen = rkv.Clone() + seen = rangeKeys.AsRangeKeyValue(v).Clone() } else { - seen.RangeKey.EndKey = rkv.RangeKey.EndKey.Clone() - require.Equal(t, seen.Value, rkv.Value) + seen.RangeKey.EndKey = rangeKeys.Bounds.EndKey.Clone() + require.Equal(t, seen.Value, v.Value) } break } diff --git a/pkg/kv/kvserver/batcheval/cmd_end_transaction.go b/pkg/kv/kvserver/batcheval/cmd_end_transaction.go index 0fd41891e0cf..9ecd47faa890 100644 --- a/pkg/kv/kvserver/batcheval/cmd_end_transaction.go +++ b/pkg/kv/kvserver/batcheval/cmd_end_transaction.go @@ -1315,9 +1315,9 @@ func computeSplitRangeKeyStatsDelta( // contribution of the range key fragmentation. The naïve calculation would be // rhs.EncodedSize() - (keyLen(rhs.EndKey) - keyLen(lhs.EndKey)) // which simplifies to 2 * keyLen(rhs.StartKey) + tsLen(rhs.Timestamp). - for i, rkv := range iter.RangeKeys().AsRangeKeyValues() { - keyBytes := int64(storage.EncodedMVCCTimestampSuffixLength(rkv.RangeKey.Timestamp)) - valBytes := int64(len(rkv.Value)) + for i, v := range iter.RangeKeys().Versions { + keyBytes := int64(storage.EncodedMVCCTimestampSuffixLength(v.Timestamp)) + valBytes := int64(len(v.Value)) if i == 0 { delta.RangeKeyCount++ keyBytes += 2 * int64(storage.EncodedMVCCKeyPrefixLength(splitKey)) @@ -1325,7 +1325,7 @@ func computeSplitRangeKeyStatsDelta( delta.RangeKeyBytes += keyBytes delta.RangeValCount++ delta.RangeValBytes += valBytes - delta.GCBytesAge += (keyBytes + valBytes) * (nowNanos/1e9 - rkv.RangeKey.Timestamp.WallTime/1e9) + delta.GCBytesAge += (keyBytes + valBytes) * (nowNanos/1e9 - v.Timestamp.WallTime/1e9) } return delta, nil diff --git a/pkg/kv/kvserver/batcheval/cmd_refresh_range.go b/pkg/kv/kvserver/batcheval/cmd_refresh_range.go index 2dadc3a47bc0..213f799553c8 100644 --- a/pkg/kv/kvserver/batcheval/cmd_refresh_range.go +++ b/pkg/kv/kvserver/batcheval/cmd_refresh_range.go @@ -93,12 +93,8 @@ func refreshRange( key := iter.UnsafeKey().Clone() if _, hasRange := iter.HasPointAndRange(); hasRange { - rangeKVs := iter.RangeKeys().AsRangeKeyValues() - if len(rangeKVs) == 0 { // defensive - return errors.Errorf("expected range key at %s not found", key) - } return roachpb.NewRefreshFailedError(roachpb.RefreshFailedError_REASON_COMMITTED_VALUE, - key.Key, rangeKVs[0].RangeKey.Timestamp) + key.Key, iter.RangeKeys().Versions[0].Timestamp) } if !key.IsValue() { diff --git a/pkg/kv/kvserver/rangefeed/catchup_scan.go b/pkg/kv/kvserver/rangefeed/catchup_scan.go index 1cf82b645727..40db0f58796a 100644 --- a/pkg/kv/kvserver/rangefeed/catchup_scan.go +++ b/pkg/kv/kvserver/rangefeed/catchup_scan.go @@ -154,15 +154,15 @@ func (i *CatchUpIterator) CatchUpScan(outputFn outputEventFn, withDiff bool) err rangeKeysStart = append(rangeKeysStart[:0], rangeBounds.Key...) // Emit events for these MVCC range tombstones, in chronological order. - rangeKeys := i.RangeKeys().AsRangeKeyValues() - for i := len(rangeKeys) - 1; i >= 0; i-- { + versions := i.RangeKeys().Versions + for i := len(versions) - 1; i >= 0; i-- { var span roachpb.Span a, span.Key = a.Copy(rangeBounds.Key, 0) a, span.EndKey = a.Copy(rangeBounds.EndKey, 0) err := outputFn(&roachpb.RangeFeedEvent{ DeleteRange: &roachpb.RangeFeedDeleteRange{ Span: span, - Timestamp: rangeKeys[i].RangeKey.Timestamp, + Timestamp: versions[i].Timestamp, }, }) if err != nil { @@ -271,8 +271,7 @@ func (i *CatchUpIterator) CatchUpScan(outputFn outputEventFn, withDiff bool) err // to rangeKeysStart above, because NextIgnoringTime() could reveal // additional MVCC range tombstones below StartTime that cover this // point. We need to find a more performant way to handle this. - if !hasRange || !storage.HasRangeKeyBetween( - i.RangeKeys().AsRangeKeyValues(), reorderBuf[l].Val.Value.Timestamp, ts) { + if !hasRange || !i.RangeKeys().HasBetween(ts, reorderBuf[l].Val.Value.Timestamp) { // TODO(sumeer): find out if it is deliberate that we are not populating // PrevValue.Timestamp. reorderBuf[l].Val.PrevValue.RawBytes = val diff --git a/pkg/kv/kvserver/rditer/replica_data_iter_test.go b/pkg/kv/kvserver/rditer/replica_data_iter_test.go index f7eeef4e4452..cc73567cf99e 100644 --- a/pkg/kv/kvserver/rditer/replica_data_iter_test.go +++ b/pkg/kv/kvserver/rditer/replica_data_iter_test.go @@ -176,19 +176,20 @@ func verifyRDReplicatedOnlyMVCCIter( } } if r { - rks := iter.RangeKeys().AsRangeKeyValues() - if !rks[0].RangeKey.StartKey.Equal(rangeStart) { + rangeKeys := iter.RangeKeys().Clone() + if !rangeKeys.Bounds.Key.Equal(rangeStart) { + rangeStart = rangeKeys.Bounds.Key.Clone() if !reverse { - for _, rk := range rks { - actualRanges = append(actualRanges, rk.RangeKey.Clone()) + for _, v := range rangeKeys.Versions { + actualRanges = append(actualRanges, rangeKeys.AsRangeKey(v)) } } else { - for i := len(rks) - 1; i >= 0; i-- { - actualRanges = append([]storage.MVCCRangeKey{rks[i].RangeKey.Clone()}, + for i := rangeKeys.Len() - 1; i >= 0; i-- { + actualRanges = append([]storage.MVCCRangeKey{ + rangeKeys.AsRangeKey(rangeKeys.Versions[i])}, actualRanges...) } } - rangeStart = rks[0].RangeKey.StartKey.Clone() } } next() From 0d37b5a9a8e0a504bb9a0534f52c917af19ca986 Mon Sep 17 00:00:00 2001 From: Erik Grinaker Date: Fri, 29 Jul 2022 15:13:56 +0000 Subject: [PATCH 5/6] kvserver/gc: partially migrate to `MVCCRangeKeyStack` Some parts require invasive changes to MVCC stats helpers. These will shortly be consolidated with other MVCC stats logic elsewhere, so the existing logic is retained for now by using `AsRangeKeyValues()`. Release note: None --- pkg/kv/kvserver/gc/gc.go | 1 + pkg/kv/kvserver/gc/gc_iterator.go | 13 +++++-------- pkg/kv/kvserver/gc/gc_random_test.go | 15 +++++---------- pkg/kv/kvserver/gc/gc_test.go | 12 ++++++------ pkg/storage/mvcc.go | 18 ++++++++---------- pkg/storage/mvcc_test.go | 6 +++--- 6 files changed, 28 insertions(+), 37 deletions(-) diff --git a/pkg/kv/kvserver/gc/gc.go b/pkg/kv/kvserver/gc/gc.go index 8e0d70fe7da8..71d9ab635d17 100644 --- a/pkg/kv/kvserver/gc/gc.go +++ b/pkg/kv/kvserver/gc/gc.go @@ -855,6 +855,7 @@ func processReplicatedRangeTombstones( if !ok { break } + // TODO(erikgrinaker): Rewrite to use MVCCRangeKeyStack. rangeKeys := iter.RangeKeys().AsRangeKeyValues() if idx := sort.Search(len(rangeKeys), func(i int) bool { diff --git a/pkg/kv/kvserver/gc/gc_iterator.go b/pkg/kv/kvserver/gc/gc_iterator.go index 2320d7315326..33c2bb5ec768 100644 --- a/pkg/kv/kvserver/gc/gc_iterator.go +++ b/pkg/kv/kvserver/gc/gc_iterator.go @@ -11,7 +11,6 @@ package gc import ( - "sort" "strings" "github.com/cockroachdb/cockroach/pkg/kv/kvserver/rditer" @@ -193,15 +192,13 @@ func (it *gcIterator) currentRangeTS() hlc.Timestamp { if rangeTombstoneStartKey.Equal(it.cachedRangeTombstoneKey) { return it.cachedRangeTombstoneTS } + it.cachedRangeTombstoneKey = append(it.cachedRangeTombstoneKey[:0], rangeTombstoneStartKey...) - it.cachedRangeTombstoneTS = hlc.Timestamp{} - rangeKeys := it.it.RangeKeys().AsRangeKeyValues() - if idx := sort.Search(len(rangeKeys), func(i int) bool { - return rangeKeys[i].RangeKey.Timestamp.LessEq(it.threshold) - }); idx < len(rangeKeys) { - it.cachedRangeTombstoneTS = rangeKeys[idx].RangeKey.Timestamp + if v, ok := it.it.RangeKeys().FirstBelow(it.threshold); ok { + it.cachedRangeTombstoneTS = v.Timestamp + } else { + it.cachedRangeTombstoneTS = hlc.Timestamp{} } - it.cachedRangeTombstoneKey = append(it.cachedRangeTombstoneKey[:0], rangeTombstoneStartKey...) return it.cachedRangeTombstoneTS } diff --git a/pkg/kv/kvserver/gc/gc_random_test.go b/pkg/kv/kvserver/gc/gc_random_test.go index a67f06b38540..d6300731e8c9 100644 --- a/pkg/kv/kvserver/gc/gc_random_test.go +++ b/pkg/kv/kvserver/gc/gc_random_test.go @@ -466,12 +466,12 @@ func getExpectationsGenerator( // so we will add them to history of current key for analysis. // Bare range tombstones are ignored. if r { - for _, r := range it.RangeKeys().AsRangeKeyValues() { + for _, r := range it.RangeKeys().AsRangeKeys() { history = append(history, historyItem{ MVCCKeyValue: storage.MVCCKeyValue{ Key: storage.MVCCKey{ - Key: r.RangeKey.StartKey, - Timestamp: r.RangeKey.Timestamp, + Key: r.StartKey, + Timestamp: r.Timestamp, }, Value: nil, }, @@ -622,13 +622,8 @@ func rangeFragmentsFromIt(t *testing.T, it storage.MVCCIterator) [][]storage.MVC if !ok { break } - _, r := it.HasPointAndRange() - if r { - fragments := make([]storage.MVCCRangeKeyValue, len(it.RangeKeys().AsRangeKeyValues())) - for i, r := range it.RangeKeys().AsRangeKeyValues() { - fragments[i] = r.Clone() - } - result = append(result, fragments) + if _, hasRange := it.HasPointAndRange(); hasRange { + result = append(result, it.RangeKeys().Clone().AsRangeKeyValues()) } it.NextKey() } diff --git a/pkg/kv/kvserver/gc/gc_test.go b/pkg/kv/kvserver/gc/gc_test.go index 696af35d5c32..0cb598f2f0f9 100644 --- a/pkg/kv/kvserver/gc/gc_test.go +++ b/pkg/kv/kvserver/gc/gc_test.go @@ -1084,20 +1084,20 @@ func engineData(t *testing.T, r storage.Reader, desc roachpb.RangeDescriptor) [] _, r := rangeIt.HasPointAndRange() if r { span := rangeIt.RangeBounds() - newKeys := rangeIt.RangeKeys().AsRangeKeyValues() + newKeys := rangeIt.RangeKeys().AsRangeKeys() if lastEnd.Equal(span.Key) { // Try merging keys by timestamp. var newPartial []storage.MVCCRangeKey i, j := 0, 0 for i < len(newKeys) && j < len(partialRangeKeys) { - switch newKeys[i].RangeKey.Timestamp.Compare(partialRangeKeys[j].Timestamp) { + switch newKeys[i].Timestamp.Compare(partialRangeKeys[j].Timestamp) { case 1: - newPartial = append(newPartial, newKeys[i].RangeKey.Clone()) + newPartial = append(newPartial, newKeys[i].Clone()) i++ case 0: newPartial = append(newPartial, storage.MVCCRangeKey{ StartKey: partialRangeKeys[j].StartKey, - EndKey: newKeys[i].RangeKey.EndKey.Clone(), + EndKey: newKeys[i].EndKey.Clone(), Timestamp: partialRangeKeys[j].Timestamp, }) i++ @@ -1108,7 +1108,7 @@ func engineData(t *testing.T, r storage.Reader, desc roachpb.RangeDescriptor) [] } } for ; i < len(newKeys); i++ { - newPartial = append(newPartial, newKeys[i].RangeKey.Clone()) + newPartial = append(newPartial, newKeys[i].Clone()) } for ; j < len(partialRangeKeys); j++ { newPartial = append(newPartial, partialRangeKeys[j].Clone()) @@ -1118,7 +1118,7 @@ func engineData(t *testing.T, r storage.Reader, desc roachpb.RangeDescriptor) [] result = append(result, makeRangeCells(partialRangeKeys)...) partialRangeKeys = make([]storage.MVCCRangeKey, len(newKeys)) for i, rk := range newKeys { - partialRangeKeys[i] = rk.RangeKey.Clone() + partialRangeKeys[i] = rk.Clone() } } lastEnd = span.EndKey.Clone() diff --git a/pkg/storage/mvcc.go b/pkg/storage/mvcc.go index a0963e97a1fb..043211baa74c 100644 --- a/pkg/storage/mvcc.go +++ b/pkg/storage/mvcc.go @@ -4599,18 +4599,15 @@ func MVCCGarbageCollect( if ms != nil { // We need to iterate ranges only to compute GCBytesAge if we are updating // stats. + // + // TODO(erikgrinaker): Rewrite to use MVCCRangeKeyStack. if _, hasRange := iter.HasPointAndRange(); hasRange && !lastRangeTombstoneStart.Equal(iter.RangeBounds().Key) { - rangeKeys := iter.RangeKeys().AsRangeKeyValues() - newLen := len(rangeKeys) - if cap(rangeTombstoneTss) < newLen { - rangeTombstoneTss = make([]hlc.Timestamp, newLen) - } else { - rangeTombstoneTss = rangeTombstoneTss[:newLen] - } - for i, rkv := range rangeKeys { - rangeTombstoneTss[i] = rkv.RangeKey.Timestamp + rangeKeys := iter.RangeKeys() + lastRangeTombstoneStart = append(lastRangeTombstoneStart[:0], rangeKeys.Bounds.Key...) + rangeTombstoneTss = rangeTombstoneTss[:0] + for _, v := range rangeKeys.Versions { + rangeTombstoneTss = append(rangeTombstoneTss, v.Timestamp) } - lastRangeTombstoneStart = append(lastRangeTombstoneStart[:0], rangeKeys[0].RangeKey.StartKey...) } else if !hasRange { lastRangeTombstoneStart = lastRangeTombstoneStart[:0] rangeTombstoneTss = rangeTombstoneTss[:0] @@ -4816,6 +4813,7 @@ func MVCCGarbageCollectRangeKeys( break } + // TODO(erikgrinaker): Rewrite to use MVCCRangeKeyStack. bounds := iter.RangeBounds() unsafeRangeKeys := iter.RangeKeys().AsRangeKeyValues() diff --git a/pkg/storage/mvcc_test.go b/pkg/storage/mvcc_test.go index d5c21cc06e68..3e4d8d0e3d31 100644 --- a/pkg/storage/mvcc_test.go +++ b/pkg/storage/mvcc_test.go @@ -5763,9 +5763,9 @@ func TestMVCCGarbageCollectRanges(t *testing.T) { if !ok { break } - for _, rkv := range it.RangeKeys().AsRangeKeyValues() { - require.Less(t, expectIndex, len(d.after), "not enough expectations; at unexpected range:", rkv.RangeKey.String()) - require.EqualValues(t, d.after[expectIndex], rkv.RangeKey, "range key is not equal") + for _, rk := range it.RangeKeys().AsRangeKeys() { + require.Less(t, expectIndex, len(d.after), "not enough expectations; at unexpected range: %s", rk) + require.EqualValues(t, d.after[expectIndex], rk, "range key is not equal") expectIndex++ } } From 4f78a769a7658d858e5c1d204887ada1ccb5f270 Mon Sep 17 00:00:00 2001 From: Erik Grinaker Date: Fri, 29 Jul 2022 15:21:36 +0000 Subject: [PATCH 6/6] storage: remove `FirstRangeKeyAbove()` and `HasRangeKeyBetween()` Release note: None --- pkg/storage/mvcc_key.go | 41 ++--------------------------------------- 1 file changed, 2 insertions(+), 39 deletions(-) diff --git a/pkg/storage/mvcc_key.go b/pkg/storage/mvcc_key.go index 54f21a63c35b..587c9ad2aa6b 100644 --- a/pkg/storage/mvcc_key.go +++ b/pkg/storage/mvcc_key.go @@ -501,8 +501,8 @@ func (s MVCCRangeKeyStack) AsRangeKeyValues() []MVCCRangeKeyValue { } // CanMergeRight returns true if the current stack will merge with the given -// right-hand stack. The key bounds must touch exactly, i.e. lhs.EndKey must -// equal rhs.Key. +// right-hand stack. The key bounds must touch exactly, i.e. the left-hand +// EndKey must equal the right-hand Key. func (s MVCCRangeKeyStack) CanMergeRight(r MVCCRangeKeyStack) bool { if s.IsEmpty() || s.Len() != r.Len() || !s.Bounds.EndKey.Equal(r.Bounds.Key) { return false @@ -669,40 +669,3 @@ func (v MVCCRangeKeyVersion) Clone() MVCCRangeKeyVersion { func (v MVCCRangeKeyVersion) Equal(o MVCCRangeKeyVersion) bool { return v.Timestamp.Equal(o.Timestamp) && bytes.Equal(v.Value, o.Value) } - -// FirstRangeKeyAbove does a binary search for the first range key at or above -// the given timestamp. It assumes the range keys are ordered in descending -// timestamp order, as returned by SimpleMVCCIterator.RangeKeys(). Returns false -// if no matching range key was found. -// -// TODO(erikgrinaker): Consider using a new type for []MVCCRangeKeyValue as -// returned by SimpleMVCCIterator.RangeKeys(), and add this as a method. -func FirstRangeKeyAbove(rangeKeys []MVCCRangeKeyValue, ts hlc.Timestamp) (MVCCRangeKeyValue, bool) { - // This is kind of odd due to sort.Search() semantics: we do a binary search - // for the first range tombstone that's below the timestamp, then return the - // previous range tombstone if any. - if i := sort.Search(len(rangeKeys), func(i int) bool { - return rangeKeys[i].RangeKey.Timestamp.Less(ts) - }); i > 0 { - return rangeKeys[i-1], true - } - return MVCCRangeKeyValue{}, false -} - -// HasRangeKeyBetween checks whether an MVCC range key exists between the two -// given timestamps (in order). It assumes the range keys are ordered in -// descending timestamp order, as returned by SimpleMVCCIterator.RangeKeys(). -func HasRangeKeyBetween(rangeKeys []MVCCRangeKeyValue, upper, lower hlc.Timestamp) bool { - if len(rangeKeys) == 0 { - return false - } - if util.RaceEnabled && upper.Less(lower) { - panic(errors.AssertionFailedf("HasRangeKeyBetween given upper %s <= lower %s", upper, lower)) - } - if rkv, ok := FirstRangeKeyAbove(rangeKeys, lower); ok { - // Consider equal timestamps to be "between". This shouldn't really happen, - // since MVCC enforces point and range keys can't have the same timestamp. - return rkv.RangeKey.Timestamp.LessEq(upper) - } - return false -}