Skip to content

Commit

Permalink
sstable: fix incorrect range key mask in virtualLast()
Browse files Browse the repository at this point in the history
Previously, when a virtual sstable's iterator was SeekLT'd
past its end key or had Last() called on it, and the virtual
sstable had an inclusive end bound, we'd do a SeekGE on the end
key in `virtualLast()`. This had the potential of being inconsistent
with the rest of the iterator stack on the positioning of the
BoundLimitedBlockPropertyFilter in range-key combined iteration
with range key masking enabled, as the sstable iterator expects
that to always be set to a range key that's behind of us in the
direction we're iterating in. This saves some key comparisons
when checking for whether the current key is masked.

This change implements a SeekLE() that can be used for this corner
case.

Fixes #3128.
  • Loading branch information
itsbilal committed Dec 18, 2023
1 parent ab4952c commit 48b54c2
Show file tree
Hide file tree
Showing 4 changed files with 244 additions and 45 deletions.
41 changes: 35 additions & 6 deletions ingest_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -587,14 +587,18 @@ func TestExcise(t *testing.T) {
}()

var opts *Options
reset := func() {
reset := func(blockSize int) {
if d != nil {
require.NoError(t, d.Close())
}

mem = vfs.NewMem()
require.NoError(t, mem.MkdirAll("ext", 0755))
opts = &Options{
BlockPropertyCollectors: []func() BlockPropertyCollector{
sstable.NewTestKeysBlockPropertyCollector,
},
Comparer: testkeys.Comparer,
FS: mem,
L0CompactionThreshold: 100,
L0StopWritesThreshold: 100,
Expand All @@ -603,7 +607,9 @@ func TestExcise(t *testing.T) {
flushed = true
}},
FormatMajorVersion: FormatVirtualSSTables,
Comparer: testkeys.Comparer,
}
if blockSize != 0 {
opts.Levels = append(opts.Levels, LevelOptions{BlockSize: blockSize, IndexBlockSize: 32 << 10})
}
// Disable automatic compactions because otherwise we'll race with
// delete-only compactions triggered by ingesting range tombstones.
Expand All @@ -616,12 +622,21 @@ func TestExcise(t *testing.T) {
d, err = Open("", opts)
require.NoError(t, err)
}
reset()
reset(0 /* blockSize */)

datadriven.RunTest(t, "testdata/excise", func(t *testing.T, td *datadriven.TestData) string {
switch td.Cmd {
case "reset":
reset()
var blockSize int
for i := range td.CmdArgs {
switch td.CmdArgs[i].Key {
case "tiny-blocks":
blockSize = 1
default:
return fmt.Sprintf("unexpected arg: %s", td.CmdArgs[i].Key)
}
}
reset(blockSize)
return ""
case "reopen":
require.NoError(t, d.Close())
Expand Down Expand Up @@ -687,9 +702,23 @@ func TestExcise(t *testing.T) {
return runGetCmd(t, td, d)

case "iter":
iter, _ := d.NewIter(&IterOptions{
opts := &IterOptions{
KeyTypes: IterKeyTypePointsAndRanges,
})
}
for i := range td.CmdArgs {
switch td.CmdArgs[i].Key {
case "range-key-masking":
opts.RangeKeyMasking = RangeKeyMasking{
Suffix: []byte(td.CmdArgs[i].Vals[0]),
Filter: func() BlockPropertyFilterMask {
return sstable.NewTestKeysMaskingFilter()
},
}
default:
return fmt.Sprintf("unexpected argument: %s", td.CmdArgs[i].Key)
}
}
iter, _ := d.NewIter(opts)
return runIterCmd(td, iter, true)

case "lsm":
Expand Down
81 changes: 63 additions & 18 deletions sstable/reader_iter_single_lvl.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
package sstable

import (
"bytes"
"context"
"fmt"
"unsafe"
Expand Down Expand Up @@ -440,7 +441,7 @@ func (i *singleLevelIterator) readBlockForVBR(
// that a block is excluded according to its properties but only if its bounds
// fall within the filter's current bounds. This function consults the
// apprioriate bound, depending on the iteration direction, and returns either
// `blockIntersects` or `blockMaybeExcluded`.
// `blockIntersects` or `blockExcluded`.
func (i *singleLevelIterator) resolveMaybeExcluded(dir int8) intersectsResult {
// TODO(jackson): We could first try comparing to top-level index block's
// key, and if within bounds avoid per-data block key comparisons.
Expand Down Expand Up @@ -852,26 +853,70 @@ func (i *singleLevelIterator) virtualLast() (*InternalKey, base.LazyValue) {
panic("pebble: invalid call to virtualLast")
}

// Seek to the first internal key.
ikey, _ := i.SeekGE(i.upper, base.SeekGEFlagsNone)
if i.endKeyInclusive {
// Let's say the virtual sstable upper bound is c#1, with the keys c#3, c#2,
// c#1, d, e, ... in the sstable. So, the last key in the virtual sstable is
// c#1. We can perform SeekGE(i.upper) and then keep nexting until we find
// the last key with userkey == i.upper.
//
// TODO(bananabrick): Think about how to improve this. If many internal keys
// with the same user key at the upper bound then this could be slow, but
// maybe the odds of having many internal keys with the same user key at the
// upper bound are low.
for ikey != nil && i.cmp(ikey.UserKey, i.upper) == 0 {
ikey, _ = i.Next()
if !i.endKeyInclusive {
// Trivial case.
return i.SeekLT(i.upper, base.SeekLTFlagsNone)
}
return i.virtualLastSeekLE(i.upper)
}

// virtualLastSeekLE is called by virtualLast to do a SeekLE as part of a
// virtualLast. Consider generalizing this into a SeekLE() if there are other
// uses of this method in the future.
func (i *singleLevelIterator) virtualLastSeekLE(key []byte) (*InternalKey, base.LazyValue) {
// Callers of SeekLE don't know about virtual sstable bounds, so we may
// have to internally restrict the bounds.
//
// TODO(bananabrick): We can optimize this check away for the level iter
// if necessary.
if i.cmp(key, i.upper) >= 0 {
if !i.endKeyInclusive {
panic("unexpected virtualLastSeekLE with exclusive upper bounds")
}
return i.Prev()
key = i.upper
}

// We seeked to the first key >= i.upper.
return i.Prev()
i.exhaustedBounds = 0
i.err = nil // clear cached iteration error
// Seek optimization only applies until iterator is first positioned with a
// SeekGE or SeekLT after SetBounds.
i.boundsCmp = 0
i.positionedUsingLatestBounds = true
i.maybeFilteredKeysSingleLevel = false

ikey, _ := i.index.SeekGE(key, base.SeekGEFlagsNone)
// We can have multiple internal keys with the same user key as the seek
// key. In that case, we want the last (greatest) internal key.
//
// NB: We can avoid this Next()ing if we just implement a blockIter.SeekLE().
// This might be challenging to do correctly, so impose regular operations
// for now.
for ikey != nil && bytes.Equal(ikey.UserKey, key) {
ikey, _ = i.index.Next()
}
if ikey == nil {
return i.skipBackward()
}
result := i.loadBlock(-1)
if result == loadBlockFailed {
return nil, base.LazyValue{}
}
if result == loadBlockIrrelevant {
// Want to skip to the previous block.
return i.skipBackward()
}
ikey, _ = i.data.SeekGE(key, base.SeekGEFlagsNone)
var val base.LazyValue
// Go to the last user key that matches key, and then Prev() on the data
// block.
for ikey != nil && bytes.Equal(ikey.UserKey, key) {
ikey, _ = i.data.Next()
}
ikey, val = i.data.Prev()
if ikey != nil {
return ikey, val
}
return i.skipBackward()
}

// SeekLT implements internalIterator.SeekLT, as documented in the pebble
Expand Down
73 changes: 52 additions & 21 deletions sstable/reader_iter_two_lvl.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
package sstable

import (
"bytes"
"context"
"fmt"

Expand Down Expand Up @@ -76,8 +77,7 @@ func (i *twoLevelIterator) loadIndex(dir int8) loadBlockResult {
// that an index block is excluded according to its properties but only if its
// bounds fall within the filter's current bounds. This function consults the
// apprioriate bound, depending on the iteration direction, and returns either
// `blockIntersects` or
// `blockMaybeExcluded`.
// `blockIntersects` or `blockExcluded`.
func (i *twoLevelIterator) resolveMaybeExcluded(dir int8) intersectsResult {
// This iterator is configured with a bound-limited block property filter.
// The bpf determined this entire index block could be excluded from
Expand Down Expand Up @@ -567,32 +567,63 @@ func (i *twoLevelIterator) SeekPrefixGE(
return i.skipForward()
}

// virtualLast should only be called if i.vReader != nil and i.endKeyInclusive
// is true.
// virtualLast should only be called if i.vReader != nil.
func (i *twoLevelIterator) virtualLast() (*InternalKey, base.LazyValue) {
if i.vState == nil {
panic("pebble: invalid call to virtualLast")
}
if !i.endKeyInclusive {
// Trivial case.
return i.SeekLT(i.upper, base.SeekLTFlagsNone)
}
return i.virtualLastSeekLE(i.upper)
}

// Seek to the first internal key.
ikey, _ := i.SeekGE(i.upper, base.SeekGEFlagsNone)
if i.endKeyInclusive {
// Let's say the virtual sstable upper bound is c#1, with the keys c#3, c#2,
// c#1, d, e, ... in the sstable. So, the last key in the virtual sstable is
// c#1. We can perform SeekGE(i.upper) and then keep nexting until we find
// the last key with userkey == i.upper.
//
// TODO(bananabrick): Think about how to improve this. If many internal keys
// with the same user key at the upper bound then this could be slow, but
// maybe the odds of having many internal keys with the same user key at the
// upper bound are low.
for ikey != nil && i.cmp(ikey.UserKey, i.upper) == 0 {
ikey, _ = i.Next()
// virtualLastSeekLE implements a SeekLE() that can be used as part
// of reverse-iteration calls such as a Last() on a virtual sstable.
func (i *twoLevelIterator) virtualLastSeekLE(key []byte) (*InternalKey, base.LazyValue) {
// Callers of SeekLE don't know about virtual sstable bounds, so we may
// have to internally restrict the bounds.
//
// TODO(bananabrick): We can optimize this check away for the level iter
// if necessary.
if i.cmp(key, i.upper) >= 0 {
if !i.endKeyInclusive {
panic("unexpected virtualLastSeekLE with exclusive upper bounds")
}
return i.Prev()
key = i.upper
}
// Need to position the topLevelIndex.
//
// The previous exhausted state of singleLevelIterator is no longer
// relevant, since we may be moving to a different index block.
i.exhaustedBounds = 0
// Seek optimization only applies until iterator is first positioned with a
// SeekGE or SeekLT after SetBounds.
i.boundsCmp = 0
i.maybeFilteredKeysTwoLevel = false
ikey, _ := i.topLevelIndex.SeekGE(key, base.SeekGEFlagsNone)
// We can have multiple internal keys with the same user key as the seek
// key. In that case, we want the last (greatest) internal key.
for ikey != nil && bytes.Equal(ikey.UserKey, key) {
ikey, _ = i.topLevelIndex.Next()
}
// We seeked to the first key >= i.upper.
return i.Prev()
if ikey == nil {
return i.skipBackward()
}
result := i.loadIndex(-1)
if result == loadBlockFailed {
i.boundsCmp = 0
return nil, base.LazyValue{}
}
if result == loadBlockIrrelevant {
// Load the previous block.
return i.skipBackward()
}
if ikey, val := i.singleLevelIterator.virtualLastSeekLE(key); ikey != nil {
return ikey, val
}
return i.skipBackward()
}

// SeekLT implements internalIterator.SeekLT, as documented in the pebble
Expand Down
94 changes: 94 additions & 0 deletions testdata/excise
Original file line number Diff line number Diff line change
Expand Up @@ -337,3 +337,97 @@ lsm
000007:[a#10,SET-c#12,SET]
6:
000006:[z#15,SET-z#15,SET]

# Regression test for #3128. A key at the upper bound of a virtual sstable
# should not be skipped in reverse iteration with range key masking enabled.
# bbsomethinglong@4 is not covered by the range key [bd-f) (which gets truncated
# into a [cc-f) with an excise) and so should not be range key masked in both
# iteration directions at the bottom.

reset tiny-blocks
----

batch
set a@3 foo
merge b@5 bar
merge bbsomethinglong@4 bazz
merge bg@3 something
merge d@6 baz
merge z something
----

flush
----

batch
set c foobar
----

flush
----

lsm
----
0.1:
000007:[c#16,SET-c#16,SET]
0.0:
000005:[a@3#10,SET-z#15,MERGE]

compact a z
----

build ext4
range-key-set bd f @7 foo
merge f something
----

ingest ext4
----

lsm
----
0.0:
000009:[bd#17,RANGEKEYSET-f#17,MERGE]
6:
000008:[a@3#0,SET-z#0,MERGE]

build ext3
set z updated
----

ingest-and-excise ext3 excise=bd-cc
----

lsm
----
0.0:
000011:[cc#17,RANGEKEYSET-f#17,MERGE]
000010:[z#18,SET-z#18,SET]
6:
000012:[a@3#0,SET-bbsomethinglong@4#0,MERGE]
000013:[d@6#0,MERGE-z#0,MERGE]

iter range-key-masking=@10
first
next
next
next
next
----
a@3: (foo, .)
b@5: (bar, .)
bbsomethinglong@4: (bazz, .)
cc: (., [cc-f) @7=foo UPDATED)
f: (something, . UPDATED)


iter range-key-masking=@10
set-bounds lower=a upper=f
seek-lt f
prev
prev
----
.
cc: (., [cc-f) @7=foo UPDATED)
bbsomethinglong@4: (bazz, . UPDATED)
b@5: (bar, .)

0 comments on commit 48b54c2

Please sign in to comment.