Skip to content

Commit

Permalink
db: simplify mergingIter.switchTo{Max,Min}Heap
Browse files Browse the repository at this point in the history
Previously, mergingIter.switchTo{Max,Min}Heap had complicated logic to avoid
violating the levelIter's invariants when higher-level range deletions caused
cascading seek keys to extend beyond iteration bounds. With 6fa3fe9, the
levelIter behaves like other internal iterators and is capable of being
relative positioned away from an exhausted state without violating its
bounds-checking invariants.

This commit applies the same logic of 6fa3fe9 to the synthetic iterator bound
keys that are interleaved when a levelIter reaches a distant bound, allowing a
relative positioning method to step away from these synthetic iterator bounds.
This allows us to simplify mergingIter.switchTo{Max,Min}Heap.
  • Loading branch information
jbowens committed Apr 24, 2024
1 parent 959b407 commit 16950aa
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 94 deletions.
9 changes: 6 additions & 3 deletions level_iter.go
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@ type levelIter struct {

// exhaustedDir is set to +1 or -1 when the levelIter has been exhausted in
// the forward or backward direction respectively. It is set when the
// underlying data is exhausted, not when iteration has reached the upper or
// underlying data is exhausted or when iteration has reached the upper or
// lower boundary and interleaved a synthetic iterator bound key. When the
// iterator is exhausted and Next or Prev is called, the levelIter uses
// exhaustedDir to determine whether the iterator should step on to the
Expand Down Expand Up @@ -718,6 +718,7 @@ func (l *levelIter) SeekPrefixGE(prefix, key []byte, flags base.SeekGEFlags) *ba
l.boundaryContext.isSyntheticIterBoundsKey = true
l.boundaryContext.isIgnorableBoundaryKey = false
}
l.exhaustedDir = +1
return l.verify(l.largestBoundary)
}
// Return the file's largest bound, ensuring this file stays open until
Expand Down Expand Up @@ -846,6 +847,7 @@ func (l *levelIter) Next() *base.InternalKV {
if l.rangeDelIterPtr != nil {
*l.rangeDelIterPtr = nil
}
l.exhaustedDir = +1
return nil
}
// We're stepping past the boundary key, so now we can load the next file.
Expand Down Expand Up @@ -948,6 +950,7 @@ func (l *levelIter) Prev() *base.InternalKV {
if l.rangeDelIterPtr != nil {
*l.rangeDelIterPtr = nil
}
l.exhaustedDir = -1
return nil
}
// We're stepping past the boundary key, so now we can load the prev file.
Expand Down Expand Up @@ -1004,6 +1007,7 @@ func (l *levelIter) skipEmptyFileForward() *base.InternalKV {
// that matches the exclusive upper bound, and does not represent
// a real key.
if l.tableOpts.UpperBound != nil {
l.exhaustedDir = +1
if *l.rangeDelIterPtr != nil {
l.syntheticBoundary.K = base.InternalKey{
UserKey: l.tableOpts.UpperBound,
Expand All @@ -1019,7 +1023,6 @@ func (l *levelIter) skipEmptyFileForward() *base.InternalKV {
// helps with performance when many levels are populated with
// sstables and most don't have any actual keys within the
// bounds.
l.exhaustedDir = +1
return nil
}
// If the boundary is a range deletion tombstone, or the caller is
Expand Down Expand Up @@ -1090,6 +1093,7 @@ func (l *levelIter) skipEmptyFileBackward() *base.InternalKV {
// that is within the inclusive lower bound, and does not
// represent a real key.
if l.tableOpts.LowerBound != nil {
l.exhaustedDir = -1
if *l.rangeDelIterPtr != nil {
l.syntheticBoundary = base.MakeInternalKV(base.InternalKey{
UserKey: l.tableOpts.LowerBound,
Expand All @@ -1105,7 +1109,6 @@ func (l *levelIter) skipEmptyFileBackward() *base.InternalKV {
// helps with performance when many levels are populated with
// sstables and most don't have any actual keys within the
// bounds.
l.exhaustedDir = -1
return nil
}
// If the boundary could be a range deletion tombstone, return the
Expand Down
96 changes: 5 additions & 91 deletions merging_iter.go
Original file line number Diff line number Diff line change
Expand Up @@ -436,43 +436,7 @@ func (m *mergingIter) switchToMinHeap() error {
if l == cur {
continue
}

// If the iterator is exhausted, it may be out of bounds if range
// deletions modified our search key as we descended. we need to
// reposition it within the search bounds. If the current key is a
// range tombstone, the iterator might still be exhausted but at a
// sstable boundary sentinel. It would be okay to reposition an
// interator like this only through successive Next calls, except that
// it would violate the levelIter's invariants by causing it to return
// a key before the lower bound.
//
// bounds = [ f, _ )
// L0: [ b ] [ f* z ]
// L1: [ a |----| k y ]
// L2: [ c (d) ] [ e g m ]
// L3: [ x ]
//
// * - current key [] - table bounds () - heap item
//
// In the above diagram, the L2 iterator is positioned at a sstable
// boundary (d) outside the lower bound (f). It arrived here from a
// seek whose seek-key was modified by a range tombstone. If we called
// Next on the L2 iterator, it would return e, violating its lower
// bound. Instead, we seek it to >= f and Next from there.

if l.iterKV == nil || (l.isSyntheticIterBoundsKey && m.heap.cmp(l.iterKV.K.UserKey, m.lower) <= 0) {
if m.lower != nil {
l.iterKV = l.iter.SeekGE(m.lower, base.SeekGEFlagsNone)
} else {
l.iterKV = l.iter.First()
}
if l.iterKV == nil {
if err := l.iter.Error(); err != nil {
return err
}
}
}
for ; l.iterKV != nil; l.iterKV = l.iter.Next() {
for l.iterKV = l.iter.Next(); l.iterKV != nil; l.iterKV = l.iter.Next() {
if base.InternalCompare(m.heap.cmp, key, l.iterKV.K) < 0 {
// key < iter-key
break
Expand All @@ -487,15 +451,8 @@ func (m *mergingIter) switchToMinHeap() error {
}

// Special handling for the current iterator because we were using its key
// above. The iterator cur.iter may still be exhausted at a sstable boundary
// sentinel. Similar to the logic applied to the other levels, in these
// cases we seek the iterator to the first key in order to avoid violating
// levelIter's invariants. See the example in the for loop above.
if cur.isSyntheticIterBoundsKey && m.heap.cmp(cur.iterKV.K.UserKey, m.lower) <= 0 {
cur.iterKV = cur.iter.SeekGE(m.lower, base.SeekGEFlagsNone)
} else {
cur.iterKV = cur.iter.Next()
}
// above.
cur.iterKV = cur.iter.Next()
if cur.iterKV == nil {
if err := cur.iter.Error(); err != nil {
return err
Expand Down Expand Up @@ -532,42 +489,7 @@ func (m *mergingIter) switchToMaxHeap() error {
continue
}

// If the iterator is exhausted, it may be out of bounds if range
// deletions modified our search key as we descended. we need to
// reposition it within the search bounds. If the current key is a
// range tombstone, the iterator might still be exhausted but at a
// sstable boundary sentinel. It would be okay to reposition an
// interator like this only through successive Prev calls, except that
// it would violate the levelIter's invariants by causing it to return
// a key beyond the upper bound.
//
// bounds = [ _, g )
// L0: [ b ] [ f* z ]
// L1: [ a |-------| k y ]
// L2: [ c d ] h [(i) m ]
// L3: [ e x ]
//
// * - current key [] - table bounds () - heap item
//
// In the above diagram, the L2 iterator is positioned at a sstable
// boundary (i) outside the upper bound (g). It arrived here from a
// seek whose seek-key was modified by a range tombstone. If we called
// Prev on the L2 iterator, it would return h, violating its upper
// bound. Instead, we seek it to < g, and Prev from there.

if l.iterKV == nil || (l.isSyntheticIterBoundsKey && m.heap.cmp(l.iterKV.K.UserKey, m.upper) >= 0) {
if m.upper != nil {
l.iterKV = l.iter.SeekLT(m.upper, base.SeekLTFlagsNone)
} else {
l.iterKV = l.iter.Last()
}
if l.iterKV == nil {
if err := l.iter.Error(); err != nil {
return err
}
}
}
for ; l.iterKV != nil; l.iterKV = l.iter.Prev() {
for l.iterKV = l.iter.Prev(); l.iterKV != nil; l.iterKV = l.iter.Prev() {
if base.InternalCompare(m.heap.cmp, key, l.iterKV.K) > 0 {
// key > iter-key
break
Expand All @@ -582,16 +504,8 @@ func (m *mergingIter) switchToMaxHeap() error {
}

// Special handling for the current iterator because we were using its key
// above. The iterator cur.iter may still be exhausted at a sstable boundary
// sentinel. Similar to the logic applied to the other levels, in these
// cases we seek the iterator to in order to avoid violating levelIter's
// invariants by Prev-ing through files. See the example in the for loop
// above.
if cur.isSyntheticIterBoundsKey && m.heap.cmp(cur.iterKV.K.UserKey, m.upper) >= 0 {
cur.iterKV = cur.iter.SeekLT(m.upper, base.SeekLTFlagsNone)
} else {
cur.iterKV = cur.iter.Prev()
}
cur.iterKV = cur.iter.Prev()
if cur.iterKV == nil {
if err := cur.iter.Error(); err != nil {
return err
Expand Down

0 comments on commit 16950aa

Please sign in to comment.