diff --git a/db.go b/db.go index 9db6f5923aa..765ac63a65b 100644 --- a/db.go +++ b/db.go @@ -1468,7 +1468,6 @@ func (i *Iterator) constructPointIter( li.init(ctx, i.opts, &i.comparer, i.newIters, files, level, internalOpts) li.initRangeDel(&mlevels[mlevelsIndex].rangeDelIter) - li.initBoundaryContext(&mlevels[mlevelsIndex].levelIterBoundaryContext) li.initCombinedIterState(&i.lazyCombinedIter.combinedIterState) mlevels[mlevelsIndex].levelIter = li mlevels[mlevelsIndex].iter = invalidating.MaybeWrapIfInvariants(li) diff --git a/level_checker.go b/level_checker.go index 69d55392190..7efcb5010d5 100644 --- a/level_checker.go +++ b/level_checker.go @@ -49,7 +49,6 @@ import ( type simpleMergingIterLevel struct { iter internalIterator rangeDelIter keyspan.FragmentIterator - levelIterBoundaryContext iterKV *base.InternalKV tombstone *keyspan.Span @@ -129,8 +128,7 @@ func (m *simpleMergingIter) step() bool { item := &m.heap.items[0] l := &m.levels[item.index] // Sentinels are not relevant for this point checking. - if !l.isIgnorableBoundaryKey && !item.key.IsExclusiveSentinel() && - item.key.Visible(m.snapshot, base.InternalKeySeqNumMax) { + if !item.key.IsExclusiveSentinel() && item.key.Visible(m.snapshot, base.InternalKeySeqNumMax) { // This is a visible point key. if !m.handleVisiblePoint(item, l) { return false @@ -144,50 +142,50 @@ func (m *simpleMergingIter) step() bool { // Step to the next point. l.iterKV = l.iter.Next() - if !l.isIgnorableBoundaryKey { - if l.iterKV != nil { - // Check point keys in an sstable are ordered. Although not required, we check - // for memtables as well. A subtle check here is that successive sstables of - // L1 and higher levels are ordered. This happens when levelIter moves to the - // next sstable in the level, in which case item.key is previous sstable's - // last point key. - if base.InternalCompare(m.heap.cmp, item.key, l.iterKV.K) >= 0 { - m.err = errors.Errorf("out of order keys %s >= %s in %s", - item.key.Pretty(m.formatKey), l.iterKV.K.Pretty(m.formatKey), l.iter) - return false - } - item.key = base.InternalKey{ - Trailer: l.iterKV.K.Trailer, - UserKey: append(item.key.UserKey[:0], l.iterKV.K.UserKey...), - } - item.value = l.iterKV.V - if m.heap.len() > 1 { - m.heap.fix(0) - } - } else { - m.err = l.iter.Close() - l.iter = nil - m.heap.pop() - } - if m.err != nil { + if l.iterKV == nil { + m.err = errors.CombineErrors(l.iter.Error(), l.iter.Close()) + l.iter = nil + m.heap.pop() + } else if !l.iterKV.K.IsExclusiveSentinel() { + // Check point keys in an sstable are ordered. Although not required, we check + // for memtables as well. A subtle check here is that successive sstables of + // L1 and higher levels are ordered. This happens when levelIter moves to the + // next sstable in the level, in which case item.key is previous sstable's + // last point key. + if base.InternalCompare(m.heap.cmp, item.key, l.iterKV.K) >= 0 { + m.err = errors.Errorf("out of order keys %s >= %s in %s", + item.key.Pretty(m.formatKey), l.iterKV.K.Pretty(m.formatKey), l.iter) return false } - if m.heap.len() == 0 { - // Last record was a MERGE record. - if m.valueMerger != nil { - var closer io.Closer - _, closer, m.err = m.valueMerger.Finish(true /* includesBase */) - if m.err == nil && closer != nil { - m.err = closer.Close() - } - if m.err != nil { - m.err = errors.Wrapf(m.err, "merge processing error on key %s in %s", - item.key.Pretty(m.formatKey), m.lastIterMsg) - } - m.valueMerger = nil + item.key = base.InternalKey{ + Trailer: l.iterKV.K.Trailer, + UserKey: append(item.key.UserKey[:0], l.iterKV.K.UserKey...), + } + item.value = l.iterKV.V + if m.heap.len() > 1 { + m.heap.fix(0) + } + } + if m.err != nil { + return false + } + if m.heap.len() == 0 { + // If m.valueMerger != nil, the last record was a MERGE record. + if m.valueMerger != nil { + var closer io.Closer + var err error + _, closer, err = m.valueMerger.Finish(true /* includesBase */) + if closer != nil { + err = errors.CombineErrors(err, closer.Close()) } - return false + if err != nil { + m.err = errors.CombineErrors(m.err, + errors.Wrapf(err, "merge processing error on key %s in %s", + item.key.Pretty(m.formatKey), m.lastIterMsg)) + } + m.valueMerger = nil } + return false } m.positionRangeDels() return true @@ -639,7 +637,6 @@ func checkLevelsInternal(c *checkConfig) (err error) { li.init(context.Background(), iterOpts, c.comparer, c.newIters, manifestIter, manifest.L0Sublevel(sublevel), internalIterOpts{}) li.initRangeDel(&mlevelAlloc[0].rangeDelIter) - li.initBoundaryContext(&mlevelAlloc[0].levelIterBoundaryContext) mlevelAlloc[0].iter = li mlevelAlloc = mlevelAlloc[1:] } @@ -653,7 +650,6 @@ func checkLevelsInternal(c *checkConfig) (err error) { li.init(context.Background(), iterOpts, c.comparer, c.newIters, current.Levels[level].Iter(), manifest.Level(level), internalIterOpts{}) li.initRangeDel(&mlevelAlloc[0].rangeDelIter) - li.initBoundaryContext(&mlevelAlloc[0].levelIterBoundaryContext) mlevelAlloc[0].iter = li mlevelAlloc = mlevelAlloc[1:] } diff --git a/level_iter.go b/level_iter.go index d8f5a6dc196..33d7723d729 100644 --- a/level_iter.go +++ b/level_iter.go @@ -84,9 +84,12 @@ type levelIter struct { // the levelIter passes over a file containing range keys. See the // lazyCombinedIter for more details. combinedIterState *combinedIterState - // A synthetic boundary key-value pair to return when SeekPrefixGE finds an - // sstable which doesn't contain the search key, but which does contain - // range tombstones. + // A synthetic boundary key-value pair to return when an sstable contains + // range tombstones that might be relevant but no more relevant point keys. + // The synthetic boundary key is always an exclusive range deletion sentinel + // key. When the user-imposed iteration bounds have been reached, the key's + // user key is the exceeded bound. Otherwise, it's the smallest/largest key + // in the file. syntheticBoundary base.InternalKV // The iter for the current file. It is nil under any of the following conditions: // - files.Current() == nil @@ -111,24 +114,6 @@ type levelIter struct { files manifest.LevelIterator err error - // Pointer into this level's mergingIterLevel.levelIterBoundaryContext. - // It's populated when the levelIter is in-use by a mergingIter. It's used - // to signal additional semantic meaning about the most recently returned - // key. It's currently used to pause at two different types of bounds: - // - // - isSyntheticIterBoundsKey is set to true when the iterator has - // user-imposed iteration bounds (l.{lower,upper}), and the levelIter - // reached the user-imposed bound. This signals that the underlying - // iterators are not necessarily exhausted, but iteration has paused to - // avoid unnecessarily loading sstables outside the user-imposed bounds. - // - isIgnorableBoundaryKey is set to true when the levelIter returns a - // fake key at one of the bounds of an sstable within the level. It does - // this only when the current sstable contains range deletions. It ensures - // the merging iterator does not move beyond the table until the table's - // range deletions are no longer necessary, even if the table contains - // no more relevant point keys. - boundaryContext *levelIterBoundaryContext - // internalOpts holds the internal iterator options to pass to the table // cache when constructing new table iterators. internalOpts internalIterOpts @@ -210,14 +195,18 @@ func (l *levelIter) initRangeDel(rangeDelIter *keyspan.FragmentIterator) { l.rangeDelIterPtr = rangeDelIter } -func (l *levelIter) initBoundaryContext(context *levelIterBoundaryContext) { - l.boundaryContext = context -} - func (l *levelIter) initCombinedIterState(state *combinedIterState) { l.combinedIterState = state } +func (l *levelIter) makeSyntheticBound(userKey []byte) *base.InternalKV { + l.syntheticBoundary = base.InternalKV{ + K: base.MakeRangeDeleteSentinelKey(userKey), + V: base.LazyValue{}, + } + return &l.syntheticBoundary +} + func (l *levelIter) maybeTriggerCombinedIteration(file *fileMetadata, dir int) { // If we encounter a file that contains range keys, we may need to // trigger a switch to combined range-key and point-key iteration, @@ -517,10 +506,6 @@ const ( func (l *levelIter) loadFile(file *fileMetadata, dir int) loadFileReturnIndicator { l.smallestBoundary = nil l.largestBoundary = nil - if l.boundaryContext != nil { - l.boundaryContext.isSyntheticIterBoundsKey = false - l.boundaryContext.isIgnorableBoundaryKey = false - } if l.iterFile == file { if l.err != nil { return noFileLoaded @@ -628,17 +613,6 @@ func (l *levelIter) verify(kv *base.InternalKV) *base.InternalKV { if l.upper != nil && kv != l.largestBoundary && l.cmp(kv.K.UserKey, l.upper) > 0 { l.logger.Fatalf("levelIter %s: upper bound violation: %s > %s\n%s", l.level, kv, l.upper, debug.Stack()) } - - if l.boundaryContext != nil && l.boundaryContext.isSyntheticIterBoundsKey { - switch { - case !kv.K.IsExclusiveSentinel(): - l.logger.Fatalf("levelIter %s: returning %s and isSyntheticIterBounds=true", l.level, kv) - case l.lower == nil && l.upper == nil: - // If we knew the direction of iteration, we could verify that - // specifically the corresponding bound is non-nil. - l.logger.Fatalf("levelIter %s: returning %s but has no bounds", l.level, kv) - } - } } return kv } @@ -650,10 +624,6 @@ func (l *levelIter) SeekGE(key []byte, flags base.SeekGEFlags) *base.InternalKV l.err = nil // clear cached iteration error l.exhaustedDir = 0 - if l.boundaryContext != nil { - l.boundaryContext.isSyntheticIterBoundsKey = false - l.boundaryContext.isIgnorableBoundaryKey = false - } // NB: the top-level Iterator has already adjusted key based on // IterOptions.LowerBound. loadFileIndicator := l.loadFile(l.findFileGE(key, flags), +1) @@ -679,10 +649,6 @@ func (l *levelIter) SeekPrefixGE(prefix, key []byte, flags base.SeekGEFlags) *ba l.err = nil // clear cached iteration error l.exhaustedDir = 0 - if l.boundaryContext != nil { - l.boundaryContext.isSyntheticIterBoundsKey = false - l.boundaryContext.isIgnorableBoundaryKey = false - } // NB: the top-level Iterator has already adjusted key based on // IterOptions.LowerBound. @@ -706,31 +672,16 @@ func (l *levelIter) SeekPrefixGE(prefix, key []byte, flags base.SeekGEFlags) *ba // the sstable. All we know is that a key with prefix does not exist in the // current sstable. We do know that the key lies within the bounds of the // table as findFileGE found the table where key <= meta.Largest. We return - // the table's bound with isIgnorableBoundaryKey set. + // the table's bound as a synthetic key. if l.rangeDelIterPtr != nil && *l.rangeDelIterPtr != nil { if l.tableOpts.UpperBound != nil { - l.syntheticBoundary = base.MakeInternalKV(base.InternalKey{ - UserKey: l.tableOpts.UpperBound, - Trailer: InternalKeyRangeDeleteSentinel, - }, nil) - l.largestBoundary = &l.syntheticBoundary - if l.boundaryContext != nil { - l.boundaryContext.isSyntheticIterBoundsKey = true - l.boundaryContext.isIgnorableBoundaryKey = false - } + l.largestBoundary = l.makeSyntheticBound(l.tableOpts.UpperBound) l.exhaustedDir = +1 return l.verify(l.largestBoundary) } // Return the file's largest bound, ensuring this file stays open until - // the mergingIter advances beyond the file's bounds. We set - // isIgnorableBoundaryKey to signal that the actual key returned should - // be ignored, and does not represent a real key in the database. - l.syntheticBoundary = base.MakeInternalKV(l.iterFile.LargestPointKey, nil) - l.largestBoundary = &l.syntheticBoundary - if l.boundaryContext != nil { - l.boundaryContext.isSyntheticIterBoundsKey = false - l.boundaryContext.isIgnorableBoundaryKey = true - } + // the mergingIter advances beyond the file's bounds. + l.largestBoundary = l.makeSyntheticBound(l.iterFile.LargestPointKey.UserKey) return l.verify(l.largestBoundary) } // It is possible that we are here because bloom filter matching failed. In @@ -756,10 +707,6 @@ func (l *levelIter) SeekLT(key []byte, flags base.SeekLTFlags) *base.InternalKV l.err = nil // clear cached iteration error l.exhaustedDir = 0 - if l.boundaryContext != nil { - l.boundaryContext.isSyntheticIterBoundsKey = false - l.boundaryContext.isIgnorableBoundaryKey = false - } // NB: the top-level Iterator has already adjusted key based on // IterOptions.UpperBound. @@ -780,10 +727,6 @@ func (l *levelIter) First() *base.InternalKV { l.err = nil // clear cached iteration error l.exhaustedDir = 0 - if l.boundaryContext != nil { - l.boundaryContext.isSyntheticIterBoundsKey = false - l.boundaryContext.isIgnorableBoundaryKey = false - } // NB: the top-level Iterator will call SeekGE if IterOptions.LowerBound is // set. @@ -804,10 +747,6 @@ func (l *levelIter) Last() *base.InternalKV { l.err = nil // clear cached iteration error l.exhaustedDir = 0 - if l.boundaryContext != nil { - l.boundaryContext.isSyntheticIterBoundsKey = false - l.boundaryContext.isIgnorableBoundaryKey = false - } // NB: the top-level Iterator will call SeekLT if IterOptions.UpperBound is // set. @@ -831,10 +770,6 @@ func (l *levelIter) Next() *base.InternalKV { if l.err != nil || l.iter == nil { return nil } - if l.boundaryContext != nil { - l.boundaryContext.isSyntheticIterBoundsKey = false - l.boundaryContext.isIgnorableBoundaryKey = false - } switch { case l.largestBoundary != nil: @@ -874,10 +809,6 @@ func (l *levelIter) NextPrefix(succKey []byte) *base.InternalKV { if l.err != nil || l.iter == nil { return nil } - if l.boundaryContext != nil { - l.boundaryContext.isSyntheticIterBoundsKey = false - l.boundaryContext.isIgnorableBoundaryKey = false - } switch { case l.largestBoundary != nil: @@ -934,10 +865,6 @@ func (l *levelIter) Prev() *base.InternalKV { if l.err != nil || l.iter == nil { return nil } - if l.boundaryContext != nil { - l.boundaryContext.isSyntheticIterBoundsKey = false - l.boundaryContext.isIgnorableBoundaryKey = false - } switch { case l.smallestBoundary != nil: @@ -996,27 +923,21 @@ func (l *levelIter) skipEmptyFileForward() *base.InternalKV { } if l.rangeDelIterPtr != nil { // We're being used as part of a mergingIter and we've exhausted the - // current sstable. If an upper bound is present and the upper bound lies - // within the current sstable, then we will have reached the upper bound - // rather than the end of the sstable. We need to return a synthetic - // boundary key so that mergingIter can use the range tombstone iterator - // until the other levels have reached this boundary. + // current sstable. If an upper bound is present and the upper bound + // lies within the current sstable, then we will have reached the + // upper bound rather than the end of the sstable. We need to return + // a synthetic boundary key so that mergingIter can use the range + // tombstone iterator until the other levels have reached this + // boundary. // // It is safe to set the boundary key to the UpperBound user key // with the RANGEDEL sentinel since it is the smallest InternalKey - // that matches the exclusive upper bound, and does not represent - // a real key. + // that matches the exclusive upper bound, and does not represent a + // real key. if l.tableOpts.UpperBound != nil { l.exhaustedDir = +1 if *l.rangeDelIterPtr != nil { - l.syntheticBoundary.K = base.InternalKey{ - UserKey: l.tableOpts.UpperBound, - Trailer: InternalKeyRangeDeleteSentinel, - } - l.largestBoundary = &l.syntheticBoundary - if l.boundaryContext != nil { - l.boundaryContext.isSyntheticIterBoundsKey = true - } + l.largestBoundary = l.makeSyntheticBound(l.tableOpts.UpperBound) return l.largestBoundary } // Else there are no range deletions in this sstable. This @@ -1025,28 +946,17 @@ func (l *levelIter) skipEmptyFileForward() *base.InternalKV { // bounds. return nil } - // If the boundary is a range deletion tombstone, or the caller is - // accessing range dels through l.rangeDelIterPtr, pause at an - // ignorable boundary key to avoid advancing to the next file until - // other levels are caught up. + // If the caller is accessing range dels through l.rangeDelIterPtr, + // pause at an ignorable boundary key to avoid advancing to the next + // file until other levels are caught up. // // Note that even if the largest boundary is not a range deletion, - // there may still be range deletions beyong the last point key + // there may still be range deletions beyond the last point key // returned. When block-property filters are in use, the sstable // iterator may have transparently skipped a tail of the point keys - // in the file. If the last point key returned /was/ the largest - // key, then we'll return a key with the same user key and trailer - // twice. Returning it again is a violation of the strict - // monotonicity normally provided. The mergingIter's heap can - // tolerate this repeat key and in this case will keep the level at - // the top of the heap and immediately skip the entry, advancing to - // the next file. + // in the file. if *l.rangeDelIterPtr != nil { - l.syntheticBoundary = base.MakeInternalKV(l.iterFile.LargestPointKey, nil) - l.largestBoundary = &l.syntheticBoundary - if l.boundaryContext != nil { - l.boundaryContext.isIgnorableBoundaryKey = true - } + l.largestBoundary = l.makeSyntheticBound(l.iterFile.LargestPointKey.UserKey) return l.largestBoundary } } @@ -1082,27 +992,21 @@ func (l *levelIter) skipEmptyFileBackward() *base.InternalKV { } if l.rangeDelIterPtr != nil { // We're being used as part of a mergingIter and we've exhausted the - // current sstable. If a lower bound is present and the lower bound lies - // within the current sstable, then we will have reached the lower bound - // rather than the beginning of the sstable. We need to return a - // synthetic boundary key so that mergingIter can use the range tombstone - // iterator until the other levels have reached this boundary. + // current sstable. If a lower bound is present and the lower bound + // lies within the current sstable, then we will have reached the + // lower bound rather than the beginning of the sstable. We need to + // return a synthetic boundary key so that mergingIter can use the + // range tombstone iterator until the other levels have reached this + // boundary. // // It is safe to set the boundary key to the LowerBound user key // with the RANGEDEL sentinel since it is the smallest InternalKey - // that is within the inclusive lower bound, and does not - // represent a real key. + // that is within the inclusive lower bound, and does not represent + // a real key. if l.tableOpts.LowerBound != nil { l.exhaustedDir = -1 if *l.rangeDelIterPtr != nil { - l.syntheticBoundary = base.MakeInternalKV(base.InternalKey{ - UserKey: l.tableOpts.LowerBound, - Trailer: InternalKeyRangeDeleteSentinel, - }, nil) - l.smallestBoundary = &l.syntheticBoundary - if l.boundaryContext != nil { - l.boundaryContext.isSyntheticIterBoundsKey = true - } + l.smallestBoundary = l.makeSyntheticBound(l.tableOpts.LowerBound) return l.smallestBoundary } // Else there are no range deletions in this sstable. This @@ -1111,21 +1015,12 @@ func (l *levelIter) skipEmptyFileBackward() *base.InternalKV { // bounds. return nil } - // If the boundary could be a range deletion tombstone, return the - // smallest point key as a special ignorable key to avoid advancing to the - // next file. - // - // It's possible the SmallestPointKey was already returned. Returning it - // again is a violation of the strict monotonicity normally provided. The - // mergingIter's heap can tolerate this repeat key and in this case will - // keep the level at the top of the heap and immediately skip the entry, - // advancing to the next file. + // If the user is using the range deletion iterator, return a + // synthetic key with the smallest user key in the file. This file + // sorts before all other keys with the same user key, so + // monotonicity is maintained. if *l.rangeDelIterPtr != nil { - l.syntheticBoundary = base.MakeInternalKV(l.iterFile.SmallestPointKey, nil) - l.smallestBoundary = &l.syntheticBoundary - if l.boundaryContext != nil { - l.boundaryContext.isIgnorableBoundaryKey = true - } + l.smallestBoundary = l.makeSyntheticBound(l.iterFile.SmallestPointKey.UserKey) return l.smallestBoundary } } diff --git a/merging_iter.go b/merging_iter.go index 26fb82729fb..2e2792d8cde 100644 --- a/merging_iter.go +++ b/merging_iter.go @@ -32,11 +32,6 @@ type mergingIterLevel struct { // intermediary internalIterator implementations. levelIter *levelIter - // levelIterBoundaryContext's fields are set when using levelIter, in order - // to surface sstable boundary keys and file-level context. See levelIter - // comment and the Range Deletions comment below. - levelIterBoundaryContext - // tombstone caches the tombstone rangeDelIter is currently pointed at. If // tombstone is nil, there are no further tombstones within the // current sstable in the current iterator direction. The cached tombstone is @@ -46,27 +41,6 @@ type mergingIterLevel struct { tombstone *keyspan.Span } -type levelIterBoundaryContext struct { - // isSyntheticIterBoundsKey is set to true iff the key returned by the level - // iterator is a synthetic key derived from the iterator bounds. This is used - // to prevent the mergingIter from being stuck at such a synthetic key if it - // becomes the top element of the heap. When used with a user-facing Iterator, - // the only range deletions exposed by this mergingIter should be those with - // `isSyntheticIterBoundsKey || isIgnorableBoundaryKey`. - // - // When true, the coincident key is an exclusive sentinel, and the current - // direction of iteration has a user-imposed iteration bound. - isSyntheticIterBoundsKey bool - // isIgnorableBoundaryKey is set to true iff the key returned by the level - // iterator is a file boundary key that should be ignored when returning to - // the parent iterator. File boundary keys are used by the level iter to - // keep a levelIter file's range deletion iterator open as long as other - // levels within the merging iterator require it. When used with a user-facing - // Iterator, the only range deletions exposed by this mergingIter should be - // those with `isSyntheticIterBoundsKey || isIgnorableBoundaryKey`. - isIgnorableBoundaryKey bool -} - // mergingIter provides a merged view of multiple iterators from different // levels of the LSM. // @@ -707,16 +681,25 @@ func (m *mergingIter) isNextEntryDeleted(item *mergingIterLevel) (bool, error) { func (m *mergingIter) findNextEntry() *base.InternalKV { for m.heap.len() > 0 && m.err == nil { item := m.heap.items[0] - if m.levels[item.index].isSyntheticIterBoundsKey { - break - } - - m.addItemStats(item) - // Skip ignorable boundary keys. These are not real keys and exist to - // keep sstables open until we've surpassed their end boundaries so that - // their range deletions are visible. - if m.levels[item.index].isIgnorableBoundaryKey { + // The levelIter internal iterator will interleave exclusive sentinel + // keys to keep files open until their range deletions are no longer + // necessary. Sometimes these are interleaved with the user key of a + // file's largest key, in which case they may simply be stepped over to + // move to the next file in the forward direction. Other times they're + // interleaved at the user key of the user-iteration boundary, if that + // falls within the bounds of a file. In the latter case, there are no + // more keys < m.upper, and we can stop iterating. + // + // We perform a key comparison to differentiate between these two cases. + // This key comparison is considered okay because it only happens for + // sentinel keys. It may be eliminated after #2863. + if m.levels[item.index].iterKV.K.IsExclusiveSentinel() { + if m.upper != nil && m.heap.cmp(m.levels[item.index].iterKV.K.UserKey, m.upper) >= 0 { + break + } + // This key is the largest boundary of a file and can be skipped now + // that the file's range deletions are no longer relevant. m.err = m.nextEntry(item, nil /* succKey */) if m.err != nil { return nil @@ -724,6 +707,8 @@ func (m *mergingIter) findNextEntry() *base.InternalKV { continue } + m.addItemStats(item) + // Check if the heap root key is deleted by a range tombstone in a // higher level. If it is, isNextEntryDeleted will advance the iterator // to a later key (through seeking or nexting). @@ -859,13 +844,25 @@ func (m *mergingIter) isPrevEntryDeleted(item *mergingIterLevel) (bool, error) { func (m *mergingIter) findPrevEntry() *base.InternalKV { for m.heap.len() > 0 && m.err == nil { item := m.heap.items[0] - if m.levels[item.index].isSyntheticIterBoundsKey { - break - } - // Skip ignorable boundary keys. These are not real keys and exist to - // keep sstables open until we've surpassed their end boundaries so that - // their range deletions are visible. - if m.levels[item.index].isIgnorableBoundaryKey { + + // The levelIter internal iterator will interleave exclusive sentinel + // keys to keep files open until their range deletions are no longer + // necessary. Sometimes these are interleaved with the user key of a + // file's smallest key, in which case they may simply be stepped over to + // move to the next file in the backward direction. Other times they're + // interleaved at the user key of the user-iteration boundary, if that + // falls within the bounds of a file. In the latter case, there are no + // more keys ≥ m.lower, and we can stop iterating. + // + // We perform a key comparison to differentiate between these two cases. + // This key comparison is considered okay because it only happens for + // sentinel keys. It may be eliminated after #2863. + if m.levels[item.index].iterKV.K.IsExclusiveSentinel() { + if m.lower != nil && m.heap.cmp(m.levels[item.index].iterKV.K.UserKey, m.lower) <= 0 { + break + } + // This key is the smallest boundary of a file and can be skipped + // now that the file's range deletions are no longer relevant. m.err = m.prevEntry(item) if m.err != nil { return nil diff --git a/merging_iter_test.go b/merging_iter_test.go index 2f918713d51..4a141af3cdd 100644 --- a/merging_iter_test.go +++ b/merging_iter_test.go @@ -307,7 +307,6 @@ func TestMergingIterCornerCases(t *testing.T) { i := len(levelIters) levelIters = append(levelIters, mergingIterLevel{iter: li}) li.initRangeDel(&levelIters[i].rangeDelIter) - li.initBoundaryContext(&levelIters[i].levelIterBoundaryContext) } miter := &mergingIter{} miter.init(nil /* opts */, &stats, cmp, func(a []byte) int { return len(a) }, levelIters...) @@ -679,7 +678,6 @@ func buildMergingIter(readers [][]*sstable.Reader, levelSlices []manifest.LevelS context.Background(), IterOptions{}, testkeys.Comparer, newIters, levelSlices[i].Iter(), manifest.Level(level), internalIterOpts{}) l.initRangeDel(&mils[level].rangeDelIter) - l.initBoundaryContext(&mils[level].levelIterBoundaryContext) mils[level].iter = l } var stats base.InternalIteratorStats diff --git a/scan_internal.go b/scan_internal.go index e6a5f75c936..d6ee26618f5 100644 --- a/scan_internal.go +++ b/scan_internal.go @@ -900,7 +900,6 @@ func (i *scanInternalIterator) constructPointIter( li.init( i.ctx, i.opts.IterOptions, i.comparer, i.newIters, files, level, internalIterOpts{}) - li.initBoundaryContext(&mlevels[mlevelsIndex].levelIterBoundaryContext) mlevels[mlevelsIndex].iter = li rli.Init(keyspan.SpanIterOptions{RangeKeyFilters: i.opts.RangeKeyFilters}, i.comparer.Compare, tableNewRangeDelIter(i.ctx, i.newIters), files, level, diff --git a/testdata/level_iter_boundaries b/testdata/level_iter_boundaries index 5a4edc025bf..413090bd37e 100644 --- a/testdata/level_iter_boundaries +++ b/testdata/level_iter_boundaries @@ -12,7 +12,7 @@ prev ---- d#72057594037927935,RANGEDEL: . -a#1,RANGEDEL: +a#72057594037927935,RANGEDEL: . iter @@ -23,7 +23,7 @@ prev ---- d#72057594037927935,RANGEDEL: . -a#1,RANGEDEL: +a#72057594037927935,RANGEDEL: . iter @@ -34,7 +34,7 @@ prev ---- d#72057594037927935,RANGEDEL: . -a#1,RANGEDEL: +a#72057594037927935,RANGEDEL: . iter @@ -90,7 +90,7 @@ prev prev ---- c#3,SET:c -b#2,RANGEDEL: +b#72057594037927935,RANGEDEL: a#1,SET:a . @@ -117,7 +117,7 @@ last prev ---- a#1,SET:b -a#1,SET: +a#72057594037927935,RANGEDEL: clear ---- @@ -134,7 +134,7 @@ next next ---- c#2,SET:c -c#2,SET: +c#72057594037927935,RANGEDEL: . iter @@ -143,7 +143,7 @@ prev prev ---- c#2,SET:c -a#1,RANGEDEL: +a#72057594037927935,RANGEDEL: . # Regression test to check that Seek{GE,LT} work properly in certain @@ -174,9 +174,9 @@ e#72057594037927935,RANGEDEL: d#3,SET:d e#72057594037927935,RANGEDEL: d#3,SET:d -c#2,RANGEDEL: +c#72057594037927935,RANGEDEL: d#3,SET:d -c#2,RANGEDEL: +c#72057594037927935,RANGEDEL: d#3,SET:d # Regression test to check that Seek{GE,LT}, First, and Last do not @@ -359,7 +359,7 @@ prev prev ---- e#5,SET:z -e#5,SET: +e#72057594037927935,RANGEDEL: . file-pos diff --git a/testdata/level_iter_seek b/testdata/level_iter_seek index 01181e4eea2..83a3ae399a8 100644 --- a/testdata/level_iter_seek +++ b/testdata/level_iter_seek @@ -61,7 +61,7 @@ d#72057594037927935,RANGEDEL: iter seek-prefix-ge d ---- -f#5,SET: / d-e:{(#6,RANGEDEL)} +f#72057594037927935,RANGEDEL: / d-e:{(#6,RANGEDEL)} # Tests a sequence of SeekPrefixGE with monotonically increasing keys, some of # which are present and some not (so fail the bloom filter match). The seek to @@ -77,7 +77,7 @@ seek-prefix-ge h ---- . c#7,SET:c / d-e:{(#6,RANGEDEL)} -f#5,SET: / d-e:{(#6,RANGEDEL)} +f#72057594037927935,RANGEDEL: / d-e:{(#6,RANGEDEL)} f#5,SET:f g#4,SET:g . @@ -119,7 +119,7 @@ c#7,SET:c {BlockBytes:56 BlockBytesInCache:0 BlockReadDuration:0s KeyBytes:0 ValueBytes:0 PointCount:0 PointsCoveredByRangeTombstones:0 SeparatedPointValue:{Count:0 ValueBytes:0 ValueBytesFetched:0}} f#5,SET:f {BlockBytes:56 BlockBytesInCache:0 BlockReadDuration:0s KeyBytes:0 ValueBytes:0 PointCount:0 PointsCoveredByRangeTombstones:0 SeparatedPointValue:{Count:0 ValueBytes:0 ValueBytesFetched:0}} -f#5,SET: +f#72057594037927935,RANGEDEL: {BlockBytes:56 BlockBytesInCache:0 BlockReadDuration:0s KeyBytes:0 ValueBytes:0 PointCount:0 PointsCoveredByRangeTombstones:0 SeparatedPointValue:{Count:0 ValueBytes:0 ValueBytesFetched:0}} g#4,SET:g {BlockBytes:112 BlockBytesInCache:0 BlockReadDuration:0s KeyBytes:0 ValueBytes:0 PointCount:0 PointsCoveredByRangeTombstones:0 SeparatedPointValue:{Count:0 ValueBytes:0 ValueBytesFetched:0}} @@ -222,9 +222,9 @@ next prev prev ---- -a#5,RANGEDEL: / a-b:{(#5,RANGEDEL)} +a#72057594037927935,RANGEDEL: / a-b:{(#5,RANGEDEL)} c#7,SET:c -a#5,RANGEDEL: +a#72057594037927935,RANGEDEL: . # Verify that prev when positioned at the largest boundary returns the @@ -268,7 +268,7 @@ seek-lt d next ---- d#2,SET:d -a#1,RANGEDEL: / a-b:{(#1,RANGEDEL)} +a#72057594037927935,RANGEDEL: / a-b:{(#1,RANGEDEL)} d#2,SET:d # Verify SeekPrefixGE correctness with trySeekUsingNext=true @@ -387,7 +387,7 @@ next next ---- f#5,SET:f -f#5,SET: +f#72057594037927935,RANGEDEL: i#4,SET:i # The below count should be 2, as we skip over the rangekey-only file. diff --git a/testdata/merging_iter b/testdata/merging_iter index 643801bca5e..c24465e9759 100644 --- a/testdata/merging_iter +++ b/testdata/merging_iter @@ -40,7 +40,7 @@ c#27,SET:27 e#10,SET:10 g#20,SET:20 . -{BlockBytes:116 BlockBytesInCache:0 BlockReadDuration:0s KeyBytes:6 ValueBytes:8 PointCount:6 PointsCoveredByRangeTombstones:0 SeparatedPointValue:{Count:0 ValueBytes:0 ValueBytesFetched:0}} +{BlockBytes:116 BlockBytesInCache:0 BlockReadDuration:0s KeyBytes:4 ValueBytes:8 PointCount:4 PointsCoveredByRangeTombstones:0 SeparatedPointValue:{Count:0 ValueBytes:0 ValueBytesFetched:0}} {BlockBytes:0 BlockBytesInCache:0 BlockReadDuration:0s KeyBytes:0 ValueBytes:0 PointCount:0 PointsCoveredByRangeTombstones:0 SeparatedPointValue:{Count:0 ValueBytes:0 ValueBytesFetched:0}} # seekGE() should not allow the rangedel to act on points in the lower sstable that are after it. @@ -636,9 +636,9 @@ a#30,SET:30 f#21,SET:21 {BlockBytes:0 BlockBytesInCache:0 BlockReadDuration:0s KeyBytes:5 ValueBytes:10 PointCount:5 PointsCoveredByRangeTombstones:4 SeparatedPointValue:{Count:0 ValueBytes:0 ValueBytesFetched:0}} . -{BlockBytes:0 BlockBytesInCache:0 BlockReadDuration:0s KeyBytes:6 ValueBytes:10 PointCount:6 PointsCoveredByRangeTombstones:4 SeparatedPointValue:{Count:0 ValueBytes:0 ValueBytesFetched:0}} +{BlockBytes:0 BlockBytesInCache:0 BlockReadDuration:0s KeyBytes:5 ValueBytes:10 PointCount:5 PointsCoveredByRangeTombstones:4 SeparatedPointValue:{Count:0 ValueBytes:0 ValueBytesFetched:0}} . -{BlockBytes:0 BlockBytesInCache:0 BlockReadDuration:0s KeyBytes:6 ValueBytes:10 PointCount:6 PointsCoveredByRangeTombstones:4 SeparatedPointValue:{Count:0 ValueBytes:0 ValueBytesFetched:0}} +{BlockBytes:0 BlockBytesInCache:0 BlockReadDuration:0s KeyBytes:5 ValueBytes:10 PointCount:5 PointsCoveredByRangeTombstones:4 SeparatedPointValue:{Count:0 ValueBytes:0 ValueBytesFetched:0}} # Test a dead simple error handling case of a 1-level seek erroring.