Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

storage: add noop behavior for bounds setting in pebbleIterator #61194

Merged
merged 1 commit into from
Mar 1, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 29 additions & 8 deletions pkg/storage/pebble.go
Original file line number Diff line number Diff line change
Expand Up @@ -1072,8 +1072,17 @@ func (p *Pebble) NewBatch() Batch {

// NewReadOnly implements the Engine interface.
func (p *Pebble) NewReadOnly() ReadWriter {
// TODO(sumeer): a sync.Pool for pebbleReadOnly would save on allocations
// for the underlying pebbleIterators.
return &pebbleReadOnly{
parent: p,
// Defensively set reusable=true. One has to be careful about this since
// an accidental false value would cause these iterators, that are value
// members of pebbleReadOnly, to be put in the pebbleIterPool.
prefixIter: pebbleIterator{reusable: true},
normalIter: pebbleIterator{reusable: true},
prefixEngineIter: pebbleIterator{reusable: true},
normalEngineIter: pebbleIterator{reusable: true},
}
}

Expand Down Expand Up @@ -1368,14 +1377,14 @@ func (p *pebbleReadOnly) NewMVCCIterator(iterKind MVCCIterKind, opts IterOptions
if iter.inuse {
panic("iterator already in use")
}
// Ensures no timestamp hints etc.
checkOptionsForIterReuse(opts)

if iter.iter != nil {
iter.setOptions(opts)
iter.setBounds(opts.LowerBound, opts.UpperBound)
} else {
iter.init(p.parent.db, p.iter, opts)
// The timestamp hints should be empty given the earlier code, but we are
// being defensive.
if p.iter == nil && opts.MaxTimestampHint.IsEmpty() && opts.MinTimestampHint.IsEmpty() {
if p.iter == nil {
// For future cloning.
p.iter = iter.iter
}
Expand Down Expand Up @@ -1403,14 +1412,14 @@ func (p *pebbleReadOnly) NewEngineIterator(opts IterOptions) EngineIterator {
if iter.inuse {
panic("iterator already in use")
}
// Ensures no timestamp hints etc.
checkOptionsForIterReuse(opts)

if iter.iter != nil {
iter.setOptions(opts)
iter.setBounds(opts.LowerBound, opts.UpperBound)
} else {
iter.init(p.parent.db, p.iter, opts)
// The timestamp hints should be empty given this is an EngineIterator,
// but we are being defensive.
if p.iter == nil && opts.MaxTimestampHint.IsEmpty() && opts.MinTimestampHint.IsEmpty() {
if p.iter == nil {
// For future cloning.
p.iter = iter.iter
}
Expand All @@ -1421,6 +1430,18 @@ func (p *pebbleReadOnly) NewEngineIterator(opts IterOptions) EngineIterator {
return iter
}

// checkOptionsForIterReuse checks that the options are appropriate for
// iterators that are reusable, and panics if not. This includes disallowing
// any timestamp hints.
func checkOptionsForIterReuse(opts IterOptions) {
if !opts.MinTimestampHint.IsEmpty() || !opts.MaxTimestampHint.IsEmpty() {
panic("iterator with timestamp hints cannot be reused")
}
if !opts.Prefix && len(opts.UpperBound) == 0 && len(opts.LowerBound) == 0 {
panic("iterator must set prefix or upper bound or lower bound")
}
}

// ConsistentIterators implements the Engine interface.
func (p *pebbleReadOnly) ConsistentIterators() bool {
return true
Expand Down
16 changes: 8 additions & 8 deletions pkg/storage/pebble_batch.go
Original file line number Diff line number Diff line change
Expand Up @@ -229,18 +229,18 @@ func (p *pebbleBatch) NewMVCCIterator(iterKind MVCCIterKind, opts IterOptions) M
if iter.inuse {
panic("iterator already in use")
}
// Ensures no timestamp hints etc.
checkOptionsForIterReuse(opts)

if iter.iter != nil {
iter.setOptions(opts)
iter.setBounds(opts.LowerBound, opts.UpperBound)
} else {
if p.batch.Indexed() {
iter.init(p.batch, p.iter, opts)
} else {
iter.init(p.db, p.iter, opts)
}
// The timestamp hints should be empty given the earlier code, but we are
// being defensive.
if p.iter == nil && opts.MaxTimestampHint.IsEmpty() && opts.MinTimestampHint.IsEmpty() {
if p.iter == nil {
// For future cloning.
p.iter = iter.iter
}
Expand Down Expand Up @@ -271,18 +271,18 @@ func (p *pebbleBatch) NewEngineIterator(opts IterOptions) EngineIterator {
if iter.inuse {
panic("iterator already in use")
}
// Ensures no timestamp hints etc.
checkOptionsForIterReuse(opts)

if iter.iter != nil {
iter.setOptions(opts)
iter.setBounds(opts.LowerBound, opts.UpperBound)
} else {
if p.batch.Indexed() {
iter.init(p.batch, p.iter, opts)
} else {
iter.init(p.db, p.iter, opts)
}
// The timestamp hints should be empty given this is an EngineIterator,
// but we are being defensive.
if p.iter == nil && opts.MaxTimestampHint.IsEmpty() && opts.MinTimestampHint.IsEmpty() {
if p.iter == nil {
// For future cloning.
p.iter = iter.iter
}
Expand Down
88 changes: 68 additions & 20 deletions pkg/storage/pebble_iterator.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,9 +38,11 @@ type pebbleIterator struct {
// use two slices for each of the bounds since this caller should not change
// the slice holding the current bounds, that the callee (pebble.MVCCIterator)
// is currently using, until after the caller has made the SetBounds call.
lowerBoundBuf [2][]byte
upperBoundBuf [2][]byte
curBuf int
lowerBoundBuf [2][]byte
upperBoundBuf [2][]byte
curBuf int
testingSetBoundsListener testingSetBoundsListener

// Set to true to govern whether to call SeekPrefixGE or SeekGE. Skips
// SSTables based on MVCC/Engine key when true.
prefix bool
Expand Down Expand Up @@ -75,11 +77,16 @@ type cloneableIter interface {
Clone() (*pebble.Iterator, error)
}

type testingSetBoundsListener interface {
postSetBounds(lower, upper []byte)
}

// Instantiates a new Pebble iterator, or gets one from the pool.
func newPebbleIterator(
handle pebble.Reader, iterToClone cloneableIter, opts IterOptions,
) *pebbleIterator {
iter := pebbleIterPool.Get().(*pebbleIterator)
iter.reusable = false // defensive
iter.init(handle, iterToClone, opts)
return iter
}
Expand Down Expand Up @@ -169,38 +176,65 @@ func (p *pebbleIterator) init(handle pebble.Reader, iterToClone cloneableIter, o
p.inuse = true
}

func (p *pebbleIterator) setOptions(opts IterOptions) {
// Overwrite any stale options from last time.
p.options = pebble.IterOptions{}

if !opts.MinTimestampHint.IsEmpty() || !opts.MaxTimestampHint.IsEmpty() {
panic("iterator with timestamp hints cannot be reused")
// setBounds is called to change the bounds on a pebbleIterator. Note that
// this is not the first time that bounds will be passed to the underlying
// pebble.Iterator. The existing bounds are in p.options.
func (p *pebbleIterator) setBounds(lowerBound, upperBound roachpb.Key) {
// If the roachpb.Key bound is nil, the corresponding bound for the
// pebble.Iterator will also be nil. p.options contains the current bounds
// known to the pebble.Iterator.
boundsChanged := ((lowerBound == nil) != (p.options.LowerBound == nil)) ||
((upperBound == nil) != (p.options.UpperBound == nil))
if !boundsChanged {
// The nil-ness is the same but the values may be different.
if lowerBound != nil {
// Both must be non-nil. We know that we've appended 0x00 to
// p.options.LowerBound, which must be ignored for this comparison.
if !bytes.Equal(p.options.LowerBound[:len(p.options.LowerBound)-1], lowerBound) {
boundsChanged = true
}
}
// If the preceding if-block has not already set boundsChanged=true, see
// if the upper bound has changed.
if !boundsChanged && upperBound != nil {
// Both must be non-nil. We know that we've appended 0x00 to
// p.options.UpperBound, which must be ignored for this comparison.
if !bytes.Equal(p.options.UpperBound[:len(p.options.UpperBound)-1], upperBound) {
boundsChanged = true
}
}
}
if !opts.Prefix && len(opts.UpperBound) == 0 && len(opts.LowerBound) == 0 {
panic("iterator must set prefix or upper bound or lower bound")
if !boundsChanged {
// This noop optimization helps the underlying pebble.Iterator to optimize
// seeks.
return
}

p.prefix = opts.Prefix
// Set the bounds to nil, before we selectively change them.
p.options.LowerBound = nil
p.options.UpperBound = nil
p.curBuf = (p.curBuf + 1) % 2
i := p.curBuf
if opts.LowerBound != nil {
if lowerBound != nil {
// This is the same as
// p.options.LowerBound = EncodeKeyToBuf(p.lowerBoundBuf[i][:0], MVCCKey{Key: opts.LowerBound}) .
// or EngineKey{Key: opts.LowerBound}.EncodeToBuf(...).
// p.options.LowerBound = EncodeKeyToBuf(p.lowerBoundBuf[i][:0], MVCCKey{Key: lowerBound}) .
// or EngineKey{Key: lowerBound}.EncodeToBuf(...).
// Since we are encoding keys with an empty version anyway, we can just
// append the NUL byte instead of calling the above encode functions which
// will do the same thing.
p.lowerBoundBuf[i] = append(p.lowerBoundBuf[i][:0], opts.LowerBound...)
p.lowerBoundBuf[i] = append(p.lowerBoundBuf[i][:0], lowerBound...)
p.lowerBoundBuf[i] = append(p.lowerBoundBuf[i], 0x00)
p.options.LowerBound = p.lowerBoundBuf[i]
}
if opts.UpperBound != nil {
if upperBound != nil {
// Same as above.
p.upperBoundBuf[i] = append(p.upperBoundBuf[i][:0], opts.UpperBound...)
p.upperBoundBuf[i] = append(p.upperBoundBuf[i][:0], upperBound...)
p.upperBoundBuf[i] = append(p.upperBoundBuf[i], 0x00)
p.options.UpperBound = p.upperBoundBuf[i]
}
p.iter.SetBounds(p.options.LowerBound, p.options.UpperBound)
if p.testingSetBoundsListener != nil {
p.testingSetBoundsListener.postSetBounds(p.options.LowerBound, p.options.UpperBound)
}
}

// Close implements the MVCCIterator interface.
Expand Down Expand Up @@ -624,11 +658,22 @@ func findSplitKeyUsingIterator(
return bestSplitKey, nil
}

// SetUpperBound implements the MVCCIterator interface.
// SetUpperBound implements the MVCCIterator interface. Note that this is not
// the first time that bounds will be passed to the underlying
// pebble.Iterator. The existing bounds are in p.options.
func (p *pebbleIterator) SetUpperBound(upperBound roachpb.Key) {
if upperBound == nil {
panic("SetUpperBound must not use a nil key")
}
if p.options.UpperBound != nil {
// We know that we've appended 0x00 to p.options.UpperBound, which must be
// ignored for this comparison.
if bytes.Equal(p.options.UpperBound[:len(p.options.UpperBound)-1], upperBound) {
// Nothing to do. This noop optimization helps the underlying
// pebble.Iterator to optimize seeks.
return
}
}
p.curBuf = (p.curBuf + 1) % 2
i := p.curBuf
if p.options.LowerBound != nil {
Expand All @@ -639,6 +684,9 @@ func (p *pebbleIterator) SetUpperBound(upperBound roachpb.Key) {
p.upperBoundBuf[i] = append(p.upperBoundBuf[i], 0x00)
p.options.UpperBound = p.upperBoundBuf[i]
p.iter.SetBounds(p.options.LowerBound, p.options.UpperBound)
if p.testingSetBoundsListener != nil {
p.testingSetBoundsListener.postSetBounds(p.options.LowerBound, p.options.UpperBound)
}
}

// Stats implements the MVCCIterator interface.
Expand Down
Loading