From 8ee03c43ef41bed686d75dc9d51afece8d922dec Mon Sep 17 00:00:00 2001 From: Jackson Owens Date: Mon, 6 Feb 2023 14:59:57 -0500 Subject: [PATCH 1/3] storage/pebbleiter: mangle unsafe buffers between positioning methods Randomly clobber the key and value buffers in order to ensure lifetimes are respected. This commit extends the `pebbleiter.assertionIter` type used in `crdb_test` builds that sits between pkg/storage and the pebble.Iterator type. It now will randomly zero the buffers used to return the previous iterator position's key and value, to ensure code isn't improperly relying on the stability of the these keys. Epic: None Close #86657. Release note: None --- pkg/storage/pebbleiter/BUILD.bazel | 5 +- pkg/storage/pebbleiter/crdb_test_on.go | 129 ++++++++++++++++++++++++- 2 files changed, 131 insertions(+), 3 deletions(-) diff --git a/pkg/storage/pebbleiter/BUILD.bazel b/pkg/storage/pebbleiter/BUILD.bazel index fb3bffb2f0bb..c3b54f46f659 100644 --- a/pkg/storage/pebbleiter/BUILD.bazel +++ b/pkg/storage/pebbleiter/BUILD.bazel @@ -13,7 +13,10 @@ go_library( }), importpath = "github.com/cockroachdb/cockroach/pkg/storage/pebbleiter", visibility = ["//visibility:public"], - deps = ["@com_github_cockroachdb_pebble//:pebble"], + deps = [ + "@com_github_cockroachdb_errors//:errors", + "@com_github_cockroachdb_pebble//:pebble", + ], ) REMOVE_GO_BUILD_CONSTRAINTS = "cat $< | grep -v '//go:build' | grep -v '// +build' > $@" diff --git a/pkg/storage/pebbleiter/crdb_test_on.go b/pkg/storage/pebbleiter/crdb_test_on.go index 211d3c8f4d4a..cdb738e06c20 100644 --- a/pkg/storage/pebbleiter/crdb_test_on.go +++ b/pkg/storage/pebbleiter/crdb_test_on.go @@ -13,7 +13,12 @@ package pebbleiter -import "github.com/cockroachdb/pebble" +import ( + "math/rand" + + "github.com/cockroachdb/errors" + "github.com/cockroachdb/pebble" +) // Iterator wraps the *pebble.Iterator in crdb_test builds with an assertionIter // that detects when Close is called on the iterator twice. Double closes are @@ -31,6 +36,23 @@ func MaybeWrap(iter *pebble.Iterator) Iterator { type assertionIter struct { *pebble.Iterator closed bool + // unsafeBufs hold buffers used for returning values with short lifetimes to + // the caller. To assert that the client is respecting the lifetimes, + // assertionIter mangles the buffers as soon as the associated lifetime + // expires. This is the same technique applied by the unsafeMVCCIterator in + // pkg/storage, but this time applied at the API boundary between + // pkg/storage and Pebble. + // + // unsafeBufs holds two buffers per-key type and an index indicating which + // are currently in use. This is used to randomly switch to a different + // buffer, ensuring that the buffer(s) returned to the caller for the + // previous iterator position are garbage (as opposed to just state + // corresponding to the current iterator position). + unsafeBufs struct { + idx int + key [2][]byte + val [2][]byte + } } func (i *assertionIter) Clone(cloneOpts pebble.CloneOptions) (Iterator, error) { @@ -43,8 +65,111 @@ func (i *assertionIter) Clone(cloneOpts pebble.CloneOptions) (Iterator, error) { func (i *assertionIter) Close() error { if i.closed { - panic("pebble.Iterator already closed") + panic(errors.AssertionFailedf("pebble.Iterator already closed")) } i.closed = true return i.Iterator.Close() } + +func (i *assertionIter) Key() []byte { + if !i.Valid() { + panic(errors.AssertionFailedf("Key() called on !Valid() pebble.Iterator")) + } + idx := i.unsafeBufs.idx + i.unsafeBufs.key[idx] = append(i.unsafeBufs.key[idx][:0], i.Iterator.Key()...) + return i.unsafeBufs.key[idx] +} + +func (i *assertionIter) Value() []byte { + if !i.Valid() { + panic(errors.AssertionFailedf("Value() called on !Valid() pebble.Iterator")) + } + idx := i.unsafeBufs.idx + i.unsafeBufs.val[idx] = append(i.unsafeBufs.val[idx][:0], i.Iterator.Value()...) + return i.unsafeBufs.val[idx] +} + +func (i *assertionIter) LazyValue() pebble.LazyValue { + if !i.Valid() { + panic(errors.AssertionFailedf("LazyValue() called on !Valid() pebble.Iterator")) + } + return i.Iterator.LazyValue() +} + +func (i *assertionIter) First() bool { + i.maybeMangleBufs() + return i.Iterator.First() +} + +func (i *assertionIter) SeekGE(key []byte) bool { + i.maybeMangleBufs() + return i.Iterator.SeekGE(key) +} + +func (i *assertionIter) SeekGEWithLimit(key []byte, limit []byte) pebble.IterValidityState { + i.maybeMangleBufs() + return i.Iterator.SeekGEWithLimit(key, limit) +} + +func (i *assertionIter) SeekPrefixGE(key []byte) bool { + i.maybeMangleBufs() + return i.Iterator.SeekPrefixGE(key) +} + +func (i *assertionIter) Next() bool { + i.maybeMangleBufs() + return i.Iterator.Next() +} + +func (i *assertionIter) NextWithLimit(limit []byte) pebble.IterValidityState { + i.maybeMangleBufs() + return i.Iterator.NextWithLimit(limit) +} + +func (i *assertionIter) NextPrefix() bool { + i.maybeMangleBufs() + return i.Iterator.NextPrefix() +} + +func (i *assertionIter) Last() bool { + i.maybeMangleBufs() + return i.Iterator.Last() +} + +func (i *assertionIter) SeekLT(key []byte) bool { + i.maybeMangleBufs() + return i.Iterator.SeekLT(key) +} + +func (i *assertionIter) SeekLTWithLimit(key []byte, limit []byte) pebble.IterValidityState { + i.maybeMangleBufs() + return i.Iterator.SeekLTWithLimit(key, limit) +} + +func (i *assertionIter) Prev() bool { + i.maybeMangleBufs() + return i.Iterator.Prev() +} + +func (i *assertionIter) PrevWithLimit(limit []byte) pebble.IterValidityState { + i.maybeMangleBufs() + return i.Iterator.PrevWithLimit(limit) +} + +// maybeMangleBufs trashes the contents of buffers used to return unsafe values +// to the caller. This is used to ensure that the client respects the Pebble +// iterator interface and the lifetimes of buffers it returns. +func (i *assertionIter) maybeMangleBufs() { + if rand.Intn(2) == 0 { + idx := i.unsafeBufs.idx + for _, b := range [...][]byte{i.unsafeBufs.key[idx], i.unsafeBufs.val[idx]} { + for i := range b { + b[i] = 0 + } + } + if rand.Intn(2) == 0 { + // Switch to a new buffer for the next iterator position. + i.unsafeBufs.idx = (i.unsafeBufs.idx + 1) % 2 + } + } +} From 3d266084a38acc48525e14cac18f068d313633e2 Mon Sep 17 00:00:00 2001 From: Jackson Owens Date: Wed, 8 Feb 2023 15:35:50 -0500 Subject: [PATCH 2/3] storage: fix UnsafeKey() usage of invalid iterator Previously, CheckSSTConflicts would improperly call UnsafeKey on an exhausted sstIter. This could result in undefined, inconsistent behavior as UnsafeKey may point to arbitrary data once the iterator is exhausted. Informs #94141. Epic: None Release note: None --- pkg/storage/sst.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/storage/sst.go b/pkg/storage/sst.go index 54207dbfdbb5..d2ba10c94eab 100644 --- a/pkg/storage/sst.go +++ b/pkg/storage/sst.go @@ -1077,7 +1077,7 @@ func CheckSSTConflicts( // 2) the ext iterator became invalid // 3) both iterators changed keys and the sst iterator's key is further // ahead. - if extChangedKeys && (!sstChangedKeys || (!extOK && sstOK) || extIter.UnsafeKey().Key.Compare(sstIter.UnsafeKey().Key) < 0) { + if sstOK && extChangedKeys && (!sstChangedKeys || !extOK || extIter.UnsafeKey().Key.Compare(sstIter.UnsafeKey().Key) < 0) { extIter.SeekGE(MVCCKey{Key: sstIter.UnsafeKey().Key}) extOK, extErr = extIter.Valid() } From a80f29c68c66a9325112ab92bffb574f7bed9980 Mon Sep 17 00:00:00 2001 From: Jackson Owens Date: Wed, 8 Feb 2023 15:59:19 -0500 Subject: [PATCH 3/3] kv/kvserver/spanset: fix read of invalid iterator's UnsafeKey Previously, the spanset.MVCCIterator implementation would retrieve the contained iterator's UsafeKey() on an invalid iterator in Next, NextKey and Prev. This retrieval was harmless because it always checked the validity of the iterator before using it, but recent assertions in test builds prevent this usage. Release note: None --- pkg/kv/kvserver/spanset/batch.go | 26 +++++++++++++++++++++++--- 1 file changed, 23 insertions(+), 3 deletions(-) diff --git a/pkg/kv/kvserver/spanset/batch.go b/pkg/kv/kvserver/spanset/batch.go index bdbbe338b709..2198e2cf8371 100644 --- a/pkg/kv/kvserver/spanset/batch.go +++ b/pkg/kv/kvserver/spanset/batch.go @@ -102,21 +102,37 @@ func (i *MVCCIterator) SeekLT(key storage.MVCCKey) { // Next is part of the storage.MVCCIterator interface. func (i *MVCCIterator) Next() { i.i.Next() - i.checkAllowed(roachpb.Span{Key: i.UnsafeKey().Key}, false) + i.checkAllowedCurrPosForward(false) } // Prev is part of the storage.MVCCIterator interface. func (i *MVCCIterator) Prev() { i.i.Prev() - i.checkAllowed(roachpb.Span{Key: i.UnsafeKey().Key}, false) + i.checkAllowedCurrPosForward(false) } // NextKey is part of the storage.MVCCIterator interface. func (i *MVCCIterator) NextKey() { i.i.NextKey() - i.checkAllowed(roachpb.Span{Key: i.UnsafeKey().Key}, false) + i.checkAllowedCurrPosForward(false) } +// checkAllowedCurrPosForward checks the span starting at the current iterator +// position, if the current iterator position is valid. +func (i *MVCCIterator) checkAllowedCurrPosForward(errIfDisallowed bool) { + i.invalid = false + i.err = nil + if ok, _ := i.i.Valid(); !ok { + // If the iterator is invalid after the operation, there's nothing to + // check. We allow uses of iterators to exceed the declared span bounds + // as long as the iterator itself is configured with proper boundaries. + return + } + i.checkAllowedValidPos(roachpb.Span{Key: i.UnsafeKey().Key}, errIfDisallowed) +} + +// checkAllowed checks the provided span if the current iterator position is +// valid. func (i *MVCCIterator) checkAllowed(span roachpb.Span, errIfDisallowed bool) { i.invalid = false i.err = nil @@ -126,6 +142,10 @@ func (i *MVCCIterator) checkAllowed(span roachpb.Span, errIfDisallowed bool) { // as long as the iterator itself is configured with proper boundaries. return } + i.checkAllowedValidPos(span, errIfDisallowed) +} + +func (i *MVCCIterator) checkAllowedValidPos(span roachpb.Span, errIfDisallowed bool) { var err error if i.spansOnly { err = i.spans.CheckAllowed(SpanReadOnly, span)