From cfb711f507a891b116813e39604e65b7c08fc3cd Mon Sep 17 00:00:00 2001 From: Jackson Owens Date: Thu, 23 Feb 2023 10:59:52 -0500 Subject: [PATCH] storage: introduce test-build assertion iterator Backport #90859, #96685 and #97222 to mangle the buffers returned by Pebble iterators to ensure MVCC and Cockroach code respects the Pebble iterator memory lifetimes. The RangeBounds() and RangeKeys() validity assertions are omitted, since 22.2 allowed these methods to be invoked on an invalid iterator or an iterator positioned at a point key. Release note: None Release justification: Non-production code changes --- build/bazelutil/check.sh | 2 + pkg/BUILD.bazel | 4 + pkg/kv/kvserver/spanset/BUILD.bazel | 1 + pkg/kv/kvserver/spanset/batch.go | 3 +- pkg/storage/BUILD.bazel | 1 + pkg/storage/disk_map.go | 7 +- pkg/storage/engine.go | 13 +- pkg/storage/engine_test.go | 4 - .../mvcc_history_metamorphic_iterator_test.go | 10 +- pkg/storage/mvcc_history_test.go | 11 +- pkg/storage/pebble.go | 5 +- pkg/storage/pebble_batch.go | 7 +- pkg/storage/pebble_iterator.go | 30 +- pkg/storage/pebbleiter/BUILD.bazel | 47 +++ pkg/storage/pebbleiter/crdb_test_off.go | 26 ++ pkg/storage/pebbleiter/crdb_test_on.go | 330 ++++++++++++++++++ pkg/storage/pebbleiter/crdb_test_test.go | 23 ++ pkg/storage/pebbleiter/pebbleiter.go | 15 + pkg/storage/sst.go | 5 + 19 files changed, 503 insertions(+), 41 deletions(-) create mode 100644 pkg/storage/pebbleiter/BUILD.bazel create mode 100644 pkg/storage/pebbleiter/crdb_test_off.go create mode 100644 pkg/storage/pebbleiter/crdb_test_on.go create mode 100644 pkg/storage/pebbleiter/crdb_test_test.go create mode 100644 pkg/storage/pebbleiter/pebbleiter.go diff --git a/build/bazelutil/check.sh b/build/bazelutil/check.sh index 07480d202414..94e90156edba 100755 --- a/build/bazelutil/check.sh +++ b/build/bazelutil/check.sh @@ -61,6 +61,8 @@ pkg/cmd/roachtest/BUILD.bazel EXISTING_CRDB_TEST_BUILD_CONSTRAINTS=" pkg/util/buildutil/crdb_test_off.go://go:build !crdb_test || crdb_test_off pkg/util/buildutil/crdb_test_on.go://go:build crdb_test && !crdb_test_off +pkg/storage/pebbleiter/crdb_test_off.go://go:build !crdb_test || crdb_test_off +pkg/storage/pebbleiter/crdb_test_on.go://go:build crdb_test && !crdb_test_off " if [ -z "${COCKROACH_BAZEL_CHECK_FAST:-}" ]; then diff --git a/pkg/BUILD.bazel b/pkg/BUILD.bazel index 43d95a490d23..3bded14ce9cd 100644 --- a/pkg/BUILD.bazel +++ b/pkg/BUILD.bazel @@ -505,6 +505,7 @@ ALL_TESTS = [ "//pkg/storage/enginepb:enginepb_test", "//pkg/storage/fs:fs_test", "//pkg/storage/metamorphic:metamorphic_test", + "//pkg/storage/pebbleiter:pebbleiter_test", "//pkg/storage:storage_test", "//pkg/testutils/docker:docker_test", "//pkg/testutils/floatcmp:floatcmp_test", @@ -1805,6 +1806,8 @@ GO_TARGETS = [ "//pkg/storage/fs:fs_test", "//pkg/storage/metamorphic:metamorphic", "//pkg/storage/metamorphic:metamorphic_test", + "//pkg/storage/pebbleiter:pebbleiter", + "//pkg/storage/pebbleiter:pebbleiter_test", "//pkg/storage:storage", "//pkg/storage:storage_test", "//pkg/streaming:streaming", @@ -2861,6 +2864,7 @@ GET_X_DATA_TARGETS = [ "//pkg/storage/enginepb:get_x_data", "//pkg/storage/fs:get_x_data", "//pkg/storage/metamorphic:get_x_data", + "//pkg/storage/pebbleiter:get_x_data", "//pkg/streaming:get_x_data", "//pkg/testutils:get_x_data", "//pkg/testutils/buildutil:get_x_data", diff --git a/pkg/kv/kvserver/spanset/BUILD.bazel b/pkg/kv/kvserver/spanset/BUILD.bazel index 5e7ecff39184..3451ad4a0d1b 100644 --- a/pkg/kv/kvserver/spanset/BUILD.bazel +++ b/pkg/kv/kvserver/spanset/BUILD.bazel @@ -14,6 +14,7 @@ go_library( "//pkg/keys", "//pkg/roachpb", "//pkg/storage", + "//pkg/storage/pebbleiter", "//pkg/util/hlc", "//pkg/util/log", "//pkg/util/protoutil", diff --git a/pkg/kv/kvserver/spanset/batch.go b/pkg/kv/kvserver/spanset/batch.go index a691a8684c55..e88a8750add0 100644 --- a/pkg/kv/kvserver/spanset/batch.go +++ b/pkg/kv/kvserver/spanset/batch.go @@ -18,6 +18,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/keys" "github.com/cockroachdb/cockroach/pkg/roachpb" "github.com/cockroachdb/cockroach/pkg/storage" + "github.com/cockroachdb/cockroach/pkg/storage/pebbleiter" "github.com/cockroachdb/cockroach/pkg/util/hlc" "github.com/cockroachdb/cockroach/pkg/util/log" "github.com/cockroachdb/cockroach/pkg/util/protoutil" @@ -392,7 +393,7 @@ func (i *EngineIterator) UnsafeRawEngineKey() []byte { } // GetRawIter is part of the storage.EngineIterator interface. -func (i *EngineIterator) GetRawIter() *pebble.Iterator { +func (i *EngineIterator) GetRawIter() pebbleiter.Iterator { return i.i.GetRawIter() } diff --git a/pkg/storage/BUILD.bazel b/pkg/storage/BUILD.bazel index 7abeb8c70cfd..1d7ad11180c1 100644 --- a/pkg/storage/BUILD.bazel +++ b/pkg/storage/BUILD.bazel @@ -61,6 +61,7 @@ go_library( "//pkg/settings/cluster", "//pkg/storage/enginepb", "//pkg/storage/fs", + "//pkg/storage/pebbleiter", "//pkg/util", "//pkg/util/admission", "//pkg/util/bufalloc", diff --git a/pkg/storage/disk_map.go b/pkg/storage/disk_map.go index 7898acebc876..77a58eb8fa64 100644 --- a/pkg/storage/disk_map.go +++ b/pkg/storage/disk_map.go @@ -16,6 +16,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/kv/kvserver/diskmap" "github.com/cockroachdb/cockroach/pkg/roachpb" + "github.com/cockroachdb/cockroach/pkg/storage/pebbleiter" "github.com/cockroachdb/cockroach/pkg/util/encoding" "github.com/cockroachdb/cockroach/pkg/util/log" "github.com/cockroachdb/errors" @@ -52,7 +53,7 @@ type pebbleMapBatchWriter struct { // pebbleMapIterator iterates over the keys of a pebbleMap in sorted order. type pebbleMapIterator struct { allowDuplicates bool - iter *pebble.Iterator + iter pebbleiter.Iterator // makeKey is a function that transforms a key into a byte slice with a prefix // used to SeekGE() the underlying iterator. makeKey func(k []byte) []byte @@ -114,9 +115,9 @@ func (r *pebbleMap) makeKeyWithSequence(k []byte) []byte { func (r *pebbleMap) NewIterator() diskmap.SortedDiskMapIterator { return &pebbleMapIterator{ allowDuplicates: r.allowDuplicates, - iter: r.store.NewIter(&pebble.IterOptions{ + iter: pebbleiter.MaybeWrap(r.store.NewIter(&pebble.IterOptions{ UpperBound: roachpb.Key(r.prefix).PrefixEnd(), - }), + })), makeKey: r.makeKey, prefix: r.prefix, } diff --git a/pkg/storage/engine.go b/pkg/storage/engine.go index 29363d159e89..32eb00227095 100644 --- a/pkg/storage/engine.go +++ b/pkg/storage/engine.go @@ -21,6 +21,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/settings/cluster" "github.com/cockroachdb/cockroach/pkg/storage/enginepb" "github.com/cockroachdb/cockroach/pkg/storage/fs" + "github.com/cockroachdb/cockroach/pkg/storage/pebbleiter" "github.com/cockroachdb/cockroach/pkg/util/envutil" "github.com/cockroachdb/cockroach/pkg/util/hlc" "github.com/cockroachdb/cockroach/pkg/util/iterutil" @@ -340,7 +341,7 @@ type EngineIterator interface { Value() []byte // GetRawIter is a low-level method only for use in the storage package, // that returns the underlying pebble Iterator. - GetRawIter() *pebble.Iterator + GetRawIter() pebbleiter.Iterator // SeekEngineKeyGEWithLimit is similar to SeekEngineKeyGE, but takes an // additional exclusive upper limit parameter. The limit is semantically // best-effort, and is an optimization to avoid O(n^2) iteration behavior in @@ -1551,17 +1552,7 @@ func assertSimpleMVCCIteratorInvariants(iter SimpleMVCCIterator) error { rangeKey, value.Value.RawBytes) } } - - } else { - // Bounds and range keys must be empty. - if bounds := iter.RangeBounds(); !bounds.Equal(roachpb.Span{}) { - return errors.AssertionFailedf("hasRange=false but RangeBounds=%s", bounds) - } - if r := iter.RangeKeys(); !r.IsEmpty() || !r.Bounds.Equal(roachpb.Span{}) { - return errors.AssertionFailedf("hasRange=false but RangeKeys=%s", r) - } } - return nil } diff --git a/pkg/storage/engine_test.go b/pkg/storage/engine_test.go index a4d13cf9aff8..f033cbb311e2 100644 --- a/pkg/storage/engine_test.go +++ b/pkg/storage/engine_test.go @@ -2171,10 +2171,6 @@ func TestEngineRangeKeysUnsupported(t *testing.T) { hasPoint, hasRange := iter.HasPointAndRange() require.True(t, hasPoint) require.False(t, hasRange) - rangeBounds, err := iter.EngineRangeBounds() - require.NoError(t, err) - require.Empty(t, rangeBounds) - require.Empty(t, iter.EngineRangeKeys()) // Exhaust the iterator. ok, err = iter.NextEngineKey() diff --git a/pkg/storage/mvcc_history_metamorphic_iterator_test.go b/pkg/storage/mvcc_history_metamorphic_iterator_test.go index f5d99eaba741..5cb730ed10d7 100644 --- a/pkg/storage/mvcc_history_metamorphic_iterator_test.go +++ b/pkg/storage/mvcc_history_metamorphic_iterator_test.go @@ -172,7 +172,7 @@ func (m *metamorphicIterator) moveAround() { } hasPoint, _ := m.it.HasPointAndRange() - rangeKeys := m.it.RangeKeys().Clone() + rangeKeys := rangeKeysIfExist(m.it).Clone() var rangeKeysIgnoringTime storage.MVCCRangeKeyStack if iit != nil { rangeKeysIgnoringTime = iit.RangeKeysIgnoringTime() @@ -224,7 +224,7 @@ func (m *metamorphicIterator) moveAround() { if m.it.UnsafeKey().Equal(cur) { break // made it } - printfln("step: %s %s [changed=%t]", m.it.UnsafeKey(), m.it.RangeKeys(), m.it.RangeKeyChanged()) + printfln("step: %s %s [changed=%t]", m.it.UnsafeKey(), rangeKeysIfExist(m.it), m.it.RangeKeyChanged()) if iit != nil { // If we're an incremental iterator with time bounds, and `cur` is not within bounds, // would miss it if we used Next. So call NextIgnoringTime unconditionally. @@ -247,7 +247,7 @@ func (m *metamorphicIterator) moveAround() { valid, err := m.it.Valid() require.Nil(m.t, err) require.True(m.t, valid, "unable to recover original position following SeekLT") - printfln("rev-step: %s %s [changed=%t]", m.it.UnsafeKey(), m.it.RangeKeys(), m.it.RangeKeyChanged()) + printfln("rev-step: %s %s [changed=%t]", m.it.UnsafeKey(), rangeKeysIfExist(m.it), m.it.RangeKeyChanged()) if m.it.UnsafeKey().Equal(cur) { printfln("done") break // made it @@ -268,13 +268,13 @@ func (m *metamorphicIterator) moveAround() { rangeKeysIgnoringTime2 = iit.RangeKeysIgnoringTime() } printfln("recovered position: %s hasPoint=%t, rangeKeys=%s, rangeKeysIgnoringTime=%s", - m.it.UnsafeKey(), hasPoint2, m.it.RangeKeys(), rangeKeysIgnoringTime2) + m.it.UnsafeKey(), hasPoint2, rangeKeysIfExist(m.it), rangeKeysIgnoringTime2) } // Back where we started and hopefully in an indistinguishable state. // When the stack is empty, sometimes it's a nil slice and sometimes zero // slice. A similar problem exists with MVCCRangeKeyVersion.Value. Sidestep // them by comparing strings. - require.Equal(m.t, fmt.Sprint(rangeKeys), fmt.Sprint(m.it.RangeKeys())) + require.Equal(m.t, fmt.Sprint(rangeKeys), fmt.Sprint(rangeKeysIfExist(m.it))) if iit != nil { require.Equal(m.t, fmt.Sprint(rangeKeysIgnoringTime), fmt.Sprint(iit.RangeKeysIgnoringTime())) } diff --git a/pkg/storage/mvcc_history_test.go b/pkg/storage/mvcc_history_test.go index f090fb37f118..4f63fc87fa38 100644 --- a/pkg/storage/mvcc_history_test.go +++ b/pkg/storage/mvcc_history_test.go @@ -1868,9 +1868,18 @@ func printIter(e *evalCtx) { } } +func rangeKeysIfExist(it storage.SimpleMVCCIterator) storage.MVCCRangeKeyStack { + if valid, err := it.Valid(); !valid || err != nil { + return storage.MVCCRangeKeyStack{} + } else if _, hasRange := it.HasPointAndRange(); !hasRange { + return storage.MVCCRangeKeyStack{} + } + return it.RangeKeys() +} + func checkAndUpdateRangeKeyChanged(e *evalCtx) bool { rangeKeyChanged := e.iter.RangeKeyChanged() - rangeKeys := e.iter.RangeKeys() + rangeKeys := rangeKeysIfExist(e.iter) if incrIter := e.tryMVCCIncrementalIter(); incrIter != nil { // For MVCCIncrementalIterator, make sure RangeKeyChangedIgnoringTime() fires diff --git a/pkg/storage/pebble.go b/pkg/storage/pebble.go index 9ecbf23ba47a..fd3c3df52691 100644 --- a/pkg/storage/pebble.go +++ b/pkg/storage/pebble.go @@ -34,6 +34,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/settings/cluster" "github.com/cockroachdb/cockroach/pkg/storage/enginepb" "github.com/cockroachdb/cockroach/pkg/storage/fs" + "github.com/cockroachdb/cockroach/pkg/storage/pebbleiter" "github.com/cockroachdb/cockroach/pkg/util/buildutil" "github.com/cockroachdb/cockroach/pkg/util/envutil" "github.com/cockroachdb/cockroach/pkg/util/hlc" @@ -1912,7 +1913,7 @@ type pebbleReadOnly struct { prefixEngineIter pebbleIterator normalEngineIter pebbleIterator - iter *pebble.Iterator + iter pebbleiter.Iterator iterUsed bool // avoids cloning after PinEngineStateForIterators() durability DurabilityRequirement closed bool @@ -2086,7 +2087,7 @@ func (p *pebbleReadOnly) PinEngineStateForIterators() error { if p.durability == GuaranteedDurability { o = &pebble.IterOptions{OnlyReadGuaranteedDurable: true} } - p.iter = p.parent.db.NewIter(o) + p.iter = pebbleiter.MaybeWrap(p.parent.db.NewIter(o)) // NB: p.iterUsed == false avoids cloning this in NewMVCCIterator(), since // we've just created it. } diff --git a/pkg/storage/pebble_batch.go b/pkg/storage/pebble_batch.go index 8febdc3a5d7f..8f6f7c79b63d 100644 --- a/pkg/storage/pebble_batch.go +++ b/pkg/storage/pebble_batch.go @@ -16,6 +16,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/roachpb" "github.com/cockroachdb/cockroach/pkg/settings/cluster" + "github.com/cockroachdb/cockroach/pkg/storage/pebbleiter" "github.com/cockroachdb/cockroach/pkg/util/uuid" "github.com/cockroachdb/errors" "github.com/cockroachdb/pebble" @@ -45,7 +46,7 @@ type pebbleBatch struct { prefixEngineIter pebbleIterator normalEngineIter pebbleIterator - iter *pebble.Iterator + iter pebbleiter.Iterator iterUsed bool // avoids cloning after PinEngineStateForIterators() writeOnly bool closed bool @@ -243,9 +244,9 @@ func (p *pebbleBatch) SupportsRangeKeys() bool { func (p *pebbleBatch) PinEngineStateForIterators() error { if p.iter == nil { if p.batch.Indexed() { - p.iter = p.batch.NewIter(nil) + p.iter = pebbleiter.MaybeWrap(p.batch.NewIter(nil)) } else { - p.iter = p.db.NewIter(nil) + p.iter = pebbleiter.MaybeWrap(p.db.NewIter(nil)) } // NB: p.iterUsed == false avoids cloning this in NewMVCCIterator(). We've // just created it, so cloning it would just be overhead. diff --git a/pkg/storage/pebble_iterator.go b/pkg/storage/pebble_iterator.go index 4d2c3f38c91f..9645a58d0a3a 100644 --- a/pkg/storage/pebble_iterator.go +++ b/pkg/storage/pebble_iterator.go @@ -17,6 +17,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/keys" "github.com/cockroachdb/cockroach/pkg/roachpb" + "github.com/cockroachdb/cockroach/pkg/storage/pebbleiter" "github.com/cockroachdb/cockroach/pkg/util" "github.com/cockroachdb/cockroach/pkg/util/hlc" "github.com/cockroachdb/cockroach/pkg/util/protoutil" @@ -31,7 +32,7 @@ import ( // should only be used in one of the two modes. type pebbleIterator struct { // Underlying iterator for the DB. - iter *pebble.Iterator + iter pebbleiter.Iterator options pebble.IterOptions // Reusable buffer for MVCCKey or EngineKey encoding. keyBuf []byte @@ -91,20 +92,22 @@ func newPebbleIterator( p := pebbleIterPool.Get().(*pebbleIterator) p.reusable = false // defensive p.init(nil, opts, durability, supportsRangeKeys) - p.iter = handle.NewIter(&p.options) + p.iter = pebbleiter.MaybeWrap(handle.NewIter(&p.options)) return p } // newPebbleIteratorByCloning creates a new Pebble iterator by cloning the given // iterator and reconfiguring it. func newPebbleIteratorByCloning( - iter *pebble.Iterator, opts IterOptions, durability DurabilityRequirement, supportsRangeKeys bool, + iter pebbleiter.Iterator, + opts IterOptions, + durability DurabilityRequirement, + supportsRangeKeys bool, ) *pebbleIterator { - var err error p := pebbleIterPool.Get().(*pebbleIterator) p.reusable = false // defensive p.init(nil, opts, durability, supportsRangeKeys) - p.iter, err = iter.Clone(pebble.CloneOptions{ + iter, err := iter.Clone(pebble.CloneOptions{ IterOptions: &p.options, RefreshBatchView: true, }) @@ -112,6 +115,7 @@ func newPebbleIteratorByCloning( p.Close() panic(err) } + p.iter = iter return p } @@ -128,11 +132,12 @@ func newPebbleSSTIterator( externalIterOpts = append(externalIterOpts, pebble.ExternalIterForwardOnly{}) } - var err error - if p.iter, err = pebble.NewExternalIter(DefaultPebbleOptions(), &p.options, files, externalIterOpts...); err != nil { + iter, err := pebble.NewExternalIter(DefaultPebbleOptions(), &p.options, files, externalIterOpts...) + if err != nil { p.Close() return nil, err } + p.iter = pebbleiter.MaybeWrap(iter) p.external = true return p, nil } @@ -141,7 +146,10 @@ func newPebbleSSTIterator( // reconfiguring the given iter. It is valid to pass a nil iter and then create // p.iter using p.options, to avoid redundant reconfiguration via SetOptions(). func (p *pebbleIterator) init( - iter *pebble.Iterator, opts IterOptions, durability DurabilityRequirement, supportsRangeKeys bool, + iter pebbleiter.Iterator, + opts IterOptions, + durability DurabilityRequirement, + supportsRangeKeys bool, ) { *p = pebbleIterator{ iter: iter, @@ -164,7 +172,7 @@ func (p *pebbleIterator) init( // 3. iter == nil: create a new iterator from handle. func (p *pebbleIterator) initReuseOrCreate( handle pebble.Reader, - iter *pebble.Iterator, + iter pebbleiter.Iterator, clone bool, opts IterOptions, durability DurabilityRequirement, @@ -177,7 +185,7 @@ func (p *pebbleIterator) initReuseOrCreate( p.init(nil, opts, durability, supportsRangeKeys) if iter == nil { - p.iter = handle.NewIter(&p.options) + p.iter = pebbleiter.MaybeWrap(handle.NewIter(&p.options)) } else if clone { var err error p.iter, err = iter.Clone(pebble.CloneOptions{ @@ -917,7 +925,7 @@ func (p *pebbleIterator) IsPrefix() bool { } // GetRawIter is part of the EngineIterator interface. -func (p *pebbleIterator) GetRawIter() *pebble.Iterator { +func (p *pebbleIterator) GetRawIter() pebbleiter.Iterator { return p.iter } diff --git a/pkg/storage/pebbleiter/BUILD.bazel b/pkg/storage/pebbleiter/BUILD.bazel new file mode 100644 index 000000000000..ab46ad360509 --- /dev/null +++ b/pkg/storage/pebbleiter/BUILD.bazel @@ -0,0 +1,47 @@ +load("//build/bazelutil/unused_checker:unused.bzl", "get_x_data") +load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test") + +# gazelle:exclude gen-crdb_test_off.go +# gazelle:exclude gen-crdb_test_on.go + +# keep +go_library( + name = "pebbleiter", + srcs = select({ + "//build/toolchains:crdb_test": [":gen-crdb-test-on"], + "//conditions:default": [":gen-crdb-test-off"], + }), + importpath = "github.com/cockroachdb/cockroach/pkg/storage/pebbleiter", + visibility = ["//visibility:public"], + deps = [ + "//pkg/util", + "@com_github_cockroachdb_errors//:errors", + "@com_github_cockroachdb_pebble//:pebble", + ], +) + +REMOVE_GO_BUILD_CONSTRAINTS = "cat $< | grep -v '//go:build' | grep -v '// +build' > $@" + +genrule( + name = "gen-crdb-test-on", + srcs = ["crdb_test_on.go"], + outs = ["gen-crdb_test_on.go"], + cmd = REMOVE_GO_BUILD_CONSTRAINTS, +) + +genrule( + name = "gen-crdb-test-off", + srcs = ["crdb_test_off.go"], + outs = ["gen-crdb_test_off.go"], + cmd = REMOVE_GO_BUILD_CONSTRAINTS, +) + +go_test( + name = "pebbleiter_test", + srcs = ["crdb_test_test.go"], + args = ["-test.timeout=295s"], + embed = [":pebbleiter"], # keep + deps = ["@com_github_stretchr_testify//require"], +) + +get_x_data(name = "get_x_data") diff --git a/pkg/storage/pebbleiter/crdb_test_off.go b/pkg/storage/pebbleiter/crdb_test_off.go new file mode 100644 index 000000000000..295117ac18e0 --- /dev/null +++ b/pkg/storage/pebbleiter/crdb_test_off.go @@ -0,0 +1,26 @@ +// Copyright 2022 The Cockroach Authors. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +//go:build !crdb_test || crdb_test_off +// +build !crdb_test crdb_test_off + +package pebbleiter + +import "github.com/cockroachdb/pebble" + +// Iterator redefines *pebble.Iterator. +type Iterator = *pebble.Iterator + +// MaybeWrap returns the provided Iterator, unmodified. +// +//gcassert:inline +func MaybeWrap(iter *pebble.Iterator) Iterator { + return iter +} diff --git a/pkg/storage/pebbleiter/crdb_test_on.go b/pkg/storage/pebbleiter/crdb_test_on.go new file mode 100644 index 000000000000..2c1104f30ec6 --- /dev/null +++ b/pkg/storage/pebbleiter/crdb_test_on.go @@ -0,0 +1,330 @@ +// Copyright 2022 The Cockroach Authors. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +//go:build crdb_test && !crdb_test_off +// +build crdb_test,!crdb_test_off + +package pebbleiter + +import ( + "math/rand" + "time" + + "github.com/cockroachdb/cockroach/pkg/util" + "github.com/cockroachdb/errors" + "github.com/cockroachdb/pebble" +) + +// Iterator wraps the *pebble.Iterator in crdb_test builds with an assertionIter +// that detects when Close is called on the iterator twice. Double closes are +// problematic because they can result in an iterator being added to a sync pool +// twice, allowing concurrent use of the same iterator struct. +type Iterator = *assertionIter + +// MaybeWrap returns the provided Pebble iterator, wrapped with double close +// detection. +func MaybeWrap(iter *pebble.Iterator) Iterator { + return &assertionIter{Iterator: iter, closedCh: make(chan struct{})} +} + +// assertionIter wraps a *pebble.Iterator with assertion checking. +type assertionIter struct { + *pebble.Iterator + closed bool + closedCh chan struct{} + // unsafeBufs hold buffers used for returning values with short lifetimes to + // the caller. To assert that the client is respecting the lifetimes, + // assertionIter mangles the buffers as soon as the associated lifetime + // expires. This is the same technique applied by the unsafeMVCCIterator in + // pkg/storage, but this time applied at the API boundary between + // pkg/storage and Pebble. + // + // unsafeBufs holds two buffers per-key type and an index indicating which + // are currently in use. This is used to randomly switch to a different + // buffer, ensuring that the buffer(s) returned to the caller for the + // previous iterator position are garbage (as opposed to just state + // corresponding to the current iterator position). + unsafeBufs struct { + idx int + key [2][]byte + val [2][]byte + } + rangeKeyBufs struct { + idx int + bufs [2]rangeKeyBuf + } +} + +type rangeKeyBuf struct { + start []byte + end []byte + keys []pebble.RangeKeyData + + // bgCh is used in race builds to synchronize with a separate goroutine + // performing background buffer mangling. If non-nil, a separate mangling + // goroutine is active and periodically mangling this buffer. When the + // buffer is next used, maybeSaveAndMangleRangeKeyBufs performs a + // synchronous send to the channel to signal that the mangling goroutine + // should exit. + bgCh chan struct{} +} + +func (b *rangeKeyBuf) mangle() { + zero(b.start) + zero(b.end) + for k := range b.keys { + zero(b.keys[k].Suffix) + zero(b.keys[k].Value) + } +} + +func (i *assertionIter) Clone(cloneOpts pebble.CloneOptions) (Iterator, error) { + iter, err := i.Iterator.Clone(cloneOpts) + if err != nil { + return nil, err + } + return MaybeWrap(iter), nil +} + +func (i *assertionIter) Close() error { + if i.closed { + panic(errors.AssertionFailedf("pebble.Iterator already closed")) + } + i.closed = true + close(i.closedCh) + return i.Iterator.Close() +} + +func (i *assertionIter) Key() []byte { + if !i.Valid() { + panic(errors.AssertionFailedf("Key() called on !Valid() pebble.Iterator")) + } + idx := i.unsafeBufs.idx + copyWithMatchingNilness(&i.unsafeBufs.key[idx], i.Iterator.Key()) + return i.unsafeBufs.key[idx] +} + +func (i *assertionIter) Value() []byte { + if !i.Valid() { + panic(errors.AssertionFailedf("Value() called on !Valid() pebble.Iterator")) + } + idx := i.unsafeBufs.idx + copyWithMatchingNilness(&i.unsafeBufs.val[idx], i.Iterator.Value()) + return i.unsafeBufs.val[idx] +} + +func (i *assertionIter) RangeBounds() ([]byte, []byte) { + if !i.Valid() { + panic(errors.AssertionFailedf("RangeBounds() called on !Valid() pebble.Iterator")) + } + if _, hasRange := i.Iterator.HasPointAndRange(); !hasRange { + panic(errors.AssertionFailedf("RangeBounds() called on pebble.Iterator without range keys")) + } + // See maybeSaveAndMangleRangeKeyBufs for where these are saved. + j := i.rangeKeyBufs.idx + return i.rangeKeyBufs.bufs[j].start, i.rangeKeyBufs.bufs[j].end +} + +func (i *assertionIter) RangeKeys() []pebble.RangeKeyData { + if !i.Valid() { + panic(errors.AssertionFailedf("RangeKeys() called on !Valid() pebble.Iterator")) + } + if _, hasRange := i.Iterator.HasPointAndRange(); !hasRange { + panic(errors.AssertionFailedf("RangeKeys() called on pebble.Iterator without range keys")) + } + // See maybeSaveAndMangleRangeKeyBufs for where these are saved. + return i.rangeKeyBufs.bufs[i.rangeKeyBufs.idx].keys +} + +func (i *assertionIter) First() bool { + i.maybeMangleBufs() + defer i.maybeSaveAndMangleRangeKeyBufs() + return i.Iterator.First() +} + +func (i *assertionIter) SeekGE(key []byte) bool { + i.maybeMangleBufs() + defer i.maybeSaveAndMangleRangeKeyBufs() + return i.Iterator.SeekGE(key) +} + +func (i *assertionIter) SeekGEWithLimit(key []byte, limit []byte) pebble.IterValidityState { + i.maybeMangleBufs() + defer i.maybeSaveAndMangleRangeKeyBufs() + return i.Iterator.SeekGEWithLimit(key, limit) +} + +func (i *assertionIter) SeekPrefixGE(key []byte) bool { + i.maybeMangleBufs() + defer i.maybeSaveAndMangleRangeKeyBufs() + return i.Iterator.SeekPrefixGE(key) +} + +func (i *assertionIter) Next() bool { + i.maybeMangleBufs() + defer i.maybeSaveAndMangleRangeKeyBufs() + return i.Iterator.Next() +} + +func (i *assertionIter) NextWithLimit(limit []byte) pebble.IterValidityState { + i.maybeMangleBufs() + defer i.maybeSaveAndMangleRangeKeyBufs() + return i.Iterator.NextWithLimit(limit) +} + +func (i *assertionIter) Last() bool { + i.maybeMangleBufs() + defer i.maybeSaveAndMangleRangeKeyBufs() + return i.Iterator.Last() +} + +func (i *assertionIter) SeekLT(key []byte) bool { + i.maybeMangleBufs() + defer i.maybeSaveAndMangleRangeKeyBufs() + return i.Iterator.SeekLT(key) +} + +func (i *assertionIter) SeekLTWithLimit(key []byte, limit []byte) pebble.IterValidityState { + i.maybeMangleBufs() + defer i.maybeSaveAndMangleRangeKeyBufs() + return i.Iterator.SeekLTWithLimit(key, limit) +} + +func (i *assertionIter) Prev() bool { + i.maybeMangleBufs() + defer i.maybeSaveAndMangleRangeKeyBufs() + return i.Iterator.Prev() +} + +func (i *assertionIter) PrevWithLimit(limit []byte) pebble.IterValidityState { + i.maybeMangleBufs() + defer i.maybeSaveAndMangleRangeKeyBufs() + return i.Iterator.PrevWithLimit(limit) +} + +// maybeMangleBufs trashes the contents of buffers used to return unsafe values +// to the caller. This is used to ensure that the client respects the Pebble +// iterator interface and the lifetimes of buffers it returns. +func (i *assertionIter) maybeMangleBufs() { + if rand.Intn(2) == 0 { + idx := i.unsafeBufs.idx + zero(i.unsafeBufs.key[idx]) + zero(i.unsafeBufs.val[idx]) + if rand.Intn(2) == 0 { + // Switch to a new buffer for the next iterator position. + i.unsafeBufs.idx = (i.unsafeBufs.idx + 1) % 2 + } + } +} + +// maybeSaveAndMangleRangeKeyBufs is invoked at the end of every iterator +// operation. It saves the range keys to buffers owned by `assertionIter` and +// with random probability mangles any buffers previously returned to the user. +func (i *assertionIter) maybeSaveAndMangleRangeKeyBufs() { + // If RangeKeyChanged()=false, the pebble.Iterator contract guarantees that + // any buffers previously returned through RangeBounds() and RangeKeys() are + // still valid. + // + // NB: Only permitted to call RangeKeyChanged() if Valid(). + valid := i.Iterator.Valid() + if valid && !i.Iterator.RangeKeyChanged() { + return + } + // INVARIANT: !Valid() || RangeKeyChanged() + + // The previous range key buffers are no longer guaranteed to be stable. + // Randomly zero them to ensure we catch bugs where they're reused. + idx := i.rangeKeyBufs.idx + mangleBuf := &i.rangeKeyBufs.bufs[idx] + if rand.Intn(2) == 0 { + mangleBuf.mangle() + } + // If the new iterator position has range keys, copy them to our buffers. + if !valid { + return + } + if _, hasRange := i.Iterator.HasPointAndRange(); !hasRange { + return + } + switchBuffers := rand.Intn(2) == 0 + if switchBuffers { + // Switch to a new buffer for the new range key state. + i.rangeKeyBufs.idx = (idx + 1) % 2 + + // In race builds, mangle the old buffer from another goroutine. This is + // nice because the race detector can tell us where the improper read is + // originating. Otherwise, we're relying on the improper read + // manifesting a test assertion failure, which may be far from the + // problematic access of an unsafe buffer. + if util.RaceEnabled { + ch := make(chan struct{}) + mangleBuf.bgCh = ch + ticker := time.NewTicker(5 * time.Millisecond) + go func() { + defer ticker.Stop() + mangleBuf.mangle() + for { + select { + case <-i.closedCh: + return + case <-ch: + return + case <-ticker.C: + mangleBuf.mangle() + } + } + }() + + // If the buffer we're about to use is being mangled in the + // background, synchronize with the goroutine doing the mangling by + // sending to its channel. After the synchronous channel send, we're + // guaranteed the goroutine will not mangle the buffer again and + // we're free to use it. + if prevMangleBuf := &i.rangeKeyBufs.bufs[i.rangeKeyBufs.idx]; prevMangleBuf.bgCh != nil { + prevMangleBuf.bgCh <- struct{}{} + prevMangleBuf.bgCh = nil + } + } + } + start, end := i.Iterator.RangeBounds() + rangeKeys := i.Iterator.RangeKeys() + + buf := &i.rangeKeyBufs.bufs[i.rangeKeyBufs.idx] + buf.start = append(buf.start[:0], start...) + buf.end = append(buf.end[:0], end...) + if len(rangeKeys) > cap(buf.keys) { + buf.keys = make([]pebble.RangeKeyData, len(rangeKeys)) + } else { + buf.keys = buf.keys[:len(rangeKeys)] + } + for k := range rangeKeys { + // Preserve nil-ness returned by Pebble to ensure we're testing exactly + // what Pebble will return in production. + copyWithMatchingNilness(&buf.keys[k].Suffix, rangeKeys[k].Suffix) + copyWithMatchingNilness(&buf.keys[k].Value, rangeKeys[k].Value) + } +} + +func zero(b []byte) { + for i := range b { + b[i] = 0x00 + } +} + +func copyWithMatchingNilness(dst *[]byte, src []byte) { + if src == nil { + *dst = nil + return + } + if *dst == nil { + *dst = make([]byte, 0, len(src)) + } + *dst = append((*dst)[:0], src...) +} diff --git a/pkg/storage/pebbleiter/crdb_test_test.go b/pkg/storage/pebbleiter/crdb_test_test.go new file mode 100644 index 000000000000..374a17a887f1 --- /dev/null +++ b/pkg/storage/pebbleiter/crdb_test_test.go @@ -0,0 +1,23 @@ +// Copyright 2022 The Cockroach Authors. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +package pebbleiter + +import ( + "testing" + + "github.com/stretchr/testify/require" +) + +func TestPebbleIterWrapped(t *testing.T) { + // Sanity-check: make sure CrdbTestBuild is set. This should be true for + // any test. + require.NotNil(t, MaybeWrap(nil)) +} diff --git a/pkg/storage/pebbleiter/pebbleiter.go b/pkg/storage/pebbleiter/pebbleiter.go new file mode 100644 index 000000000000..7da71829ec11 --- /dev/null +++ b/pkg/storage/pebbleiter/pebbleiter.go @@ -0,0 +1,15 @@ +// Copyright 2022 The Cockroach Authors. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +// Package pebbleiter exposes a type that selectively wraps a Pebble Iterator +// only in crdb_test builds. This wrapper type performs assertions in test-only +// builds. In non-test-only builds, pebbleiter exposes the pebble.Iterator +// directly. +package pebbleiter diff --git a/pkg/storage/sst.go b/pkg/storage/sst.go index a720ab2bb337..6a6d829d23ff 100644 --- a/pkg/storage/sst.go +++ b/pkg/storage/sst.go @@ -609,6 +609,11 @@ func CheckSSTConflicts( statsDiff.Add(updateStatsOnRangeKeyMerge(sstRangeKeys.Bounds.Key, sstRangeKeys.Versions)) } extIter.SeekGE(savedExtKey) + // After seeking, the old buffers have been invalidated. + // Re-retrieve the buffers. + if extHasRange { + extRangeKeys = extIter.RangeKeys() + } } if extRangeKeysChanged && !sstPrevRangeKeys.IsEmpty() && sstPrevRangeKeys.Bounds.Overlaps(extRangeKeys.Bounds) { // Because we always re-seek the extIter after every sstIter step,