From e0c5cfaed3d18884990ee8c6b6682d5506ccf815 Mon Sep 17 00:00:00 2001 From: Peter Mattis Date: Tue, 12 Nov 2019 09:10:39 -0500 Subject: [PATCH] storage/engine: fix Pebble bloom filters The Pebble `Comparer.Split` function was returning a different key prefix than the prefix extractor we are using with RocksDB. The Pebble version was omitting the trailing NUL byte the RocksDB one was including it. This meant that Pebble generated bloom filters were incompatible with RocksDB generated ones. This fixes `TestRemoveDeadReplicas` which opens a store using Pebble, repairs the store using RocksDB, then re-opens the store using Pebble. This final step was failing to find the `storeIdent` key to the bloom filter problem. Add support for using Pebble in `cli.OpenExistingStore`, though note that TODO that this currently causes a test failure. Fixes #42351 Release note: None --- pkg/cli/debug.go | 53 ++++++++++++++++++++++++++---------- pkg/storage/engine/pebble.go | 18 ++++++------ 2 files changed, 49 insertions(+), 22 deletions(-) diff --git a/pkg/cli/debug.go b/pkg/cli/debug.go index de79ee648417..055966c543f2 100644 --- a/pkg/cli/debug.go +++ b/pkg/cli/debug.go @@ -38,6 +38,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/sql/execinfrapb" "github.com/cockroachdb/cockroach/pkg/storage" "github.com/cockroachdb/cockroach/pkg/storage/engine" + "github.com/cockroachdb/cockroach/pkg/storage/engine/enginepb" "github.com/cockroachdb/cockroach/pkg/storage/rditer" "github.com/cockroachdb/cockroach/pkg/storage/stateloader" "github.com/cockroachdb/cockroach/pkg/storage/storagepb" @@ -52,6 +53,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/util/sysutil" "github.com/cockroachdb/cockroach/pkg/util/timeutil" "github.com/cockroachdb/cockroach/pkg/util/uuid" + "github.com/cockroachdb/pebble" "github.com/cockroachdb/pebble/tool" "github.com/gogo/protobuf/jsonpb" "github.com/kr/pretty" @@ -118,31 +120,54 @@ func OpenExistingStore(dir string, stopper *stop.Stopper, readOnly bool) (engine // OpenEngine opens the engine at 'dir'. Depending on the supplied options, // an empty engine might be initialized. func OpenEngine(dir string, stopper *stop.Stopper, opts OpenEngineOptions) (engine.Engine, error) { - // TODO(hueypark): This seems to support all engines like engine.NewTempEngine. - cache := engine.NewRocksDBCache(server.DefaultCacheSize) - defer cache.Release() maxOpenFiles, err := server.SetOpenFileLimitForOneStore() if err != nil { return nil, err } - cfg := engine.RocksDBConfig{ - StorageConfig: base.StorageConfig{ - Settings: serverCfg.Settings, - Dir: dir, - MustExist: opts.MustExist, - }, - MaxOpenFiles: maxOpenFiles, - ReadOnly: opts.ReadOnly, + storageConfig := base.StorageConfig{ + Settings: serverCfg.Settings, + Dir: dir, + MustExist: opts.MustExist, } - if PopulateRocksDBConfigHook != nil { - if err := PopulateRocksDBConfigHook(&cfg.StorageConfig); err != nil { + if err := PopulateRocksDBConfigHook(&storageConfig); err != nil { return nil, err } } - db, err := engine.NewRocksDB(cfg, cache) + var db engine.Engine + + // TODO(peter): Using Pebble here causes TestRemoveDeadReplicas to fail with: + // + // debug_test.go:292: pebble: internal error: L0 flushed file 000004 overlaps with the largest seqnum of a preceding flushed file: 0-2278 vs 414 + + // engineType := engine.DefaultStorageEngine + engineType := enginepb.EngineTypeRocksDB + switch engineType { + case enginepb.EngineTypePebble: + cfg := engine.PebbleConfig{ + StorageConfig: storageConfig, + Opts: engine.DefaultPebbleOptions(), + } + cfg.Opts.Cache = pebble.NewCache(server.DefaultCacheSize) + cfg.Opts.MaxOpenFiles = int(maxOpenFiles) + + db, err = engine.NewPebble(context.Background(), cfg) + + case enginepb.EngineTypeRocksDB: + cache := engine.NewRocksDBCache(server.DefaultCacheSize) + defer cache.Release() + + cfg := engine.RocksDBConfig{ + StorageConfig: storageConfig, + MaxOpenFiles: maxOpenFiles, + ReadOnly: opts.ReadOnly, + } + + db, err = engine.NewRocksDB(cfg, cache) + } + if err != nil { return nil, err } diff --git a/pkg/storage/engine/pebble.go b/pkg/storage/engine/pebble.go index bd4a8ac27b23..5c6132e84ab0 100644 --- a/pkg/storage/engine/pebble.go +++ b/pkg/storage/engine/pebble.go @@ -64,16 +64,18 @@ var MVCCComparer = &pebble.Comparer{ }, Split: func(k []byte) int { - if len(k) == 0 { - return len(k) - } - // This is similar to what enginepb.SplitMVCCKey does. - tsLen := int(k[len(k)-1]) - keyPartEnd := len(k) - 1 - tsLen - if keyPartEnd < 0 { + key, _, ok := enginepb.SplitMVCCKey(k) + if !ok { return len(k) } - return keyPartEnd + // This matches the behavior of libroach/KeyPrefix. RocksDB requires that + // keys generated via a SliceTransform be comparable with normal encoded + // MVCC keys. Encoded MVCC keys have a suffix indicating the number of + // bytes of timestamp data. MVCC keys without a timestamp have a suffix of + // 0. We're careful in EncodeKey to make sure that the user-key always has + // a trailing 0. If there is no timestamp this falls out naturally. If + // there is a timestamp we prepend a 0 to the encoded timestamp data. + return len(key) + 1 }, Name: "cockroach_comparator",