diff --git a/pkg/base/config.go b/pkg/base/config.go index 77ed729dd767..c907a28b1a7e 100644 --- a/pkg/base/config.go +++ b/pkg/base/config.go @@ -13,6 +13,7 @@ package base import ( "context" "crypto/tls" + "fmt" "net/http" "net/url" "sync" @@ -148,6 +149,34 @@ const ( EngineTypePebble ) +// Type implements the pflag.Value interface. +func (e *EngineType) Type() string { return "string" } + +// String implements the pflag.Value interface. +func (e *EngineType) String() string { + switch *e { + case EngineTypeRocksDB: + return "rocksdb" + case EngineTypePebble: + return "pebble" + } + return "" +} + +// Set implements the pflag.Value interface. +func (e *EngineType) Set(s string) error { + switch s { + case "rocksdb": + *e = EngineTypeRocksDB + case "pebble": + *e = EngineTypePebble + default: + return fmt.Errorf("invalid storage engine: %s "+ + "(possible values: rocksdb, pebble)", s) + } + return nil +} + // Config is embedded by server.Config. A base config is not meant to be used // directly, but embedding configs should call cfg.InitDefaults(). type Config struct { @@ -671,8 +700,6 @@ const ( // pertaining to temp storage flags, specifically --temp-dir and // --max-disk-temp-storage. type TempStorageConfig struct { - // Engine specifies whether to use rocksdb or pebble for temp storage. - Engine EngineType // InMemory specifies whether the temporary storage will remain // in-memory or occupy a temporary subdirectory on-disk. InMemory bool @@ -696,7 +723,6 @@ func TempStorageConfigFromEnv( st *cluster.Settings, firstStore StoreSpec, parentDir string, - engine EngineType, maxSizeBytes int64, specIdx int, ) TempStorageConfig { @@ -727,7 +753,6 @@ func TempStorageConfigFromEnv( } return TempStorageConfig{ - Engine: engine, InMemory: inMem, Mon: &monitor, SpecIdx: specIdx, diff --git a/pkg/cli/cliflags/flags.go b/pkg/cli/cliflags/flags.go index ce81057c78d2..507c054a2998 100644 --- a/pkg/cli/cliflags/flags.go +++ b/pkg/cli/cliflags/flags.go @@ -693,6 +693,14 @@ Also, if you use equal signs in the file path to a store, you must use the "path" field label.`, } + StorageEngine = FlagInfo{ + Name: "storage-engine", + Description: ` +Storage engine to use for all stores on this cockroach node. Options are rocksdb +(default), or pebble. +`, + } + Size = FlagInfo{ Name: "size", Shorthand: "z", @@ -737,14 +745,6 @@ If this flag is unspecified, the temporary subdirectory will be located under the root of the first store.`, } - TempEngine = FlagInfo{ - Name: "temp-engine", - Description: ` -Storage engine to use for temporary storage. Options are rocksdb (default), or -experiemental-pebble. -`, - } - ExternalIODir = FlagInfo{ Name: "external-io-dir", Description: ` diff --git a/pkg/cli/context.go b/pkg/cli/context.go index 69db482c293c..d3803443ea96 100644 --- a/pkg/cli/context.go +++ b/pkg/cli/context.go @@ -277,8 +277,7 @@ var startCtx struct { // temporary directory to use to spill computation results to disk. tempDir string - // storage engine to use for temporary storage (eg. rocksdb, pebble) - tempEngine storageEngine + // directory to use for remotely-initiated operations that can // specify node-local I/O paths, like BACKUP/RESTORE/IMPORT. externalIODir string diff --git a/pkg/cli/flags.go b/pkg/cli/flags.go index aaf72e18cfac..ac735c70836f 100644 --- a/pkg/cli/flags.go +++ b/pkg/cli/flags.go @@ -347,6 +347,7 @@ func init() { VarFlag(f, &serverCfg.Locality, cliflags.Locality) VarFlag(f, &serverCfg.Stores, cliflags.Store) + VarFlag(f, &serverCfg.StorageEngine, cliflags.StorageEngine) VarFlag(f, &serverCfg.MaxOffset, cliflags.MaxOffset) // Usage for the unix socket is odd as we use a real file, whereas @@ -398,9 +399,6 @@ func init() { // refers to becomes known. VarFlag(f, diskTempStorageSizeValue, cliflags.SQLTempStorage) StringFlag(f, &startCtx.tempDir, cliflags.TempDir, startCtx.tempDir) - VarFlag(f, &startCtx.tempEngine, cliflags.TempEngine) - // Hide the tempEngine flag while it's experimental. - _ = f.MarkHidden(cliflags.TempEngine.Name) StringFlag(f, &startCtx.externalIODir, cliflags.ExternalIODir, startCtx.externalIODir) VarFlag(f, serverCfg.SQLAuditLogDirName, cliflags.SQLAuditLogDirName) diff --git a/pkg/cli/flags_util.go b/pkg/cli/flags_util.go index 3c990d34c3ff..c846b8738d5d 100644 --- a/pkg/cli/flags_util.go +++ b/pkg/cli/flags_util.go @@ -379,41 +379,6 @@ func (f *tableDisplayFormat) Set(s string) error { return nil } -type storageEngine int - -const ( - engineRocksDB storageEngine = iota - enginePebble -) - -// Type implements the pflag.Value interface. -func (f *storageEngine) Type() string { return "string" } - -// String implements the pflag.Value interface. -func (f *storageEngine) String() string { - switch *f { - case engineRocksDB: - return "rocksdb" - case enginePebble: - return "experimental-pebble" - } - return "" -} - -// Set implements the pflag.Value interface. -func (f *storageEngine) Set(s string) error { - switch s { - case "rocksdb": - *f = engineRocksDB - case "experimental-pebble": - *f = enginePebble - default: - return fmt.Errorf("invalid storage engine: %s "+ - "(possible values: rocksdb, experimental-pebble)", s) - } - return nil -} - // bytesOrPercentageValue is a flag that accepts an integer value, an integer // plus a unit (e.g. 32GB or 32GiB) or a percentage (e.g. 32%). In all these // cases, it transforms the string flag input into an int64 value. diff --git a/pkg/cli/start.go b/pkg/cli/start.go index 9e510fb0d225..91bedc5283df 100644 --- a/pkg/cli/start.go +++ b/pkg/cli/start.go @@ -368,10 +368,6 @@ func initTempStorageConfig( tempStorageMaxSizeBytes = base.DefaultTempStorageMaxSizeBytes } } - storageEngine := base.EngineTypeRocksDB - if startCtx.tempEngine == enginePebble { - storageEngine = base.EngineTypePebble - } // Initialize a base.TempStorageConfig based on first store's spec and // cli flags. @@ -380,7 +376,6 @@ func initTempStorageConfig( st, firstStore, startCtx.tempDir, - storageEngine, tempStorageMaxSizeBytes, specIdx, ) @@ -777,6 +772,7 @@ If problems persist, please see ` + base.DocsURL("cluster-setup-troubleshooting. for i, spec := range serverCfg.Stores.Specs { fmt.Fprintf(tw, "store[%d]:\t%s\n", i, spec) } + fmt.Fprintf(tw, "storage engine: \t%s\n", serverCfg.StorageEngine.String()) nodeID := s.NodeID() if initialBoot { if nodeID == server.FirstNodeID { diff --git a/pkg/server/config.go b/pkg/server/config.go index 106b55032223..42bce6bd6939 100644 --- a/pkg/server/config.go +++ b/pkg/server/config.go @@ -34,16 +34,17 @@ import ( "github.com/cockroachdb/cockroach/pkg/util/log" "github.com/cockroachdb/cockroach/pkg/util/retry" "github.com/cockroachdb/cockroach/pkg/util/timeutil" + "github.com/cockroachdb/pebble" "github.com/elastic/gosigar" "github.com/pkg/errors" ) // Context defaults. const ( - // DefaultCacheSize is the default size of the RocksDB cache. We default the - // cache size and SQL memory pool size to 128 MiB. Larger values might - // provide significantly better performance, but we're not sure what type of - // system we're running on (development or production or some shared + // DefaultCacheSize is the default size of the RocksDB and Pebble caches. We + // default the cache size and SQL memory pool size to 128 MiB. Larger values + // might provide significantly better performance, but we're not sure what + // type of system we're running on (development or production or some shared // environment). Production users should almost certainly override these // settings and we'll warn in the logs about doing so. DefaultCacheSize = 128 << 20 // 128 MB @@ -131,6 +132,10 @@ type Config struct { // Stores is specified to enable durable key-value storage. Stores base.StoreSpecList + // StorageEngine specifies the engine type (eg. rocksdb, pebble) to use to + // instantiate stores. + StorageEngine base.EngineType + // TempStorageConfig is used to configure temp storage, which stores // ephemeral data when processing large queries. TempStorageConfig base.TempStorageConfig @@ -354,7 +359,7 @@ func MakeConfig(ctx context.Context, st *cluster.Settings) Config { Specs: []base.StoreSpec{storeSpec}, }, TempStorageConfig: base.TempStorageConfigFromEnv( - ctx, st, storeSpec, "" /* parentDir */, base.EngineTypeRocksDB, base.DefaultTempStorageMaxSizeBytes, 0), + ctx, st, storeSpec, "" /* parentDir */, base.DefaultTempStorageMaxSizeBytes, 0), } cfg.AmbientCtx.Tracer = st.Tracer @@ -427,9 +432,16 @@ func (cfg *Config) CreateEngines(ctx context.Context) (Engines, error) { var details []string - details = append(details, fmt.Sprintf("RocksDB cache size: %s", humanizeutil.IBytes(cfg.CacheSize))) - cache := engine.NewRocksDBCache(cfg.CacheSize) - defer cache.Release() + var cache engine.RocksDBCache + var pebbleCache *pebble.Cache + if cfg.StorageEngine == base.EngineTypePebble { + details = append(details, fmt.Sprintf("Pebble cache size: %s", humanizeutil.IBytes(cfg.CacheSize))) + pebbleCache = pebble.NewCache(cfg.CacheSize) + } else { + details = append(details, fmt.Sprintf("RocksDB cache size: %s", humanizeutil.IBytes(cfg.CacheSize))) + cache = engine.NewRocksDBCache(cfg.CacheSize) + defer cache.Release() + } var physicalStores int for _, spec := range cfg.Stores.Specs { @@ -479,19 +491,41 @@ func (cfg *Config) CreateEngines(ctx context.Context) (Engines, error) { details = append(details, fmt.Sprintf("store %d: RocksDB, max size %s, max open file limit %d", i, humanizeutil.IBytes(sizeInBytes), openFileLimitPerStore)) - rocksDBConfig := engine.RocksDBConfig{ - Attrs: spec.Attributes, - Dir: spec.Path, - MaxSizeBytes: sizeInBytes, - MaxOpenFiles: openFileLimitPerStore, - WarnLargeBatchThreshold: 500 * time.Millisecond, - Settings: cfg.Settings, - UseFileRegistry: spec.UseFileRegistry, - RocksDBOptions: spec.RocksDBOptions, - ExtraOptions: spec.ExtraOptions, - } - eng, err := engine.NewRocksDB(rocksDBConfig, cache) + var eng engine.Engine + var err error + if cfg.StorageEngine == base.EngineTypePebble { + // TODO(itsbilal): Tune these options, and allow them to be overridden + // in the spec (similar to the existing spec.RocksDBOptions and others). + pebbleOpts := &pebble.Options{ + Cache: pebbleCache, + MaxOpenFiles: int(openFileLimitPerStore), + MemTableSize: 64 << 20, + MemTableStopWritesThreshold: 4, + MinFlushRate: 4 << 20, + L0CompactionThreshold: 2, + L0StopWritesThreshold: 400, + LBaseMaxBytes: 64 << 20, // 64 MB + Levels: []pebble.LevelOptions{{ + BlockSize: 32 << 10, + }}, + } + eng, err = engine.NewPebble(spec.Path, pebbleOpts) + } else { + rocksDBConfig := engine.RocksDBConfig{ + Attrs: spec.Attributes, + Dir: spec.Path, + MaxSizeBytes: sizeInBytes, + MaxOpenFiles: openFileLimitPerStore, + WarnLargeBatchThreshold: 500 * time.Millisecond, + Settings: cfg.Settings, + UseFileRegistry: spec.UseFileRegistry, + RocksDBOptions: spec.RocksDBOptions, + ExtraOptions: spec.ExtraOptions, + } + + eng, err = engine.NewRocksDB(rocksDBConfig, cache) + } if err != nil { return Engines{}, err } diff --git a/pkg/server/server.go b/pkg/server/server.go index 93aa7550eb60..4d6774b530e5 100644 --- a/pkg/server/server.go +++ b/pkg/server/server.go @@ -59,6 +59,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/storage/bulk" "github.com/cockroachdb/cockroach/pkg/storage/closedts/container" "github.com/cockroachdb/cockroach/pkg/storage/cloud" + "github.com/cockroachdb/cockroach/pkg/storage/diskmap" "github.com/cockroachdb/cockroach/pkg/storage/engine" "github.com/cockroachdb/cockroach/pkg/storage/reports" "github.com/cockroachdb/cockroach/pkg/storage/storagebase" @@ -394,7 +395,13 @@ func NewServer(cfg Config, stopper *stop.Stopper) (*Server, error) { // Set up the DistSQL temp engine. useStoreSpec := cfg.Stores.Specs[s.cfg.TempStorageConfig.SpecIdx] - tempEngine, err := engine.NewTempEngine(s.cfg.TempStorageConfig, useStoreSpec) + var tempEngine diskmap.Factory + var err error + if cfg.StorageEngine == base.EngineTypePebble { + tempEngine, err = engine.NewPebbleTempEngine(s.cfg.TempStorageConfig, useStoreSpec) + } else { + tempEngine, err = engine.NewTempEngine(s.cfg.TempStorageConfig, useStoreSpec) + } if err != nil { return nil, errors.Wrap(err, "could not create temp storage") } diff --git a/pkg/storage/engine/temp_engine.go b/pkg/storage/engine/temp_engine.go index e751b5fd0dc7..eb90cc766c63 100644 --- a/pkg/storage/engine/temp_engine.go +++ b/pkg/storage/engine/temp_engine.go @@ -40,15 +40,11 @@ func (r *rocksDBTempEngine) NewSortedDiskMultiMap() diskmap.SortedDiskMap { return newRocksDBMap(r.db, true /* allowDuplicates */) } -// NewTempEngine creates a new engine for DistSQL processors to use when the -// working set is larger than can be stored in memory. +// NewTempEngine creates a new RocksDB engine for DistSQL processors to use when +// the working set is larger than can be stored in memory. func NewTempEngine( tempStorage base.TempStorageConfig, storeSpec base.StoreSpec, ) (diskmap.Factory, error) { - if tempStorage.Engine == base.EngineTypePebble { - return NewPebbleTempEngine(tempStorage, storeSpec) - } - if tempStorage.InMemory { // TODO(arjun): Limit the size of the store once #16750 is addressed. // Technically we do not pass any attributes to temporary store. @@ -97,8 +93,8 @@ func (r *pebbleTempEngine) NewSortedDiskMultiMap() diskmap.SortedDiskMap { return newPebbleMap(r.db, true /* allowDuplicates */) } -// NewPebbleTempEngine creates a new engine for DistSQL processors to use when the -// working set is larger than can be stored in memory. +// NewPebbleTempEngine creates a new Pebble engine for DistSQL processors to use +// when the working set is larger than can be stored in memory. func NewPebbleTempEngine( tempStorage base.TempStorageConfig, storeSpec base.StoreSpec, ) (diskmap.Factory, error) {