diff --git a/pkg/storage/mvcc.go b/pkg/storage/mvcc.go index dd5012bf53f8..e2f353f2d20e 100644 --- a/pkg/storage/mvcc.go +++ b/pkg/storage/mvcc.go @@ -129,6 +129,19 @@ var rocksdbConcurrency = envutil.EnvOrDefaultInt( return max }()) +// The sub-level threshold at which to allow an increase in compaction +// concurrency. The maximum is still controlled by +// pebble.Options.MaxConcurrentCompactions. The default of 2 will allow an +// additional compaction (so total 1 + 1 = 2 compactions) when the sub-level +// count is 2, and increment concurrency by 1 whenever sub-level count +// increases by 2 (so 1 + 3 = 4 compactions) when sub-level count is 6. +// Admission control starts shaping regular traffic at a sub-level count of 5, +// and elastic traffic at a sub-level count of 1, hence this default of 2. See +// https://github.com/cockroachdb/pebble/issues/2832#issuecomment-1699743392 +// for some discussion on the bad behavior caused by not setting this option. +var l0SubLevelCompactionConcurrency = envutil.EnvOrDefaultInt( + "COCKROACH_L0_SUB_LEVEL_CONCURRENCY", 2) + // MakeValue returns the inline value. func MakeValue(meta enginepb.MVCCMetadata) roachpb.Value { return roachpb.Value{RawBytes: meta.RawBytes} diff --git a/pkg/storage/pebble.go b/pkg/storage/pebble.go index 93498b612e24..6a9d834bef63 100644 --- a/pkg/storage/pebble.go +++ b/pkg/storage/pebble.go @@ -571,8 +571,9 @@ func DefaultPebbleOptions() *pebble.Options { } opts := &pebble.Options{ - Comparer: EngineComparer, - FS: vfs.Default, + Comparer: EngineComparer, + FS: vfs.Default, + // A value of 2 triggers a compaction when there is 1 sub-level. L0CompactionThreshold: 2, L0StopWritesThreshold: 1000, LBaseMaxBytes: 64 << 20, // 64 MB @@ -584,6 +585,7 @@ func DefaultPebbleOptions() *pebble.Options { BlockPropertyCollectors: PebbleBlockPropertyCollectors, FormatMajorVersion: MinimumSupportedFormatVersion, } + opts.Experimental.L0CompactionConcurrency = l0SubLevelCompactionConcurrency // Automatically flush 10s after the first range tombstone is added to a // memtable. This ensures that we can reclaim space even when there's no // activity on the database generating flushes.