Skip to content

Commit

Permalink
kvserver: Add cluster setting to use excise in snapshots
Browse files Browse the repository at this point in the history
This change adds a cluster setting,
`kv.snapshot_receiver.excise.enabled`, to use IngestAndExcise
for the replicated/user-key portion of a replica's contents
instead of rangedels. This reduces write-amp as
rangedels/rangekeydels have to be compacted while an excise
shrinks sstables into virtual sstables to clear out contents
of a replica immediately. At the moment, this is an experimental
feature and should be used with caution.

Epic: none

Release note: None
  • Loading branch information
itsbilal committed Oct 2, 2023
1 parent 2ac6153 commit 65bb6cc
Show file tree
Hide file tree
Showing 6 changed files with 34 additions and 6 deletions.
1 change: 1 addition & 0 deletions docs/generated/settings/settings.html
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@
<tr><td><div id="setting-kv-replica-stats-addsst-request-size-factor" class="anchored"><code>kv.replica_stats.addsst_request_size_factor</code></div></td><td>integer</td><td><code>50000</code></td><td>the divisor that is applied to addsstable request sizes, then recorded in a leaseholders QPS; 0 means all requests are treated as cost 1</td><td>Dedicated/Self-Hosted</td></tr>
<tr><td><div id="setting-kv-replication-reports-interval" class="anchored"><code>kv.replication_reports.interval</code></div></td><td>duration</td><td><code>1m0s</code></td><td>the frequency for generating the replication_constraint_stats, replication_stats_report and replication_critical_localities reports (set to 0 to disable)</td><td>Dedicated/Self-Hosted</td></tr>
<tr><td><div id="setting-kv-snapshot-rebalance-max-rate" class="anchored"><code>kv.snapshot_rebalance.max_rate</code></div></td><td>byte size</td><td><code>32 MiB</code></td><td>the rate limit (bytes/sec) to use for rebalance and upreplication snapshots</td><td>Dedicated/Self-Hosted</td></tr>
<tr><td><div id="setting-kv-snapshot-receiver-excise-enabled" class="anchored"><code>kv.snapshot_receiver.excise.enabled</code></div></td><td>boolean</td><td><code>false</code></td><td>set to true to use excises instead of range deletions for KV snapshots</td><td>Dedicated/Self-Hosted</td></tr>
<tr><td><div id="setting-kv-transaction-max-intents-bytes" class="anchored"><code>kv.transaction.max_intents_bytes</code></div></td><td>integer</td><td><code>4194304</code></td><td>maximum number of bytes used to track locks in transactions</td><td>Serverless/Dedicated/Self-Hosted</td></tr>
<tr><td><div id="setting-kv-transaction-max-refresh-spans-bytes" class="anchored"><code>kv.transaction.max_refresh_spans_bytes</code></div></td><td>integer</td><td><code>4194304</code></td><td>maximum number of bytes used to track refresh spans in serializable transactions</td><td>Serverless/Dedicated/Self-Hosted</td></tr>
<tr><td><div id="setting-kv-transaction-reject-over-max-intents-budget-enabled" class="anchored"><code>kv.transaction.reject_over_max_intents_budget.enabled</code></div></td><td>boolean</td><td><code>false</code></td><td>if set, transactions that exceed their lock tracking budget (kv.transaction.max_intents_bytes) are rejected instead of having their lock spans imprecisely compressed</td><td>Serverless/Dedicated/Self-Hosted</td></tr>
Expand Down
2 changes: 1 addition & 1 deletion pkg/kv/kvserver/mvcc_gc_queue.go
Original file line number Diff line number Diff line change
Expand Up @@ -720,7 +720,7 @@ func (mgcq *mvccGCQueue) process(
}

var snap storage.Reader
if repl.store.cfg.SharedStorageEnabled || storage.UseEFOS.Get(&repl.ClusterSettings().SV) {
if repl.store.cfg.SharedStorageEnabled || storage.ShouldUseEFOS(&repl.ClusterSettings().SV) {
efos := repl.store.TODOEngine().NewEventuallyFileOnlySnapshot(rditer.MakeReplicatedKeySpans(desc))
if util.RaceEnabled {
ss := rditer.MakeReplicatedKeySpanSet(desc)
Expand Down
2 changes: 1 addition & 1 deletion pkg/kv/kvserver/replica_consistency.go
Original file line number Diff line number Diff line change
Expand Up @@ -655,7 +655,7 @@ func (r *Replica) computeChecksumPostApply(
// Raft-consistent (i.e. not in the middle of an AddSSTable).
spans := rditer.MakeReplicatedKeySpans(&desc)
var snap storage.Reader
if r.store.cfg.SharedStorageEnabled || storage.UseEFOS.Get(&r.ClusterSettings().SV) {
if r.store.cfg.SharedStorageEnabled || storage.ShouldUseEFOS(&r.ClusterSettings().SV) {
efos := r.store.TODOEngine().NewEventuallyFileOnlySnapshot(spans)
if util.RaceEnabled {
ss := rditer.MakeReplicatedKeySpanSet(&desc)
Expand Down
2 changes: 1 addition & 1 deletion pkg/kv/kvserver/replica_raftstorage.go
Original file line number Diff line number Diff line change
Expand Up @@ -240,7 +240,7 @@ func (r *Replica) GetSnapshot(
var snap storage.Reader
var startKey roachpb.RKey
r.raftMu.Lock()
if r.store.cfg.SharedStorageEnabled || storage.UseEFOS.Get(&r.ClusterSettings().SV) {
if r.store.cfg.SharedStorageEnabled || storage.ShouldUseEFOS(&r.ClusterSettings().SV) {
var ss *spanset.SpanSet
r.mu.RLock()
spans := rditer.MakeAllKeySpans(r.mu.state.Desc) // needs unreplicated to access Raft state
Expand Down
6 changes: 4 additions & 2 deletions pkg/kv/kvserver/store_snapshot.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ import (
"io"
"time"

"github.com/cockroachdb/cockroach/pkg/clusterversion"
"github.com/cockroachdb/cockroach/pkg/keys"
"github.com/cockroachdb/cockroach/pkg/kv/kvpb"
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/allocator/storepool"
Expand Down Expand Up @@ -499,8 +500,9 @@ func (kvSS *kvBatchSnapshotStrategy) Receive(
// TODO(jeffreyxiao): Re-evaluate as the default range size grows.
keyRanges := rditer.MakeReplicatedKeySpans(header.State.Desc)

doExcise := header.SharedReplicate
if doExcise && !s.cfg.SharedStorageEnabled {
doExcise := header.SharedReplicate || (storage.UseExciseForSnapshots.Get(&s.ClusterSettings().SV) &&
s.cfg.Settings.Version.IsActive(ctx, clusterversion.V23_2_PebbleFormatVirtualSSTables))
if header.SharedReplicate && !s.cfg.SharedStorageEnabled {
return noSnap, sendSnapshotError(ctx, s, stream, errors.New("cannot accept shared sstables"))
}

Expand Down
27 changes: 26 additions & 1 deletion pkg/storage/pebble.go
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,10 @@ var ValueBlocksEnabled = settings.RegisterBoolSetting(

// UseEFOS controls whether uses of pebble Snapshots should use
// EventuallyFileOnlySnapshots instead. This reduces write-amp with the main
// tradeoff being higher space-amp.
// tradeoff being higher space-amp. Note that UseExciseForSnapshot, if true,
// effectively causes EventuallyFileOnlySnapshots to be used as well.
//
// Note: Do NOT read this setting directly. Use ShouldUseEFOS() instead.
var UseEFOS = settings.RegisterBoolSetting(
settings.SystemOnly,
"storage.experimental.eventually_file_only_snapshots.enabled",
Expand All @@ -110,6 +113,21 @@ var UseEFOS = settings.RegisterBoolSetting(
"storage.experimental.eventually_file_only_snapshots.enabled", false), /* defaultValue */
settings.WithPublic)

// UseExciseForSnapshots controls whether virtual-sstable-based excises should
// be used instead of range deletions for clearing out replica contents as part
// of a rebalance/recovery snapshot application. Applied on the receiver side.
// Note that setting this setting to true also effectively causes UseEFOS above
// to become true. This interaction is why this setting is defined in the
// storage package even though it mostly affects KV.
var UseExciseForSnapshots = settings.RegisterBoolSetting(
settings.SystemOnly,
"kv.snapshot_receiver.excise.enabled",
"set to true to use excises instead of range deletions for KV snapshots",
util.ConstantWithMetamorphicTestBool(
"kv.snapshot_receiver.excise.enabled", false), /* defaultValue */
settings.WithPublic,
)

// IngestAsFlushable controls whether ingested sstables that overlap the
// memtable may be lazily ingested: written to the WAL and enqueued in the list
// of flushables (eg, memtables, large batches and now lazily-ingested
Expand All @@ -129,6 +147,13 @@ var IngestAsFlushable = settings.RegisterBoolSetting(
util.ConstantWithMetamorphicTestBool(
"storage.ingest_as_flushable.enabled", true))

// ShouldUseEFOS returns true if either of the UseEFOS or UseExciseForSnapshots
// cluster settings are enabled, and EventuallyFileOnlySnapshots must be used
// to guarantee snapshot-like semantics.
func ShouldUseEFOS(settings *settings.Values) bool {
return UseEFOS.Get(settings) || UseExciseForSnapshots.Get(settings)
}

// EngineKeyCompare compares cockroach keys, including the version (which
// could be MVCC timestamps).
func EngineKeyCompare(a, b []byte) int {
Expand Down

0 comments on commit 65bb6cc

Please sign in to comment.