Skip to content

Commit

Permalink
kv: enforce minimum Raft snapshot rate limit
Browse files Browse the repository at this point in the history
Related to cockroachdb#75728.

This commit enforces a minimum Raft snapshot rate limit of 1mb/s. We've seen
recent problems (cockroachdb#75728) related to mis-configured cluster settings, so it's
best to protect ourselves from this kind of misconfiguration.

```
[email protected]:26257/movr> set cluster setting kv.snapshot_rebalance.max_rate = '8';
ERROR: invalid value for kv.snapshot_rebalance.max_rate: snapshot rate cannot be set to a value below 1.0 MiB: 8 B

[email protected]:26257/movr> set cluster setting kv.snapshot_rebalance.max_rate = '8Kib';
ERROR: invalid value for kv.snapshot_rebalance.max_rate: snapshot rate cannot be set to a value below 1.0 MiB: 8.0 KiB

[email protected]:26257/movr> set cluster setting kv.snapshot_rebalance.max_rate = '8Mib';
SET CLUSTER SETTING
```
  • Loading branch information
nvanbenschoten authored and RajivTS committed Mar 6, 2022
1 parent 960b460 commit 33e697f
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 3 deletions.
28 changes: 26 additions & 2 deletions pkg/kv/kvserver/store_snapshot.go
Original file line number Diff line number Diff line change
Expand Up @@ -709,6 +709,18 @@ type SnapshotStorePool interface {
throttle(reason throttleReason, why string, toStoreID roachpb.StoreID)
}

// minSnapshotRate defines the minimum value that the rate limit for rebalance
// and recovery snapshots can be configured to. Any value below this lower bound
// is considered unsafe for use, as it can lead to excessively long-running
// snapshots. The sender of Raft snapshots holds resources (e.g. LSM snapshots,
// LSM iterators until #75824 is addressed) and blocks Raft log truncation, so
// it is not safe to let a single snapshot run for an unlimited period of time.
//
// The value was chosen based on a maximum range size of 512mb and a desire to
// prevent a single snapshot for running for more than 10 minutes. With a rate
// limit of 1mb/s, a 512mb snapshot will take just under 9 minutes to send.
const minSnapshotRate = 1 << 20 // 1mb/s

// rebalanceSnapshotRate is the rate at which snapshots can be sent in the
// context of up-replication or rebalancing (i.e. any snapshot that was not
// requested by raft itself, to which `kv.snapshot_recovery.max_rate` applies).
Expand All @@ -717,7 +729,13 @@ var rebalanceSnapshotRate = settings.RegisterByteSizeSetting(
"kv.snapshot_rebalance.max_rate",
"the rate limit (bytes/sec) to use for rebalance and upreplication snapshots",
32<<20, // 32mb/s
settings.PositiveInt,
func(v int64) error {
if v < minSnapshotRate {
return errors.Errorf("snapshot rate cannot be set to a value below %s: %s",
humanizeutil.IBytes(minSnapshotRate), humanizeutil.IBytes(v))
}
return nil
},
).WithPublic()

// recoverySnapshotRate is the rate at which Raft-initiated snapshot can be
Expand All @@ -734,7 +752,13 @@ var recoverySnapshotRate = settings.RegisterByteSizeSetting(
"kv.snapshot_recovery.max_rate",
"the rate limit (bytes/sec) to use for recovery snapshots",
32<<20, // 32mb/s
settings.PositiveInt,
func(v int64) error {
if v < minSnapshotRate {
return errors.Errorf("snapshot rate cannot be set to a value below %s: %s",
humanizeutil.IBytes(minSnapshotRate), humanizeutil.IBytes(v))
}
return nil
},
).WithPublic()

// snapshotSenderBatchSize is the size that key-value batches are allowed to
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ func TestSettingWatcherOnTenant(t *testing.T) {
"cluster.organization": {"foobar", "bazbax"},
// Include a system-only setting to verify that we don't try to change its
// value (which would cause a panic in test builds).
systemOnlySetting: {1024, 2048},
systemOnlySetting: {2 << 20, 4 << 20},
}
fakeTenant := roachpb.MakeTenantID(2)
systemTable := keys.SystemSQLCodec.TablePrefix(keys.SettingsTableID)
Expand Down

0 comments on commit 33e697f

Please sign in to comment.