Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
75889: server: use constant for filtering internal stmt stats r=xinhaoz a=xinhaoz

Previously, the internal app name prefix was hard-coded
into the query used to fetch combined stmt/txn stats.
This commit replaces that usage with the pre-existing
internal app prefix defined in constants.

Release note: None

76209: kv: enforce minimum Raft snapshot rate limit r=nvanbenschoten a=nvanbenschoten

Related to #75728.

This commit enforces a minimum Raft snapshot rate limit of 1mb/s. We've seen
recent problems (#75728) related to mis-configured cluster settings, so it's
best to protect ourselves from this kind of misconfiguration.

```
[email protected]:26257/movr> set cluster setting kv.snapshot_rebalance.max_rate = '8';
ERROR: invalid value for kv.snapshot_rebalance.max_rate: snapshot rate cannot be set to a value below 1.0 MiB: 8 B

[email protected]:26257/movr> set cluster setting kv.snapshot_rebalance.max_rate = '8Kib';
ERROR: invalid value for kv.snapshot_rebalance.max_rate: snapshot rate cannot be set to a value below 1.0 MiB: 8.0 KiB

[email protected]:26257/movr> set cluster setting kv.snapshot_rebalance.max_rate = '8Mib';
SET CLUSTER SETTING
```

76236: tlp: respect test shutdown deadline r=jordanlewis a=jordanlewis

Closes #74675

This commit fixes the TLP test shutdown routine to actually begin
shutting down 5 minutes before the test timeout. Before, the code
that was supposed to check the timeout was not reached reliably.

Release note: None

Co-authored-by: Xin Hao Zhang <[email protected]>
Co-authored-by: Nathan VanBenschoten <[email protected]>
Co-authored-by: Jordan Lewis <[email protected]>
  • Loading branch information
4 people committed Feb 8, 2022
4 parents 844ac13 + 3447e1e + 2331ac1 + ab93dce commit 4d95f56
Show file tree
Hide file tree
Showing 5 changed files with 33 additions and 10 deletions.
6 changes: 3 additions & 3 deletions pkg/cmd/roachtest/tests/tlp.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,9 @@ func runTLP(ctx context.Context, t test.Test, c cluster.Cluster) {
timeout := 10 * time.Minute
// Run 10 minute iterations of TLP in a loop for about the entire test,
// giving 5 minutes at the end to allow the test to shut down cleanly.
until := time.After(t.Spec().(*registry.TestSpec).Timeout - 5*time.Minute)
var cancel context.CancelFunc
ctx, cancel = context.WithTimeout(ctx, t.Spec().(*registry.TestSpec).Timeout-5*time.Minute)
defer cancel()
done := ctx.Done()

c.Put(ctx, t.Cockroach(), "./cockroach")
Expand All @@ -60,8 +62,6 @@ func runTLP(ctx context.Context, t test.Test, c cluster.Cluster) {
for i := 0; ; i++ {
c.Start(ctx, t.L(), option.DefaultStartOpts(), install.MakeClusterSettings())
select {
case <-until:
return
case <-done:
return
default:
Expand Down
28 changes: 26 additions & 2 deletions pkg/kv/kvserver/store_snapshot.go
Original file line number Diff line number Diff line change
Expand Up @@ -709,6 +709,18 @@ type SnapshotStorePool interface {
throttle(reason throttleReason, why string, toStoreID roachpb.StoreID)
}

// minSnapshotRate defines the minimum value that the rate limit for rebalance
// and recovery snapshots can be configured to. Any value below this lower bound
// is considered unsafe for use, as it can lead to excessively long-running
// snapshots. The sender of Raft snapshots holds resources (e.g. LSM snapshots,
// LSM iterators until #75824 is addressed) and blocks Raft log truncation, so
// it is not safe to let a single snapshot run for an unlimited period of time.
//
// The value was chosen based on a maximum range size of 512mb and a desire to
// prevent a single snapshot for running for more than 10 minutes. With a rate
// limit of 1mb/s, a 512mb snapshot will take just under 9 minutes to send.
const minSnapshotRate = 1 << 20 // 1mb/s

// rebalanceSnapshotRate is the rate at which snapshots can be sent in the
// context of up-replication or rebalancing (i.e. any snapshot that was not
// requested by raft itself, to which `kv.snapshot_recovery.max_rate` applies).
Expand All @@ -717,7 +729,13 @@ var rebalanceSnapshotRate = settings.RegisterByteSizeSetting(
"kv.snapshot_rebalance.max_rate",
"the rate limit (bytes/sec) to use for rebalance and upreplication snapshots",
32<<20, // 32mb/s
settings.PositiveInt,
func(v int64) error {
if v < minSnapshotRate {
return errors.Errorf("snapshot rate cannot be set to a value below %s: %s",
humanizeutil.IBytes(minSnapshotRate), humanizeutil.IBytes(v))
}
return nil
},
).WithPublic()

// recoverySnapshotRate is the rate at which Raft-initiated snapshot can be
Expand All @@ -734,7 +752,13 @@ var recoverySnapshotRate = settings.RegisterByteSizeSetting(
"kv.snapshot_recovery.max_rate",
"the rate limit (bytes/sec) to use for recovery snapshots",
32<<20, // 32mb/s
settings.PositiveInt,
func(v int64) error {
if v < minSnapshotRate {
return errors.Errorf("snapshot rate cannot be set to a value below %s: %s",
humanizeutil.IBytes(minSnapshotRate), humanizeutil.IBytes(v))
}
return nil
},
).WithPublic()

// snapshotSenderBatchSize is the size that key-value batches are allowed to
Expand Down
2 changes: 1 addition & 1 deletion pkg/server/combined_statement_stats.go
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ func getFilterAndParams(
buffer.WriteString(testingKnobs.GetAOSTClause())

// Filter out internal statements by app name.
buffer.WriteString(" WHERE app_name NOT LIKE '$ internal%'")
buffer.WriteString(fmt.Sprintf(" WHERE app_name NOT LIKE '%s%%'", catconstants.InternalAppNamePrefix))

if start != nil {
buffer.WriteString(" AND aggregated_ts >= $1")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ func TestSettingWatcherOnTenant(t *testing.T) {
"cluster.organization": {"foobar", "bazbax"},
// Include a system-only setting to verify that we don't try to change its
// value (which would cause a panic in test builds).
systemOnlySetting: {1024, 2048},
systemOnlySetting: {2 << 20, 4 << 20},
}
fakeTenant := roachpb.MakeTenantID(2)
systemTable := keys.SystemSQLCodec.TablePrefix(keys.SettingsTableID)
Expand Down
5 changes: 2 additions & 3 deletions pkg/server/status_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1989,9 +1989,8 @@ func TestStatusAPICombinedStatements(t *testing.T) {
continue
}
if strings.HasPrefix(respStatement.Key.KeyData.App, catconstants.InternalAppNamePrefix) {
// We ignore internal queries, these are not relevant for the
// validity of this test.
continue
// CombinedStatementStats should filter out internal queries.
t.Fatalf("unexpected internal query: %s", respStatement.Key.KeyData.Query)
}
if strings.HasPrefix(respStatement.Key.KeyData.Query, "ALTER USER") {
// Ignore the ALTER USER ... VIEWACTIVITY statement.
Expand Down

0 comments on commit 4d95f56

Please sign in to comment.