Skip to content

Commit

Permalink
Merge #68535
Browse files Browse the repository at this point in the history
68535: admission: enable admission control r=sumeerbhola a=sumeerbhola

Also updated existing roachtests that explicitly enabled
admission control to now disable admission control, so
that we can continue to compare performance of the two
settings.

Release note (ops change): The cluster settings affecting
the admission control system enablement are now set to
defaults that enable admission control.

Co-authored-by: sumeerbhola <[email protected]>
  • Loading branch information
craig[bot] and sumeerbhola committed Oct 28, 2021
2 parents 31e7416 + 7e5741b commit f7c4ed0
Show file tree
Hide file tree
Showing 6 changed files with 52 additions and 57 deletions.
6 changes: 3 additions & 3 deletions docs/generated/settings/settings-for-tenants.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Setting Type Default Description
admission.kv.enabled boolean false when true, work performed by the KV layer is subject to admission control
admission.sql_kv_response.enabled boolean false when true, work performed by the SQL layer when receiving a KV response is subject to admission control
admission.sql_sql_response.enabled boolean false when true, work performed by the SQL layer when receiving a DistSQL response is subject to admission control
admission.kv.enabled boolean true when true, work performed by the KV layer is subject to admission control
admission.sql_kv_response.enabled boolean true when true, work performed by the SQL layer when receiving a KV response is subject to admission control
admission.sql_sql_response.enabled boolean true when true, work performed by the SQL layer when receiving a DistSQL response is subject to admission control
bulkio.backup.file_size byte size 128 MiB target size for individual data files produced during BACKUP
bulkio.backup.read_timeout duration 5m0s amount of time after which a read attempt is considered timed out, which causes the backup to fail
bulkio.backup.read_with_priority_after duration 1m0s amount of time since the read-as-of time above which a BACKUP should use priority when retrying reads
Expand Down
6 changes: 3 additions & 3 deletions docs/generated/settings/settings.html
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
<table>
<thead><tr><th>Setting</th><th>Type</th><th>Default</th><th>Description</th></tr></thead>
<tbody>
<tr><td><code>admission.kv.enabled</code></td><td>boolean</td><td><code>false</code></td><td>when true, work performed by the KV layer is subject to admission control</td></tr>
<tr><td><code>admission.sql_kv_response.enabled</code></td><td>boolean</td><td><code>false</code></td><td>when true, work performed by the SQL layer when receiving a KV response is subject to admission control</td></tr>
<tr><td><code>admission.sql_sql_response.enabled</code></td><td>boolean</td><td><code>false</code></td><td>when true, work performed by the SQL layer when receiving a DistSQL response is subject to admission control</td></tr>
<tr><td><code>admission.kv.enabled</code></td><td>boolean</td><td><code>true</code></td><td>when true, work performed by the KV layer is subject to admission control</td></tr>
<tr><td><code>admission.sql_kv_response.enabled</code></td><td>boolean</td><td><code>true</code></td><td>when true, work performed by the SQL layer when receiving a KV response is subject to admission control</td></tr>
<tr><td><code>admission.sql_sql_response.enabled</code></td><td>boolean</td><td><code>true</code></td><td>when true, work performed by the SQL layer when receiving a DistSQL response is subject to admission control</td></tr>
<tr><td><code>bulkio.backup.file_size</code></td><td>byte size</td><td><code>128 MiB</code></td><td>target size for individual data files produced during BACKUP</td></tr>
<tr><td><code>bulkio.backup.read_timeout</code></td><td>duration</td><td><code>5m0s</code></td><td>amount of time after which a read attempt is considered timed out, which causes the backup to fail</td></tr>
<tr><td><code>bulkio.backup.read_with_priority_after</code></td><td>duration</td><td><code>1m0s</code></td><td>amount of time since the read-as-of time above which a BACKUP should use priority when retrying reads</td></tr>
Expand Down
47 changes: 21 additions & 26 deletions pkg/cmd/roachtest/tests/kv.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,15 +48,15 @@ func registerKV(r registry.Registry) {
blockSize int
splits int // 0 implies default, negative implies 0
// If true, load-based splitting will be disabled.
disableLoadSplits bool
encryption bool
sequential bool
admissionControlEnabled bool
concMultiplier int
duration time.Duration
tracing bool // `trace.debug.enable`
tags []string
owner registry.Owner // defaults to KV
disableLoadSplits bool
encryption bool
sequential bool
admissionControlDisabled bool
concMultiplier int
duration time.Duration
tracing bool // `trace.debug.enable`
tags []string
owner registry.Owner // defaults to KV
}
computeNumSplits := func(opts kvOptions) int {
// TODO(ajwerner): set this default to a more sane value or remove it and
Expand Down Expand Up @@ -89,9 +89,7 @@ func registerKV(r registry.Registry) {
t.Fatalf("failed to enable tracing: %v", err)
}
}
if opts.admissionControlEnabled {
EnableAdmissionControl(ctx, t, c)
}
SetAdmissionControl(ctx, t, c, !opts.admissionControlDisabled)

t.Status("running workload")
m := c.NewMonitor(ctx, c.Range(1, nodes))
Expand Down Expand Up @@ -150,13 +148,8 @@ func registerKV(r registry.Registry) {
// CPU overload test, to stress admission control.
{nodes: 1, cpus: 8, readPercent: 50, concMultiplier: 8192, duration: 20 * time.Minute},
// IO write overload test, to stress admission control.
//
// TODO(sumeerbhola): re-enable when admission control is enabled by default. This
// test crashes the cluster every now and then. See:
// https://github.com/cockroachdb/cockroach/issues/70247
//
// {nodes: 1, cpus: 8, readPercent: 0, concMultiplier: 4096, blockSize: 1 << 16, /* 64 KB */
// duration: 20 * time.Minute},
{nodes: 1, cpus: 8, readPercent: 0, concMultiplier: 4096, blockSize: 1 << 16, /* 64 KB */
duration: 20 * time.Minute},
{nodes: 1, cpus: 8, readPercent: 95},
{nodes: 1, cpus: 32, readPercent: 0},
{nodes: 1, cpus: 32, readPercent: 95},
Expand All @@ -167,8 +160,8 @@ func registerKV(r registry.Registry) {
{nodes: 3, cpus: 8, readPercent: 95, splits: -1 /* no splits */},
{nodes: 3, cpus: 32, readPercent: 0},
{nodes: 3, cpus: 32, readPercent: 95},
{nodes: 3, cpus: 32, readPercent: 0, admissionControlEnabled: true},
{nodes: 3, cpus: 32, readPercent: 95, admissionControlEnabled: true},
{nodes: 3, cpus: 32, readPercent: 0, admissionControlDisabled: true},
{nodes: 3, cpus: 32, readPercent: 95, admissionControlDisabled: true},
{nodes: 3, cpus: 32, readPercent: 0, splits: -1 /* no splits */},
{nodes: 3, cpus: 32, readPercent: 95, splits: -1 /* no splits */},

Expand All @@ -182,9 +175,9 @@ func registerKV(r registry.Registry) {
{nodes: 3, cpus: 32, readPercent: 0, blockSize: 1 << 16 /* 64 KB */},
{nodes: 3, cpus: 32, readPercent: 95, blockSize: 1 << 16 /* 64 KB */},
{nodes: 3, cpus: 32, readPercent: 0, blockSize: 1 << 16, /* 64 KB */
admissionControlEnabled: true},
admissionControlDisabled: true},
{nodes: 3, cpus: 32, readPercent: 95, blockSize: 1 << 16, /* 64 KB */
admissionControlEnabled: true},
admissionControlDisabled: true},

// Configs with large batch sizes.
{nodes: 3, cpus: 8, readPercent: 0, batchSize: 16},
Expand Down Expand Up @@ -241,8 +234,8 @@ func registerKV(r registry.Registry) {
if opts.sequential {
nameParts = append(nameParts, "seq")
}
if opts.admissionControlEnabled {
nameParts = append(nameParts, "admission")
if opts.admissionControlDisabled {
nameParts = append(nameParts, "no-admission")
}
if opts.concMultiplier != 0 { // support legacy test name which didn't include this multiplier
nameParts = append(nameParts, fmt.Sprintf("conc=%d", opts.concMultiplier))
Expand Down Expand Up @@ -873,7 +866,9 @@ func registerKVMultiStoreWithOverload(r registry.Registry) {
t.Fatalf("failed to configure zone for %s: %v", name, err)
}
}
EnableAdmissionControl(ctx, t, c)
// Defensive, since admission control is enabled by default. This test can
// fail if admission control is disabled.
SetAdmissionControl(ctx, t, c, true)
if _, err := db.ExecContext(ctx,
"SET CLUSTER SETTING kv.range_split.by_load_enabled = 'false'"); err != nil {
t.Fatalf("failed to disable load based splitting: %v", err)
Expand Down
30 changes: 13 additions & 17 deletions pkg/cmd/roachtest/tests/tpcc.go
Original file line number Diff line number Diff line change
Expand Up @@ -693,9 +693,9 @@ func registerTPCC(r registry.Registry) {
EstimatedMax: gceOrAws(cloud, 2400, 3000),
})
registerTPCCBenchSpec(r, tpccBenchSpec{
Nodes: 3,
CPUs: 16,
AdmissionControlEnabled: true,
Nodes: 3,
CPUs: 16,
AdmissionControlDisabled: true,

LoadWarehouses: gceOrAws(cloud, 3000, 3500),
EstimatedMax: gceOrAws(cloud, 2400, 3000),
Expand Down Expand Up @@ -801,12 +801,12 @@ func (l tpccBenchLoadConfig) numLoadNodes(d tpccBenchDistribution) int {
}

type tpccBenchSpec struct {
Nodes int
CPUs int
Chaos bool
AdmissionControlEnabled bool
Distribution tpccBenchDistribution
LoadConfig tpccBenchLoadConfig
Nodes int
CPUs int
Chaos bool
AdmissionControlDisabled bool
Distribution tpccBenchDistribution
LoadConfig tpccBenchLoadConfig

// The number of warehouses to load into the cluster before beginning
// benchmarking. Should be larger than EstimatedMax and should be a
Expand Down Expand Up @@ -857,8 +857,8 @@ func registerTPCCBenchSpec(r registry.Registry, b tpccBenchSpec) {
if b.Chaos {
nameParts = append(nameParts, "chaos")
}
if b.AdmissionControlEnabled {
nameParts = append(nameParts, "admission")
if b.AdmissionControlDisabled {
nameParts = append(nameParts, "no-admission")
}

opts := []spec.Option{spec.CPU(b.CPUs)}
Expand Down Expand Up @@ -1034,9 +1034,7 @@ func runTPCCBench(ctx context.Context, t test.Test, c cluster.Cluster, b tpccBen
c.EncryptDefault(false)
c.EncryptAtRandom(false)
c.Start(ctx, append(b.startOpts(), roachNodes)...)
if b.AdmissionControlEnabled {
EnableAdmissionControl(ctx, t, c)
}
SetAdmissionControl(ctx, t, c, !b.AdmissionControlDisabled)
useHAProxy := b.Chaos
const restartWait = 15 * time.Second
{
Expand Down Expand Up @@ -1125,9 +1123,7 @@ func runTPCCBench(ctx context.Context, t test.Test, c cluster.Cluster, b tpccBen
}

c.Start(ctx, append(b.startOpts(), roachNodes)...)
if b.AdmissionControlEnabled {
EnableAdmissionControl(ctx, t, c)
}
SetAdmissionControl(ctx, t, c, !b.AdmissionControlDisabled)
}

s := search.NewLineSearcher(1, b.LoadWarehouses, b.EstimatedMax, initStepSize, precision)
Expand Down
14 changes: 9 additions & 5 deletions pkg/cmd/roachtest/tests/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -94,16 +94,20 @@ func WaitForUpdatedReplicationReport(ctx context.Context, t test.Test, db *gosql
}
}

// EnableAdmissionControl enables the admission control cluster settings on
// the given cluster.
func EnableAdmissionControl(ctx context.Context, t test.Test, c cluster.Cluster) {
// SetAdmissionControl sets the admission control cluster settings on the
// given cluster.
func SetAdmissionControl(ctx context.Context, t test.Test, c cluster.Cluster, enabled bool) {
db := c.Conn(ctx, 1)
defer db.Close()
val := "true"
if !enabled {
val = "false"
}
for _, setting := range []string{"admission.kv.enabled", "admission.sql_kv_response.enabled",
"admission.sql_sql_response.enabled"} {
if _, err := db.ExecContext(
ctx, "SET CLUSTER SETTING "+setting+" = 'true'"); err != nil {
t.Fatalf("failed to enable admission control: %v", err)
ctx, "SET CLUSTER SETTING "+setting+" = '"+val+"'"); err != nil {
t.Fatalf("failed to set admission control to %t: %v", enabled, err)
}
}
}
6 changes: 3 additions & 3 deletions pkg/util/admission/work_queue.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,23 +36,23 @@ import (
var KVAdmissionControlEnabled = settings.RegisterBoolSetting(
"admission.kv.enabled",
"when true, work performed by the KV layer is subject to admission control",
false).WithPublic()
true).WithPublic()

// SQLKVResponseAdmissionControlEnabled controls whether response processing
// in SQL, for KV requests, is enabled.
var SQLKVResponseAdmissionControlEnabled = settings.RegisterBoolSetting(
"admission.sql_kv_response.enabled",
"when true, work performed by the SQL layer when receiving a KV response is subject to "+
"admission control",
false).WithPublic()
true).WithPublic()

// SQLSQLResponseAdmissionControlEnabled controls whether response processing
// in SQL, for DistSQL requests, is enabled.
var SQLSQLResponseAdmissionControlEnabled = settings.RegisterBoolSetting(
"admission.sql_sql_response.enabled",
"when true, work performed by the SQL layer when receiving a DistSQL response is subject "+
"to admission control",
false).WithPublic()
true).WithPublic()

var admissionControlEnabledSettings = [numWorkKinds]*settings.BoolSetting{
KVWork: KVAdmissionControlEnabled,
Expand Down

0 comments on commit f7c4ed0

Please sign in to comment.