diff --git a/pkg/ccl/backupccl/BUILD.bazel b/pkg/ccl/backupccl/BUILD.bazel index 910c6dea9a1a..565dcac0054e 100644 --- a/pkg/ccl/backupccl/BUILD.bazel +++ b/pkg/ccl/backupccl/BUILD.bazel @@ -172,6 +172,7 @@ go_test( "bench_test.go", "create_scheduled_backup_test.go", "datadriven_test.go", + "file_sst_sink_test.go", "full_cluster_backup_restore_test.go", "generative_split_and_scatter_processor_test.go", "key_rewriter_test.go", diff --git a/pkg/ccl/backupccl/backup_test.go b/pkg/ccl/backupccl/backup_test.go index 6c352b25e38e..eebc51960750 100644 --- a/pkg/ccl/backupccl/backup_test.go +++ b/pkg/ccl/backupccl/backup_test.go @@ -4000,7 +4000,7 @@ func TestBackupRestoreChecksum(t *testing.T) { } // Corrupt one of the files in the backup. - f, err := os.OpenFile(filepath.Join(dir, backupManifest.Files[1].Path), os.O_WRONLY, 0) + f, err := os.OpenFile(filepath.Join(dir, backupManifest.Files[0].Path), os.O_WRONLY, 0) if err != nil { t.Fatalf("%+v", err) } diff --git a/pkg/ccl/backupccl/file_sst_sink.go b/pkg/ccl/backupccl/file_sst_sink.go index 67f98c130a48..40a449093cb9 100644 --- a/pkg/ccl/backupccl/file_sst_sink.go +++ b/pkg/ccl/backupccl/file_sst_sink.go @@ -190,7 +190,7 @@ func (s *fileSSTSink) write(ctx context.Context, resp exportedSpan) error { // If this span extended the last span added -- that is, picked up where it // ended and has the same time-bounds -- then we can simply extend that span // and add to its entry counts. Otherwise we need to record it separately. - if l := len(s.flushedFiles) - 1; l > 0 && s.flushedFiles[l].Span.EndKey.Equal(span.Key) && + if l := len(s.flushedFiles) - 1; l >= 0 && s.flushedFiles[l].Span.EndKey.Equal(span.Key) && s.flushedFiles[l].EndTime.EqOrdering(resp.metadata.EndTime) && s.flushedFiles[l].StartTime.EqOrdering(resp.metadata.StartTime) { s.flushedFiles[l].Span.EndKey = span.EndKey diff --git a/pkg/ccl/backupccl/file_sst_sink_test.go b/pkg/ccl/backupccl/file_sst_sink_test.go new file mode 100644 index 000000000000..7ddba58970e2 --- /dev/null +++ b/pkg/ccl/backupccl/file_sst_sink_test.go @@ -0,0 +1,137 @@ +// Copyright 2023 The Cockroach Authors. +// +// Licensed as a CockroachDB Enterprise file under the Cockroach Community +// License (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// https://github.com/cockroachdb/cockroach/blob/master/licenses/CCL.txt + +package backupccl + +import ( + "bytes" + "context" + "fmt" + "testing" + + "github.com/cockroachdb/cockroach/pkg/base" + "github.com/cockroachdb/cockroach/pkg/blobs" + "github.com/cockroachdb/cockroach/pkg/ccl/backupccl/backuppb" + "github.com/cockroachdb/cockroach/pkg/cloud" + "github.com/cockroachdb/cockroach/pkg/roachpb" + "github.com/cockroachdb/cockroach/pkg/security/username" + "github.com/cockroachdb/cockroach/pkg/sql/execinfrapb" + "github.com/cockroachdb/cockroach/pkg/sql/isql" + "github.com/cockroachdb/cockroach/pkg/storage" + "github.com/cockroachdb/cockroach/pkg/util/hlc" + "github.com/cockroachdb/cockroach/pkg/util/leaktest" + "github.com/cockroachdb/cockroach/pkg/util/log" + "github.com/gogo/protobuf/types" + "github.com/stretchr/testify/require" +) + +// TestFileSSTSinkExtendOneFile is a regression test for a bug in fileSSTSink in +// which the sink fails to extend its last span added if there's only one file +// in the sink so far. +func TestFileSSTSinkExtendOneFile(t *testing.T) { + defer leaktest.AfterTest(t)() + defer log.Scope(t).Close(t) + + ctx := context.Background() + tc, sqlDB, _, cleanup := backupRestoreTestSetup(t, singleNode, 1, InitManualReplication) + defer cleanup() + + store, err := cloud.ExternalStorageFromURI(ctx, "userfile:///0", + base.ExternalIODirConfig{}, + tc.Servers[0].ClusterSettings(), + blobs.TestEmptyBlobClientFactory, + username.RootUserName(), + tc.Servers[0].InternalDB().(isql.DB), + nil, /* limiters */ + cloud.NilMetrics, + ) + require.NoError(t, err) + sqlDB.Exec(t, `SET CLUSTER SETTING bulkio.backup.file_size = '20B'`) + + // Never block. + progCh := make(chan execinfrapb.RemoteProducerMetadata_BulkProcessorProgress, 10) + + sinkConf := sstSinkConf{ + id: 1, + enc: nil, + progCh: progCh, + settings: &tc.Servers[0].ClusterSettings().SV, + } + + sink := makeFileSSTSink(sinkConf, store) + + getKeys := func(prefix string, n int) []byte { + var b bytes.Buffer + sst := storage.MakeBackupSSTWriter(ctx, nil, &b) + for i := 0; i < n; i++ { + require.NoError(t, sst.PutUnversioned([]byte(fmt.Sprintf("%s%08d", prefix, i)), nil)) + } + sst.Close() + return b.Bytes() + } + + exportResponse1 := exportedSpan{ + metadata: backuppb.BackupManifest_File{ + Span: roachpb.Span{ + Key: []byte("b"), + EndKey: []byte("b"), + }, + EntryCounts: roachpb.RowCount{ + DataSize: 100, + Rows: 1, + }, + StartTime: hlc.Timestamp{}, + EndTime: hlc.Timestamp{}, + LocalityKV: "", + }, + dataSST: getKeys("b", 100), + revStart: hlc.Timestamp{}, + completedSpans: 1, + atKeyBoundary: false, + } + + exportResponse2 := exportedSpan{ + metadata: backuppb.BackupManifest_File{ + Span: roachpb.Span{ + Key: []byte("b"), + EndKey: []byte("z"), + }, + EntryCounts: roachpb.RowCount{ + DataSize: 100, + Rows: 1, + }, + StartTime: hlc.Timestamp{}, + EndTime: hlc.Timestamp{}, + LocalityKV: "", + }, + dataSST: getKeys("c", 100), + revStart: hlc.Timestamp{}, + completedSpans: 1, + atKeyBoundary: true, + } + + require.NoError(t, sink.write(ctx, exportResponse1)) + require.NoError(t, sink.write(ctx, exportResponse2)) + + close(progCh) + + var progs []execinfrapb.RemoteProducerMetadata_BulkProcessorProgress + for p := range progCh { + progs = append(progs, p) + } + + require.Equal(t, 1, len(progs)) + var progDetails backuppb.BackupManifest_Progress + if err := types.UnmarshalAny(&progs[0].ProgressDetails, &progDetails); err != nil { + t.Fatal(err) + } + + // Verify that the file in the sink was properly extended and there is only 1 + // file in the progress details. + require.Equal(t, 1, len(progDetails.Files)) +} diff --git a/pkg/ccl/backupccl/show_test.go b/pkg/ccl/backupccl/show_test.go index efde7c884a37..53b26662fb1f 100644 --- a/pkg/ccl/backupccl/show_test.go +++ b/pkg/ccl/backupccl/show_test.go @@ -54,6 +54,7 @@ func TestShowBackup(t *testing.T) { defer cleanupEmptyCluster() sqlDB.ExecMultiple(t, strings.Split(` SET CLUSTER SETTING sql.cross_db_fks.enabled = TRUE; +SET CLUSTER SETTING bulkio.backup.file_size = '1'; CREATE TYPE data.welcome AS ENUM ('hello', 'hi'); USE data; CREATE SCHEMA sc; CREATE TABLE data.sc.t1 (a INT); diff --git a/pkg/cmd/roachtest/tests/restore.go b/pkg/cmd/roachtest/tests/restore.go index 1ac899e7e5ca..9b318ba4ddd0 100644 --- a/pkg/cmd/roachtest/tests/restore.go +++ b/pkg/cmd/roachtest/tests/restore.go @@ -328,193 +328,11 @@ func registerRestoreNodeShutdown(r registry.Registry) { }) } -type testDataSet interface { - name() string - // runRestore does any setup that's required and restores the dataset into - // the given cluster. Any setup shouldn't take a long amount of time since - // perf artifacts are based on how long this takes. - runRestore(ctx context.Context, c cluster.Cluster) - - // runRestoreDetached is like runRestore but runs the RESTORE WITH detahced, - // and returns the job ID. - runRestoreDetached(ctx context.Context, t test.Test, c cluster.Cluster) (jobspb.JobID, error) -} - -type dataBank2TB struct{} - -func (dataBank2TB) name() string { - return "2TB" -} - -func (dataBank2TB) runRestore(ctx context.Context, c cluster.Cluster) { - c.Run(ctx, c.Node(1), `./cockroach sql --insecure -e "CREATE DATABASE restore2tb"`) - c.Run(ctx, c.Node(1), `./cockroach sql --insecure -e " - RESTORE csv.bank FROM - 'gs://cockroach-fixtures/workload/bank/version=1.0.0,payload-bytes=10240,ranges=0,rows=65104166,seed=1/bank?AUTH=implicit' - WITH into_db = 'restore2tb'"`) -} - -func (dataBank2TB) runRestoreDetached( - ctx context.Context, t test.Test, c cluster.Cluster, -) (jobspb.JobID, error) { - c.Run(ctx, c.Node(1), `./cockroach sql --insecure -e "CREATE DATABASE restore2tb"`) - c.Run(ctx, c.Node(1), `./cockroach sql --insecure -e " - RESTORE csv.bank FROM - 'gs://cockroach-fixtures/workload/bank/version=1.0.0,payload-bytes=10240,ranges=0,rows=65104166,seed=1/bank?AUTH=implicit' - WITH into_db = 'restore2tb', detached"`) - db, err := c.ConnE(ctx, t.L(), c.Node(1)[0]) - if err != nil { - return 0, errors.Wrap(err, "failed to connect to node 1; running restore detached") - } - - var jobID jobspb.JobID - if err := db.QueryRow(`SELECT job_id FROM [SHOW JOBS] WHERE job_type = 'RESTORE'`).Scan(&jobID); err != nil { - return 0, err - } - - return jobID, nil -} - -var _ testDataSet = dataBank2TB{} - -type tpccIncData struct{} - -func (tpccIncData) name() string { - return "TPCCInc" -} - -func (tpccIncData) runRestore(ctx context.Context, c cluster.Cluster) { - // This data set restores a 1.80TB (replicated) backup consisting of 48 - // incremental backup layers taken every 15 minutes. 8000 warehouses were - // imported and then a workload of 1000 warehouses was run against the cluster - // while the incremental backups were being taken. - c.Run(ctx, c.Node(1), `./cockroach sql --insecure -e " - RESTORE FROM '2022/09/29-000000.00' IN - 'gs://cockroach-fixtures/backups/tpcc/rev-history=false,inc-count=48,cluster/8000-warehouses/22.2.0-alpha.4?AUTH=implicit' - AS OF SYSTEM TIME '2022-09-28 23:42:00'"`) -} - -func (tpccIncData) runRestoreDetached( - ctx context.Context, t test.Test, c cluster.Cluster, -) (jobspb.JobID, error) { - c.Run(ctx, c.Node(1), `./cockroach sql --insecure -e " - RESTORE FROM '/2022/09/07-000000.00' IN - 'gs://cockroach-fixtures/tpcc-incrementals-22.2?AUTH=implicit' - AS OF SYSTEM TIME '2022-09-07 12:15:00'" - WITH detached"`) - db, err := c.ConnE(ctx, t.L(), c.Node(1)[0]) - if err != nil { - return 0, errors.Wrap(err, "failed to connect to node 1; running restore detached") - } - - var jobID jobspb.JobID - if err := db.QueryRow(`SELECT job_id FROM [SHOW JOBS] WHERE job_type = 'RESTORE'`).Scan(&jobID); err != nil { - return 0, err - } - - return jobID, nil -} - func registerRestore(r registry.Registry) { // TODO(msbutler): delete the tests created by the loop below. Specifically // - restore2TB/nodes=10 // - restore2TB/nodes=32 // - restore2TB/nodes=6/cpus=8/pd-volume=2500GB - largeVolumeSize := 2500 // the size in GB of disks in large volume configs - for _, item := range []struct { - nodes int - cpus int - largeVolumes bool - dataSet testDataSet - - timeout time.Duration - }{ - {dataSet: dataBank2TB{}, nodes: 10, timeout: 6 * time.Hour}, - {dataSet: dataBank2TB{}, nodes: 32, timeout: 3 * time.Hour}, - {dataSet: dataBank2TB{}, nodes: 6, timeout: 4 * time.Hour, cpus: 8, largeVolumes: true}, - {dataSet: tpccIncData{}, nodes: 10, timeout: 6 * time.Hour}, - } { - item := item - clusterOpts := make([]spec.Option, 0) - testName := fmt.Sprintf("restore%s/nodes=%d", item.dataSet.name(), item.nodes) - if item.cpus != 0 { - clusterOpts = append(clusterOpts, spec.CPU(item.cpus)) - testName += fmt.Sprintf("/cpus=%d", item.cpus) - } - if item.largeVolumes { - clusterOpts = append(clusterOpts, spec.VolumeSize(largeVolumeSize)) - testName += fmt.Sprintf("/pd-volume=%dGB", largeVolumeSize) - } - // Has been seen to OOM: https://github.com/cockroachdb/cockroach/issues/71805 - clusterOpts = append(clusterOpts, spec.HighMem(true)) - - r.Add(registry.TestSpec{ - Name: testName, - Owner: registry.OwnerDisasterRecovery, - Cluster: r.MakeClusterSpec(item.nodes, clusterOpts...), - Timeout: item.timeout, - EncryptionSupport: registry.EncryptionMetamorphic, - Run: func(ctx context.Context, t test.Test, c cluster.Cluster) { - c.Put(ctx, t.Cockroach(), "./cockroach") - c.Start(ctx, t.L(), option.DefaultStartOptsNoBackups(), install.MakeClusterSettings()) - m := c.NewMonitor(ctx) - - // Run the disk usage logger in the monitor to guarantee its - // having terminated when the test ends. - dul := NewDiskUsageLogger(t, c) - m.Go(dul.Runner) - hc := NewHealthChecker(t, c, c.All()) - m.Go(hc.Runner) - - // TODO(peter): This currently causes the test to fail because we see a - // flurry of valid merges when the restore finishes. - // - // m.Go(func(ctx context.Context) error { - // // Make sure the merge queue doesn't muck with our restore. - // return verifyMetrics(ctx, c, map[string]float64{ - // "cr.store.queue.merge.process.success": 10, - // "cr.store.queue.merge.process.failure": 10, - // }) - // }) - - tick, perfBuf := initBulkJobPerfArtifacts(testName, item.timeout) - m.Go(func(ctx context.Context) error { - defer dul.Done() - defer hc.Done() - t.Status(`running restore`) - // Tick once before starting the restore, and once after to - // capture the total elapsed time. This is used by - // roachperf to compute and display the average MB/sec per - // node. - if item.cpus >= 8 { - // If the nodes are large enough (specifically, if they - // have enough memory we can increase the parallelism - // of restore). Machines with 16 vCPUs typically have - // enough memory to support 3 concurrent workers. - c.Run(ctx, c.Node(1), - `./cockroach sql --insecure -e "SET CLUSTER SETTING kv.bulk_io_write.restore_node_concurrency = 5"`) - c.Run(ctx, c.Node(1), - `./cockroach sql --insecure -e "SET CLUSTER SETTING kv.bulk_io_write.concurrent_addsstable_requests = 5"`) - } - tick() - item.dataSet.runRestore(ctx, c) - tick() - - // Upload the perf artifacts to any one of the nodes so that the test - // runner copies it into an appropriate directory path. - dest := filepath.Join(t.PerfArtifactsDir(), "stats.json") - if err := c.RunE(ctx, c.Node(1), "mkdir -p "+filepath.Dir(dest)); err != nil { - log.Errorf(ctx, "failed to create perf dir: %+v", err) - } - if err := c.PutString(ctx, perfBuf.String(), dest, 0755, c.Node(1)); err != nil { - log.Errorf(ctx, "failed to upload perf artifacts to node: %s", err.Error()) - } - return nil - }) - m.Wait() - }, - }) - } durationGauge := r.PromFactory().NewGaugeVec(prometheus.GaugeOpts{Namespace: registry. PrometheusNameSpace, Subsystem: "restore", Name: "duration"}, []string{"test_name"}) @@ -692,6 +510,28 @@ func registerRestore(r registry.Registry) { timeout: 1 * time.Hour, }, { + // Benchmarks if per node throughput remains constant if the number of + // nodes doubles relative to default. + hardware: makeHardwareSpecs(hardwareSpecs{nodes: 8}), + backup: makeBackupSpecs(backupSpecs{}), + timeout: 1 * time.Hour, + }, + { + // Benchmarks if per node throughput doubles if the vcpu count doubles + // relative to default. + hardware: makeHardwareSpecs(hardwareSpecs{cpus: 16}), + backup: makeBackupSpecs(backupSpecs{}), + timeout: 1 * time.Hour, + }, + { + // Ensures we can restore a 48 length incremental chain. + // Also benchmarks per node throughput for a long chain. + hardware: makeHardwareSpecs(hardwareSpecs{}), + backup: makeBackupSpecs(backupSpecs{backupsIncluded: 48}), + timeout: 1 * time.Hour, + }, + { + // The nightly 8TB Restore test. hardware: makeHardwareSpecs(hardwareSpecs{nodes: 10, volumeSize: 2000}), backup: makeBackupSpecs(backupSpecs{ version: "v22.2.1", @@ -699,6 +539,7 @@ func registerRestore(r registry.Registry) { timeout: 5 * time.Hour, }, { + // The weekly 32TB Restore test. hardware: makeHardwareSpecs(hardwareSpecs{nodes: 15, cpus: 16, volumeSize: 5000}), backup: makeBackupSpecs(backupSpecs{ version: "v22.2.1", @@ -708,9 +549,7 @@ func registerRestore(r registry.Registry) { }, // TODO(msbutler): add the following tests once roachperf/grafana is hooked up and old tests are // removed: - // - restore/tpce/400GB/nodes=10 // - restore/tpce/400GB/nodes=30 - // - restore/tpce/400GB/cpu=16 // - restore/tpce/400GB/encryption } { sp := sp @@ -909,9 +748,14 @@ func makeBackupSpecs(override backupSpecs) backupSpecs { specs.fullBackupDir = override.fullBackupDir } + if override.backupsIncluded != 0 { + specs.backupsIncluded = override.backupsIncluded + } + if override.workload != nil { specs.workload = override.workload } + return specs } diff --git a/pkg/kv/kvserver/allocator/allocatorimpl/allocator.go b/pkg/kv/kvserver/allocator/allocatorimpl/allocator.go index 089ecd82fe43..ec288057d766 100644 --- a/pkg/kv/kvserver/allocator/allocatorimpl/allocator.go +++ b/pkg/kv/kvserver/allocator/allocatorimpl/allocator.go @@ -1912,7 +1912,7 @@ func (a Allocator) RebalanceNonVoter( // machinery to achieve range count convergence. func (a *Allocator) ScorerOptions(ctx context.Context) *RangeCountScorerOptions { return &RangeCountScorerOptions{ - StoreHealthOptions: a.StoreHealthOptions(ctx), + IOOverloadOptions: a.IOOverloadOptions(), deterministic: a.deterministic, rangeRebalanceThreshold: RangeRebalanceThreshold.Get(&a.st.SV), } @@ -1922,7 +1922,7 @@ func (a *Allocator) ScorerOptions(ctx context.Context) *RangeCountScorerOptions func (a *Allocator) ScorerOptionsForScatter(ctx context.Context) *ScatterScorerOptions { return &ScatterScorerOptions{ RangeCountScorerOptions: RangeCountScorerOptions{ - StoreHealthOptions: a.StoreHealthOptions(ctx), + IOOverloadOptions: a.IOOverloadOptions(), deterministic: a.deterministic, rangeRebalanceThreshold: 0, }, @@ -1946,6 +1946,7 @@ func (a *Allocator) ScorerOptionsForScatter(ctx context.Context) *ScatterScorerO // - It excludes replicas that may need snapshots. If replica calling this // method is not the Raft leader (meaning that it doesn't know whether follower // replicas need a snapshot or not), produces no results. +// - It excludes replicas that are on stores which are IO overloaded. func (a *Allocator) ValidLeaseTargets( ctx context.Context, storePool storepool.AllocatorStorePool, @@ -2029,9 +2030,96 @@ func (a *Allocator) ValidLeaseTargets( candidates = preferred } + // Filter the candidate list to only those stores which are not IO + // overloaded. + nonIOOverloadedPreferred := a.nonIOOverloadedLeaseTargets( + ctx, + storePool, + candidates, + leaseRepl.StoreID(), + a.IOOverloadOptions(), + ) + + return nonIOOverloadedPreferred +} + +// nonIOOverloadedLeaseTargets returns a list of non IO overloaded lease +// replica targets and whether the leaseholder replica should be replaced, +// given the existing replicas, IO overload options and IO overload of +// existing replica stores. +func (a *Allocator) nonIOOverloadedLeaseTargets( + ctx context.Context, + storePool storepool.AllocatorStorePool, + existingReplicas []roachpb.ReplicaDescriptor, + leaseStoreID roachpb.StoreID, + ioOverloadOptions IOOverloadOptions, +) (candidates []roachpb.ReplicaDescriptor) { + // We return early to avoid unnecessary work when IO overload is set to be + // ignored anyway. + if ioOverloadOptions.LeaseEnforcementLevel == IOOverloadThresholdIgnore { + return existingReplicas + } + + sl, _, _ := storePool.GetStoreListFromIDs(replDescsToStoreIDs(existingReplicas), storepool.StoreFilterSuspect) + avgIOOverload := sl.CandidateIOOverloadScores.Mean + + for _, replDesc := range existingReplicas { + store, ok := sl.FindStoreByID(replDesc.StoreID) + // If the replica is the current leaseholder, don't include it as a + // candidate and if it is filtered out of the store list due to being + // suspect; or the leaseholder store doesn't pass the leaseholder IO + // overload check. + // + // Note that the leaseholder store IO overload check is less strict than + // the transfer target check below. We don't want to shed leases at the + // same point a candidate becomes ineligible as it could lead to thrashing. + // Instead, we create a buffer between the two to avoid leases moving back + // and forth. + if (replDesc.StoreID == leaseStoreID) && + (!ok || !ioOverloadOptions.existingLeaseCheck(ctx, store, avgIOOverload)) { + continue + } + + // If the replica is not the leaseholder, don't include it as a candidate + // if it is filtered out similar to above, or the replica store doesn't + // pass the lease transfer IO overload check. + if replDesc.StoreID != leaseStoreID && + (!ok || !ioOverloadOptions.transferLeaseToCheck(ctx, store, avgIOOverload)) { + continue + } + + candidates = append(candidates, replDesc) + } + return candidates } +// leaseholderShouldMoveDueToIOOverload returns true if the current leaseholder +// store is IO overloaded and there are other viable leaseholder stores. +func (a *Allocator) leaseholderShouldMoveDueToIOOverload( + ctx context.Context, + storePool storepool.AllocatorStorePool, + existingReplicas []roachpb.ReplicaDescriptor, + leaseStoreID roachpb.StoreID, + ioOverloadOptions IOOverloadOptions, +) bool { + sl, _, _ := storePool.GetStoreListFromIDs(replDescsToStoreIDs(existingReplicas), storepool.StoreFilterSuspect) + avgIOOverload := sl.CandidateIOOverloadScores.Mean + + // Check the existing replicas for the leaseholder, if it doesn't meet the + // check return that the lease should be moved due to IO overload on the + // current leaseholder store. If the leaseholder is suspect or doesn't have a + // store descriptor ready, then we ignore it below and don't consider it IO + // overloaded. + for _, replDesc := range existingReplicas { + if store, ok := sl.FindStoreByID(replDesc.StoreID); ok && replDesc.StoreID == leaseStoreID { + return !ioOverloadOptions.existingLeaseCheck(ctx, store, avgIOOverload) + } + } + + return false +} + // leaseholderShouldMoveDueToPreferences returns true if the current leaseholder // is in violation of lease preferences _that can otherwise be satisfied_ by // some existing replica. @@ -2084,17 +2172,16 @@ func (a *Allocator) leaseholderShouldMoveDueToPreferences( return true } -// StoreHealthOptions returns the store health options, currently only -// considering the threshold for io overload. This threshold is not -// considered in allocation or rebalancing decisions (excluding candidate -// stores as targets) when enforcementLevel is set to storeHealthNoAction or -// storeHealthLogOnly. By default storeHealthBlockRebalanceTo is the action taken. When -// there is a mixed version cluster, storeHealthNoAction is set instead. -func (a *Allocator) StoreHealthOptions(_ context.Context) StoreHealthOptions { - enforcementLevel := IOOverloadEnforcementLevel(IOOverloadThresholdEnforcement.Get(&a.st.SV)) - return StoreHealthOptions{ - EnforcementLevel: enforcementLevel, - IOOverloadThreshold: IOOverloadThreshold.Get(&a.st.SV), +// IOOverloadOptions returns the store IO overload options. It is used to +// filter and score candidates based on their level of IO overload and +// enforcement level. +func (a *Allocator) IOOverloadOptions() IOOverloadOptions { + return IOOverloadOptions{ + ReplicaEnforcementLevel: IOOverloadEnforcementLevel(ReplicaIOOverloadThresholdEnforcement.Get(&a.st.SV)), + LeaseEnforcementLevel: IOOverloadEnforcementLevel(LeaseIOOverloadThresholdEnforcement.Get(&a.st.SV)), + ReplicaIOOverloadThreshold: ReplicaIOOverloadThreshold.Get(&a.st.SV), + LeaseIOOverloadThreshold: LeaseIOOverloadThreshold.Get(&a.st.SV), + LeaseIOOverloadShedThreshold: LeaseIOOverloadShedThreshold.Get(&a.st.SV), } } @@ -2129,10 +2216,11 @@ func (a *Allocator) TransferLeaseTarget( opts allocator.TransferLeaseOptions, ) roachpb.ReplicaDescriptor { excludeLeaseRepl := opts.ExcludeLeaseRepl - if a.leaseholderShouldMoveDueToPreferences(ctx, storePool, conf, leaseRepl, existing) { + if a.leaseholderShouldMoveDueToPreferences(ctx, storePool, conf, leaseRepl, existing) || + a.leaseholderShouldMoveDueToIOOverload(ctx, storePool, existing, leaseRepl.StoreID(), a.IOOverloadOptions()) { // Explicitly exclude the current leaseholder from the result set if it is // in violation of lease preferences that can be satisfied by some other - // replica. + // replica or is IO overloaded. excludeLeaseRepl = true } @@ -2148,9 +2236,10 @@ func (a *Allocator) TransferLeaseTarget( return roachpb.ReplicaDescriptor{} } - existing = a.ValidLeaseTargets(ctx, storePool, conf, existing, leaseRepl, opts) + validTargets := a.ValidLeaseTargets(ctx, storePool, conf, existing, leaseRepl, opts) + // Short-circuit if there are no valid targets out there. - if len(existing) == 0 || (len(existing) == 1 && existing[0].StoreID == leaseRepl.StoreID()) { + if len(validTargets) == 0 || (len(validTargets) == 1 && validTargets[0].StoreID == leaseRepl.StoreID()) { log.KvDistribution.VEventf(ctx, 2, "no lease transfer target found for r%d", leaseRepl.GetRangeID()) return roachpb.ReplicaDescriptor{} } @@ -2166,7 +2255,7 @@ func (a *Allocator) TransferLeaseTarget( // falls back to `leaseCountConvergence`. Rationalize this or refactor this // logic to be more clear. transferDec, repl := a.shouldTransferLeaseForAccessLocality( - ctx, storePool, source, existing, usageInfo, nil, candidateLeasesMean, + ctx, storePool, source, validTargets, usageInfo, nil, candidateLeasesMean, ) if !excludeLeaseRepl { switch transferDec { @@ -2176,7 +2265,7 @@ func (a *Allocator) TransferLeaseTarget( } fallthrough case decideWithoutStats: - if !a.shouldTransferLeaseForLeaseCountConvergence(ctx, storePool, sl, source, existing) { + if !a.shouldTransferLeaseForLeaseCountConvergence(ctx, storePool, sl, source, validTargets) { return roachpb.ReplicaDescriptor{} } case shouldTransfer: @@ -2202,13 +2291,13 @@ func (a *Allocator) TransferLeaseTarget( if !opts.CheckCandidateFullness { a.randGen.Lock() defer a.randGen.Unlock() - return existing[a.randGen.Intn(len(existing))] + return validTargets[a.randGen.Intn(len(validTargets))] } var bestOption roachpb.ReplicaDescriptor - candidates := make([]roachpb.ReplicaDescriptor, 0, len(existing)) + candidates := make([]roachpb.ReplicaDescriptor, 0, len(validTargets)) bestOptionLeaseCount := int32(math.MaxInt32) - for _, repl := range existing { + for _, repl := range validTargets { if leaseRepl.StoreID() == repl.StoreID { continue } @@ -2225,7 +2314,7 @@ func (a *Allocator) TransferLeaseTarget( } } if len(candidates) == 0 { - // If there were no existing replicas on stores with less-than-mean + // If there were no validTargets replicas on stores with less-than-mean // leases, and we _must_ move the lease away (indicated by // `opts.excludeLeaseRepl`), just return the best possible option. if excludeLeaseRepl { @@ -2239,8 +2328,8 @@ func (a *Allocator) TransferLeaseTarget( case allocator.LoadConvergence: leaseReplLoad := usageInfo.TransferImpact() - candidates := make([]roachpb.StoreID, 0, len(existing)-1) - for _, repl := range existing { + candidates := make([]roachpb.StoreID, 0, len(validTargets)-1) + for _, repl := range validTargets { if repl.StoreID != leaseRepl.StoreID() { candidates = append(candidates, repl.StoreID) } @@ -2262,7 +2351,7 @@ func (a *Allocator) TransferLeaseTarget( candidates, storeDescMap, &LoadScorerOptions{ - StoreHealthOptions: a.StoreHealthOptions(ctx), + IOOverloadOptions: a.IOOverloadOptions(), Deterministic: a.deterministic, LoadDims: opts.LoadDimensions, LoadThreshold: LoadThresholds(&a.st.SV, opts.LoadDimensions...), @@ -2316,7 +2405,7 @@ func (a *Allocator) TransferLeaseTarget( log.KvDistribution.Fatalf(ctx, "unknown declineReason: %v", noRebalanceReason) } - for _, repl := range existing { + for _, repl := range validTargets { if repl.StoreID == bestStore { return repl } @@ -2861,3 +2950,11 @@ func maxReplicaID(replicas []roachpb.ReplicaDescriptor) roachpb.ReplicaID { } return max } + +func replDescsToStoreIDs(descs []roachpb.ReplicaDescriptor) []roachpb.StoreID { + ret := make([]roachpb.StoreID, len(descs)) + for i, desc := range descs { + ret[i] = desc.StoreID + } + return ret +} diff --git a/pkg/kv/kvserver/allocator/allocatorimpl/allocator_scorer.go b/pkg/kv/kvserver/allocator/allocatorimpl/allocator_scorer.go index f37531dbcdec..090bf2ae3424 100644 --- a/pkg/kv/kvserver/allocator/allocatorimpl/allocator_scorer.go +++ b/pkg/kv/kvserver/allocator/allocatorimpl/allocator_scorer.go @@ -71,13 +71,24 @@ const ( // away from the mean. minRangeRebalanceThreshold = 2 - // DefaultIOOverloadThreshold is used to avoid allocating to stores with an + // DefaultReplicaIOOverloadThreshold is used to avoid allocating to stores with an // IO overload score greater than what's set. This is typically used in // conjunction with IOOverloadMeanThreshold below. - DefaultIOOverloadThreshold = 0.8 + DefaultReplicaIOOverloadThreshold = 0.8 + + // DefaultLeaseIOOverloadThreshold is used to shed leases from stores with an + // IO overload score greater than this threshold. This is typically used in + // conjunction with IOOverloadMeanThreshold below. + DefaultLeaseIOOverloadThreshold = 0.5 + + // DefaultLeaseIOOverloadShedThreshold is used to shed leases from stores + // with an IO overload score greater than the this threshold. This is + // typically used in conjunction with IOOverloadMeanThreshold below. + DefaultLeaseIOOverloadShedThreshold = 0.9 // IOOverloadMeanThreshold is the percentage above the mean after which a - // store could be conisdered unhealthy if also exceeding the threshold. + // store could be conisdered IO overload if also exceeding the absolute IO + // threshold. IOOverloadMeanThreshold = 1.1 // L0SublevelTrackerRetention is the tracking period for statistics on the @@ -86,29 +97,27 @@ const ( L0SublevelTrackerRetention = time.Minute * 10 ) -// IOOverloadEnforcementLevel represents the level of action that may be taken or -// excluded when a candidate disk is considered unhealthy. +// IOOverloadEnforcementLevel represents a level of action that may be taken or +// excluded when a store's disk is considered IO overloaded. type IOOverloadEnforcementLevel int64 const ( - // IOOverloadThresholdNoAction wil not exclude stores from being considered - // as targets for any action regardless of the store IO overload. - IOOverloadThresholdNoAction IOOverloadEnforcementLevel = iota - // IOOverloadThresholdLogOnly will not exclude stores from being considered - // as targets for any action regarldess of the store IO overload. When a - // store exceeds IOOverloadThreshold, an event is logged. - IOOverloadThresholdLogOnly - // IOOverloadThresholdBlockRebalanceTo excludes stores from being being - // considered as targets for rebalance actions if they exceed (a) - // kv.allocator.io_overload_threshold and (b) the mean IO overload among - // possible candidates. This does not affect upreplication. - IOOverloadThresholdBlockRebalanceTo - // IOOverloadThresholdBlockAll excludes stores from being considered as a - // target for allocation and rebalancing actions if they exceed (a) - // kv.allocator.io_overload_threshold and (b) the mean IO overload among - // possible candidates. In other words, the store will receive no new - // replicas. + // IOOverloadThresholdIgnore wil not exclude stores for any action regardless + // of the store IO overload. + IOOverloadThresholdIgnore IOOverloadEnforcementLevel = iota + // IOOverloadThresholdBlockTransfers excludes stores for rebalance or lease + // transfer actions if they are IO overloaded. + IOOverloadThresholdBlockTransfers + // IOOverloadThresholdBlockAll excludes stores for allocation, rebalancing + // and lease transfer actions if they are IO overloaded. In other words, the + // store will receive no new replicas or leases. IOOverloadThresholdBlockAll + // IOOverloadThresholdShed has the same behavior as + // IOOverloadThresholdBlockAll, however existing stores will be removed. This + // currently is only used for lease transfers. The leaseholder store WILL BE + // excluded as a candidate for its current range leases i.e. The lease will + // always transfer to a valid, non-IO-overloaded store if one exists. + IOOverloadThresholdShed ) // RangeRebalanceThreshold is the minimum ratio of a store's range count to @@ -127,30 +136,31 @@ var RangeRebalanceThreshold = func() *settings.FloatSetting { return s }() -// IOOverloadThreshold is the maximum IO overload score of a candidate store -// before being considered unhealthy. Once considered unhealthy, the action -// taken will be dictated by IOOverloadThresholdEnforcement cluster setting -// defined below. -var IOOverloadThreshold = settings.RegisterFloatSetting( +// ReplicaIOOverloadThreshold is the maximum IO overload score of a candidate +// store before being excluded as a candidate for rebalancing replicas or +// allocation. This is only acted upon if ReplicaIOOverloadThreshold is set to +// `block_all` or `block_rebalance_to`. +var ReplicaIOOverloadThreshold = settings.RegisterFloatSetting( settings.SystemOnly, - "kv.allocator.io_overload_threshold", + "kv.allocator.replica_io_overload_threshold", "the maximum store io overload before the enforcement defined by "+ "`kv.allocator.io_overload_threshold_enforce` is taken on a store "+ "for allocation decisions", - DefaultIOOverloadThreshold, + DefaultReplicaIOOverloadThreshold, ) -// IOOverloadThresholdEnforcement defines the level of enforcement when a candidate +// ReplicaIOOverloadThresholdEnforcement defines the level of enforcement when a candidate // stores' IO overload exceeds the threshold defined in IOOverloadThresold. No // action is taken when block_none and block_none_log are set. Rebalancing // towards the candidate store is blocked when block_rebalance_to is set. // Allocating and rebalancing towards the candidate store is blocked when // block_all is set. -// NB: No matter the value of this setting, IOOverload will never cause -// rebalancing away from a store (shedding), only block the store from being a target. -var IOOverloadThresholdEnforcement = settings.RegisterEnumSetting( +// NB: No matter the value of this setting, IO overload will never cause +// rebalancing away from a store (shedding), only block the store from +// receiving new replicas. +var ReplicaIOOverloadThresholdEnforcement = settings.RegisterEnumSetting( settings.SystemOnly, - "kv.allocator.io_overload_threshold_enforcement", + "kv.allocator.replica_io_overload_threshold_enforcement", "the level of enforcement when a candidate store has an io overload score "+ "exceeding `kv.allocator.io_overload_threshold` and above the "+ "average of comparable allocation candidates:`block_none` will exclude "+ @@ -160,10 +170,56 @@ var IOOverloadThresholdEnforcement = settings.RegisterEnumSetting( "from being targets of both allocation and rebalancing", "block_rebalance_to", map[int64]string{ - int64(IOOverloadThresholdNoAction): "block_none", - int64(IOOverloadThresholdLogOnly): "block_none_log", - int64(IOOverloadThresholdBlockRebalanceTo): "block_rebalance_to", - int64(IOOverloadThresholdBlockAll): "block_all", + int64(IOOverloadThresholdIgnore): "ignore", + int64(IOOverloadThresholdBlockTransfers): "block_rebalance_to", + int64(IOOverloadThresholdBlockAll): "block_all", + }, +) + +// LeaseIOOverloadThreshold is the maximum IO overload score a store may have +// before being excluded as a candidate for lease transfers. This threshold is +// only acted upon if LeaseIOOverloadThresholdEnforcement is set to 'shed' or +// `block`. +var LeaseIOOverloadThreshold = settings.RegisterFloatSetting( + settings.SystemOnly, + "kv.allocator.lease_io_overload_threshold", + "a store will not receive new leases when its IO overload score is above this "+ + "value and `kv.allocator.io_overload_threshold_enforcement_leases` is "+ + "`shed` or `block_transfer_to`", + DefaultLeaseIOOverloadThreshold, +) + +// LeaseIOOverloadShedThreshold is the maximum IO overload score the current +// leaseholder store for a range may have before shedding its leases and no +// longer receiving new leases. This threhsold is acted upon only If +// LeaseIOOverloadThresholdEnforcement is set to 'shed'. +var LeaseIOOverloadShedThreshold = settings.RegisterFloatSetting( + settings.SystemOnly, + "kv.allocator.lease_shed_io_overload_threshold", + "a store will shed its leases and receive no new leases when its "+ + "IO overload score is above this value and "+ + "`kv.allocator.io_overload_threshold_enforcement_leases` is `shed`", + DefaultLeaseIOOverloadShedThreshold, +) + +// LeaseIOOverloadThresholdEnforcement defines the level of enforcement for +// lease transfers when a candidate stores' IO overload exceeds the threshold +// defined in IOOverloadThreshold, and additionally +// ShedIOOverloadThresholdBuffer when shed is set. +var LeaseIOOverloadThresholdEnforcement = settings.RegisterEnumSetting( + settings.SystemOnly, + "kv.allocator.lease_io_overload_threshold_enforcement", + "the level of enforcement on lease transfers when a candidate store has an"+ + "io overload score exceeding `kv.allocator.io_overload_threshold_lease` and above the "+ + "average of comparable allocation candidates:`ignore` disable enforcement, "+ + "`block_transfer_to` a store will receive no new leases but won't lose existing leases,"+ + "`shed`: a store will receive no new leases and shed existing leases to "+ + "non io-overloaded stores, this is a superset of block_transfer_to", + "block_transfer_to", + map[int64]string{ + int64(IOOverloadThresholdIgnore): "ingore", + int64(IOOverloadThresholdBlockTransfers): "block_transfer_to", + int64(IOOverloadThresholdShed): "shed", }, ) @@ -219,9 +275,9 @@ type ScorerOptions interface { // with the same QPS) that would converge the range's existing stores' QPS the // most. removalMaximallyConvergesScore(removalCandStoreList storepool.StoreList, existing roachpb.StoreDescriptor) int - // getStoreHealthOptions returns the scorer options for store health. It is - // used to inform scoring based on the health of a store. - getStoreHealthOptions() StoreHealthOptions + // getIOOverloadOptions returns the scorer options for store IO overload. It + // is used to inform scoring based on the IO overload of a store. + getIOOverloadOptions() IOOverloadOptions } func jittered(val float64, jitter float64, rand allocatorRand) float64 { @@ -246,8 +302,8 @@ type ScatterScorerOptions struct { var _ ScorerOptions = &ScatterScorerOptions{} -func (o *ScatterScorerOptions) getStoreHealthOptions() StoreHealthOptions { - return o.RangeCountScorerOptions.StoreHealthOptions +func (o *ScatterScorerOptions) getIOOverloadOptions() IOOverloadOptions { + return o.RangeCountScorerOptions.IOOverloadOptions } func (o *ScatterScorerOptions) maybeJitterStoreStats( @@ -269,15 +325,15 @@ func (o *ScatterScorerOptions) maybeJitterStoreStats( // This means that the resulting rebalancing decisions will further the goal of // converging range counts across stores in the cluster. type RangeCountScorerOptions struct { - StoreHealthOptions + IOOverloadOptions deterministic bool rangeRebalanceThreshold float64 } var _ ScorerOptions = &RangeCountScorerOptions{} -func (o *RangeCountScorerOptions) getStoreHealthOptions() StoreHealthOptions { - return o.StoreHealthOptions +func (o *RangeCountScorerOptions) getIOOverloadOptions() IOOverloadOptions { + return o.IOOverloadOptions } func (o *RangeCountScorerOptions) maybeJitterStoreStats( @@ -390,9 +446,9 @@ func (o *RangeCountScorerOptions) removalMaximallyConvergesScore( // queries-per-second. This means that the resulting rebalancing decisions will // further the goal of converging QPS across stores in the cluster. type LoadScorerOptions struct { - StoreHealthOptions StoreHealthOptions - Deterministic bool - LoadDims []load.Dimension + IOOverloadOptions IOOverloadOptions + Deterministic bool + LoadDims []load.Dimension // LoadThreshold and MinLoadThreshold track the threshold beyond which a // store should be considered under/overfull and the minimum absolute @@ -426,8 +482,8 @@ type LoadScorerOptions struct { RebalanceImpact load.Load } -func (o *LoadScorerOptions) getStoreHealthOptions() StoreHealthOptions { - return o.StoreHealthOptions +func (o *LoadScorerOptions) getIOOverloadOptions() IOOverloadOptions { + return o.IOOverloadOptions } func (o *LoadScorerOptions) maybeJitterStoreStats( @@ -975,7 +1031,7 @@ func rankedCandidateListForAllocation( continue } - if !allocator.MaxCapacityCheck(s) || !options.getStoreHealthOptions().storeIsHealthy( + if !allocator.MaxCapacityCheck(s) || !options.getIOOverloadOptions().allocateReplicaToCheck( ctx, s, candidateStores.CandidateIOOverloadScores.Mean, @@ -1561,7 +1617,7 @@ func rankedCandidateListForRebalancing( candIOOverloadScore, _ := s.Capacity.IOThreshold.Score() cand.fullDisk = !rebalanceToMaxCapacityCheck(s) cand.ioOverloadScore = candIOOverloadScore - cand.ioOverloaded = !options.getStoreHealthOptions().rebalanceToStoreIsHealthy( + cand.ioOverloaded = !options.getIOOverloadOptions().rebalanceReplicaToCheck( ctx, s, // We only wish to compare the IO overload to the @@ -2174,75 +2230,119 @@ func convergesOnMean(oldVal, newVal, mean float64) bool { return math.Abs(newVal-mean) < math.Abs(oldVal-mean) } -// StoreHealthOptions is the scorer options for store health. It is -// used to inform scoring based on the health of a store. -type StoreHealthOptions struct { - EnforcementLevel IOOverloadEnforcementLevel - IOOverloadThreshold float64 +// IOOverloadOptions is the scorer options for store IO overload. It is used to +// inform scoring based on a store's IO overload score. +type IOOverloadOptions struct { + ReplicaEnforcementLevel IOOverloadEnforcementLevel + LeaseEnforcementLevel IOOverloadEnforcementLevel + + ReplicaIOOverloadThreshold float64 + LeaseIOOverloadThreshold float64 + LeaseIOOverloadShedThreshold float64 } -// storeIsHealthy returns true if the store IO overload does not exceed -// the cluster threshold and mean, or the enforcement level does not include -// excluding candidates from being allocation targets. -func (o StoreHealthOptions) storeIsHealthy( - ctx context.Context, store roachpb.StoreDescriptor, avg float64, -) bool { - ioOverloadScore, _ := store.Capacity.IOThreshold.Score() - if o.EnforcementLevel == IOOverloadThresholdNoAction || - ioOverloadScore < o.IOOverloadThreshold { - return true +func ioOverloadCheck( + score, avg, absThreshold, meanThreshold float64, + enforcement IOOverloadEnforcementLevel, + disallowed ...IOOverloadEnforcementLevel, +) (ok bool, reason string) { + absCheck := score < absThreshold + meanCheck := score < avg*meanThreshold + + // The score needs to be no less than both the average threshold and the + // absolute threshold in order to be considered IO overloaded. + if absCheck || meanCheck { + return true, "" } - // Still log an event when the IO overload score exceeds the threshold, however - // does not exceed the cluster average. This is enabled to avoid confusion - // where candidate stores are still targets, despite exeeding the - // threshold. - if ioOverloadScore < avg*IOOverloadMeanThreshold { - log.KvDistribution.VEventf(ctx, 5, "s%d, allocate check io overload %.2f exceeds threshold %.2f, but below average: %.2f, action enabled %d", - store.StoreID, ioOverloadScore, - o.IOOverloadThreshold, avg, o.EnforcementLevel) - return true + for _, disallowedEnforcement := range disallowed { + if enforcement == disallowedEnforcement { + return false, fmt.Sprintf( + "io overload %.2f exceeds threshold %.2f, above average: %.2f, enforcement %d", + score, absThreshold, avg, enforcement) + } } - log.KvDistribution.VEventf(ctx, 5, "s%d, allocate check io overload %.2f exceeds threshold %.2f, above average: %.2f, action enabled %d", - store.StoreID, ioOverloadScore, - o.IOOverloadThreshold, avg, o.EnforcementLevel) + return true, "" +} - // The store is only considered unhealthy when the enforcement level is - // storeHealthBlockAll. - return o.EnforcementLevel < IOOverloadThresholdBlockAll +// allocateReplicaToCheck returns true if the store IO overload does not exceed +// the cluster threshold and mean, or the enforcement level does not prevent +// replica allocation to IO overloaded stores. +func (o IOOverloadOptions) allocateReplicaToCheck( + ctx context.Context, store roachpb.StoreDescriptor, avg float64, +) bool { + score, _ := store.Capacity.IOThreshold.Score() + + if ok, reason := ioOverloadCheck(score, avg, + o.ReplicaIOOverloadThreshold, IOOverloadMeanThreshold, + o.ReplicaEnforcementLevel, + IOOverloadThresholdBlockAll, + ); !ok { + log.KvDistribution.VEventf(ctx, 3, "s%d: %s", store.StoreID, reason) + return false + } + + return true } -// rebalanceToStoreIsHealthy returns true if the store IO overload does not +// rebalanceReplicaToCheck returns true if the store IO overload does not // exceed the cluster threshold and mean, or the enforcement level does not -// include excluding candidates from being rebalancing targets. -func (o StoreHealthOptions) rebalanceToStoreIsHealthy( +// prevent replica rebalancing to IO overloaded stores. +func (o IOOverloadOptions) rebalanceReplicaToCheck( ctx context.Context, store roachpb.StoreDescriptor, avg float64, ) bool { - ioOverloadScore, _ := store.Capacity.IOThreshold.Score() - if o.EnforcementLevel == IOOverloadThresholdNoAction || - ioOverloadScore < o.IOOverloadThreshold { - return true + score, _ := store.Capacity.IOThreshold.Score() + + if ok, reason := ioOverloadCheck(score, avg, + o.ReplicaIOOverloadThreshold, IOOverloadMeanThreshold, + o.ReplicaEnforcementLevel, + IOOverloadThresholdBlockTransfers, IOOverloadThresholdBlockAll, + ); !ok { + log.KvDistribution.VEventf(ctx, 3, "s%d: %s", store.StoreID, reason) + return false } + return true +} - if ioOverloadScore < avg*IOOverloadMeanThreshold { - log.KvDistribution.VEventf(ctx, 5, - "s%d, allocate check io overload %.2f exceeds threshold %.2f, but "+ - "below average watermark: %.2f, action enabled %d", - store.StoreID, ioOverloadScore, o.IOOverloadThreshold, - avg*IOOverloadMeanThreshold, o.EnforcementLevel) - return true +// transferLeaseToCheck returns true if the store IO overload does not exceed +// the cluster threshold and mean, or the enforcement level does not prevent +// lease transfers to IO overloaded stores. +func (o IOOverloadOptions) transferLeaseToCheck( + ctx context.Context, store roachpb.StoreDescriptor, avg float64, +) bool { + score, _ := store.Capacity.IOThreshold.Score() + + if ok, reason := ioOverloadCheck(score, avg, + o.LeaseIOOverloadThreshold, IOOverloadMeanThreshold, + o.LeaseEnforcementLevel, + IOOverloadThresholdBlockTransfers, IOOverloadThresholdShed, + ); !ok { + log.KvDistribution.VEventf(ctx, 3, "s%d: %s", store.StoreID, reason) + return false } - log.KvDistribution.VEventf(ctx, 5, - "s%d, allocate check io overload %.2f exceeds threshold %.2f, above average "+ - "watermark: %.2f, action enabled %d", - store.StoreID, ioOverloadScore, o.IOOverloadThreshold, - avg*IOOverloadMeanThreshold, o.EnforcementLevel) + return true +} + +// transferLeaseToCheck returns true if the store IO overload does not exceed +// the cluster threshold and mean, or the enforcement level does not prevent +// existing stores from holidng leases whilst being IO overloaded. +func (o IOOverloadOptions) existingLeaseCheck( + ctx context.Context, store roachpb.StoreDescriptor, avg float64, +) bool { + score, _ := store.Capacity.IOThreshold.Score() + + if ok, reason := ioOverloadCheck(score, avg, + o.LeaseIOOverloadShedThreshold, IOOverloadMeanThreshold, + o.LeaseEnforcementLevel, + IOOverloadThresholdShed, + ); !ok { + log.KvDistribution.VEventf(ctx, 3, "s%d: %s", store.StoreID, reason) + return false + } - // The store is only considered unhealthy when the enforcement level is - // storeHealthBlockRebalanceTo or storeHealthBlockAll. - return o.EnforcementLevel < IOOverloadThresholdBlockRebalanceTo + return true } // rebalanceToMaxCapacityCheck returns true if the store has enough room to diff --git a/pkg/kv/kvserver/allocator/allocatorimpl/allocator_scorer_test.go b/pkg/kv/kvserver/allocator/allocatorimpl/allocator_scorer_test.go index 7f2965c4bec5..726f54ed8bbd 100644 --- a/pkg/kv/kvserver/allocator/allocatorimpl/allocator_scorer_test.go +++ b/pkg/kv/kvserver/allocator/allocatorimpl/allocator_scorer_test.go @@ -1096,7 +1096,7 @@ func TestShouldRebalanceDiversity(t *testing.T) { defer leaktest.AfterTest(t)() defer log.Scope(t).Close(t) - options := &RangeCountScorerOptions{StoreHealthOptions: StoreHealthOptions{EnforcementLevel: IOOverloadThresholdNoAction}} + options := &RangeCountScorerOptions{} newStore := func(id int, locality roachpb.Locality) roachpb.StoreDescriptor { return roachpb.StoreDescriptor{ StoreID: roachpb.StoreID(id), diff --git a/pkg/kv/kvserver/allocator/allocatorimpl/allocator_test.go b/pkg/kv/kvserver/allocator/allocatorimpl/allocator_test.go index f5903e40db7c..a7cd812c3dbd 100644 --- a/pkg/kv/kvserver/allocator/allocatorimpl/allocator_test.go +++ b/pkg/kv/kvserver/allocator/allocatorimpl/allocator_test.go @@ -348,22 +348,22 @@ var oneStoreHighIOOverload = []*roachpb.StoreDescriptor{ { StoreID: 1, Node: roachpb.NodeDescriptor{NodeID: 1}, - Capacity: roachpb.StoreCapacity{Capacity: 200, Available: 200, RangeCount: 600, IOThreshold: TestingIOThresholdWithScore(DefaultIOOverloadThreshold - 5)}, + Capacity: roachpb.StoreCapacity{Capacity: 200, Available: 200, RangeCount: 600, IOThreshold: TestingIOThresholdWithScore(DefaultReplicaIOOverloadThreshold - 5)}, }, { StoreID: 2, Node: roachpb.NodeDescriptor{NodeID: 2}, - Capacity: roachpb.StoreCapacity{Capacity: 200, Available: 200, RangeCount: 1800, IOThreshold: TestingIOThresholdWithScore(DefaultIOOverloadThreshold - 5)}, + Capacity: roachpb.StoreCapacity{Capacity: 200, Available: 200, RangeCount: 1800, IOThreshold: TestingIOThresholdWithScore(DefaultReplicaIOOverloadThreshold - 5)}, }, { StoreID: 3, Node: roachpb.NodeDescriptor{NodeID: 3}, - Capacity: roachpb.StoreCapacity{Capacity: 200, Available: 200, RangeCount: 600, IOThreshold: TestingIOThresholdWithScore(DefaultIOOverloadThreshold + 5)}, + Capacity: roachpb.StoreCapacity{Capacity: 200, Available: 200, RangeCount: 600, IOThreshold: TestingIOThresholdWithScore(DefaultReplicaIOOverloadThreshold + 5)}, }, { StoreID: 4, Node: roachpb.NodeDescriptor{NodeID: 4}, - Capacity: roachpb.StoreCapacity{Capacity: 200, Available: 200, RangeCount: 1200, IOThreshold: TestingIOThresholdWithScore(DefaultIOOverloadThreshold - 5)}, + Capacity: roachpb.StoreCapacity{Capacity: 200, Available: 200, RangeCount: 1200, IOThreshold: TestingIOThresholdWithScore(DefaultReplicaIOOverloadThreshold - 5)}, }, } @@ -371,17 +371,17 @@ var allStoresHighIOOverload = []*roachpb.StoreDescriptor{ { StoreID: 1, Node: roachpb.NodeDescriptor{NodeID: 1}, - Capacity: roachpb.StoreCapacity{Capacity: 200, Available: 200, RangeCount: 1200, IOThreshold: TestingIOThresholdWithScore(DefaultIOOverloadThreshold + 1)}, + Capacity: roachpb.StoreCapacity{Capacity: 200, Available: 200, RangeCount: 1200, IOThreshold: TestingIOThresholdWithScore(DefaultReplicaIOOverloadThreshold + 1)}, }, { StoreID: 2, Node: roachpb.NodeDescriptor{NodeID: 2}, - Capacity: roachpb.StoreCapacity{Capacity: 200, Available: 200, RangeCount: 800, IOThreshold: TestingIOThresholdWithScore(DefaultIOOverloadThreshold + 1)}, + Capacity: roachpb.StoreCapacity{Capacity: 200, Available: 200, RangeCount: 800, IOThreshold: TestingIOThresholdWithScore(DefaultReplicaIOOverloadThreshold + 1)}, }, { StoreID: 3, Node: roachpb.NodeDescriptor{NodeID: 3}, - Capacity: roachpb.StoreCapacity{Capacity: 200, Available: 200, RangeCount: 600, IOThreshold: TestingIOThresholdWithScore(DefaultIOOverloadThreshold + 1)}, + Capacity: roachpb.StoreCapacity{Capacity: 200, Available: 200, RangeCount: 600, IOThreshold: TestingIOThresholdWithScore(DefaultReplicaIOOverloadThreshold + 1)}, }, } @@ -389,17 +389,17 @@ var allStoresHighIOOverloadSkewed = []*roachpb.StoreDescriptor{ { StoreID: 1, Node: roachpb.NodeDescriptor{NodeID: 1}, - Capacity: roachpb.StoreCapacity{Capacity: 200, Available: 200, RangeCount: 1200, IOThreshold: TestingIOThresholdWithScore(DefaultIOOverloadThreshold + 1)}, + Capacity: roachpb.StoreCapacity{Capacity: 200, Available: 200, RangeCount: 1200, IOThreshold: TestingIOThresholdWithScore(DefaultReplicaIOOverloadThreshold + 1)}, }, { StoreID: 2, Node: roachpb.NodeDescriptor{NodeID: 2}, - Capacity: roachpb.StoreCapacity{Capacity: 200, Available: 200, RangeCount: 800, IOThreshold: TestingIOThresholdWithScore(DefaultIOOverloadThreshold + 50)}, + Capacity: roachpb.StoreCapacity{Capacity: 200, Available: 200, RangeCount: 800, IOThreshold: TestingIOThresholdWithScore(DefaultReplicaIOOverloadThreshold + 50)}, }, { StoreID: 3, Node: roachpb.NodeDescriptor{NodeID: 3}, - Capacity: roachpb.StoreCapacity{Capacity: 200, Available: 200, RangeCount: 600, IOThreshold: TestingIOThresholdWithScore(DefaultIOOverloadThreshold + 55)}, + Capacity: roachpb.StoreCapacity{Capacity: 200, Available: 200, RangeCount: 600, IOThreshold: TestingIOThresholdWithScore(DefaultReplicaIOOverloadThreshold + 55)}, }, } @@ -407,27 +407,27 @@ var threeStoresHighIOOverloadAscRangeCount = []*roachpb.StoreDescriptor{ { StoreID: 1, Node: roachpb.NodeDescriptor{NodeID: 1}, - Capacity: roachpb.StoreCapacity{Capacity: 200, Available: 200, RangeCount: 100, IOThreshold: TestingIOThresholdWithScore(DefaultIOOverloadThreshold + 10)}, + Capacity: roachpb.StoreCapacity{Capacity: 200, Available: 200, RangeCount: 100, IOThreshold: TestingIOThresholdWithScore(DefaultReplicaIOOverloadThreshold + 10)}, }, { StoreID: 2, Node: roachpb.NodeDescriptor{NodeID: 2}, - Capacity: roachpb.StoreCapacity{Capacity: 200, Available: 200, RangeCount: 400, IOThreshold: TestingIOThresholdWithScore(DefaultIOOverloadThreshold + 10)}, + Capacity: roachpb.StoreCapacity{Capacity: 200, Available: 200, RangeCount: 400, IOThreshold: TestingIOThresholdWithScore(DefaultReplicaIOOverloadThreshold + 10)}, }, { StoreID: 3, Node: roachpb.NodeDescriptor{NodeID: 3}, - Capacity: roachpb.StoreCapacity{Capacity: 200, Available: 200, RangeCount: 1600, IOThreshold: TestingIOThresholdWithScore(DefaultIOOverloadThreshold + 10)}, + Capacity: roachpb.StoreCapacity{Capacity: 200, Available: 200, RangeCount: 1600, IOThreshold: TestingIOThresholdWithScore(DefaultReplicaIOOverloadThreshold + 10)}, }, { StoreID: 4, Node: roachpb.NodeDescriptor{NodeID: 4}, - Capacity: roachpb.StoreCapacity{Capacity: 200, Available: 200, RangeCount: 6400, IOThreshold: TestingIOThresholdWithScore(DefaultIOOverloadThreshold - 10)}, + Capacity: roachpb.StoreCapacity{Capacity: 200, Available: 200, RangeCount: 6400, IOThreshold: TestingIOThresholdWithScore(DefaultReplicaIOOverloadThreshold - 10)}, }, { StoreID: 5, Node: roachpb.NodeDescriptor{NodeID: 5}, - Capacity: roachpb.StoreCapacity{Capacity: 200, Available: 200, RangeCount: 25000, IOThreshold: TestingIOThresholdWithScore(DefaultIOOverloadThreshold - 10)}, + Capacity: roachpb.StoreCapacity{Capacity: 200, Available: 200, RangeCount: 25000, IOThreshold: TestingIOThresholdWithScore(DefaultReplicaIOOverloadThreshold - 10)}, }, } @@ -594,7 +594,7 @@ func TestAllocatorNoAvailableDisks(t *testing.T) { } } -func TestAllocatorIOOverloadCheck(t *testing.T) { +func TestAllocatorAllocateVoterIOOverloadCheck(t *testing.T) { defer leaktest.AfterTest(t)() defer log.Scope(t).Close(t) @@ -614,7 +614,7 @@ func TestAllocatorIOOverloadCheck(t *testing.T) { } tests := []testCase{ { - name: "ignore io overload on allocation when StoreHealthNoAction enforcement", + name: "ignore io overload on allocation when ignore enforcement", stores: allStoresHighIOOverload, conf: emptySpanConfig(), // NB: All stores have high io overload, this should be ignored and @@ -623,10 +623,10 @@ func TestAllocatorIOOverloadCheck(t *testing.T) { // Recovery of a dead node can pick any valid store, not necessarily the // one with the lowest range count. expectedTargetIfDead: roachpb.StoreID(2), - enforcement: IOOverloadThresholdNoAction, + enforcement: IOOverloadThresholdIgnore, }, { - name: "ignore io overload on allocation when storeHealthLogOnly enforcement", + name: "ignore io overload on allocation when block rebalance to enforcement", // NB: All stores have high io overload, this should be ignored and // allocate to the store with the lowest range count. stores: allStoresHighIOOverload, @@ -635,22 +635,10 @@ func TestAllocatorIOOverloadCheck(t *testing.T) { // Recovery of a dead node can pick any valid store, not necessarily the // one with the lowest range count. expectedTargetIfDead: roachpb.StoreID(2), - enforcement: IOOverloadThresholdLogOnly, + enforcement: IOOverloadThresholdBlockTransfers, }, { - name: "ignore io overload on allocation when StoreHealthBlockRebalanceTo enforcement", - // NB: All stores have high io overload, this should be ignored and - // allocate to the store with the lowest range count. - stores: allStoresHighIOOverload, - conf: emptySpanConfig(), - expectedTargetIfAlive: roachpb.StoreID(3), - // Recovery of a dead node can pick any valid store, not necessarily the - // one with the lowest range count. - expectedTargetIfDead: roachpb.StoreID(2), - enforcement: IOOverloadThresholdBlockRebalanceTo, - }, - { - name: "don't allocate to stores when all have high io overload and StoreHealthBlockAll", + name: "don't allocate to stores when all have high io overload and block all enforcement", // NB: All stores have high io overload (limit + 1), none are above the watermark, select the lowest range count. stores: allStoresHighIOOverload, conf: emptySpanConfig(), @@ -661,7 +649,7 @@ func TestAllocatorIOOverloadCheck(t *testing.T) { enforcement: IOOverloadThresholdBlockAll, }, { - name: "allocate to store below the mean when all have high io overload and StoreHealthBlockAll", + name: "allocate to store below the mean when all have high io overload and block all enforcement", // NB: All stores have high io overload, however store 1 is below the watermark mean io overload. stores: allStoresHighIOOverloadSkewed, conf: emptySpanConfig(), @@ -670,7 +658,7 @@ func TestAllocatorIOOverloadCheck(t *testing.T) { enforcement: IOOverloadThresholdBlockAll, }, { - name: "allocate to lowest range count store without high io overload when StoreHealthBlockAll enforcement", + name: "allocate to lowest range count store without high io overload when block all enforcement", // NB: Store 1, 2 and 3 have high io overload and are above the watermark, the lowest range count (4) // should be selected. stores: threeStoresHighIOOverloadAscRangeCount, @@ -691,9 +679,7 @@ func TestAllocatorIOOverloadCheck(t *testing.T) { defer stopper.Stop(ctx) sg := gossiputil.NewStoreGossiper(g) sg.GossipStores(test.stores, t) - - // Enable read disk health checking in candidate exclusion. - IOOverloadThresholdEnforcement.Override(ctx, &a.st.SV, int64(test.enforcement)) + ReplicaIOOverloadThresholdEnforcement.Override(ctx, &a.st.SV, int64(test.enforcement)) // Allocate a voter where all replicas are alive (e.g. up-replicating a valid range). add, _, err := a.AllocateVoter( @@ -1653,7 +1639,7 @@ func TestAllocatorRebalanceByQPS(t *testing.T) { gossiputil.NewStoreGossiper(g).GossipStores(subtest.testStores, t) var rangeUsageInfo allocator.RangeUsageInfo options := TestingQPSLoadScorerOptions(100, 0.2) - options.StoreHealthOptions = StoreHealthOptions{EnforcementLevel: IOOverloadThresholdNoAction} + options.IOOverloadOptions = IOOverloadOptions{ReplicaEnforcementLevel: IOOverloadThresholdIgnore} add, remove, _, ok := a.RebalanceVoter( ctx, sp, @@ -1767,7 +1753,7 @@ func TestAllocatorRemoveBasedOnQPS(t *testing.T) { defer stopper.Stop(ctx) gossiputil.NewStoreGossiper(g).GossipStores(subtest.testStores, t) options := TestingQPSLoadScorerOptions(0, 0.1) - options.StoreHealthOptions = StoreHealthOptions{EnforcementLevel: IOOverloadThresholdNoAction} + options.IOOverloadOptions = IOOverloadOptions{ReplicaEnforcementLevel: IOOverloadThresholdIgnore} remove, _, err := a.RemoveVoter( ctx, sp, @@ -1975,6 +1961,133 @@ func TestAllocatorTransferLeaseTarget(t *testing.T) { } } +func TestAllocatorTransferLeaseTargetIOOverloadCheck(t *testing.T) { + defer leaktest.AfterTest(t)() + defer log.Scope(t).Close(t) + ctx := context.Background() + + floats := func(nums ...float64) []float64 { + return nums + } + + // We want the shed threshold to be 0.9 and the overload threhsold to be 0.5 + // i.e. block transfers at >=0.5 and block transfers + shed leases at >=0.9. + const shedThreshold = 0.9 + const threshold = 0.5 + + testCases := []struct { + name string + leaseCounts, IOScores []float64 + leaseholder roachpb.StoreID + excludeLeaseRepl bool + expected roachpb.StoreID + enforcement IOOverloadEnforcementLevel + }{ + { + name: "don't move off of store with high io overload when block enforcement", + leaseCounts: floats(100, 100, 100, 100, 100), + IOScores: floats(2.5, 1.5, 0.5, 0, 0), + leaseholder: 1, + expected: 0, + enforcement: IOOverloadThresholdBlockTransfers, + }, + { + name: "move off of store with high io overload when shed enforcement", + leaseCounts: floats(100, 100, 100, 100, 100), + IOScores: floats(2.5, 1.5, 0.5, 0, 0), + leaseholder: 1, + // Store 3 is above the threshold (1.0 > 0.8), but equal to the avg (1.0), so + // it is still considered a non-IO-overloaded candidate. + expected: 3, + enforcement: IOOverloadThresholdShed, + }, + { + name: "don't transfer to io overloaded store when block enforcement", + leaseCounts: floats(0, 100, 100, 400, 400), + IOScores: floats(2.5, 1.5, 0.5, 0, 0), + leaseholder: 5, + expected: 3, + enforcement: IOOverloadThresholdBlockTransfers, + }, + { + name: "don't transfer to io overloaded store when shed enforcement", + leaseCounts: floats(0, 100, 100, 400, 400), + IOScores: floats(2.5, 1.5, 0.5, 0, 0), + leaseholder: 5, + expected: 3, + enforcement: IOOverloadThresholdShed, + }, + { + name: "still transfer to io overloaded store when no action enforcement", + leaseCounts: floats(0, 100, 100, 400, 400), + IOScores: floats(2.5, 1.5, 0.5, 0, 0), + leaseholder: 5, + expected: 2, + enforcement: IOOverloadThresholdIgnore, + }, + { + name: "move off of store with high io overload with skewed lease counts shed enforcement", + leaseCounts: floats(0, 0, 10000, 10000, 10000), + IOScores: floats(2.5, 1.5, 0.5, 0, 0), + leaseholder: 1, + expected: 3, + enforcement: IOOverloadThresholdShed, + }, + { + name: "don't move off of store with high io overload but less than shed threshold with shed enforcement", + leaseCounts: floats(0, 0, 0, 0, 0), + IOScores: floats(0.89, 0, 0, 0, 0), + leaseholder: 1, + expected: 0, + enforcement: IOOverloadThresholdShed, + }, + } + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + stopper, g, sp, a, _ := CreateTestAllocator(ctx, 10, true /* deterministic */) + defer stopper.Stop(ctx) + n := len(tc.leaseCounts) + stores := make([]*roachpb.StoreDescriptor, n) + existing := make([]roachpb.ReplicaDescriptor, 0, n) + for i := range tc.leaseCounts { + existing = append(existing, replicas(roachpb.StoreID(i+1))...) + stores[i] = &roachpb.StoreDescriptor{ + StoreID: roachpb.StoreID(i + 1), + Node: roachpb.NodeDescriptor{NodeID: roachpb.NodeID(i + 1)}, + Capacity: roachpb.StoreCapacity{ + LeaseCount: int32(tc.leaseCounts[i]), + IOThreshold: TestingIOThresholdWithScore(tc.IOScores[i]), + }, + } + } + + sg := gossiputil.NewStoreGossiper(g) + sg.GossipStores(stores, t) + LeaseIOOverloadThresholdEnforcement.Override(ctx, &a.st.SV, int64(tc.enforcement)) + LeaseIOOverloadThreshold.Override(ctx, &a.st.SV, threshold) + LeaseIOOverloadShedThreshold.Override(ctx, &a.st.SV, shedThreshold) + + target := a.TransferLeaseTarget( + ctx, + sp, + emptySpanConfig(), + existing, + &mockRepl{ + replicationFactor: int32(n), + storeID: tc.leaseholder, + }, + allocator.RangeUsageInfo{}, /* stats */ + false, /* forceDecisionWithoutStats */ + allocator.TransferLeaseOptions{ + CheckCandidateFullness: true, + }, + ) + require.Equal(t, tc.expected, target.StoreID) + }) + } + +} + func TestAllocatorTransferLeaseToReplicasNeedingSnapshot(t *testing.T) { defer leaktest.AfterTest(t)() defer log.Scope(t).Close(t) @@ -4345,14 +4458,14 @@ func TestAllocatorRebalanceNonVoters(t *testing.T) { } } -// TestAllocatorRebalanceStoreHealthCheck ensures that rebalancing voters: -// (1) Respects storeHealthEnforcement setting, by ignoring IO overload in -// rebalancing decisions when disabled or set to log only. -// (2) Considers IO overload when set to rebalanceOnly or allocate in +// TestAllocatorRebalanceIOOverloadCheck ensures that rebalancing voters: +// (1) Respects replica IO overload enforcement setting, by ignoring IO +// overload in rebalancing decisions when disabled or set to log only. +// (2) Considers IO overload when set to block_rebalance_to or allocate in // conjunction with the mean. // (3) Does not attempt to rebalance off of the store when io overload // is high, as this setting is only used for filtering candidates. -func TestAllocatorRebalanceStoreHealthCheck(t *testing.T) { +func TestAllocatorRebalanceIOOverloadCheck(t *testing.T) { defer leaktest.AfterTest(t)() ctx := context.Background() @@ -4367,7 +4480,7 @@ func TestAllocatorRebalanceStoreHealthCheck(t *testing.T) { } tests := []testCase{ { - name: "don't move off of nodes with high io overload when StoreHealthBlockRebalanceTo", + name: "don't move off of nodes with high io overload when block rebalance to", // NB: Store 1,2, 4 have okay io overload. Store 3 has high io overload. // We expect high io overload to only be considered for // exlcuding targets, not for triggering rebalancing. @@ -4375,10 +4488,10 @@ func TestAllocatorRebalanceStoreHealthCheck(t *testing.T) { conf: emptySpanConfig(), existingVoters: replicas(3, 1), expectNoAction: true, - enforcement: IOOverloadThresholdBlockRebalanceTo, + enforcement: IOOverloadThresholdBlockTransfers, }, { - name: "don't move off of nodes with high io overload when StoreHealthBlockAll", + name: "don't move off of nodes with high io overload when block all", // NB: Store 1,2, 4 have okay io overload. Store 3 has high io overload. // We expect high io overload to only be considered for // exlcuding targets, not for triggering rebalancing. @@ -4389,7 +4502,7 @@ func TestAllocatorRebalanceStoreHealthCheck(t *testing.T) { enforcement: IOOverloadThresholdBlockAll, }, { - name: "don't take action when enforcement is not StoreHealthNoAction", + name: "don't take action when enforcement is ignore", // NB: Store 3 has IOOverload > threshold. Store 2 has 3 x higher // ranges as other stores. Should move to candidate to 4, however // enforcement for rebalancing is not enabled so will pick @@ -4399,10 +4512,10 @@ func TestAllocatorRebalanceStoreHealthCheck(t *testing.T) { existingVoters: replicas(1, 2), expectedRemoveTargets: []roachpb.StoreID{2}, expectedAddTargets: []roachpb.StoreID{3}, - enforcement: IOOverloadThresholdNoAction, + enforcement: IOOverloadThresholdIgnore, }, { - name: "don't rebalance to nodes with high io overload when StoreHealthBlockRebalanceTo enforcement", + name: "don't rebalance to nodes with high io overload when block rebalance to", // NB: Store 3 has IOOverload > threshold. Store 2 has 3 x higher // ranges as other stores. Should move to candidate to 4, which // doesn't have high io overload. @@ -4411,10 +4524,10 @@ func TestAllocatorRebalanceStoreHealthCheck(t *testing.T) { existingVoters: replicas(1, 2), expectedRemoveTargets: []roachpb.StoreID{2}, expectedAddTargets: []roachpb.StoreID{4}, - enforcement: IOOverloadThresholdBlockRebalanceTo, + enforcement: IOOverloadThresholdBlockTransfers, }, { - name: "don't rebalance to nodes with high io overload when StoreHealthBlockAll enforcement", + name: "don't rebalance to nodes with high io overload when block all enforcement", // NB: Store 3 has IOOverload > threshold. Store 2 has 3 x higher // ranges as other stores. Should move to candidate to 4, which // doesn't have high io overload. @@ -4445,7 +4558,7 @@ func TestAllocatorRebalanceStoreHealthCheck(t *testing.T) { sg.GossipStores(test.stores, t) // Enable read disk health checking in candidate exclusion. options := a.ScorerOptions(ctx) - options.StoreHealthOptions = StoreHealthOptions{EnforcementLevel: test.enforcement, IOOverloadThreshold: 1} + options.IOOverloadOptions = IOOverloadOptions{ReplicaEnforcementLevel: test.enforcement, ReplicaIOOverloadThreshold: 1} add, remove, _, ok := a.RebalanceVoter( ctx, sp, @@ -8544,7 +8657,6 @@ func qpsBasedRebalanceFn( jitteredQPS := avgQPS * (1 + alloc.randGen.Float64()) opts := TestingQPSLoadScorerOptions(jitteredQPS, 0.2) - opts.StoreHealthOptions = StoreHealthOptions{EnforcementLevel: IOOverloadThresholdNoAction} opts.Deterministic = false var rangeUsageInfo allocator.RangeUsageInfo add, remove, details, ok := alloc.RebalanceVoter( diff --git a/pkg/kv/kvserver/asim/storerebalancer/store_rebalancer.go b/pkg/kv/kvserver/asim/storerebalancer/store_rebalancer.go index 3c72760c5e24..bbdbe58d4f9a 100644 --- a/pkg/kv/kvserver/asim/storerebalancer/store_rebalancer.go +++ b/pkg/kv/kvserver/asim/storerebalancer/store_rebalancer.go @@ -134,7 +134,7 @@ func (s simRebalanceObjectiveProvider) Objective() kvserver.LBRebalancingObjecti func (src *storeRebalancerControl) scorerOptions() *allocatorimpl.LoadScorerOptions { return &allocatorimpl.LoadScorerOptions{ - StoreHealthOptions: allocatorimpl.StoreHealthOptions{}, + IOOverloadOptions: src.allocator.IOOverloadOptions(), Deterministic: true, LoadDims: []load.Dimension{load.Queries}, LoadThreshold: allocatorimpl.MakeQPSOnlyDim(src.settings.LBRebalanceQPSThreshold), diff --git a/pkg/kv/kvserver/store_rebalancer.go b/pkg/kv/kvserver/store_rebalancer.go index 14a4fabe11ae..d47feafed82a 100644 --- a/pkg/kv/kvserver/store_rebalancer.go +++ b/pkg/kv/kvserver/store_rebalancer.go @@ -284,7 +284,7 @@ func (sr *StoreRebalancer) scorerOptions( ctx context.Context, lbDimension load.Dimension, ) *allocatorimpl.LoadScorerOptions { return &allocatorimpl.LoadScorerOptions{ - StoreHealthOptions: sr.allocator.StoreHealthOptions(ctx), + IOOverloadOptions: sr.allocator.IOOverloadOptions(), Deterministic: sr.storePool.IsDeterministic(), LoadDims: []load.Dimension{lbDimension}, LoadThreshold: allocatorimpl.LoadThresholds(&sr.st.SV, lbDimension), diff --git a/pkg/kv/kvserver/store_rebalancer_test.go b/pkg/kv/kvserver/store_rebalancer_test.go index b8f7ddb07dfe..d3e68d0c14d4 100644 --- a/pkg/kv/kvserver/store_rebalancer_test.go +++ b/pkg/kv/kvserver/store_rebalancer_test.go @@ -60,7 +60,7 @@ var ( QueriesPerSecond: 3000, CPUPerSecond: 3000 * float64(time.Millisecond), IOThreshold: allocatorimpl.TestingIOThresholdWithScore( - allocatorimpl.DefaultIOOverloadThreshold - 10), + allocatorimpl.DefaultReplicaIOOverloadThreshold - 10), }, }, { @@ -80,7 +80,7 @@ var ( QueriesPerSecond: 2800, CPUPerSecond: 2800 * float64(time.Millisecond), IOThreshold: allocatorimpl.TestingIOThresholdWithScore( - allocatorimpl.DefaultIOOverloadThreshold - 5), + allocatorimpl.DefaultReplicaIOOverloadThreshold - 5), }, }, { @@ -100,7 +100,7 @@ var ( QueriesPerSecond: 2600, CPUPerSecond: 2600 * float64(time.Millisecond), IOThreshold: allocatorimpl.TestingIOThresholdWithScore( - allocatorimpl.DefaultIOOverloadThreshold + 2), + allocatorimpl.DefaultReplicaIOOverloadThreshold + 2), }, }, { @@ -120,7 +120,7 @@ var ( QueriesPerSecond: 2400, CPUPerSecond: 2400 * float64(time.Millisecond), IOThreshold: allocatorimpl.TestingIOThresholdWithScore( - allocatorimpl.DefaultIOOverloadThreshold - 10), + allocatorimpl.DefaultReplicaIOOverloadThreshold - 10), }, }, { @@ -140,7 +140,7 @@ var ( QueriesPerSecond: 2200, CPUPerSecond: 2200 * float64(time.Millisecond), IOThreshold: allocatorimpl.TestingIOThresholdWithScore( - allocatorimpl.DefaultIOOverloadThreshold - 3), + allocatorimpl.DefaultReplicaIOOverloadThreshold - 3), }, }, { @@ -160,7 +160,7 @@ var ( QueriesPerSecond: 2000, CPUPerSecond: 2000 * float64(time.Millisecond), IOThreshold: allocatorimpl.TestingIOThresholdWithScore( - allocatorimpl.DefaultIOOverloadThreshold + 2), + allocatorimpl.DefaultReplicaIOOverloadThreshold + 2), }, }, { @@ -180,7 +180,7 @@ var ( QueriesPerSecond: 1800, CPUPerSecond: 1800 * float64(time.Millisecond), IOThreshold: allocatorimpl.TestingIOThresholdWithScore( - allocatorimpl.DefaultIOOverloadThreshold - 10), + allocatorimpl.DefaultReplicaIOOverloadThreshold - 10), }, }, { @@ -200,7 +200,7 @@ var ( QueriesPerSecond: 1600, CPUPerSecond: 1600 * float64(time.Millisecond), IOThreshold: allocatorimpl.TestingIOThresholdWithScore( - allocatorimpl.DefaultIOOverloadThreshold - 5), + allocatorimpl.DefaultReplicaIOOverloadThreshold - 5), }, }, { @@ -220,7 +220,7 @@ var ( QueriesPerSecond: 1400, CPUPerSecond: 1400 * float64(time.Millisecond), IOThreshold: allocatorimpl.TestingIOThresholdWithScore( - allocatorimpl.DefaultIOOverloadThreshold + 3), + allocatorimpl.DefaultReplicaIOOverloadThreshold + 3), }, }, } @@ -283,7 +283,7 @@ var ( QueriesPerSecond: 1500, CPUPerSecond: 1500 * float64(time.Millisecond), IOThreshold: allocatorimpl.TestingIOThresholdWithScore( - allocatorimpl.DefaultIOOverloadThreshold - 15), + allocatorimpl.DefaultReplicaIOOverloadThreshold - 15), }, }, { @@ -293,7 +293,7 @@ var ( QueriesPerSecond: 1300, CPUPerSecond: 1300 * float64(time.Millisecond), IOThreshold: allocatorimpl.TestingIOThresholdWithScore( - allocatorimpl.DefaultIOOverloadThreshold - 10), + allocatorimpl.DefaultReplicaIOOverloadThreshold - 10), }, }, { @@ -303,7 +303,7 @@ var ( QueriesPerSecond: 1000, CPUPerSecond: 1000 * float64(time.Millisecond), IOThreshold: allocatorimpl.TestingIOThresholdWithScore( - allocatorimpl.DefaultIOOverloadThreshold - 5), + allocatorimpl.DefaultReplicaIOOverloadThreshold - 5), }, }, { @@ -313,7 +313,7 @@ var ( QueriesPerSecond: 900, CPUPerSecond: 900 * float64(time.Millisecond), IOThreshold: allocatorimpl.TestingIOThresholdWithScore( - allocatorimpl.DefaultIOOverloadThreshold + 20), + allocatorimpl.DefaultReplicaIOOverloadThreshold + 20), }, }, { @@ -323,7 +323,7 @@ var ( QueriesPerSecond: 500, CPUPerSecond: 500 * float64(time.Millisecond), IOThreshold: allocatorimpl.TestingIOThresholdWithScore( - allocatorimpl.DefaultIOOverloadThreshold + 25), + allocatorimpl.DefaultReplicaIOOverloadThreshold + 25), }, }, } @@ -338,7 +338,7 @@ var ( QueriesPerSecond: 1000, CPUPerSecond: 1000 * float64(time.Millisecond), IOThreshold: allocatorimpl.TestingIOThresholdWithScore( - allocatorimpl.DefaultIOOverloadThreshold + 100), + allocatorimpl.DefaultReplicaIOOverloadThreshold + 100), }, }, { @@ -348,7 +348,7 @@ var ( QueriesPerSecond: 1000, CPUPerSecond: 1000 * float64(time.Millisecond), IOThreshold: allocatorimpl.TestingIOThresholdWithScore( - allocatorimpl.DefaultIOOverloadThreshold - 15), + allocatorimpl.DefaultReplicaIOOverloadThreshold - 15), }, }, { @@ -358,7 +358,7 @@ var ( QueriesPerSecond: 1000, CPUPerSecond: 1000 * float64(time.Millisecond), IOThreshold: allocatorimpl.TestingIOThresholdWithScore( - allocatorimpl.DefaultIOOverloadThreshold + 100), + allocatorimpl.DefaultReplicaIOOverloadThreshold + 100), }, }, { @@ -368,7 +368,7 @@ var ( QueriesPerSecond: 1000, CPUPerSecond: 1000 * float64(time.Millisecond), IOThreshold: allocatorimpl.TestingIOThresholdWithScore( - allocatorimpl.DefaultIOOverloadThreshold - 15), + allocatorimpl.DefaultReplicaIOOverloadThreshold - 15), }, }, { @@ -378,7 +378,7 @@ var ( QueriesPerSecond: 1000, CPUPerSecond: 1000 * float64(time.Millisecond), IOThreshold: allocatorimpl.TestingIOThresholdWithScore( - allocatorimpl.DefaultIOOverloadThreshold + 100), + allocatorimpl.DefaultReplicaIOOverloadThreshold + 100), }, }, } @@ -393,7 +393,7 @@ var ( QueriesPerSecond: 1500, CPUPerSecond: 1500 * float64(time.Millisecond), IOThreshold: allocatorimpl.TestingIOThresholdWithScore( - allocatorimpl.DefaultIOOverloadThreshold + 1), + allocatorimpl.DefaultReplicaIOOverloadThreshold + 1), }, }, { @@ -403,7 +403,7 @@ var ( QueriesPerSecond: 1300, CPUPerSecond: 1300 * float64(time.Millisecond), IOThreshold: allocatorimpl.TestingIOThresholdWithScore( - allocatorimpl.DefaultIOOverloadThreshold + 1), + allocatorimpl.DefaultReplicaIOOverloadThreshold + 1), }, }, { @@ -413,7 +413,7 @@ var ( QueriesPerSecond: 1000, CPUPerSecond: 1000 * float64(time.Millisecond), IOThreshold: allocatorimpl.TestingIOThresholdWithScore( - allocatorimpl.DefaultIOOverloadThreshold + 1), + allocatorimpl.DefaultReplicaIOOverloadThreshold + 1), }, }, { @@ -423,7 +423,7 @@ var ( QueriesPerSecond: 900, CPUPerSecond: 900 * float64(time.Millisecond), IOThreshold: allocatorimpl.TestingIOThresholdWithScore( - allocatorimpl.DefaultIOOverloadThreshold + 1), + allocatorimpl.DefaultReplicaIOOverloadThreshold + 1), }, }, { @@ -433,7 +433,7 @@ var ( QueriesPerSecond: 500, CPUPerSecond: 500 * float64(time.Millisecond), IOThreshold: allocatorimpl.TestingIOThresholdWithScore( - allocatorimpl.DefaultIOOverloadThreshold + 1), + allocatorimpl.DefaultReplicaIOOverloadThreshold + 1), }, }, } @@ -448,7 +448,7 @@ var ( QueriesPerSecond: 1500, CPUPerSecond: 1500 * float64(time.Millisecond), IOThreshold: allocatorimpl.TestingIOThresholdWithScore( - allocatorimpl.DefaultIOOverloadThreshold + 1), + allocatorimpl.DefaultReplicaIOOverloadThreshold + 1), }, }, { @@ -458,7 +458,7 @@ var ( QueriesPerSecond: 1300, CPUPerSecond: 1300 * float64(time.Millisecond), IOThreshold: allocatorimpl.TestingIOThresholdWithScore( - allocatorimpl.DefaultIOOverloadThreshold + 10), + allocatorimpl.DefaultReplicaIOOverloadThreshold + 10), }, }, { @@ -468,7 +468,7 @@ var ( QueriesPerSecond: 1000, CPUPerSecond: 1000 * float64(time.Millisecond), IOThreshold: allocatorimpl.TestingIOThresholdWithScore( - allocatorimpl.DefaultIOOverloadThreshold + 50), + allocatorimpl.DefaultReplicaIOOverloadThreshold + 50), }, }, { @@ -478,7 +478,7 @@ var ( QueriesPerSecond: 900, CPUPerSecond: 900 * float64(time.Millisecond), IOThreshold: allocatorimpl.TestingIOThresholdWithScore( - allocatorimpl.DefaultIOOverloadThreshold + 100), + allocatorimpl.DefaultReplicaIOOverloadThreshold + 100), }, }, { @@ -488,7 +488,7 @@ var ( QueriesPerSecond: 500, CPUPerSecond: 500 * float64(time.Millisecond), IOThreshold: allocatorimpl.TestingIOThresholdWithScore( - allocatorimpl.DefaultIOOverloadThreshold + 100), + allocatorimpl.DefaultReplicaIOOverloadThreshold + 100), }, }, } @@ -935,7 +935,7 @@ func TestChooseRangeToRebalanceRandom(t *testing.T) { hottestRanges := sr.replicaRankings.TopLoad() options := sr.scorerOptions(ctx, lbRebalanceDimension) rctx := sr.NewRebalanceContext(ctx, options, hottestRanges, sr.RebalanceMode()) - rctx.options.StoreHealthOptions = allocatorimpl.StoreHealthOptions{EnforcementLevel: allocatorimpl.IOOverloadThresholdNoAction} + rctx.options.IOOverloadOptions = allocatorimpl.IOOverloadOptions{ReplicaEnforcementLevel: allocatorimpl.IOOverloadThresholdIgnore} rctx.options.LoadThreshold = allocatorimpl.WithAllDims(rebalanceThreshold) _, voterTargets, nonVoterTargets := sr.chooseRangeToRebalance(ctx, rctx) @@ -1061,7 +1061,7 @@ func TestChooseRangeToRebalanceAcrossHeterogeneousZones(t *testing.T) { name: "rebalance one replica within heavy region", voters: []roachpb.StoreID{1, 6, 9}, constraints: oneReplicaPerRegion, - expRebalancedVoters: []roachpb.StoreID{9, 6, 2}, + expRebalancedVoters: []roachpb.StoreID{2, 6, 9}, }, // A replica is in a heavily loaded region, on a relatively heavily loaded // store. We expect it to be moved to a less busy store within the same @@ -1083,7 +1083,7 @@ func TestChooseRangeToRebalanceAcrossHeterogeneousZones(t *testing.T) { voters: []roachpb.StoreID{1, 2, 9}, constraints: twoReplicasInHotRegion, leasePreferences: leasePreferredHotRegion, - expRebalancedVoters: []roachpb.StoreID{3, 2, 9}, + expRebalancedVoters: []roachpb.StoreID{2, 3, 9}, }, // Two replicas are in the hot region, both on relatively heavily // loaded nodes. We expect one of those replicas to be moved to a @@ -1094,21 +1094,21 @@ func TestChooseRangeToRebalanceAcrossHeterogeneousZones(t *testing.T) { name: "rebalance two replicas out of three within heavy region, prefer lease in heavy region", voters: []roachpb.StoreID{1, 2, 9}, constraints: twoReplicasInHotRegion, - expRebalancedVoters: []roachpb.StoreID{9, 2, 3}, + expRebalancedVoters: []roachpb.StoreID{2, 9, 3}, }, { name: "rebalance two replicas out of five within heavy region", voters: []roachpb.StoreID{1, 2, 6, 8, 9}, constraints: twoReplicasInHotRegion, // NB: Because of the diversity heuristic we won't rebalance to node 7. - expRebalancedVoters: []roachpb.StoreID{9, 3, 6, 8, 2}, + expRebalancedVoters: []roachpb.StoreID{8, 3, 6, 9, 2}, }, { name: "rebalance two replicas out of five within heavy region", voters: []roachpb.StoreID{1, 2, 6, 8, 9}, constraints: twoReplicasInHotRegion, // NB: Because of the diversity heuristic we won't rebalance to node 7. - expRebalancedVoters: []roachpb.StoreID{9, 3, 6, 8, 2}, + expRebalancedVoters: []roachpb.StoreID{8, 3, 6, 9, 2}, }, // In the absence of any constraints, ensure that as long as diversity is // maximized, replicas on hot stores are rebalanced to cooler stores within @@ -1144,7 +1144,7 @@ func TestChooseRangeToRebalanceAcrossHeterogeneousZones(t *testing.T) { voterConstraints: allReplicasInHotRegion, constraints: oneReplicaPerRegion, - expRebalancedVoters: []roachpb.StoreID{3, 2, 1}, + expRebalancedVoters: []roachpb.StoreID{2, 3, 1}, // NB: Expect the non-voter on node 4 (hottest node in region B) to // move to node 5 (least hot region in region B), the least hot // node without a high IO overload score. @@ -1158,8 +1158,10 @@ func TestChooseRangeToRebalanceAcrossHeterogeneousZones(t *testing.T) { voterConstraints: twoReplicasInSecondHottestRegion, constraints: oneReplicaPerRegion, // NB: Expect the voter on node 4 (hottest node in region B) to move to - // node 6 (least hot region in region B). - expRebalancedVoters: []roachpb.StoreID{9, 5, 6, 8, 3}, + // node 6 (least hot region in region B). Expect the lease to move to the + // lowest QPS node among nodes that pass the IO overload transfer check + // (n8). + expRebalancedVoters: []roachpb.StoreID{8, 5, 6, 9, 3}, }, { name: "primary region with second highest QPS, region survival, one voter on sub-optimal node, prefer lease hottest region", @@ -1183,9 +1185,10 @@ func TestChooseRangeToRebalanceAcrossHeterogeneousZones(t *testing.T) { constraints: oneReplicaPerRegion, leasePreferences: leasePreferredSecondHotRegion, // NB: Expect the voter on node 4 (hottest node in region B) to move to - // node 6 (least hot region in region B). Expect lease to transfer - // to least hot store, in the second hottest region (node 6). - expRebalancedVoters: []roachpb.StoreID{6, 5, 3, 8, 9}, + // node 6 (least hot region in region B). Expect lease to transfer to + // least hot store, in the second hottest region that passes the lease IO + // overload check (node 5). + expRebalancedVoters: []roachpb.StoreID{5, 6, 3, 8, 9}, }, { name: "primary region with highest QPS, region survival, two voters on sub-optimal nodes", @@ -1199,8 +1202,9 @@ func TestChooseRangeToRebalanceAcrossHeterogeneousZones(t *testing.T) { // the least hot region. Additionally, in region B, we've got one replica // on store 4 (which is the hottest store in that region). We expect that // replica to be moved to store 5, which is the least hot node without a - // high IO overload score. - expRebalancedVoters: []roachpb.StoreID{9, 2, 5, 8, 3}, + // high IO overload score. Expect the lease to move to s8 as it passes + // the IO overload transfer check. + expRebalancedVoters: []roachpb.StoreID{8, 2, 5, 9, 3}, }, { name: "one voter on sub-optimal node in the coldest region", @@ -1224,6 +1228,7 @@ func TestChooseRangeToRebalanceAcrossHeterogeneousZones(t *testing.T) { expRebalancedVoters: []roachpb.StoreID{8, 5, 6}, }, } + for _, tc := range testCases { t.Run(tc.name, withQPSCPU(t, objectiveProvider, func(t *testing.T) { // Boilerplate for test setup. @@ -1271,8 +1276,9 @@ func TestChooseRangeToRebalanceAcrossHeterogeneousZones(t *testing.T) { hottestRanges := sr.replicaRankings.TopLoad() options := sr.scorerOptions(ctx, lbRebalanceDimension) rctx := sr.NewRebalanceContext(ctx, options, hottestRanges, LBRebalancingLeasesAndReplicas) - rctx.options.StoreHealthOptions = allocatorimpl.StoreHealthOptions{ - EnforcementLevel: allocatorimpl.IOOverloadThresholdBlockRebalanceTo} + rctx.options.IOOverloadOptions = allocatorimpl.IOOverloadOptions{ + ReplicaEnforcementLevel: allocatorimpl.IOOverloadThresholdBlockTransfers, + } rctx.options.LoadThreshold = allocatorimpl.WithAllDims(0.05) _, voterTargets, nonVoterTargets := sr.chooseRangeToRebalance( @@ -1360,8 +1366,8 @@ func TestChooseRangeToRebalanceIgnoresRangeOnBestStores(t *testing.T) { hottestRanges := sr.replicaRankings.TopLoad() options := sr.scorerOptions(ctx, lbRebalanceDimension) rctx := sr.NewRebalanceContext(ctx, options, hottestRanges, sr.RebalanceMode()) - rctx.options.StoreHealthOptions = allocatorimpl.StoreHealthOptions{ - EnforcementLevel: allocatorimpl.IOOverloadThresholdNoAction} + rctx.options.IOOverloadOptions = allocatorimpl.IOOverloadOptions{ + ReplicaEnforcementLevel: allocatorimpl.IOOverloadThresholdIgnore} rctx.options.LoadThreshold = allocatorimpl.WithAllDims(0.05) sr.chooseRangeToRebalance(ctx, rctx) @@ -1528,8 +1534,8 @@ func TestChooseRangeToRebalanceOffHotNodes(t *testing.T) { hottestRanges := sr.replicaRankings.TopLoad() options := sr.scorerOptions(ctx, lbRebalanceDimension) rctx := sr.NewRebalanceContext(ctx, options, hottestRanges, sr.RebalanceMode()) - rctx.options.StoreHealthOptions = allocatorimpl.StoreHealthOptions{ - EnforcementLevel: allocatorimpl.IOOverloadThresholdNoAction} + rctx.options.IOOverloadOptions = allocatorimpl.IOOverloadOptions{ + ReplicaEnforcementLevel: allocatorimpl.IOOverloadThresholdIgnore} rctx.options.LoadThreshold = allocatorimpl.WithAllDims(tc.rebalanceThreshold) _, voterTargets, _ := sr.chooseRangeToRebalance(ctx, rctx) @@ -1637,8 +1643,8 @@ func TestNoLeaseTransferToBehindReplicas(t *testing.T) { hottestRanges = sr.replicaRankings.TopLoad() options = sr.scorerOptions(ctx, lbRebalanceDimension) rctx = sr.NewRebalanceContext(ctx, options, hottestRanges, sr.RebalanceMode()) - rctx.options.StoreHealthOptions = allocatorimpl.StoreHealthOptions{ - EnforcementLevel: allocatorimpl.IOOverloadThresholdNoAction} + rctx.options.IOOverloadOptions = allocatorimpl.IOOverloadOptions{ + ReplicaEnforcementLevel: allocatorimpl.IOOverloadThresholdIgnore} rctx.options.LoadThreshold = allocatorimpl.WithAllDims(0.05) rctx.options.Deterministic = true @@ -1682,17 +1688,7 @@ func TestStoreRebalancerIOOverloadCheck(t *testing.T) { expectedTargets: []roachpb.ReplicationTarget{ {NodeID: 4, StoreID: 4}, {NodeID: 3, StoreID: 3}, {NodeID: 5, StoreID: 5}, }, - enforcement: allocatorimpl.IOOverloadThresholdNoAction, - }, - { - name: "ignore io overload on allocation when log only enforcement", - // NB: All stores have high io overload, this should be ignored. - stores: noLocalityHighReadAmpStores, - conf: roachpb.SpanConfig{}, - expectedTargets: []roachpb.ReplicationTarget{ - {NodeID: 4, StoreID: 4}, {NodeID: 3, StoreID: 3}, {NodeID: 5, StoreID: 5}, - }, - enforcement: allocatorimpl.IOOverloadThresholdLogOnly, + enforcement: allocatorimpl.IOOverloadThresholdIgnore, }, { name: "don't stop rebalancing when the io overload score uniformly above threshold and block rebalance to enforcement", @@ -1702,7 +1698,7 @@ func TestStoreRebalancerIOOverloadCheck(t *testing.T) { expectedTargets: []roachpb.ReplicationTarget{ {NodeID: 4, StoreID: 4}, {NodeID: 3, StoreID: 3}, {NodeID: 5, StoreID: 5}, }, - enforcement: allocatorimpl.IOOverloadThresholdBlockRebalanceTo, + enforcement: allocatorimpl.IOOverloadThresholdBlockTransfers, }, { name: "don't stop rebalancing when the io overload score is uniformly above threshold and block rebalance to enforcement", @@ -1734,7 +1730,7 @@ func TestStoreRebalancerIOOverloadCheck(t *testing.T) { expectedTargets: []roachpb.ReplicationTarget{ {NodeID: 2, StoreID: 2}, {NodeID: 3, StoreID: 3}, {NodeID: 5, StoreID: 5}, }, - enforcement: allocatorimpl.IOOverloadThresholdBlockRebalanceTo, + enforcement: allocatorimpl.IOOverloadThresholdBlockTransfers, }, { name: "rebalance should ignore stores with high IO overload when block rebalance to enforcement", @@ -1748,7 +1744,7 @@ func TestStoreRebalancerIOOverloadCheck(t *testing.T) { expectedTargets: []roachpb.ReplicationTarget{ {NodeID: 2, StoreID: 2}, {NodeID: 3, StoreID: 3}, {NodeID: 5, StoreID: 5}, }, - enforcement: allocatorimpl.IOOverloadThresholdBlockRebalanceTo, + enforcement: allocatorimpl.IOOverloadThresholdBlockTransfers, }, { name: "rebalance should ignore stores with high IO overload scores when block all enforcement level", @@ -1778,7 +1774,7 @@ func TestStoreRebalancerIOOverloadCheck(t *testing.T) { stores: noLocalityUniformQPSHighReadAmp, conf: roachpb.SpanConfig{}, expectedTargets: nil, - enforcement: allocatorimpl.IOOverloadThresholdBlockRebalanceTo, + enforcement: allocatorimpl.IOOverloadThresholdBlockTransfers, }, } @@ -1809,8 +1805,8 @@ func TestStoreRebalancerIOOverloadCheck(t *testing.T) { rctx := sr.NewRebalanceContext(ctx, options, hottestRanges, sr.RebalanceMode()) require.Greater(t, len(rctx.hottestRanges), 0) - rctx.options.StoreHealthOptions = allocatorimpl.StoreHealthOptions{ - EnforcementLevel: test.enforcement, IOOverloadThreshold: allocatorimpl.DefaultIOOverloadThreshold} + rctx.options.IOOverloadOptions = allocatorimpl.IOOverloadOptions{ + ReplicaEnforcementLevel: test.enforcement, ReplicaIOOverloadThreshold: allocatorimpl.DefaultReplicaIOOverloadThreshold} rctx.options.LoadThreshold = allocatorimpl.WithAllDims(0.05) _, targetVoters, _ := sr.chooseRangeToRebalance(ctx, rctx)