From 3f1263dff53e5ca85b8afc06fd05f2a78e0a005d Mon Sep 17 00:00:00 2001 From: Austen McClernon Date: Wed, 22 Feb 2023 20:25:40 +0000 Subject: [PATCH] allocator: check io overload on lease transfer Previously, the allocator would return lease transfer targets without considering the IO overload of stores involved. When leases would transfer to the IO overloaded stores, service latency tended to degrade. This commit adds IO overload checks prior to lease transfers. The IO overload checks are similar to the IO overload checks for allocating replicas in #97142. The checks work by comparing a candidate store against `kv.allocator.lease_io_overload_threshold` and the mean of other candidates. If the candidate store is equal to or greater than both these values, it is considered IO overloaded. The default value is 0.5. The current leaseholder has to meet a higher bar to be considered IO overloaded. It must have an IO overload score greater or equal to `kv.allocator.lease_shed_io_overload_threshold`. The default value is 0.9. The level of enforcement for IO overload is controlled by `kv.allocator.lease_io_overload_threshold_enforcement` controls the action taken when a candidate store for a lease transfer is IO overloaded. - `ignore`: ignore IO overload scores entirely during lease transfers (effectively disabling this mechanism); - `block_transfer_to`: lease transfers only consider stores that aren't IO overloaded (existing leases on IO overloaded stores are left as is); - `shed`: actively shed leases from IO overloaded stores to less IO overloaded stores (this is a super-set of block_transfer_to). The default is `block_transfer_to`. This commit also updates the existing replica IO overload checks to be prefixed with `Replica`, to avoid confusion between lease and replica IO overload checks. Resolves: #96508 Release note (ops change): Range leases will no longer be transferred to stores which are IO overloaded. --- .../allocator/allocatorimpl/allocator.go | 151 +++++++-- .../allocatorimpl/allocator_scorer.go | 310 ++++++++++++------ .../allocatorimpl/allocator_scorer_test.go | 2 +- .../allocator/allocatorimpl/allocator_test.go | 222 +++++++++---- .../asim/storerebalancer/store_rebalancer.go | 2 +- pkg/kv/kvserver/store_rebalancer.go | 2 +- pkg/kv/kvserver/store_rebalancer_test.go | 132 ++++---- 7 files changed, 563 insertions(+), 258 deletions(-) diff --git a/pkg/kv/kvserver/allocator/allocatorimpl/allocator.go b/pkg/kv/kvserver/allocator/allocatorimpl/allocator.go index 089ecd82fe43..ec288057d766 100644 --- a/pkg/kv/kvserver/allocator/allocatorimpl/allocator.go +++ b/pkg/kv/kvserver/allocator/allocatorimpl/allocator.go @@ -1912,7 +1912,7 @@ func (a Allocator) RebalanceNonVoter( // machinery to achieve range count convergence. func (a *Allocator) ScorerOptions(ctx context.Context) *RangeCountScorerOptions { return &RangeCountScorerOptions{ - StoreHealthOptions: a.StoreHealthOptions(ctx), + IOOverloadOptions: a.IOOverloadOptions(), deterministic: a.deterministic, rangeRebalanceThreshold: RangeRebalanceThreshold.Get(&a.st.SV), } @@ -1922,7 +1922,7 @@ func (a *Allocator) ScorerOptions(ctx context.Context) *RangeCountScorerOptions func (a *Allocator) ScorerOptionsForScatter(ctx context.Context) *ScatterScorerOptions { return &ScatterScorerOptions{ RangeCountScorerOptions: RangeCountScorerOptions{ - StoreHealthOptions: a.StoreHealthOptions(ctx), + IOOverloadOptions: a.IOOverloadOptions(), deterministic: a.deterministic, rangeRebalanceThreshold: 0, }, @@ -1946,6 +1946,7 @@ func (a *Allocator) ScorerOptionsForScatter(ctx context.Context) *ScatterScorerO // - It excludes replicas that may need snapshots. If replica calling this // method is not the Raft leader (meaning that it doesn't know whether follower // replicas need a snapshot or not), produces no results. +// - It excludes replicas that are on stores which are IO overloaded. func (a *Allocator) ValidLeaseTargets( ctx context.Context, storePool storepool.AllocatorStorePool, @@ -2029,9 +2030,96 @@ func (a *Allocator) ValidLeaseTargets( candidates = preferred } + // Filter the candidate list to only those stores which are not IO + // overloaded. + nonIOOverloadedPreferred := a.nonIOOverloadedLeaseTargets( + ctx, + storePool, + candidates, + leaseRepl.StoreID(), + a.IOOverloadOptions(), + ) + + return nonIOOverloadedPreferred +} + +// nonIOOverloadedLeaseTargets returns a list of non IO overloaded lease +// replica targets and whether the leaseholder replica should be replaced, +// given the existing replicas, IO overload options and IO overload of +// existing replica stores. +func (a *Allocator) nonIOOverloadedLeaseTargets( + ctx context.Context, + storePool storepool.AllocatorStorePool, + existingReplicas []roachpb.ReplicaDescriptor, + leaseStoreID roachpb.StoreID, + ioOverloadOptions IOOverloadOptions, +) (candidates []roachpb.ReplicaDescriptor) { + // We return early to avoid unnecessary work when IO overload is set to be + // ignored anyway. + if ioOverloadOptions.LeaseEnforcementLevel == IOOverloadThresholdIgnore { + return existingReplicas + } + + sl, _, _ := storePool.GetStoreListFromIDs(replDescsToStoreIDs(existingReplicas), storepool.StoreFilterSuspect) + avgIOOverload := sl.CandidateIOOverloadScores.Mean + + for _, replDesc := range existingReplicas { + store, ok := sl.FindStoreByID(replDesc.StoreID) + // If the replica is the current leaseholder, don't include it as a + // candidate and if it is filtered out of the store list due to being + // suspect; or the leaseholder store doesn't pass the leaseholder IO + // overload check. + // + // Note that the leaseholder store IO overload check is less strict than + // the transfer target check below. We don't want to shed leases at the + // same point a candidate becomes ineligible as it could lead to thrashing. + // Instead, we create a buffer between the two to avoid leases moving back + // and forth. + if (replDesc.StoreID == leaseStoreID) && + (!ok || !ioOverloadOptions.existingLeaseCheck(ctx, store, avgIOOverload)) { + continue + } + + // If the replica is not the leaseholder, don't include it as a candidate + // if it is filtered out similar to above, or the replica store doesn't + // pass the lease transfer IO overload check. + if replDesc.StoreID != leaseStoreID && + (!ok || !ioOverloadOptions.transferLeaseToCheck(ctx, store, avgIOOverload)) { + continue + } + + candidates = append(candidates, replDesc) + } + return candidates } +// leaseholderShouldMoveDueToIOOverload returns true if the current leaseholder +// store is IO overloaded and there are other viable leaseholder stores. +func (a *Allocator) leaseholderShouldMoveDueToIOOverload( + ctx context.Context, + storePool storepool.AllocatorStorePool, + existingReplicas []roachpb.ReplicaDescriptor, + leaseStoreID roachpb.StoreID, + ioOverloadOptions IOOverloadOptions, +) bool { + sl, _, _ := storePool.GetStoreListFromIDs(replDescsToStoreIDs(existingReplicas), storepool.StoreFilterSuspect) + avgIOOverload := sl.CandidateIOOverloadScores.Mean + + // Check the existing replicas for the leaseholder, if it doesn't meet the + // check return that the lease should be moved due to IO overload on the + // current leaseholder store. If the leaseholder is suspect or doesn't have a + // store descriptor ready, then we ignore it below and don't consider it IO + // overloaded. + for _, replDesc := range existingReplicas { + if store, ok := sl.FindStoreByID(replDesc.StoreID); ok && replDesc.StoreID == leaseStoreID { + return !ioOverloadOptions.existingLeaseCheck(ctx, store, avgIOOverload) + } + } + + return false +} + // leaseholderShouldMoveDueToPreferences returns true if the current leaseholder // is in violation of lease preferences _that can otherwise be satisfied_ by // some existing replica. @@ -2084,17 +2172,16 @@ func (a *Allocator) leaseholderShouldMoveDueToPreferences( return true } -// StoreHealthOptions returns the store health options, currently only -// considering the threshold for io overload. This threshold is not -// considered in allocation or rebalancing decisions (excluding candidate -// stores as targets) when enforcementLevel is set to storeHealthNoAction or -// storeHealthLogOnly. By default storeHealthBlockRebalanceTo is the action taken. When -// there is a mixed version cluster, storeHealthNoAction is set instead. -func (a *Allocator) StoreHealthOptions(_ context.Context) StoreHealthOptions { - enforcementLevel := IOOverloadEnforcementLevel(IOOverloadThresholdEnforcement.Get(&a.st.SV)) - return StoreHealthOptions{ - EnforcementLevel: enforcementLevel, - IOOverloadThreshold: IOOverloadThreshold.Get(&a.st.SV), +// IOOverloadOptions returns the store IO overload options. It is used to +// filter and score candidates based on their level of IO overload and +// enforcement level. +func (a *Allocator) IOOverloadOptions() IOOverloadOptions { + return IOOverloadOptions{ + ReplicaEnforcementLevel: IOOverloadEnforcementLevel(ReplicaIOOverloadThresholdEnforcement.Get(&a.st.SV)), + LeaseEnforcementLevel: IOOverloadEnforcementLevel(LeaseIOOverloadThresholdEnforcement.Get(&a.st.SV)), + ReplicaIOOverloadThreshold: ReplicaIOOverloadThreshold.Get(&a.st.SV), + LeaseIOOverloadThreshold: LeaseIOOverloadThreshold.Get(&a.st.SV), + LeaseIOOverloadShedThreshold: LeaseIOOverloadShedThreshold.Get(&a.st.SV), } } @@ -2129,10 +2216,11 @@ func (a *Allocator) TransferLeaseTarget( opts allocator.TransferLeaseOptions, ) roachpb.ReplicaDescriptor { excludeLeaseRepl := opts.ExcludeLeaseRepl - if a.leaseholderShouldMoveDueToPreferences(ctx, storePool, conf, leaseRepl, existing) { + if a.leaseholderShouldMoveDueToPreferences(ctx, storePool, conf, leaseRepl, existing) || + a.leaseholderShouldMoveDueToIOOverload(ctx, storePool, existing, leaseRepl.StoreID(), a.IOOverloadOptions()) { // Explicitly exclude the current leaseholder from the result set if it is // in violation of lease preferences that can be satisfied by some other - // replica. + // replica or is IO overloaded. excludeLeaseRepl = true } @@ -2148,9 +2236,10 @@ func (a *Allocator) TransferLeaseTarget( return roachpb.ReplicaDescriptor{} } - existing = a.ValidLeaseTargets(ctx, storePool, conf, existing, leaseRepl, opts) + validTargets := a.ValidLeaseTargets(ctx, storePool, conf, existing, leaseRepl, opts) + // Short-circuit if there are no valid targets out there. - if len(existing) == 0 || (len(existing) == 1 && existing[0].StoreID == leaseRepl.StoreID()) { + if len(validTargets) == 0 || (len(validTargets) == 1 && validTargets[0].StoreID == leaseRepl.StoreID()) { log.KvDistribution.VEventf(ctx, 2, "no lease transfer target found for r%d", leaseRepl.GetRangeID()) return roachpb.ReplicaDescriptor{} } @@ -2166,7 +2255,7 @@ func (a *Allocator) TransferLeaseTarget( // falls back to `leaseCountConvergence`. Rationalize this or refactor this // logic to be more clear. transferDec, repl := a.shouldTransferLeaseForAccessLocality( - ctx, storePool, source, existing, usageInfo, nil, candidateLeasesMean, + ctx, storePool, source, validTargets, usageInfo, nil, candidateLeasesMean, ) if !excludeLeaseRepl { switch transferDec { @@ -2176,7 +2265,7 @@ func (a *Allocator) TransferLeaseTarget( } fallthrough case decideWithoutStats: - if !a.shouldTransferLeaseForLeaseCountConvergence(ctx, storePool, sl, source, existing) { + if !a.shouldTransferLeaseForLeaseCountConvergence(ctx, storePool, sl, source, validTargets) { return roachpb.ReplicaDescriptor{} } case shouldTransfer: @@ -2202,13 +2291,13 @@ func (a *Allocator) TransferLeaseTarget( if !opts.CheckCandidateFullness { a.randGen.Lock() defer a.randGen.Unlock() - return existing[a.randGen.Intn(len(existing))] + return validTargets[a.randGen.Intn(len(validTargets))] } var bestOption roachpb.ReplicaDescriptor - candidates := make([]roachpb.ReplicaDescriptor, 0, len(existing)) + candidates := make([]roachpb.ReplicaDescriptor, 0, len(validTargets)) bestOptionLeaseCount := int32(math.MaxInt32) - for _, repl := range existing { + for _, repl := range validTargets { if leaseRepl.StoreID() == repl.StoreID { continue } @@ -2225,7 +2314,7 @@ func (a *Allocator) TransferLeaseTarget( } } if len(candidates) == 0 { - // If there were no existing replicas on stores with less-than-mean + // If there were no validTargets replicas on stores with less-than-mean // leases, and we _must_ move the lease away (indicated by // `opts.excludeLeaseRepl`), just return the best possible option. if excludeLeaseRepl { @@ -2239,8 +2328,8 @@ func (a *Allocator) TransferLeaseTarget( case allocator.LoadConvergence: leaseReplLoad := usageInfo.TransferImpact() - candidates := make([]roachpb.StoreID, 0, len(existing)-1) - for _, repl := range existing { + candidates := make([]roachpb.StoreID, 0, len(validTargets)-1) + for _, repl := range validTargets { if repl.StoreID != leaseRepl.StoreID() { candidates = append(candidates, repl.StoreID) } @@ -2262,7 +2351,7 @@ func (a *Allocator) TransferLeaseTarget( candidates, storeDescMap, &LoadScorerOptions{ - StoreHealthOptions: a.StoreHealthOptions(ctx), + IOOverloadOptions: a.IOOverloadOptions(), Deterministic: a.deterministic, LoadDims: opts.LoadDimensions, LoadThreshold: LoadThresholds(&a.st.SV, opts.LoadDimensions...), @@ -2316,7 +2405,7 @@ func (a *Allocator) TransferLeaseTarget( log.KvDistribution.Fatalf(ctx, "unknown declineReason: %v", noRebalanceReason) } - for _, repl := range existing { + for _, repl := range validTargets { if repl.StoreID == bestStore { return repl } @@ -2861,3 +2950,11 @@ func maxReplicaID(replicas []roachpb.ReplicaDescriptor) roachpb.ReplicaID { } return max } + +func replDescsToStoreIDs(descs []roachpb.ReplicaDescriptor) []roachpb.StoreID { + ret := make([]roachpb.StoreID, len(descs)) + for i, desc := range descs { + ret[i] = desc.StoreID + } + return ret +} diff --git a/pkg/kv/kvserver/allocator/allocatorimpl/allocator_scorer.go b/pkg/kv/kvserver/allocator/allocatorimpl/allocator_scorer.go index f37531dbcdec..090bf2ae3424 100644 --- a/pkg/kv/kvserver/allocator/allocatorimpl/allocator_scorer.go +++ b/pkg/kv/kvserver/allocator/allocatorimpl/allocator_scorer.go @@ -71,13 +71,24 @@ const ( // away from the mean. minRangeRebalanceThreshold = 2 - // DefaultIOOverloadThreshold is used to avoid allocating to stores with an + // DefaultReplicaIOOverloadThreshold is used to avoid allocating to stores with an // IO overload score greater than what's set. This is typically used in // conjunction with IOOverloadMeanThreshold below. - DefaultIOOverloadThreshold = 0.8 + DefaultReplicaIOOverloadThreshold = 0.8 + + // DefaultLeaseIOOverloadThreshold is used to shed leases from stores with an + // IO overload score greater than this threshold. This is typically used in + // conjunction with IOOverloadMeanThreshold below. + DefaultLeaseIOOverloadThreshold = 0.5 + + // DefaultLeaseIOOverloadShedThreshold is used to shed leases from stores + // with an IO overload score greater than the this threshold. This is + // typically used in conjunction with IOOverloadMeanThreshold below. + DefaultLeaseIOOverloadShedThreshold = 0.9 // IOOverloadMeanThreshold is the percentage above the mean after which a - // store could be conisdered unhealthy if also exceeding the threshold. + // store could be conisdered IO overload if also exceeding the absolute IO + // threshold. IOOverloadMeanThreshold = 1.1 // L0SublevelTrackerRetention is the tracking period for statistics on the @@ -86,29 +97,27 @@ const ( L0SublevelTrackerRetention = time.Minute * 10 ) -// IOOverloadEnforcementLevel represents the level of action that may be taken or -// excluded when a candidate disk is considered unhealthy. +// IOOverloadEnforcementLevel represents a level of action that may be taken or +// excluded when a store's disk is considered IO overloaded. type IOOverloadEnforcementLevel int64 const ( - // IOOverloadThresholdNoAction wil not exclude stores from being considered - // as targets for any action regardless of the store IO overload. - IOOverloadThresholdNoAction IOOverloadEnforcementLevel = iota - // IOOverloadThresholdLogOnly will not exclude stores from being considered - // as targets for any action regarldess of the store IO overload. When a - // store exceeds IOOverloadThreshold, an event is logged. - IOOverloadThresholdLogOnly - // IOOverloadThresholdBlockRebalanceTo excludes stores from being being - // considered as targets for rebalance actions if they exceed (a) - // kv.allocator.io_overload_threshold and (b) the mean IO overload among - // possible candidates. This does not affect upreplication. - IOOverloadThresholdBlockRebalanceTo - // IOOverloadThresholdBlockAll excludes stores from being considered as a - // target for allocation and rebalancing actions if they exceed (a) - // kv.allocator.io_overload_threshold and (b) the mean IO overload among - // possible candidates. In other words, the store will receive no new - // replicas. + // IOOverloadThresholdIgnore wil not exclude stores for any action regardless + // of the store IO overload. + IOOverloadThresholdIgnore IOOverloadEnforcementLevel = iota + // IOOverloadThresholdBlockTransfers excludes stores for rebalance or lease + // transfer actions if they are IO overloaded. + IOOverloadThresholdBlockTransfers + // IOOverloadThresholdBlockAll excludes stores for allocation, rebalancing + // and lease transfer actions if they are IO overloaded. In other words, the + // store will receive no new replicas or leases. IOOverloadThresholdBlockAll + // IOOverloadThresholdShed has the same behavior as + // IOOverloadThresholdBlockAll, however existing stores will be removed. This + // currently is only used for lease transfers. The leaseholder store WILL BE + // excluded as a candidate for its current range leases i.e. The lease will + // always transfer to a valid, non-IO-overloaded store if one exists. + IOOverloadThresholdShed ) // RangeRebalanceThreshold is the minimum ratio of a store's range count to @@ -127,30 +136,31 @@ var RangeRebalanceThreshold = func() *settings.FloatSetting { return s }() -// IOOverloadThreshold is the maximum IO overload score of a candidate store -// before being considered unhealthy. Once considered unhealthy, the action -// taken will be dictated by IOOverloadThresholdEnforcement cluster setting -// defined below. -var IOOverloadThreshold = settings.RegisterFloatSetting( +// ReplicaIOOverloadThreshold is the maximum IO overload score of a candidate +// store before being excluded as a candidate for rebalancing replicas or +// allocation. This is only acted upon if ReplicaIOOverloadThreshold is set to +// `block_all` or `block_rebalance_to`. +var ReplicaIOOverloadThreshold = settings.RegisterFloatSetting( settings.SystemOnly, - "kv.allocator.io_overload_threshold", + "kv.allocator.replica_io_overload_threshold", "the maximum store io overload before the enforcement defined by "+ "`kv.allocator.io_overload_threshold_enforce` is taken on a store "+ "for allocation decisions", - DefaultIOOverloadThreshold, + DefaultReplicaIOOverloadThreshold, ) -// IOOverloadThresholdEnforcement defines the level of enforcement when a candidate +// ReplicaIOOverloadThresholdEnforcement defines the level of enforcement when a candidate // stores' IO overload exceeds the threshold defined in IOOverloadThresold. No // action is taken when block_none and block_none_log are set. Rebalancing // towards the candidate store is blocked when block_rebalance_to is set. // Allocating and rebalancing towards the candidate store is blocked when // block_all is set. -// NB: No matter the value of this setting, IOOverload will never cause -// rebalancing away from a store (shedding), only block the store from being a target. -var IOOverloadThresholdEnforcement = settings.RegisterEnumSetting( +// NB: No matter the value of this setting, IO overload will never cause +// rebalancing away from a store (shedding), only block the store from +// receiving new replicas. +var ReplicaIOOverloadThresholdEnforcement = settings.RegisterEnumSetting( settings.SystemOnly, - "kv.allocator.io_overload_threshold_enforcement", + "kv.allocator.replica_io_overload_threshold_enforcement", "the level of enforcement when a candidate store has an io overload score "+ "exceeding `kv.allocator.io_overload_threshold` and above the "+ "average of comparable allocation candidates:`block_none` will exclude "+ @@ -160,10 +170,56 @@ var IOOverloadThresholdEnforcement = settings.RegisterEnumSetting( "from being targets of both allocation and rebalancing", "block_rebalance_to", map[int64]string{ - int64(IOOverloadThresholdNoAction): "block_none", - int64(IOOverloadThresholdLogOnly): "block_none_log", - int64(IOOverloadThresholdBlockRebalanceTo): "block_rebalance_to", - int64(IOOverloadThresholdBlockAll): "block_all", + int64(IOOverloadThresholdIgnore): "ignore", + int64(IOOverloadThresholdBlockTransfers): "block_rebalance_to", + int64(IOOverloadThresholdBlockAll): "block_all", + }, +) + +// LeaseIOOverloadThreshold is the maximum IO overload score a store may have +// before being excluded as a candidate for lease transfers. This threshold is +// only acted upon if LeaseIOOverloadThresholdEnforcement is set to 'shed' or +// `block`. +var LeaseIOOverloadThreshold = settings.RegisterFloatSetting( + settings.SystemOnly, + "kv.allocator.lease_io_overload_threshold", + "a store will not receive new leases when its IO overload score is above this "+ + "value and `kv.allocator.io_overload_threshold_enforcement_leases` is "+ + "`shed` or `block_transfer_to`", + DefaultLeaseIOOverloadThreshold, +) + +// LeaseIOOverloadShedThreshold is the maximum IO overload score the current +// leaseholder store for a range may have before shedding its leases and no +// longer receiving new leases. This threhsold is acted upon only If +// LeaseIOOverloadThresholdEnforcement is set to 'shed'. +var LeaseIOOverloadShedThreshold = settings.RegisterFloatSetting( + settings.SystemOnly, + "kv.allocator.lease_shed_io_overload_threshold", + "a store will shed its leases and receive no new leases when its "+ + "IO overload score is above this value and "+ + "`kv.allocator.io_overload_threshold_enforcement_leases` is `shed`", + DefaultLeaseIOOverloadShedThreshold, +) + +// LeaseIOOverloadThresholdEnforcement defines the level of enforcement for +// lease transfers when a candidate stores' IO overload exceeds the threshold +// defined in IOOverloadThreshold, and additionally +// ShedIOOverloadThresholdBuffer when shed is set. +var LeaseIOOverloadThresholdEnforcement = settings.RegisterEnumSetting( + settings.SystemOnly, + "kv.allocator.lease_io_overload_threshold_enforcement", + "the level of enforcement on lease transfers when a candidate store has an"+ + "io overload score exceeding `kv.allocator.io_overload_threshold_lease` and above the "+ + "average of comparable allocation candidates:`ignore` disable enforcement, "+ + "`block_transfer_to` a store will receive no new leases but won't lose existing leases,"+ + "`shed`: a store will receive no new leases and shed existing leases to "+ + "non io-overloaded stores, this is a superset of block_transfer_to", + "block_transfer_to", + map[int64]string{ + int64(IOOverloadThresholdIgnore): "ingore", + int64(IOOverloadThresholdBlockTransfers): "block_transfer_to", + int64(IOOverloadThresholdShed): "shed", }, ) @@ -219,9 +275,9 @@ type ScorerOptions interface { // with the same QPS) that would converge the range's existing stores' QPS the // most. removalMaximallyConvergesScore(removalCandStoreList storepool.StoreList, existing roachpb.StoreDescriptor) int - // getStoreHealthOptions returns the scorer options for store health. It is - // used to inform scoring based on the health of a store. - getStoreHealthOptions() StoreHealthOptions + // getIOOverloadOptions returns the scorer options for store IO overload. It + // is used to inform scoring based on the IO overload of a store. + getIOOverloadOptions() IOOverloadOptions } func jittered(val float64, jitter float64, rand allocatorRand) float64 { @@ -246,8 +302,8 @@ type ScatterScorerOptions struct { var _ ScorerOptions = &ScatterScorerOptions{} -func (o *ScatterScorerOptions) getStoreHealthOptions() StoreHealthOptions { - return o.RangeCountScorerOptions.StoreHealthOptions +func (o *ScatterScorerOptions) getIOOverloadOptions() IOOverloadOptions { + return o.RangeCountScorerOptions.IOOverloadOptions } func (o *ScatterScorerOptions) maybeJitterStoreStats( @@ -269,15 +325,15 @@ func (o *ScatterScorerOptions) maybeJitterStoreStats( // This means that the resulting rebalancing decisions will further the goal of // converging range counts across stores in the cluster. type RangeCountScorerOptions struct { - StoreHealthOptions + IOOverloadOptions deterministic bool rangeRebalanceThreshold float64 } var _ ScorerOptions = &RangeCountScorerOptions{} -func (o *RangeCountScorerOptions) getStoreHealthOptions() StoreHealthOptions { - return o.StoreHealthOptions +func (o *RangeCountScorerOptions) getIOOverloadOptions() IOOverloadOptions { + return o.IOOverloadOptions } func (o *RangeCountScorerOptions) maybeJitterStoreStats( @@ -390,9 +446,9 @@ func (o *RangeCountScorerOptions) removalMaximallyConvergesScore( // queries-per-second. This means that the resulting rebalancing decisions will // further the goal of converging QPS across stores in the cluster. type LoadScorerOptions struct { - StoreHealthOptions StoreHealthOptions - Deterministic bool - LoadDims []load.Dimension + IOOverloadOptions IOOverloadOptions + Deterministic bool + LoadDims []load.Dimension // LoadThreshold and MinLoadThreshold track the threshold beyond which a // store should be considered under/overfull and the minimum absolute @@ -426,8 +482,8 @@ type LoadScorerOptions struct { RebalanceImpact load.Load } -func (o *LoadScorerOptions) getStoreHealthOptions() StoreHealthOptions { - return o.StoreHealthOptions +func (o *LoadScorerOptions) getIOOverloadOptions() IOOverloadOptions { + return o.IOOverloadOptions } func (o *LoadScorerOptions) maybeJitterStoreStats( @@ -975,7 +1031,7 @@ func rankedCandidateListForAllocation( continue } - if !allocator.MaxCapacityCheck(s) || !options.getStoreHealthOptions().storeIsHealthy( + if !allocator.MaxCapacityCheck(s) || !options.getIOOverloadOptions().allocateReplicaToCheck( ctx, s, candidateStores.CandidateIOOverloadScores.Mean, @@ -1561,7 +1617,7 @@ func rankedCandidateListForRebalancing( candIOOverloadScore, _ := s.Capacity.IOThreshold.Score() cand.fullDisk = !rebalanceToMaxCapacityCheck(s) cand.ioOverloadScore = candIOOverloadScore - cand.ioOverloaded = !options.getStoreHealthOptions().rebalanceToStoreIsHealthy( + cand.ioOverloaded = !options.getIOOverloadOptions().rebalanceReplicaToCheck( ctx, s, // We only wish to compare the IO overload to the @@ -2174,75 +2230,119 @@ func convergesOnMean(oldVal, newVal, mean float64) bool { return math.Abs(newVal-mean) < math.Abs(oldVal-mean) } -// StoreHealthOptions is the scorer options for store health. It is -// used to inform scoring based on the health of a store. -type StoreHealthOptions struct { - EnforcementLevel IOOverloadEnforcementLevel - IOOverloadThreshold float64 +// IOOverloadOptions is the scorer options for store IO overload. It is used to +// inform scoring based on a store's IO overload score. +type IOOverloadOptions struct { + ReplicaEnforcementLevel IOOverloadEnforcementLevel + LeaseEnforcementLevel IOOverloadEnforcementLevel + + ReplicaIOOverloadThreshold float64 + LeaseIOOverloadThreshold float64 + LeaseIOOverloadShedThreshold float64 } -// storeIsHealthy returns true if the store IO overload does not exceed -// the cluster threshold and mean, or the enforcement level does not include -// excluding candidates from being allocation targets. -func (o StoreHealthOptions) storeIsHealthy( - ctx context.Context, store roachpb.StoreDescriptor, avg float64, -) bool { - ioOverloadScore, _ := store.Capacity.IOThreshold.Score() - if o.EnforcementLevel == IOOverloadThresholdNoAction || - ioOverloadScore < o.IOOverloadThreshold { - return true +func ioOverloadCheck( + score, avg, absThreshold, meanThreshold float64, + enforcement IOOverloadEnforcementLevel, + disallowed ...IOOverloadEnforcementLevel, +) (ok bool, reason string) { + absCheck := score < absThreshold + meanCheck := score < avg*meanThreshold + + // The score needs to be no less than both the average threshold and the + // absolute threshold in order to be considered IO overloaded. + if absCheck || meanCheck { + return true, "" } - // Still log an event when the IO overload score exceeds the threshold, however - // does not exceed the cluster average. This is enabled to avoid confusion - // where candidate stores are still targets, despite exeeding the - // threshold. - if ioOverloadScore < avg*IOOverloadMeanThreshold { - log.KvDistribution.VEventf(ctx, 5, "s%d, allocate check io overload %.2f exceeds threshold %.2f, but below average: %.2f, action enabled %d", - store.StoreID, ioOverloadScore, - o.IOOverloadThreshold, avg, o.EnforcementLevel) - return true + for _, disallowedEnforcement := range disallowed { + if enforcement == disallowedEnforcement { + return false, fmt.Sprintf( + "io overload %.2f exceeds threshold %.2f, above average: %.2f, enforcement %d", + score, absThreshold, avg, enforcement) + } } - log.KvDistribution.VEventf(ctx, 5, "s%d, allocate check io overload %.2f exceeds threshold %.2f, above average: %.2f, action enabled %d", - store.StoreID, ioOverloadScore, - o.IOOverloadThreshold, avg, o.EnforcementLevel) + return true, "" +} - // The store is only considered unhealthy when the enforcement level is - // storeHealthBlockAll. - return o.EnforcementLevel < IOOverloadThresholdBlockAll +// allocateReplicaToCheck returns true if the store IO overload does not exceed +// the cluster threshold and mean, or the enforcement level does not prevent +// replica allocation to IO overloaded stores. +func (o IOOverloadOptions) allocateReplicaToCheck( + ctx context.Context, store roachpb.StoreDescriptor, avg float64, +) bool { + score, _ := store.Capacity.IOThreshold.Score() + + if ok, reason := ioOverloadCheck(score, avg, + o.ReplicaIOOverloadThreshold, IOOverloadMeanThreshold, + o.ReplicaEnforcementLevel, + IOOverloadThresholdBlockAll, + ); !ok { + log.KvDistribution.VEventf(ctx, 3, "s%d: %s", store.StoreID, reason) + return false + } + + return true } -// rebalanceToStoreIsHealthy returns true if the store IO overload does not +// rebalanceReplicaToCheck returns true if the store IO overload does not // exceed the cluster threshold and mean, or the enforcement level does not -// include excluding candidates from being rebalancing targets. -func (o StoreHealthOptions) rebalanceToStoreIsHealthy( +// prevent replica rebalancing to IO overloaded stores. +func (o IOOverloadOptions) rebalanceReplicaToCheck( ctx context.Context, store roachpb.StoreDescriptor, avg float64, ) bool { - ioOverloadScore, _ := store.Capacity.IOThreshold.Score() - if o.EnforcementLevel == IOOverloadThresholdNoAction || - ioOverloadScore < o.IOOverloadThreshold { - return true + score, _ := store.Capacity.IOThreshold.Score() + + if ok, reason := ioOverloadCheck(score, avg, + o.ReplicaIOOverloadThreshold, IOOverloadMeanThreshold, + o.ReplicaEnforcementLevel, + IOOverloadThresholdBlockTransfers, IOOverloadThresholdBlockAll, + ); !ok { + log.KvDistribution.VEventf(ctx, 3, "s%d: %s", store.StoreID, reason) + return false } + return true +} - if ioOverloadScore < avg*IOOverloadMeanThreshold { - log.KvDistribution.VEventf(ctx, 5, - "s%d, allocate check io overload %.2f exceeds threshold %.2f, but "+ - "below average watermark: %.2f, action enabled %d", - store.StoreID, ioOverloadScore, o.IOOverloadThreshold, - avg*IOOverloadMeanThreshold, o.EnforcementLevel) - return true +// transferLeaseToCheck returns true if the store IO overload does not exceed +// the cluster threshold and mean, or the enforcement level does not prevent +// lease transfers to IO overloaded stores. +func (o IOOverloadOptions) transferLeaseToCheck( + ctx context.Context, store roachpb.StoreDescriptor, avg float64, +) bool { + score, _ := store.Capacity.IOThreshold.Score() + + if ok, reason := ioOverloadCheck(score, avg, + o.LeaseIOOverloadThreshold, IOOverloadMeanThreshold, + o.LeaseEnforcementLevel, + IOOverloadThresholdBlockTransfers, IOOverloadThresholdShed, + ); !ok { + log.KvDistribution.VEventf(ctx, 3, "s%d: %s", store.StoreID, reason) + return false } - log.KvDistribution.VEventf(ctx, 5, - "s%d, allocate check io overload %.2f exceeds threshold %.2f, above average "+ - "watermark: %.2f, action enabled %d", - store.StoreID, ioOverloadScore, o.IOOverloadThreshold, - avg*IOOverloadMeanThreshold, o.EnforcementLevel) + return true +} + +// transferLeaseToCheck returns true if the store IO overload does not exceed +// the cluster threshold and mean, or the enforcement level does not prevent +// existing stores from holidng leases whilst being IO overloaded. +func (o IOOverloadOptions) existingLeaseCheck( + ctx context.Context, store roachpb.StoreDescriptor, avg float64, +) bool { + score, _ := store.Capacity.IOThreshold.Score() + + if ok, reason := ioOverloadCheck(score, avg, + o.LeaseIOOverloadShedThreshold, IOOverloadMeanThreshold, + o.LeaseEnforcementLevel, + IOOverloadThresholdShed, + ); !ok { + log.KvDistribution.VEventf(ctx, 3, "s%d: %s", store.StoreID, reason) + return false + } - // The store is only considered unhealthy when the enforcement level is - // storeHealthBlockRebalanceTo or storeHealthBlockAll. - return o.EnforcementLevel < IOOverloadThresholdBlockRebalanceTo + return true } // rebalanceToMaxCapacityCheck returns true if the store has enough room to diff --git a/pkg/kv/kvserver/allocator/allocatorimpl/allocator_scorer_test.go b/pkg/kv/kvserver/allocator/allocatorimpl/allocator_scorer_test.go index 7f2965c4bec5..726f54ed8bbd 100644 --- a/pkg/kv/kvserver/allocator/allocatorimpl/allocator_scorer_test.go +++ b/pkg/kv/kvserver/allocator/allocatorimpl/allocator_scorer_test.go @@ -1096,7 +1096,7 @@ func TestShouldRebalanceDiversity(t *testing.T) { defer leaktest.AfterTest(t)() defer log.Scope(t).Close(t) - options := &RangeCountScorerOptions{StoreHealthOptions: StoreHealthOptions{EnforcementLevel: IOOverloadThresholdNoAction}} + options := &RangeCountScorerOptions{} newStore := func(id int, locality roachpb.Locality) roachpb.StoreDescriptor { return roachpb.StoreDescriptor{ StoreID: roachpb.StoreID(id), diff --git a/pkg/kv/kvserver/allocator/allocatorimpl/allocator_test.go b/pkg/kv/kvserver/allocator/allocatorimpl/allocator_test.go index adce6f429a6c..2858afaba776 100644 --- a/pkg/kv/kvserver/allocator/allocatorimpl/allocator_test.go +++ b/pkg/kv/kvserver/allocator/allocatorimpl/allocator_test.go @@ -348,22 +348,22 @@ var oneStoreHighIOOverload = []*roachpb.StoreDescriptor{ { StoreID: 1, Node: roachpb.NodeDescriptor{NodeID: 1}, - Capacity: roachpb.StoreCapacity{Capacity: 200, Available: 200, RangeCount: 600, IOThreshold: TestingIOThresholdWithScore(DefaultIOOverloadThreshold - 5)}, + Capacity: roachpb.StoreCapacity{Capacity: 200, Available: 200, RangeCount: 600, IOThreshold: TestingIOThresholdWithScore(DefaultReplicaIOOverloadThreshold - 5)}, }, { StoreID: 2, Node: roachpb.NodeDescriptor{NodeID: 2}, - Capacity: roachpb.StoreCapacity{Capacity: 200, Available: 200, RangeCount: 1800, IOThreshold: TestingIOThresholdWithScore(DefaultIOOverloadThreshold - 5)}, + Capacity: roachpb.StoreCapacity{Capacity: 200, Available: 200, RangeCount: 1800, IOThreshold: TestingIOThresholdWithScore(DefaultReplicaIOOverloadThreshold - 5)}, }, { StoreID: 3, Node: roachpb.NodeDescriptor{NodeID: 3}, - Capacity: roachpb.StoreCapacity{Capacity: 200, Available: 200, RangeCount: 600, IOThreshold: TestingIOThresholdWithScore(DefaultIOOverloadThreshold + 5)}, + Capacity: roachpb.StoreCapacity{Capacity: 200, Available: 200, RangeCount: 600, IOThreshold: TestingIOThresholdWithScore(DefaultReplicaIOOverloadThreshold + 5)}, }, { StoreID: 4, Node: roachpb.NodeDescriptor{NodeID: 4}, - Capacity: roachpb.StoreCapacity{Capacity: 200, Available: 200, RangeCount: 1200, IOThreshold: TestingIOThresholdWithScore(DefaultIOOverloadThreshold - 5)}, + Capacity: roachpb.StoreCapacity{Capacity: 200, Available: 200, RangeCount: 1200, IOThreshold: TestingIOThresholdWithScore(DefaultReplicaIOOverloadThreshold - 5)}, }, } @@ -371,17 +371,17 @@ var allStoresHighIOOverload = []*roachpb.StoreDescriptor{ { StoreID: 1, Node: roachpb.NodeDescriptor{NodeID: 1}, - Capacity: roachpb.StoreCapacity{Capacity: 200, Available: 200, RangeCount: 1200, IOThreshold: TestingIOThresholdWithScore(DefaultIOOverloadThreshold + 1)}, + Capacity: roachpb.StoreCapacity{Capacity: 200, Available: 200, RangeCount: 1200, IOThreshold: TestingIOThresholdWithScore(DefaultReplicaIOOverloadThreshold + 1)}, }, { StoreID: 2, Node: roachpb.NodeDescriptor{NodeID: 2}, - Capacity: roachpb.StoreCapacity{Capacity: 200, Available: 200, RangeCount: 800, IOThreshold: TestingIOThresholdWithScore(DefaultIOOverloadThreshold + 1)}, + Capacity: roachpb.StoreCapacity{Capacity: 200, Available: 200, RangeCount: 800, IOThreshold: TestingIOThresholdWithScore(DefaultReplicaIOOverloadThreshold + 1)}, }, { StoreID: 3, Node: roachpb.NodeDescriptor{NodeID: 3}, - Capacity: roachpb.StoreCapacity{Capacity: 200, Available: 200, RangeCount: 600, IOThreshold: TestingIOThresholdWithScore(DefaultIOOverloadThreshold + 1)}, + Capacity: roachpb.StoreCapacity{Capacity: 200, Available: 200, RangeCount: 600, IOThreshold: TestingIOThresholdWithScore(DefaultReplicaIOOverloadThreshold + 1)}, }, } @@ -389,17 +389,17 @@ var allStoresHighIOOverloadSkewed = []*roachpb.StoreDescriptor{ { StoreID: 1, Node: roachpb.NodeDescriptor{NodeID: 1}, - Capacity: roachpb.StoreCapacity{Capacity: 200, Available: 200, RangeCount: 1200, IOThreshold: TestingIOThresholdWithScore(DefaultIOOverloadThreshold + 1)}, + Capacity: roachpb.StoreCapacity{Capacity: 200, Available: 200, RangeCount: 1200, IOThreshold: TestingIOThresholdWithScore(DefaultReplicaIOOverloadThreshold + 1)}, }, { StoreID: 2, Node: roachpb.NodeDescriptor{NodeID: 2}, - Capacity: roachpb.StoreCapacity{Capacity: 200, Available: 200, RangeCount: 800, IOThreshold: TestingIOThresholdWithScore(DefaultIOOverloadThreshold + 50)}, + Capacity: roachpb.StoreCapacity{Capacity: 200, Available: 200, RangeCount: 800, IOThreshold: TestingIOThresholdWithScore(DefaultReplicaIOOverloadThreshold + 50)}, }, { StoreID: 3, Node: roachpb.NodeDescriptor{NodeID: 3}, - Capacity: roachpb.StoreCapacity{Capacity: 200, Available: 200, RangeCount: 600, IOThreshold: TestingIOThresholdWithScore(DefaultIOOverloadThreshold + 55)}, + Capacity: roachpb.StoreCapacity{Capacity: 200, Available: 200, RangeCount: 600, IOThreshold: TestingIOThresholdWithScore(DefaultReplicaIOOverloadThreshold + 55)}, }, } @@ -407,27 +407,27 @@ var threeStoresHighIOOverloadAscRangeCount = []*roachpb.StoreDescriptor{ { StoreID: 1, Node: roachpb.NodeDescriptor{NodeID: 1}, - Capacity: roachpb.StoreCapacity{Capacity: 200, Available: 200, RangeCount: 100, IOThreshold: TestingIOThresholdWithScore(DefaultIOOverloadThreshold + 10)}, + Capacity: roachpb.StoreCapacity{Capacity: 200, Available: 200, RangeCount: 100, IOThreshold: TestingIOThresholdWithScore(DefaultReplicaIOOverloadThreshold + 10)}, }, { StoreID: 2, Node: roachpb.NodeDescriptor{NodeID: 2}, - Capacity: roachpb.StoreCapacity{Capacity: 200, Available: 200, RangeCount: 400, IOThreshold: TestingIOThresholdWithScore(DefaultIOOverloadThreshold + 10)}, + Capacity: roachpb.StoreCapacity{Capacity: 200, Available: 200, RangeCount: 400, IOThreshold: TestingIOThresholdWithScore(DefaultReplicaIOOverloadThreshold + 10)}, }, { StoreID: 3, Node: roachpb.NodeDescriptor{NodeID: 3}, - Capacity: roachpb.StoreCapacity{Capacity: 200, Available: 200, RangeCount: 1600, IOThreshold: TestingIOThresholdWithScore(DefaultIOOverloadThreshold + 10)}, + Capacity: roachpb.StoreCapacity{Capacity: 200, Available: 200, RangeCount: 1600, IOThreshold: TestingIOThresholdWithScore(DefaultReplicaIOOverloadThreshold + 10)}, }, { StoreID: 4, Node: roachpb.NodeDescriptor{NodeID: 4}, - Capacity: roachpb.StoreCapacity{Capacity: 200, Available: 200, RangeCount: 6400, IOThreshold: TestingIOThresholdWithScore(DefaultIOOverloadThreshold - 10)}, + Capacity: roachpb.StoreCapacity{Capacity: 200, Available: 200, RangeCount: 6400, IOThreshold: TestingIOThresholdWithScore(DefaultReplicaIOOverloadThreshold - 10)}, }, { StoreID: 5, Node: roachpb.NodeDescriptor{NodeID: 5}, - Capacity: roachpb.StoreCapacity{Capacity: 200, Available: 200, RangeCount: 25000, IOThreshold: TestingIOThresholdWithScore(DefaultIOOverloadThreshold - 10)}, + Capacity: roachpb.StoreCapacity{Capacity: 200, Available: 200, RangeCount: 25000, IOThreshold: TestingIOThresholdWithScore(DefaultReplicaIOOverloadThreshold - 10)}, }, } @@ -595,7 +595,7 @@ func TestAllocatorNoAvailableDisks(t *testing.T) { } } -func TestAllocatorIOOverloadCheck(t *testing.T) { +func TestAllocatorAllocateVoterIOOverloadCheck(t *testing.T) { defer leaktest.AfterTest(t)() defer log.Scope(t).Close(t) @@ -615,7 +615,7 @@ func TestAllocatorIOOverloadCheck(t *testing.T) { } tests := []testCase{ { - name: "ignore io overload on allocation when StoreHealthNoAction enforcement", + name: "ignore io overload on allocation when ignore enforcement", stores: allStoresHighIOOverload, conf: emptySpanConfig(), // NB: All stores have high io overload, this should be ignored and @@ -624,10 +624,10 @@ func TestAllocatorIOOverloadCheck(t *testing.T) { // Recovery of a dead node can pick any valid store, not necessarily the // one with the lowest range count. expectedTargetIfDead: roachpb.StoreID(2), - enforcement: IOOverloadThresholdNoAction, + enforcement: IOOverloadThresholdIgnore, }, { - name: "ignore io overload on allocation when storeHealthLogOnly enforcement", + name: "ignore io overload on allocation when block rebalance to enforcement", // NB: All stores have high io overload, this should be ignored and // allocate to the store with the lowest range count. stores: allStoresHighIOOverload, @@ -636,22 +636,10 @@ func TestAllocatorIOOverloadCheck(t *testing.T) { // Recovery of a dead node can pick any valid store, not necessarily the // one with the lowest range count. expectedTargetIfDead: roachpb.StoreID(2), - enforcement: IOOverloadThresholdLogOnly, + enforcement: IOOverloadThresholdBlockTransfers, }, { - name: "ignore io overload on allocation when StoreHealthBlockRebalanceTo enforcement", - // NB: All stores have high io overload, this should be ignored and - // allocate to the store with the lowest range count. - stores: allStoresHighIOOverload, - conf: emptySpanConfig(), - expectedTargetIfAlive: roachpb.StoreID(3), - // Recovery of a dead node can pick any valid store, not necessarily the - // one with the lowest range count. - expectedTargetIfDead: roachpb.StoreID(2), - enforcement: IOOverloadThresholdBlockRebalanceTo, - }, - { - name: "don't allocate to stores when all have high io overload and StoreHealthBlockAll", + name: "don't allocate to stores when all have high io overload and block all enforcement", // NB: All stores have high io overload (limit + 1), none are above the watermark, select the lowest range count. stores: allStoresHighIOOverload, conf: emptySpanConfig(), @@ -662,7 +650,7 @@ func TestAllocatorIOOverloadCheck(t *testing.T) { enforcement: IOOverloadThresholdBlockAll, }, { - name: "allocate to store below the mean when all have high io overload and StoreHealthBlockAll", + name: "allocate to store below the mean when all have high io overload and block all enforcement", // NB: All stores have high io overload, however store 1 is below the watermark mean io overload. stores: allStoresHighIOOverloadSkewed, conf: emptySpanConfig(), @@ -671,7 +659,7 @@ func TestAllocatorIOOverloadCheck(t *testing.T) { enforcement: IOOverloadThresholdBlockAll, }, { - name: "allocate to lowest range count store without high io overload when StoreHealthBlockAll enforcement", + name: "allocate to lowest range count store without high io overload when block all enforcement", // NB: Store 1, 2 and 3 have high io overload and are above the watermark, the lowest range count (4) // should be selected. stores: threeStoresHighIOOverloadAscRangeCount, @@ -692,9 +680,7 @@ func TestAllocatorIOOverloadCheck(t *testing.T) { defer stopper.Stop(ctx) sg := gossiputil.NewStoreGossiper(g) sg.GossipStores(test.stores, t) - - // Enable read disk health checking in candidate exclusion. - IOOverloadThresholdEnforcement.Override(ctx, &a.st.SV, int64(test.enforcement)) + ReplicaIOOverloadThresholdEnforcement.Override(ctx, &a.st.SV, int64(test.enforcement)) // Allocate a voter where all replicas are alive (e.g. up-replicating a valid range). add, _, err := a.AllocateVoter( @@ -1654,7 +1640,7 @@ func TestAllocatorRebalanceByQPS(t *testing.T) { gossiputil.NewStoreGossiper(g).GossipStores(subtest.testStores, t) var rangeUsageInfo allocator.RangeUsageInfo options := TestingQPSLoadScorerOptions(100, 0.2) - options.StoreHealthOptions = StoreHealthOptions{EnforcementLevel: IOOverloadThresholdNoAction} + options.IOOverloadOptions = IOOverloadOptions{ReplicaEnforcementLevel: IOOverloadThresholdIgnore} add, remove, _, ok := a.RebalanceVoter( ctx, sp, @@ -1768,7 +1754,7 @@ func TestAllocatorRemoveBasedOnQPS(t *testing.T) { defer stopper.Stop(ctx) gossiputil.NewStoreGossiper(g).GossipStores(subtest.testStores, t) options := TestingQPSLoadScorerOptions(0, 0.1) - options.StoreHealthOptions = StoreHealthOptions{EnforcementLevel: IOOverloadThresholdNoAction} + options.IOOverloadOptions = IOOverloadOptions{ReplicaEnforcementLevel: IOOverloadThresholdIgnore} remove, _, err := a.RemoveVoter( ctx, sp, @@ -1976,6 +1962,133 @@ func TestAllocatorTransferLeaseTarget(t *testing.T) { } } +func TestAllocatorTransferLeaseTargetIOOverloadCheck(t *testing.T) { + defer leaktest.AfterTest(t)() + defer log.Scope(t).Close(t) + ctx := context.Background() + + floats := func(nums ...float64) []float64 { + return nums + } + + // We want the shed threshold to be 0.9 and the overload threhsold to be 0.5 + // i.e. block transfers at >=0.5 and block transfers + shed leases at >=0.9. + const shedThreshold = 0.9 + const threshold = 0.5 + + testCases := []struct { + name string + leaseCounts, IOScores []float64 + leaseholder roachpb.StoreID + excludeLeaseRepl bool + expected roachpb.StoreID + enforcement IOOverloadEnforcementLevel + }{ + { + name: "don't move off of store with high io overload when block enforcement", + leaseCounts: floats(100, 100, 100, 100, 100), + IOScores: floats(2.5, 1.5, 0.5, 0, 0), + leaseholder: 1, + expected: 0, + enforcement: IOOverloadThresholdBlockTransfers, + }, + { + name: "move off of store with high io overload when shed enforcement", + leaseCounts: floats(100, 100, 100, 100, 100), + IOScores: floats(2.5, 1.5, 0.5, 0, 0), + leaseholder: 1, + // Store 3 is above the threshold (1.0 > 0.8), but equal to the avg (1.0), so + // it is still considered a non-IO-overloaded candidate. + expected: 3, + enforcement: IOOverloadThresholdShed, + }, + { + name: "don't transfer to io overloaded store when block enforcement", + leaseCounts: floats(0, 100, 100, 400, 400), + IOScores: floats(2.5, 1.5, 0.5, 0, 0), + leaseholder: 5, + expected: 3, + enforcement: IOOverloadThresholdBlockTransfers, + }, + { + name: "don't transfer to io overloaded store when shed enforcement", + leaseCounts: floats(0, 100, 100, 400, 400), + IOScores: floats(2.5, 1.5, 0.5, 0, 0), + leaseholder: 5, + expected: 3, + enforcement: IOOverloadThresholdShed, + }, + { + name: "still transfer to io overloaded store when no action enforcement", + leaseCounts: floats(0, 100, 100, 400, 400), + IOScores: floats(2.5, 1.5, 0.5, 0, 0), + leaseholder: 5, + expected: 2, + enforcement: IOOverloadThresholdIgnore, + }, + { + name: "move off of store with high io overload with skewed lease counts shed enforcement", + leaseCounts: floats(0, 0, 10000, 10000, 10000), + IOScores: floats(2.5, 1.5, 0.5, 0, 0), + leaseholder: 1, + expected: 3, + enforcement: IOOverloadThresholdShed, + }, + { + name: "don't move off of store with high io overload but less than shed threshold with shed enforcement", + leaseCounts: floats(0, 0, 0, 0, 0), + IOScores: floats(0.89, 0, 0, 0, 0), + leaseholder: 1, + expected: 0, + enforcement: IOOverloadThresholdShed, + }, + } + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + stopper, g, sp, a, _ := CreateTestAllocator(ctx, 10, true /* deterministic */) + defer stopper.Stop(ctx) + n := len(tc.leaseCounts) + stores := make([]*roachpb.StoreDescriptor, n) + existing := make([]roachpb.ReplicaDescriptor, 0, n) + for i := range tc.leaseCounts { + existing = append(existing, replicas(roachpb.StoreID(i+1))...) + stores[i] = &roachpb.StoreDescriptor{ + StoreID: roachpb.StoreID(i + 1), + Node: roachpb.NodeDescriptor{NodeID: roachpb.NodeID(i + 1)}, + Capacity: roachpb.StoreCapacity{ + LeaseCount: int32(tc.leaseCounts[i]), + IOThreshold: TestingIOThresholdWithScore(tc.IOScores[i]), + }, + } + } + + sg := gossiputil.NewStoreGossiper(g) + sg.GossipStores(stores, t) + LeaseIOOverloadThresholdEnforcement.Override(ctx, &a.st.SV, int64(tc.enforcement)) + LeaseIOOverloadThreshold.Override(ctx, &a.st.SV, threshold) + LeaseIOOverloadShedThreshold.Override(ctx, &a.st.SV, shedThreshold) + + target := a.TransferLeaseTarget( + ctx, + sp, + emptySpanConfig(), + existing, + &mockRepl{ + replicationFactor: int32(n), + storeID: tc.leaseholder, + }, + allocator.RangeUsageInfo{}, /* stats */ + false, /* forceDecisionWithoutStats */ + allocator.TransferLeaseOptions{ + CheckCandidateFullness: true, + }, + ) + require.Equal(t, tc.expected, target.StoreID) + }) + } + +} + func TestAllocatorTransferLeaseToReplicasNeedingSnapshot(t *testing.T) { defer leaktest.AfterTest(t)() defer log.Scope(t).Close(t) @@ -4346,14 +4459,14 @@ func TestAllocatorRebalanceNonVoters(t *testing.T) { } } -// TestAllocatorRebalanceStoreHealthCheck ensures that rebalancing voters: -// (1) Respects storeHealthEnforcement setting, by ignoring IO overload in -// rebalancing decisions when disabled or set to log only. -// (2) Considers IO overload when set to rebalanceOnly or allocate in +// TestAllocatorRebalanceIOOverloadCheck ensures that rebalancing voters: +// (1) Respects replica IO overload enforcement setting, by ignoring IO +// overload in rebalancing decisions when disabled or set to log only. +// (2) Considers IO overload when set to block_rebalance_to or allocate in // conjunction with the mean. // (3) Does not attempt to rebalance off of the store when io overload // is high, as this setting is only used for filtering candidates. -func TestAllocatorRebalanceStoreHealthCheck(t *testing.T) { +func TestAllocatorRebalanceIOOverloadCheck(t *testing.T) { defer leaktest.AfterTest(t)() ctx := context.Background() @@ -4368,7 +4481,7 @@ func TestAllocatorRebalanceStoreHealthCheck(t *testing.T) { } tests := []testCase{ { - name: "don't move off of nodes with high io overload when StoreHealthBlockRebalanceTo", + name: "don't move off of nodes with high io overload when block rebalance to", // NB: Store 1,2, 4 have okay io overload. Store 3 has high io overload. // We expect high io overload to only be considered for // exlcuding targets, not for triggering rebalancing. @@ -4376,10 +4489,10 @@ func TestAllocatorRebalanceStoreHealthCheck(t *testing.T) { conf: emptySpanConfig(), existingVoters: replicas(3, 1), expectNoAction: true, - enforcement: IOOverloadThresholdBlockRebalanceTo, + enforcement: IOOverloadThresholdBlockTransfers, }, { - name: "don't move off of nodes with high io overload when StoreHealthBlockAll", + name: "don't move off of nodes with high io overload when block all", // NB: Store 1,2, 4 have okay io overload. Store 3 has high io overload. // We expect high io overload to only be considered for // exlcuding targets, not for triggering rebalancing. @@ -4390,7 +4503,7 @@ func TestAllocatorRebalanceStoreHealthCheck(t *testing.T) { enforcement: IOOverloadThresholdBlockAll, }, { - name: "don't take action when enforcement is not StoreHealthNoAction", + name: "don't take action when enforcement is ignore", // NB: Store 3 has IOOverload > threshold. Store 2 has 3 x higher // ranges as other stores. Should move to candidate to 4, however // enforcement for rebalancing is not enabled so will pick @@ -4400,10 +4513,10 @@ func TestAllocatorRebalanceStoreHealthCheck(t *testing.T) { existingVoters: replicas(1, 2), expectedRemoveTargets: []roachpb.StoreID{2}, expectedAddTargets: []roachpb.StoreID{3}, - enforcement: IOOverloadThresholdNoAction, + enforcement: IOOverloadThresholdIgnore, }, { - name: "don't rebalance to nodes with high io overload when StoreHealthBlockRebalanceTo enforcement", + name: "don't rebalance to nodes with high io overload when block rebalance to", // NB: Store 3 has IOOverload > threshold. Store 2 has 3 x higher // ranges as other stores. Should move to candidate to 4, which // doesn't have high io overload. @@ -4412,10 +4525,10 @@ func TestAllocatorRebalanceStoreHealthCheck(t *testing.T) { existingVoters: replicas(1, 2), expectedRemoveTargets: []roachpb.StoreID{2}, expectedAddTargets: []roachpb.StoreID{4}, - enforcement: IOOverloadThresholdBlockRebalanceTo, + enforcement: IOOverloadThresholdBlockTransfers, }, { - name: "don't rebalance to nodes with high io overload when StoreHealthBlockAll enforcement", + name: "don't rebalance to nodes with high io overload when block all enforcement", // NB: Store 3 has IOOverload > threshold. Store 2 has 3 x higher // ranges as other stores. Should move to candidate to 4, which // doesn't have high io overload. @@ -4446,7 +4559,7 @@ func TestAllocatorRebalanceStoreHealthCheck(t *testing.T) { sg.GossipStores(test.stores, t) // Enable read disk health checking in candidate exclusion. options := a.ScorerOptions(ctx) - options.StoreHealthOptions = StoreHealthOptions{EnforcementLevel: test.enforcement, IOOverloadThreshold: 1} + options.IOOverloadOptions = IOOverloadOptions{ReplicaEnforcementLevel: test.enforcement, ReplicaIOOverloadThreshold: 1} add, remove, _, ok := a.RebalanceVoter( ctx, sp, @@ -8545,7 +8658,6 @@ func qpsBasedRebalanceFn( jitteredQPS := avgQPS * (1 + alloc.randGen.Float64()) opts := TestingQPSLoadScorerOptions(jitteredQPS, 0.2) - opts.StoreHealthOptions = StoreHealthOptions{EnforcementLevel: IOOverloadThresholdNoAction} opts.Deterministic = false var rangeUsageInfo allocator.RangeUsageInfo add, remove, details, ok := alloc.RebalanceVoter( diff --git a/pkg/kv/kvserver/asim/storerebalancer/store_rebalancer.go b/pkg/kv/kvserver/asim/storerebalancer/store_rebalancer.go index 3c72760c5e24..bbdbe58d4f9a 100644 --- a/pkg/kv/kvserver/asim/storerebalancer/store_rebalancer.go +++ b/pkg/kv/kvserver/asim/storerebalancer/store_rebalancer.go @@ -134,7 +134,7 @@ func (s simRebalanceObjectiveProvider) Objective() kvserver.LBRebalancingObjecti func (src *storeRebalancerControl) scorerOptions() *allocatorimpl.LoadScorerOptions { return &allocatorimpl.LoadScorerOptions{ - StoreHealthOptions: allocatorimpl.StoreHealthOptions{}, + IOOverloadOptions: src.allocator.IOOverloadOptions(), Deterministic: true, LoadDims: []load.Dimension{load.Queries}, LoadThreshold: allocatorimpl.MakeQPSOnlyDim(src.settings.LBRebalanceQPSThreshold), diff --git a/pkg/kv/kvserver/store_rebalancer.go b/pkg/kv/kvserver/store_rebalancer.go index 14a4fabe11ae..d47feafed82a 100644 --- a/pkg/kv/kvserver/store_rebalancer.go +++ b/pkg/kv/kvserver/store_rebalancer.go @@ -284,7 +284,7 @@ func (sr *StoreRebalancer) scorerOptions( ctx context.Context, lbDimension load.Dimension, ) *allocatorimpl.LoadScorerOptions { return &allocatorimpl.LoadScorerOptions{ - StoreHealthOptions: sr.allocator.StoreHealthOptions(ctx), + IOOverloadOptions: sr.allocator.IOOverloadOptions(), Deterministic: sr.storePool.IsDeterministic(), LoadDims: []load.Dimension{lbDimension}, LoadThreshold: allocatorimpl.LoadThresholds(&sr.st.SV, lbDimension), diff --git a/pkg/kv/kvserver/store_rebalancer_test.go b/pkg/kv/kvserver/store_rebalancer_test.go index b8f7ddb07dfe..d3e68d0c14d4 100644 --- a/pkg/kv/kvserver/store_rebalancer_test.go +++ b/pkg/kv/kvserver/store_rebalancer_test.go @@ -60,7 +60,7 @@ var ( QueriesPerSecond: 3000, CPUPerSecond: 3000 * float64(time.Millisecond), IOThreshold: allocatorimpl.TestingIOThresholdWithScore( - allocatorimpl.DefaultIOOverloadThreshold - 10), + allocatorimpl.DefaultReplicaIOOverloadThreshold - 10), }, }, { @@ -80,7 +80,7 @@ var ( QueriesPerSecond: 2800, CPUPerSecond: 2800 * float64(time.Millisecond), IOThreshold: allocatorimpl.TestingIOThresholdWithScore( - allocatorimpl.DefaultIOOverloadThreshold - 5), + allocatorimpl.DefaultReplicaIOOverloadThreshold - 5), }, }, { @@ -100,7 +100,7 @@ var ( QueriesPerSecond: 2600, CPUPerSecond: 2600 * float64(time.Millisecond), IOThreshold: allocatorimpl.TestingIOThresholdWithScore( - allocatorimpl.DefaultIOOverloadThreshold + 2), + allocatorimpl.DefaultReplicaIOOverloadThreshold + 2), }, }, { @@ -120,7 +120,7 @@ var ( QueriesPerSecond: 2400, CPUPerSecond: 2400 * float64(time.Millisecond), IOThreshold: allocatorimpl.TestingIOThresholdWithScore( - allocatorimpl.DefaultIOOverloadThreshold - 10), + allocatorimpl.DefaultReplicaIOOverloadThreshold - 10), }, }, { @@ -140,7 +140,7 @@ var ( QueriesPerSecond: 2200, CPUPerSecond: 2200 * float64(time.Millisecond), IOThreshold: allocatorimpl.TestingIOThresholdWithScore( - allocatorimpl.DefaultIOOverloadThreshold - 3), + allocatorimpl.DefaultReplicaIOOverloadThreshold - 3), }, }, { @@ -160,7 +160,7 @@ var ( QueriesPerSecond: 2000, CPUPerSecond: 2000 * float64(time.Millisecond), IOThreshold: allocatorimpl.TestingIOThresholdWithScore( - allocatorimpl.DefaultIOOverloadThreshold + 2), + allocatorimpl.DefaultReplicaIOOverloadThreshold + 2), }, }, { @@ -180,7 +180,7 @@ var ( QueriesPerSecond: 1800, CPUPerSecond: 1800 * float64(time.Millisecond), IOThreshold: allocatorimpl.TestingIOThresholdWithScore( - allocatorimpl.DefaultIOOverloadThreshold - 10), + allocatorimpl.DefaultReplicaIOOverloadThreshold - 10), }, }, { @@ -200,7 +200,7 @@ var ( QueriesPerSecond: 1600, CPUPerSecond: 1600 * float64(time.Millisecond), IOThreshold: allocatorimpl.TestingIOThresholdWithScore( - allocatorimpl.DefaultIOOverloadThreshold - 5), + allocatorimpl.DefaultReplicaIOOverloadThreshold - 5), }, }, { @@ -220,7 +220,7 @@ var ( QueriesPerSecond: 1400, CPUPerSecond: 1400 * float64(time.Millisecond), IOThreshold: allocatorimpl.TestingIOThresholdWithScore( - allocatorimpl.DefaultIOOverloadThreshold + 3), + allocatorimpl.DefaultReplicaIOOverloadThreshold + 3), }, }, } @@ -283,7 +283,7 @@ var ( QueriesPerSecond: 1500, CPUPerSecond: 1500 * float64(time.Millisecond), IOThreshold: allocatorimpl.TestingIOThresholdWithScore( - allocatorimpl.DefaultIOOverloadThreshold - 15), + allocatorimpl.DefaultReplicaIOOverloadThreshold - 15), }, }, { @@ -293,7 +293,7 @@ var ( QueriesPerSecond: 1300, CPUPerSecond: 1300 * float64(time.Millisecond), IOThreshold: allocatorimpl.TestingIOThresholdWithScore( - allocatorimpl.DefaultIOOverloadThreshold - 10), + allocatorimpl.DefaultReplicaIOOverloadThreshold - 10), }, }, { @@ -303,7 +303,7 @@ var ( QueriesPerSecond: 1000, CPUPerSecond: 1000 * float64(time.Millisecond), IOThreshold: allocatorimpl.TestingIOThresholdWithScore( - allocatorimpl.DefaultIOOverloadThreshold - 5), + allocatorimpl.DefaultReplicaIOOverloadThreshold - 5), }, }, { @@ -313,7 +313,7 @@ var ( QueriesPerSecond: 900, CPUPerSecond: 900 * float64(time.Millisecond), IOThreshold: allocatorimpl.TestingIOThresholdWithScore( - allocatorimpl.DefaultIOOverloadThreshold + 20), + allocatorimpl.DefaultReplicaIOOverloadThreshold + 20), }, }, { @@ -323,7 +323,7 @@ var ( QueriesPerSecond: 500, CPUPerSecond: 500 * float64(time.Millisecond), IOThreshold: allocatorimpl.TestingIOThresholdWithScore( - allocatorimpl.DefaultIOOverloadThreshold + 25), + allocatorimpl.DefaultReplicaIOOverloadThreshold + 25), }, }, } @@ -338,7 +338,7 @@ var ( QueriesPerSecond: 1000, CPUPerSecond: 1000 * float64(time.Millisecond), IOThreshold: allocatorimpl.TestingIOThresholdWithScore( - allocatorimpl.DefaultIOOverloadThreshold + 100), + allocatorimpl.DefaultReplicaIOOverloadThreshold + 100), }, }, { @@ -348,7 +348,7 @@ var ( QueriesPerSecond: 1000, CPUPerSecond: 1000 * float64(time.Millisecond), IOThreshold: allocatorimpl.TestingIOThresholdWithScore( - allocatorimpl.DefaultIOOverloadThreshold - 15), + allocatorimpl.DefaultReplicaIOOverloadThreshold - 15), }, }, { @@ -358,7 +358,7 @@ var ( QueriesPerSecond: 1000, CPUPerSecond: 1000 * float64(time.Millisecond), IOThreshold: allocatorimpl.TestingIOThresholdWithScore( - allocatorimpl.DefaultIOOverloadThreshold + 100), + allocatorimpl.DefaultReplicaIOOverloadThreshold + 100), }, }, { @@ -368,7 +368,7 @@ var ( QueriesPerSecond: 1000, CPUPerSecond: 1000 * float64(time.Millisecond), IOThreshold: allocatorimpl.TestingIOThresholdWithScore( - allocatorimpl.DefaultIOOverloadThreshold - 15), + allocatorimpl.DefaultReplicaIOOverloadThreshold - 15), }, }, { @@ -378,7 +378,7 @@ var ( QueriesPerSecond: 1000, CPUPerSecond: 1000 * float64(time.Millisecond), IOThreshold: allocatorimpl.TestingIOThresholdWithScore( - allocatorimpl.DefaultIOOverloadThreshold + 100), + allocatorimpl.DefaultReplicaIOOverloadThreshold + 100), }, }, } @@ -393,7 +393,7 @@ var ( QueriesPerSecond: 1500, CPUPerSecond: 1500 * float64(time.Millisecond), IOThreshold: allocatorimpl.TestingIOThresholdWithScore( - allocatorimpl.DefaultIOOverloadThreshold + 1), + allocatorimpl.DefaultReplicaIOOverloadThreshold + 1), }, }, { @@ -403,7 +403,7 @@ var ( QueriesPerSecond: 1300, CPUPerSecond: 1300 * float64(time.Millisecond), IOThreshold: allocatorimpl.TestingIOThresholdWithScore( - allocatorimpl.DefaultIOOverloadThreshold + 1), + allocatorimpl.DefaultReplicaIOOverloadThreshold + 1), }, }, { @@ -413,7 +413,7 @@ var ( QueriesPerSecond: 1000, CPUPerSecond: 1000 * float64(time.Millisecond), IOThreshold: allocatorimpl.TestingIOThresholdWithScore( - allocatorimpl.DefaultIOOverloadThreshold + 1), + allocatorimpl.DefaultReplicaIOOverloadThreshold + 1), }, }, { @@ -423,7 +423,7 @@ var ( QueriesPerSecond: 900, CPUPerSecond: 900 * float64(time.Millisecond), IOThreshold: allocatorimpl.TestingIOThresholdWithScore( - allocatorimpl.DefaultIOOverloadThreshold + 1), + allocatorimpl.DefaultReplicaIOOverloadThreshold + 1), }, }, { @@ -433,7 +433,7 @@ var ( QueriesPerSecond: 500, CPUPerSecond: 500 * float64(time.Millisecond), IOThreshold: allocatorimpl.TestingIOThresholdWithScore( - allocatorimpl.DefaultIOOverloadThreshold + 1), + allocatorimpl.DefaultReplicaIOOverloadThreshold + 1), }, }, } @@ -448,7 +448,7 @@ var ( QueriesPerSecond: 1500, CPUPerSecond: 1500 * float64(time.Millisecond), IOThreshold: allocatorimpl.TestingIOThresholdWithScore( - allocatorimpl.DefaultIOOverloadThreshold + 1), + allocatorimpl.DefaultReplicaIOOverloadThreshold + 1), }, }, { @@ -458,7 +458,7 @@ var ( QueriesPerSecond: 1300, CPUPerSecond: 1300 * float64(time.Millisecond), IOThreshold: allocatorimpl.TestingIOThresholdWithScore( - allocatorimpl.DefaultIOOverloadThreshold + 10), + allocatorimpl.DefaultReplicaIOOverloadThreshold + 10), }, }, { @@ -468,7 +468,7 @@ var ( QueriesPerSecond: 1000, CPUPerSecond: 1000 * float64(time.Millisecond), IOThreshold: allocatorimpl.TestingIOThresholdWithScore( - allocatorimpl.DefaultIOOverloadThreshold + 50), + allocatorimpl.DefaultReplicaIOOverloadThreshold + 50), }, }, { @@ -478,7 +478,7 @@ var ( QueriesPerSecond: 900, CPUPerSecond: 900 * float64(time.Millisecond), IOThreshold: allocatorimpl.TestingIOThresholdWithScore( - allocatorimpl.DefaultIOOverloadThreshold + 100), + allocatorimpl.DefaultReplicaIOOverloadThreshold + 100), }, }, { @@ -488,7 +488,7 @@ var ( QueriesPerSecond: 500, CPUPerSecond: 500 * float64(time.Millisecond), IOThreshold: allocatorimpl.TestingIOThresholdWithScore( - allocatorimpl.DefaultIOOverloadThreshold + 100), + allocatorimpl.DefaultReplicaIOOverloadThreshold + 100), }, }, } @@ -935,7 +935,7 @@ func TestChooseRangeToRebalanceRandom(t *testing.T) { hottestRanges := sr.replicaRankings.TopLoad() options := sr.scorerOptions(ctx, lbRebalanceDimension) rctx := sr.NewRebalanceContext(ctx, options, hottestRanges, sr.RebalanceMode()) - rctx.options.StoreHealthOptions = allocatorimpl.StoreHealthOptions{EnforcementLevel: allocatorimpl.IOOverloadThresholdNoAction} + rctx.options.IOOverloadOptions = allocatorimpl.IOOverloadOptions{ReplicaEnforcementLevel: allocatorimpl.IOOverloadThresholdIgnore} rctx.options.LoadThreshold = allocatorimpl.WithAllDims(rebalanceThreshold) _, voterTargets, nonVoterTargets := sr.chooseRangeToRebalance(ctx, rctx) @@ -1061,7 +1061,7 @@ func TestChooseRangeToRebalanceAcrossHeterogeneousZones(t *testing.T) { name: "rebalance one replica within heavy region", voters: []roachpb.StoreID{1, 6, 9}, constraints: oneReplicaPerRegion, - expRebalancedVoters: []roachpb.StoreID{9, 6, 2}, + expRebalancedVoters: []roachpb.StoreID{2, 6, 9}, }, // A replica is in a heavily loaded region, on a relatively heavily loaded // store. We expect it to be moved to a less busy store within the same @@ -1083,7 +1083,7 @@ func TestChooseRangeToRebalanceAcrossHeterogeneousZones(t *testing.T) { voters: []roachpb.StoreID{1, 2, 9}, constraints: twoReplicasInHotRegion, leasePreferences: leasePreferredHotRegion, - expRebalancedVoters: []roachpb.StoreID{3, 2, 9}, + expRebalancedVoters: []roachpb.StoreID{2, 3, 9}, }, // Two replicas are in the hot region, both on relatively heavily // loaded nodes. We expect one of those replicas to be moved to a @@ -1094,21 +1094,21 @@ func TestChooseRangeToRebalanceAcrossHeterogeneousZones(t *testing.T) { name: "rebalance two replicas out of three within heavy region, prefer lease in heavy region", voters: []roachpb.StoreID{1, 2, 9}, constraints: twoReplicasInHotRegion, - expRebalancedVoters: []roachpb.StoreID{9, 2, 3}, + expRebalancedVoters: []roachpb.StoreID{2, 9, 3}, }, { name: "rebalance two replicas out of five within heavy region", voters: []roachpb.StoreID{1, 2, 6, 8, 9}, constraints: twoReplicasInHotRegion, // NB: Because of the diversity heuristic we won't rebalance to node 7. - expRebalancedVoters: []roachpb.StoreID{9, 3, 6, 8, 2}, + expRebalancedVoters: []roachpb.StoreID{8, 3, 6, 9, 2}, }, { name: "rebalance two replicas out of five within heavy region", voters: []roachpb.StoreID{1, 2, 6, 8, 9}, constraints: twoReplicasInHotRegion, // NB: Because of the diversity heuristic we won't rebalance to node 7. - expRebalancedVoters: []roachpb.StoreID{9, 3, 6, 8, 2}, + expRebalancedVoters: []roachpb.StoreID{8, 3, 6, 9, 2}, }, // In the absence of any constraints, ensure that as long as diversity is // maximized, replicas on hot stores are rebalanced to cooler stores within @@ -1144,7 +1144,7 @@ func TestChooseRangeToRebalanceAcrossHeterogeneousZones(t *testing.T) { voterConstraints: allReplicasInHotRegion, constraints: oneReplicaPerRegion, - expRebalancedVoters: []roachpb.StoreID{3, 2, 1}, + expRebalancedVoters: []roachpb.StoreID{2, 3, 1}, // NB: Expect the non-voter on node 4 (hottest node in region B) to // move to node 5 (least hot region in region B), the least hot // node without a high IO overload score. @@ -1158,8 +1158,10 @@ func TestChooseRangeToRebalanceAcrossHeterogeneousZones(t *testing.T) { voterConstraints: twoReplicasInSecondHottestRegion, constraints: oneReplicaPerRegion, // NB: Expect the voter on node 4 (hottest node in region B) to move to - // node 6 (least hot region in region B). - expRebalancedVoters: []roachpb.StoreID{9, 5, 6, 8, 3}, + // node 6 (least hot region in region B). Expect the lease to move to the + // lowest QPS node among nodes that pass the IO overload transfer check + // (n8). + expRebalancedVoters: []roachpb.StoreID{8, 5, 6, 9, 3}, }, { name: "primary region with second highest QPS, region survival, one voter on sub-optimal node, prefer lease hottest region", @@ -1183,9 +1185,10 @@ func TestChooseRangeToRebalanceAcrossHeterogeneousZones(t *testing.T) { constraints: oneReplicaPerRegion, leasePreferences: leasePreferredSecondHotRegion, // NB: Expect the voter on node 4 (hottest node in region B) to move to - // node 6 (least hot region in region B). Expect lease to transfer - // to least hot store, in the second hottest region (node 6). - expRebalancedVoters: []roachpb.StoreID{6, 5, 3, 8, 9}, + // node 6 (least hot region in region B). Expect lease to transfer to + // least hot store, in the second hottest region that passes the lease IO + // overload check (node 5). + expRebalancedVoters: []roachpb.StoreID{5, 6, 3, 8, 9}, }, { name: "primary region with highest QPS, region survival, two voters on sub-optimal nodes", @@ -1199,8 +1202,9 @@ func TestChooseRangeToRebalanceAcrossHeterogeneousZones(t *testing.T) { // the least hot region. Additionally, in region B, we've got one replica // on store 4 (which is the hottest store in that region). We expect that // replica to be moved to store 5, which is the least hot node without a - // high IO overload score. - expRebalancedVoters: []roachpb.StoreID{9, 2, 5, 8, 3}, + // high IO overload score. Expect the lease to move to s8 as it passes + // the IO overload transfer check. + expRebalancedVoters: []roachpb.StoreID{8, 2, 5, 9, 3}, }, { name: "one voter on sub-optimal node in the coldest region", @@ -1224,6 +1228,7 @@ func TestChooseRangeToRebalanceAcrossHeterogeneousZones(t *testing.T) { expRebalancedVoters: []roachpb.StoreID{8, 5, 6}, }, } + for _, tc := range testCases { t.Run(tc.name, withQPSCPU(t, objectiveProvider, func(t *testing.T) { // Boilerplate for test setup. @@ -1271,8 +1276,9 @@ func TestChooseRangeToRebalanceAcrossHeterogeneousZones(t *testing.T) { hottestRanges := sr.replicaRankings.TopLoad() options := sr.scorerOptions(ctx, lbRebalanceDimension) rctx := sr.NewRebalanceContext(ctx, options, hottestRanges, LBRebalancingLeasesAndReplicas) - rctx.options.StoreHealthOptions = allocatorimpl.StoreHealthOptions{ - EnforcementLevel: allocatorimpl.IOOverloadThresholdBlockRebalanceTo} + rctx.options.IOOverloadOptions = allocatorimpl.IOOverloadOptions{ + ReplicaEnforcementLevel: allocatorimpl.IOOverloadThresholdBlockTransfers, + } rctx.options.LoadThreshold = allocatorimpl.WithAllDims(0.05) _, voterTargets, nonVoterTargets := sr.chooseRangeToRebalance( @@ -1360,8 +1366,8 @@ func TestChooseRangeToRebalanceIgnoresRangeOnBestStores(t *testing.T) { hottestRanges := sr.replicaRankings.TopLoad() options := sr.scorerOptions(ctx, lbRebalanceDimension) rctx := sr.NewRebalanceContext(ctx, options, hottestRanges, sr.RebalanceMode()) - rctx.options.StoreHealthOptions = allocatorimpl.StoreHealthOptions{ - EnforcementLevel: allocatorimpl.IOOverloadThresholdNoAction} + rctx.options.IOOverloadOptions = allocatorimpl.IOOverloadOptions{ + ReplicaEnforcementLevel: allocatorimpl.IOOverloadThresholdIgnore} rctx.options.LoadThreshold = allocatorimpl.WithAllDims(0.05) sr.chooseRangeToRebalance(ctx, rctx) @@ -1528,8 +1534,8 @@ func TestChooseRangeToRebalanceOffHotNodes(t *testing.T) { hottestRanges := sr.replicaRankings.TopLoad() options := sr.scorerOptions(ctx, lbRebalanceDimension) rctx := sr.NewRebalanceContext(ctx, options, hottestRanges, sr.RebalanceMode()) - rctx.options.StoreHealthOptions = allocatorimpl.StoreHealthOptions{ - EnforcementLevel: allocatorimpl.IOOverloadThresholdNoAction} + rctx.options.IOOverloadOptions = allocatorimpl.IOOverloadOptions{ + ReplicaEnforcementLevel: allocatorimpl.IOOverloadThresholdIgnore} rctx.options.LoadThreshold = allocatorimpl.WithAllDims(tc.rebalanceThreshold) _, voterTargets, _ := sr.chooseRangeToRebalance(ctx, rctx) @@ -1637,8 +1643,8 @@ func TestNoLeaseTransferToBehindReplicas(t *testing.T) { hottestRanges = sr.replicaRankings.TopLoad() options = sr.scorerOptions(ctx, lbRebalanceDimension) rctx = sr.NewRebalanceContext(ctx, options, hottestRanges, sr.RebalanceMode()) - rctx.options.StoreHealthOptions = allocatorimpl.StoreHealthOptions{ - EnforcementLevel: allocatorimpl.IOOverloadThresholdNoAction} + rctx.options.IOOverloadOptions = allocatorimpl.IOOverloadOptions{ + ReplicaEnforcementLevel: allocatorimpl.IOOverloadThresholdIgnore} rctx.options.LoadThreshold = allocatorimpl.WithAllDims(0.05) rctx.options.Deterministic = true @@ -1682,17 +1688,7 @@ func TestStoreRebalancerIOOverloadCheck(t *testing.T) { expectedTargets: []roachpb.ReplicationTarget{ {NodeID: 4, StoreID: 4}, {NodeID: 3, StoreID: 3}, {NodeID: 5, StoreID: 5}, }, - enforcement: allocatorimpl.IOOverloadThresholdNoAction, - }, - { - name: "ignore io overload on allocation when log only enforcement", - // NB: All stores have high io overload, this should be ignored. - stores: noLocalityHighReadAmpStores, - conf: roachpb.SpanConfig{}, - expectedTargets: []roachpb.ReplicationTarget{ - {NodeID: 4, StoreID: 4}, {NodeID: 3, StoreID: 3}, {NodeID: 5, StoreID: 5}, - }, - enforcement: allocatorimpl.IOOverloadThresholdLogOnly, + enforcement: allocatorimpl.IOOverloadThresholdIgnore, }, { name: "don't stop rebalancing when the io overload score uniformly above threshold and block rebalance to enforcement", @@ -1702,7 +1698,7 @@ func TestStoreRebalancerIOOverloadCheck(t *testing.T) { expectedTargets: []roachpb.ReplicationTarget{ {NodeID: 4, StoreID: 4}, {NodeID: 3, StoreID: 3}, {NodeID: 5, StoreID: 5}, }, - enforcement: allocatorimpl.IOOverloadThresholdBlockRebalanceTo, + enforcement: allocatorimpl.IOOverloadThresholdBlockTransfers, }, { name: "don't stop rebalancing when the io overload score is uniformly above threshold and block rebalance to enforcement", @@ -1734,7 +1730,7 @@ func TestStoreRebalancerIOOverloadCheck(t *testing.T) { expectedTargets: []roachpb.ReplicationTarget{ {NodeID: 2, StoreID: 2}, {NodeID: 3, StoreID: 3}, {NodeID: 5, StoreID: 5}, }, - enforcement: allocatorimpl.IOOverloadThresholdBlockRebalanceTo, + enforcement: allocatorimpl.IOOverloadThresholdBlockTransfers, }, { name: "rebalance should ignore stores with high IO overload when block rebalance to enforcement", @@ -1748,7 +1744,7 @@ func TestStoreRebalancerIOOverloadCheck(t *testing.T) { expectedTargets: []roachpb.ReplicationTarget{ {NodeID: 2, StoreID: 2}, {NodeID: 3, StoreID: 3}, {NodeID: 5, StoreID: 5}, }, - enforcement: allocatorimpl.IOOverloadThresholdBlockRebalanceTo, + enforcement: allocatorimpl.IOOverloadThresholdBlockTransfers, }, { name: "rebalance should ignore stores with high IO overload scores when block all enforcement level", @@ -1778,7 +1774,7 @@ func TestStoreRebalancerIOOverloadCheck(t *testing.T) { stores: noLocalityUniformQPSHighReadAmp, conf: roachpb.SpanConfig{}, expectedTargets: nil, - enforcement: allocatorimpl.IOOverloadThresholdBlockRebalanceTo, + enforcement: allocatorimpl.IOOverloadThresholdBlockTransfers, }, } @@ -1809,8 +1805,8 @@ func TestStoreRebalancerIOOverloadCheck(t *testing.T) { rctx := sr.NewRebalanceContext(ctx, options, hottestRanges, sr.RebalanceMode()) require.Greater(t, len(rctx.hottestRanges), 0) - rctx.options.StoreHealthOptions = allocatorimpl.StoreHealthOptions{ - EnforcementLevel: test.enforcement, IOOverloadThreshold: allocatorimpl.DefaultIOOverloadThreshold} + rctx.options.IOOverloadOptions = allocatorimpl.IOOverloadOptions{ + ReplicaEnforcementLevel: test.enforcement, ReplicaIOOverloadThreshold: allocatorimpl.DefaultReplicaIOOverloadThreshold} rctx.options.LoadThreshold = allocatorimpl.WithAllDims(0.05) _, targetVoters, _ := sr.chooseRangeToRebalance(ctx, rctx)