diff --git a/pkg/kv/kvserver/allocator/storepool/override_store_pool.go b/pkg/kv/kvserver/allocator/storepool/override_store_pool.go index 4cd59b90a008..83b81b484836 100644 --- a/pkg/kv/kvserver/allocator/storepool/override_store_pool.go +++ b/pkg/kv/kvserver/allocator/storepool/override_store_pool.go @@ -15,6 +15,7 @@ import ( "time" "github.com/cockroachdb/cockroach/pkg/kv/kvserver/allocator" + "github.com/cockroachdb/cockroach/pkg/kv/kvserver/liveness" "github.com/cockroachdb/cockroach/pkg/kv/kvserver/liveness/livenesspb" "github.com/cockroachdb/cockroach/pkg/roachpb" "github.com/cockroachdb/cockroach/pkg/util" @@ -39,6 +40,7 @@ type OverrideStorePool struct { sp *StorePool overrideNodeLivenessFn NodeLivenessFunc + overrideNodeCountFn NodeCountFunc } var _ AllocatorStorePool = &OverrideStorePool{} @@ -58,13 +60,27 @@ func OverrideNodeLivenessFunc( } } +// OverrideNodeCountFunc constructs a NodeCountFunc based on a set of predefined +// overrides. If any nodeID does not have an override, the real liveness is used +// for the count for the number of nodes not decommissioning or decommissioned. +func OverrideNodeCountFunc( + overrides map[roachpb.NodeID]livenesspb.NodeLivenessStatus, nodeLiveness *liveness.NodeLiveness, +) NodeCountFunc { + return func() int { + return nodeLiveness.GetNodeCountWithOverrides(overrides) + } +} + // NewOverrideStorePool constructs an OverrideStorePool that can use its own // view of node liveness while falling through to an underlying store pool for // the state of peer stores. -func NewOverrideStorePool(storePool *StorePool, nl NodeLivenessFunc) *OverrideStorePool { +func NewOverrideStorePool( + storePool *StorePool, nl NodeLivenessFunc, nc NodeCountFunc, +) *OverrideStorePool { return &OverrideStorePool{ sp: storePool, overrideNodeLivenessFn: nl, + overrideNodeCountFn: nc, } } @@ -133,7 +149,7 @@ func (o *OverrideStorePool) LiveAndDeadReplicas( // ClusterNodeCount implements the AllocatorStorePool interface. func (o *OverrideStorePool) ClusterNodeCount() int { - return o.sp.ClusterNodeCount() + return o.overrideNodeCountFn() } // IsDeterministic implements the AllocatorStorePool interface. diff --git a/pkg/kv/kvserver/liveness/liveness.go b/pkg/kv/kvserver/liveness/liveness.go index 23aec0aa1ac3..7e6e0984f14e 100644 --- a/pkg/kv/kvserver/liveness/liveness.go +++ b/pkg/kv/kvserver/liveness/liveness.go @@ -1528,6 +1528,30 @@ func (nl *NodeLiveness) GetNodeCount() int { return count } +// GetNodeCountWithOverrides returns a count of the number of nodes in the cluster, +// including dead nodes, but excluding decommissioning or decommissioned nodes, +// using the provided set of liveness overrides. +func (nl *NodeLiveness) GetNodeCountWithOverrides( + overrides map[roachpb.NodeID]livenesspb.NodeLivenessStatus, +) int { + nl.mu.RLock() + defer nl.mu.RUnlock() + var count int + for _, l := range nl.mu.nodes { + if l.Membership.Active() { + if status, ok := overrides[l.NodeID]; ok { + if status != livenesspb.NodeLivenessStatus_DECOMMISSIONING && + status != livenesspb.NodeLivenessStatus_DECOMMISSIONED { + count++ + } + } else { + count++ + } + } + } + return count +} + // TestingSetDrainingInternal is a testing helper to set the internal draining // state for a NodeLiveness instance. func (nl *NodeLiveness) TestingSetDrainingInternal( diff --git a/pkg/kv/kvserver/store_test.go b/pkg/kv/kvserver/store_test.go index 739fe928a024..f2e8fe51c436 100644 --- a/pkg/kv/kvserver/store_test.go +++ b/pkg/kv/kvserver/store_test.go @@ -3550,7 +3550,8 @@ func TestAllocatorCheckRange(t *testing.T) { var storePoolOverride storepool.AllocatorStorePool if len(tc.livenessOverrides) > 0 { livenessOverride := storepool.OverrideNodeLivenessFunc(tc.livenessOverrides, sp.NodeLivenessFn) - storePoolOverride = storepool.NewOverrideStorePool(sp, livenessOverride) + nodeCountOverride := storepool.OverrideNodeCountFunc(tc.livenessOverrides, cfg.NodeLiveness) + storePoolOverride = storepool.NewOverrideStorePool(sp, livenessOverride, nodeCountOverride) } // Execute actual allocator range repair check. diff --git a/pkg/server/decommission.go b/pkg/server/decommission.go index e6347ebdbd78..ad3973d6fb2a 100644 --- a/pkg/server/decommission.go +++ b/pkg/server/decommission.go @@ -215,7 +215,12 @@ func (s *Server) DecommissionPreCheck( overrideNodeLivenessFn := storepool.OverrideNodeLivenessFunc( decommissionCheckNodeIDs, existingStorePool.NodeLivenessFn, ) - overrideStorePool := storepool.NewOverrideStorePool(existingStorePool, overrideNodeLivenessFn) + overrideNodeCount := storepool.OverrideNodeCountFunc( + decommissionCheckNodeIDs, evalStore.GetStoreConfig().NodeLiveness, + ) + overrideStorePool := storepool.NewOverrideStorePool( + existingStorePool, overrideNodeLivenessFn, overrideNodeCount, + ) // Define our replica filter to only look at the replicas on the checked nodes. predHasDecommissioningReplica := func(rDesc roachpb.ReplicaDescriptor) bool {