Skip to content

Commit

Permalink
allocator: correct node count calculations with overrides
Browse files Browse the repository at this point in the history
While we are correctly using the overrides set in the
`OverrideStorePool` for the purposes of the node liveness function, the
node count function did not properly incorporate the overrides
previously. This change rectifies that, using the preset overrides
specified at creation of the override store pool to calculate the number
of non-decommissioning, non-decommissioned nodes (alive or dead), as
viewed by the override store pool. This allows for correct calculation
of the number of needed voters, allowing us to correctly determine which
allocation action is needed for a range.

Depends on cockroachdb#93758.

Epic: CRDB-20924

Release note: None
  • Loading branch information
AlexTalks committed Jan 10, 2023
1 parent 1e57036 commit e6a12bf
Show file tree
Hide file tree
Showing 4 changed files with 50 additions and 4 deletions.
20 changes: 18 additions & 2 deletions pkg/kv/kvserver/allocator/storepool/override_store_pool.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ import (
"time"

"github.com/cockroachdb/cockroach/pkg/kv/kvserver/allocator"
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/liveness"
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/liveness/livenesspb"
"github.com/cockroachdb/cockroach/pkg/roachpb"
"github.com/cockroachdb/cockroach/pkg/util"
Expand All @@ -39,6 +40,7 @@ type OverrideStorePool struct {
sp *StorePool

overrideNodeLivenessFn NodeLivenessFunc
overrideNodeCountFn NodeCountFunc
}

var _ AllocatorStorePool = &OverrideStorePool{}
Expand All @@ -58,13 +60,27 @@ func OverrideNodeLivenessFunc(
}
}

// OverrideNodeCountFunc constructs a NodeCountFunc based on a set of predefined
// overrides. If any nodeID does not have an override, the real liveness is used
// for the count for the number of nodes not decommissioning or decommissioned.
func OverrideNodeCountFunc(
overrides map[roachpb.NodeID]livenesspb.NodeLivenessStatus, nodeLiveness *liveness.NodeLiveness,
) NodeCountFunc {
return func() int {
return nodeLiveness.GetNodeCountWithOverrides(overrides)
}
}

// NewOverrideStorePool constructs an OverrideStorePool that can use its own
// view of node liveness while falling through to an underlying store pool for
// the state of peer stores.
func NewOverrideStorePool(storePool *StorePool, nl NodeLivenessFunc) *OverrideStorePool {
func NewOverrideStorePool(
storePool *StorePool, nl NodeLivenessFunc, nc NodeCountFunc,
) *OverrideStorePool {
return &OverrideStorePool{
sp: storePool,
overrideNodeLivenessFn: nl,
overrideNodeCountFn: nc,
}
}

Expand Down Expand Up @@ -133,7 +149,7 @@ func (o *OverrideStorePool) LiveAndDeadReplicas(

// ClusterNodeCount implements the AllocatorStorePool interface.
func (o *OverrideStorePool) ClusterNodeCount() int {
return o.sp.ClusterNodeCount()
return o.overrideNodeCountFn()
}

// IsDeterministic implements the AllocatorStorePool interface.
Expand Down
24 changes: 24 additions & 0 deletions pkg/kv/kvserver/liveness/liveness.go
Original file line number Diff line number Diff line change
Expand Up @@ -1528,6 +1528,30 @@ func (nl *NodeLiveness) GetNodeCount() int {
return count
}

// GetNodeCountWithOverrides returns a count of the number of nodes in the cluster,
// including dead nodes, but excluding decommissioning or decommissioned nodes,
// using the provided set of liveness overrides.
func (nl *NodeLiveness) GetNodeCountWithOverrides(
overrides map[roachpb.NodeID]livenesspb.NodeLivenessStatus,
) int {
nl.mu.RLock()
defer nl.mu.RUnlock()
var count int
for _, l := range nl.mu.nodes {
if l.Membership.Active() {
if status, ok := overrides[l.NodeID]; ok {
if status != livenesspb.NodeLivenessStatus_DECOMMISSIONING &&
status != livenesspb.NodeLivenessStatus_DECOMMISSIONED {
count++
}
} else {
count++
}
}
}
return count
}

// TestingSetDrainingInternal is a testing helper to set the internal draining
// state for a NodeLiveness instance.
func (nl *NodeLiveness) TestingSetDrainingInternal(
Expand Down
3 changes: 2 additions & 1 deletion pkg/kv/kvserver/store_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3550,7 +3550,8 @@ func TestAllocatorCheckRange(t *testing.T) {
var storePoolOverride storepool.AllocatorStorePool
if len(tc.livenessOverrides) > 0 {
livenessOverride := storepool.OverrideNodeLivenessFunc(tc.livenessOverrides, sp.NodeLivenessFn)
storePoolOverride = storepool.NewOverrideStorePool(sp, livenessOverride)
nodeCountOverride := storepool.OverrideNodeCountFunc(tc.livenessOverrides, cfg.NodeLiveness)
storePoolOverride = storepool.NewOverrideStorePool(sp, livenessOverride, nodeCountOverride)
}

// Execute actual allocator range repair check.
Expand Down
7 changes: 6 additions & 1 deletion pkg/server/decommission.go
Original file line number Diff line number Diff line change
Expand Up @@ -215,7 +215,12 @@ func (s *Server) DecommissionPreCheck(
overrideNodeLivenessFn := storepool.OverrideNodeLivenessFunc(
decommissionCheckNodeIDs, existingStorePool.NodeLivenessFn,
)
overrideStorePool := storepool.NewOverrideStorePool(existingStorePool, overrideNodeLivenessFn)
overrideNodeCount := storepool.OverrideNodeCountFunc(
decommissionCheckNodeIDs, evalStore.GetStoreConfig().NodeLiveness,
)
overrideStorePool := storepool.NewOverrideStorePool(
existingStorePool, overrideNodeLivenessFn, overrideNodeCount,
)

// Define our replica filter to only look at the replicas on the checked nodes.
predHasDecommissioningReplica := func(rDesc roachpb.ReplicaDescriptor) bool {
Expand Down

0 comments on commit e6a12bf

Please sign in to comment.