Skip to content

Commit

Permalink
kvserver: Allow rebalances between stores on the same nodes.
Browse files Browse the repository at this point in the history
Closes cockroachdb#6782

This change modifies the replica_queue to allow rebalances between multiple stores within a single node. To make this correct we add a number of guard rails into the Allocator to prevent a rebalance from placing multiple replicas of a range on the same node. This is not desired because in a simple 3x replication scenario a single node crash may result in a whole range becoming unavailable. Unfortunately these changes are not enough and in the naive scenario the allocator ends up in an endless rebalance loop between the two stores on the same node. This change leverages the existing allocator heuristics to accomplish this goal, specifically balance_score and deversity_score. The balance_score is used to compute the right balance  of replicas per node, so anytime we compare stores we factor the range count by the number of stores on a node. This allows the balance_score to be used across a heterogenous coackroach topology, where each node may have a different number of stores on it. To prevent replicas ending up on the same node, we extend the failure domain definition to include the node  and leverage the locality feature to add the node as the last locality tier.

Release note (performance improvement): This change removes the last roadblock to running CockroachDB with multiple stores (i.e. disks) per node. The allocation algorithm now supports intra-node rebalances, which means CRDB can fully utilize the additional stores on the same node.
  • Loading branch information
lunevalex committed Aug 17, 2020
1 parent 7b1abbf commit 2332697
Show file tree
Hide file tree
Showing 13 changed files with 841 additions and 246 deletions.
17 changes: 8 additions & 9 deletions pkg/kv/kvserver/allocator.go
Original file line number Diff line number Diff line change
Expand Up @@ -502,8 +502,8 @@ func (a *Allocator) allocateTargetFromList(
analyzedConstraints := constraint.AnalyzeConstraints(
ctx, a.storePool.getStoreDescriptor, candidateReplicas, zone)
candidates := allocateCandidates(
sl, analyzedConstraints, candidateReplicas, a.storePool.getLocalities(candidateReplicas),
options,
ctx, sl, analyzedConstraints, candidateReplicas,
a.storePool.getLocalitiesByStore(candidateReplicas), options,
)
log.VEventf(ctx, 3, "allocate candidates: %s", candidates)
if target := candidates.selectGood(a.randGen); target != nil {
Expand Down Expand Up @@ -566,9 +566,10 @@ func (a Allocator) RemoveTarget(
ctx, a.storePool.getStoreDescriptor, existingReplicas, zone)
options := a.scorerOptions()
rankedCandidates := removeCandidates(
ctx,
sl,
analyzedConstraints,
a.storePool.getLocalities(existingReplicas),
a.storePool.getLocalitiesByStore(existingReplicas),
options,
)
log.VEventf(ctx, 3, "remove candidates: %s", rankedCandidates)
Expand All @@ -585,7 +586,6 @@ func (a Allocator) RemoveTarget(
}
}
}

return roachpb.ReplicaDescriptor{}, "", errors.New("could not select an appropriate replica to be removed")
}

Expand Down Expand Up @@ -663,8 +663,7 @@ func (a Allocator) RebalanceTarget(
sl,
analyzedConstraints,
existingReplicas,
a.storePool.getLocalities(existingReplicas),
a.storePool.getNodeLocalityString,
a.storePool.getLocalitiesByStore(existingReplicas),
options,
)

Expand Down Expand Up @@ -742,7 +741,7 @@ func (a Allocator) RebalanceTarget(
}
detailsBytes, err := json.Marshal(dDetails)
if err != nil {
log.Warningf(ctx, "failed to marshal details for choosing rebalance target: %+v", err)
log.VEventf(ctx, 2, "failed to marshal details for choosing rebalance target: %+v", err)
}

addTarget := roachpb.ReplicationTarget{
Expand Down Expand Up @@ -800,7 +799,7 @@ func (a *Allocator) TransferLeaseTarget(
filteredDescs = append(filteredDescs, s)
}
}
sl = makeStoreList(filteredDescs)
sl = a.storePool.makeStoreList(filteredDescs)

source, ok := a.storePool.getStoreDescriptor(leaseStoreID)
if !ok {
Expand Down Expand Up @@ -1005,7 +1004,7 @@ func (a Allocator) shouldTransferLeaseUsingStats(
if stats == nil || !enableLoadBasedLeaseRebalancing.Get(&a.storePool.st.SV) {
return decideWithoutStats, roachpb.ReplicaDescriptor{}
}
replicaLocalities := a.storePool.getLocalities(existing)
replicaLocalities := a.storePool.getLocalitiesByNode(existing)
for _, locality := range replicaLocalities {
if len(locality.Tiers) == 0 {
return decideWithoutStats, roachpb.ReplicaDescriptor{}
Expand Down
Loading

0 comments on commit 2332697

Please sign in to comment.