Skip to content

Commit

Permalink
kvcoord: fix sorting of replicas
Browse files Browse the repository at this point in the history
This patch fixes a bug in replica sorting. When it think that a
particular request can be served through follower reads, the DistSender
sorts the replicas by latency, falling back to locality attributes. It
also has a special provision for ordering the local replica(s) first,
but that provision was buggy: the `Less` function was only correctly
comparing a local replica with a non-local one when the local replica
was on the left side of the compairison. This patch fixes the bug by
extracting the prioritization of local replicas out of the main sort,
giving them a dedicate phase for clarity.

Release note (bug fix): A certain percentage of cases in which a node
could have served a follower read were not handled correctly, resulting
in the node routing the request to another nearby node for no reason.
This has been fixed.
  • Loading branch information
andreimatei committed May 19, 2021
1 parent bc58328 commit acdda53
Show file tree
Hide file tree
Showing 3 changed files with 34 additions and 7 deletions.
13 changes: 11 additions & 2 deletions pkg/kv/kvclient/kvcoord/replica_slice.go
Original file line number Diff line number Diff line change
Expand Up @@ -206,12 +206,21 @@ func (rs ReplicaSlice) OptimizeReplicaOrder(
shuffle.Shuffle(rs)
return
}

// Sort replicas by latency and then attribute affinity.
sort.Slice(rs, func(i, j int) bool {
// If there is a replica in local node, it sorts first.
// Replicas on the same node have the same latency.
if rs[i].NodeID == rs[j].NodeID {
return false // i == j
}
// Replicas on the local node sort first.
if rs[i].NodeID == nodeDesc.NodeID {
return true
return true // i < j
}
if rs[j].NodeID == nodeDesc.NodeID {
return false // j < i
}

if latencyFn != nil {
latencyI, okI := latencyFn(rs[i].addr())
latencyJ, okJ := latencyFn(rs[j].addr())
Expand Down
26 changes: 22 additions & 4 deletions pkg/kv/kvclient/kvcoord/replica_slice_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -141,10 +141,6 @@ func desc(nid roachpb.NodeID, sid roachpb.StoreID) roachpb.ReplicaDescriptor {
return roachpb.ReplicaDescriptor{NodeID: nid, StoreID: sid}
}

func addr(nid roachpb.NodeID, sid roachpb.StoreID) util.UnresolvedAddr {
return util.MakeUnresolvedAddr("tcp", fmt.Sprintf("%d:%d", nid, sid))
}

func locality(t *testing.T, locStrs []string) roachpb.Locality {
var locality roachpb.Locality
for _, l := range locStrs {
Expand All @@ -163,6 +159,7 @@ func locality(t *testing.T, locStrs []string) roachpb.Locality {

func nodeDesc(t *testing.T, nid roachpb.NodeID, locStrs []string) *roachpb.NodeDescriptor {
return &roachpb.NodeDescriptor{
NodeID: nid,
Locality: locality(t, locStrs),
Address: util.MakeUnresolvedAddr("tcp", fmt.Sprintf("%d:26257", nid)),
}
Expand Down Expand Up @@ -218,6 +215,27 @@ func TestReplicaSliceOptimizeReplicaOrder(t *testing.T) {
},
expOrdered: []roachpb.NodeID{4, 3, 2},
},
{
// Test that replicas on the local node sort first, regardless of factors
// like their latency measurement (in production they won't have any
// latency measurement).
name: "local node comes first",
node: nodeDesc(t, 1, nil),
latencies: map[string]time.Duration{
"1:26257": 10 * time.Hour,
"2:26257": time.Hour,
"3:26257": time.Minute,
"4:26257": time.Second,
},
slice: ReplicaSlice{
info(t, 1, 1, nil),
info(t, 1, 2, nil),
info(t, 2, 2, nil),
info(t, 3, 3, nil),
info(t, 4, 4, nil),
},
expOrdered: []roachpb.NodeID{1, 4, 3, 2},
},
}
for _, test := range testCases {
t.Run(test.name, func(t *testing.T) {
Expand Down
2 changes: 1 addition & 1 deletion pkg/sql/physicalplan/replicaoracle/oracle_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ func TestClosest(t *testing.T) {
nil, /* txn */
&roachpb.RangeDescriptor{
InternalReplicas: []roachpb.ReplicaDescriptor{
{NodeID: 1, StoreID: 1},
{NodeID: 4, StoreID: 4},
{NodeID: 2, StoreID: 2},
{NodeID: 3, StoreID: 3},
},
Expand Down

0 comments on commit acdda53

Please sign in to comment.