From 419fe7144385dcea4328afca79459dcd9956400d Mon Sep 17 00:00:00 2001 From: Peter Mattis Date: Sun, 6 Nov 2016 11:43:06 -0500 Subject: [PATCH] storage: prioritize up-replication over dead-replica removal Previously, dead-replica removal was prioritized over up-replication which could lead to situations in which the up-replication work created by the removal of a dead-replica was starved for a significant period of time. Additionally, dead-replica removal is quite fast in comparison to up-replication so prioritizing it allows a whole bunch of up-replication work to be generated fairly quickly. Now we prioritize up-replication work over dead-replica removal. If a stable cluster (no replication work needed), when a node dies each replicateQueue will alternate between removing a dead replica from a range and bringing that same range back to full replication. Fixes #10476 --- pkg/storage/allocator.go | 4 +- pkg/storage/allocator_test.go | 78 +++++++++++++++++------------------ 2 files changed, 41 insertions(+), 41 deletions(-) diff --git a/pkg/storage/allocator.go b/pkg/storage/allocator.go index 0dfd3896956a..43b07ee05bbb 100644 --- a/pkg/storage/allocator.go +++ b/pkg/storage/allocator.go @@ -40,8 +40,8 @@ const ( maxFractionUsedThreshold = 0.95 // priorities for various repair operations. - removeDeadReplicaPriority float64 = 10000 - addMissingReplicaPriority float64 = 1000 + addMissingReplicaPriority float64 = 10000 + removeDeadReplicaPriority float64 = 1000 removeExtraReplicaPriority float64 = 100 ) diff --git a/pkg/storage/allocator_test.go b/pkg/storage/allocator_test.go index cf70d8e7c09e..98316c6afaa0 100644 --- a/pkg/storage/allocator_test.go +++ b/pkg/storage/allocator_test.go @@ -799,7 +799,7 @@ func TestAllocatorComputeAction(t *testing.T) { desc roachpb.RangeDescriptor expectedAction AllocatorAction }{ - // Needs three replicas, two are on dead stores. + // Needs Three replicas, have two { zone: config.ZoneConfig{ NumReplicas: 3, @@ -815,23 +815,18 @@ func TestAllocatorComputeAction(t *testing.T) { ReplicaID: 1, }, { - StoreID: 7, - NodeID: 7, - ReplicaID: 7, - }, - { - StoreID: 6, - NodeID: 6, - ReplicaID: 6, + StoreID: 2, + NodeID: 2, + ReplicaID: 2, }, }, }, - expectedAction: AllocatorRemoveDead, + expectedAction: AllocatorAdd, }, - // Needs three replicas, one is on a dead store. + // Needs Five replicas, have four. { zone: config.ZoneConfig{ - NumReplicas: 3, + NumReplicas: 5, Constraints: config.Constraints{Constraints: []config.Constraint{{Value: "us-east"}}}, RangeMinBytes: 0, RangeMaxBytes: 64000, @@ -849,15 +844,20 @@ func TestAllocatorComputeAction(t *testing.T) { ReplicaID: 2, }, { - StoreID: 6, - NodeID: 6, - ReplicaID: 6, + StoreID: 3, + NodeID: 3, + ReplicaID: 3, + }, + { + StoreID: 4, + NodeID: 4, + ReplicaID: 4, }, }, }, - expectedAction: AllocatorRemoveDead, + expectedAction: AllocatorAdd, }, - // Needs three replicas, one is dead. + // Needs three replicas, two are on dead stores. { zone: config.ZoneConfig{ NumReplicas: 3, @@ -873,20 +873,20 @@ func TestAllocatorComputeAction(t *testing.T) { ReplicaID: 1, }, { - StoreID: 2, - NodeID: 2, - ReplicaID: 2, + StoreID: 7, + NodeID: 7, + ReplicaID: 7, }, { - StoreID: 8, - NodeID: 8, - ReplicaID: 8, + StoreID: 6, + NodeID: 6, + ReplicaID: 6, }, }, }, expectedAction: AllocatorRemoveDead, }, - // Needs five replicas, one is on a dead store. + // Needs three replicas, one is on a dead store. { zone: config.ZoneConfig{ NumReplicas: 3, @@ -906,16 +906,6 @@ func TestAllocatorComputeAction(t *testing.T) { NodeID: 2, ReplicaID: 2, }, - { - StoreID: 3, - NodeID: 3, - ReplicaID: 3, - }, - { - StoreID: 4, - NodeID: 4, - ReplicaID: 4, - }, { StoreID: 6, NodeID: 6, @@ -925,7 +915,7 @@ func TestAllocatorComputeAction(t *testing.T) { }, expectedAction: AllocatorRemoveDead, }, - // Needs Three replicas, have two + // Needs three replicas, one is dead. { zone: config.ZoneConfig{ NumReplicas: 3, @@ -945,14 +935,19 @@ func TestAllocatorComputeAction(t *testing.T) { NodeID: 2, ReplicaID: 2, }, + { + StoreID: 8, + NodeID: 8, + ReplicaID: 8, + }, }, }, - expectedAction: AllocatorAdd, + expectedAction: AllocatorRemoveDead, }, - // Needs Five replicas, have four. + // Needs five replicas, one is on a dead store. { zone: config.ZoneConfig{ - NumReplicas: 5, + NumReplicas: 3, Constraints: config.Constraints{Constraints: []config.Constraint{{Value: "us-east"}}}, RangeMinBytes: 0, RangeMaxBytes: 64000, @@ -979,9 +974,14 @@ func TestAllocatorComputeAction(t *testing.T) { NodeID: 4, ReplicaID: 4, }, + { + StoreID: 6, + NodeID: 6, + ReplicaID: 6, + }, }, }, - expectedAction: AllocatorAdd, + expectedAction: AllocatorRemoveDead, }, // Need three replicas, have four. {