From 4aa0b528dcef2c6a729abc32efaf7a5ce17a074d Mon Sep 17 00:00:00 2001 From: Alex Robinson Date: Fri, 7 Dec 2018 17:23:54 -0800 Subject: [PATCH] storage: Fix GetNeededReplicas to count non-live, non-dead nodes Release note (bug fix): Fixes issue where ranges with high replication factors would be incorrectly down-replicated if enough nodes stop running that the number of desired replicas is greater than the number of running nodes. This issue could cause under-replication and potentially unavailability. --- pkg/storage/allocator.go | 2 +- pkg/storage/allocator_test.go | 83 ++++++++++++++++++++++++++++++++--- pkg/storage/store.go | 6 +-- pkg/storage/store_pool.go | 16 ++++--- 4 files changed, 91 insertions(+), 16 deletions(-) diff --git a/pkg/storage/allocator.go b/pkg/storage/allocator.go index d500999c5108..8639ee9caa8f 100644 --- a/pkg/storage/allocator.go +++ b/pkg/storage/allocator.go @@ -241,7 +241,7 @@ func MakeAllocator( // GetNeededReplicas calculates the number of replicas a range should // have given its zone config and the number of nodes available for -// up-replication (i.e. live and not decommissioning). +// up-replication (i.e. not dead and not decommissioning). func GetNeededReplicas(zoneConfigReplicaCount int32, availableNodes int) int { numZoneReplicas := int(zoneConfigReplicaCount) need := numZoneReplicas diff --git a/pkg/storage/allocator_test.go b/pkg/storage/allocator_test.go index ead55335fe5e..64a404b04f1d 100644 --- a/pkg/storage/allocator_test.go +++ b/pkg/storage/allocator_test.go @@ -329,7 +329,11 @@ func createTestAllocator( // ranges in deadReplicas. func mockStorePool( storePool *StorePool, - aliveStoreIDs, deadStoreIDs, decommissioningStoreIDs, decommissionedStoreIDs []roachpb.StoreID, + aliveStoreIDs []roachpb.StoreID, + unavailableStoreIDs []roachpb.StoreID, + deadStoreIDs []roachpb.StoreID, + decommissioningStoreIDs []roachpb.StoreID, + decommissionedStoreIDs []roachpb.StoreID, deadReplicas []roachpb.ReplicaIdent, ) { storePool.detailsMu.Lock() @@ -345,6 +349,14 @@ func mockStorePool( Node: roachpb.NodeDescriptor{NodeID: roachpb.NodeID(storeID)}, } } + for _, storeID := range unavailableStoreIDs { + liveNodeSet[roachpb.NodeID(storeID)] = NodeLivenessStatus_UNAVAILABLE + detail := storePool.getStoreDetailLocked(storeID) + detail.desc = &roachpb.StoreDescriptor{ + StoreID: storeID, + Node: roachpb.NodeDescriptor{NodeID: roachpb.NodeID(storeID)}, + } + } for _, storeID := range deadStoreIDs { liveNodeSet[roachpb.NodeID(storeID)] = NodeLivenessStatus_DEAD detail := storePool.getStoreDetailLocked(storeID) @@ -923,6 +935,7 @@ func TestAllocatorRebalanceDeadNodes(t *testing.T) { mockStorePool( sp, []roachpb.StoreID{1, 2, 3, 4, 5, 6}, + nil, []roachpb.StoreID{7, 8}, nil, nil, @@ -4460,6 +4473,7 @@ func TestAllocatorComputeAction(t *testing.T) { // is dead. mockStorePool(sp, []roachpb.StoreID{1, 2, 3, 4, 5, 8}, + nil, []roachpb.StoreID{6, 7}, nil, nil, @@ -4569,7 +4583,7 @@ func TestAllocatorComputeActionRemoveDead(t *testing.T) { defer stopper.Stop(ctx) for i, tcase := range testCases { - mockStorePool(sp, tcase.live, tcase.dead, nil, nil, nil) + mockStorePool(sp, tcase.live, nil, tcase.dead, nil, nil, nil) action, _ := a.ComputeAction(ctx, zone, RangeInfo{Desc: &tcase.desc}) if tcase.expectedAction != action { @@ -4790,7 +4804,7 @@ func TestAllocatorComputeActionDecommission(t *testing.T) { defer stopper.Stop(ctx) for i, tcase := range testCases { - mockStorePool(sp, tcase.live, tcase.dead, tcase.decommissioning, tcase.decommissioned, nil) + mockStorePool(sp, tcase.live, nil, tcase.dead, tcase.decommissioning, tcase.decommissioned, nil) action, _ := a.ComputeAction(ctx, tcase.zone, RangeInfo{Desc: &tcase.desc}) if tcase.expectedAction != action { @@ -4807,14 +4821,15 @@ func TestAllocatorComputeActionDynamicNumReplicas(t *testing.T) { storeList []roachpb.StoreID expectedAction AllocatorAction live []roachpb.StoreID + unavailable []roachpb.StoreID dead []roachpb.StoreID decommissioning []roachpb.StoreID - decommissioned []roachpb.StoreID }{ { storeList: []roachpb.StoreID{1, 2, 3, 4}, expectedAction: AllocatorRemoveDecommissioning, live: []roachpb.StoreID{4}, + unavailable: nil, dead: nil, decommissioning: []roachpb.StoreID{1, 2, 3}, }, @@ -4822,6 +4837,7 @@ func TestAllocatorComputeActionDynamicNumReplicas(t *testing.T) { storeList: []roachpb.StoreID{1, 2, 3}, expectedAction: AllocatorAdd, live: []roachpb.StoreID{4, 5}, + unavailable: nil, dead: nil, decommissioning: []roachpb.StoreID{1, 2, 3}, }, @@ -4829,6 +4845,7 @@ func TestAllocatorComputeActionDynamicNumReplicas(t *testing.T) { storeList: []roachpb.StoreID{1, 2, 3, 4}, expectedAction: AllocatorRemoveDead, live: []roachpb.StoreID{1, 2, 3, 5}, + unavailable: nil, dead: []roachpb.StoreID{4}, decommissioning: nil, }, @@ -4836,6 +4853,7 @@ func TestAllocatorComputeActionDynamicNumReplicas(t *testing.T) { storeList: []roachpb.StoreID{1, 4}, expectedAction: AllocatorAdd, live: []roachpb.StoreID{1, 2, 3, 5}, + unavailable: nil, dead: []roachpb.StoreID{4}, decommissioning: nil, }, @@ -4843,6 +4861,7 @@ func TestAllocatorComputeActionDynamicNumReplicas(t *testing.T) { storeList: []roachpb.StoreID{1, 2, 3}, expectedAction: AllocatorConsiderRebalance, live: []roachpb.StoreID{1, 2, 3, 4}, + unavailable: nil, dead: nil, decommissioning: nil, }, @@ -4850,6 +4869,7 @@ func TestAllocatorComputeActionDynamicNumReplicas(t *testing.T) { storeList: []roachpb.StoreID{1, 2}, expectedAction: AllocatorAdd, live: []roachpb.StoreID{1, 2}, + unavailable: nil, dead: nil, decommissioning: nil, }, @@ -4857,6 +4877,7 @@ func TestAllocatorComputeActionDynamicNumReplicas(t *testing.T) { storeList: []roachpb.StoreID{1, 2, 3}, expectedAction: AllocatorConsiderRebalance, live: []roachpb.StoreID{1, 2, 3}, + unavailable: nil, dead: nil, decommissioning: nil, }, @@ -4864,9 +4885,58 @@ func TestAllocatorComputeActionDynamicNumReplicas(t *testing.T) { storeList: []roachpb.StoreID{1, 2, 3, 4}, expectedAction: AllocatorRemove, live: []roachpb.StoreID{1, 2, 3, 4}, + unavailable: nil, dead: nil, decommissioning: nil, }, + { + storeList: []roachpb.StoreID{1, 2, 3, 4, 5}, + expectedAction: AllocatorConsiderRebalance, + live: []roachpb.StoreID{1, 2, 3, 4, 5}, + unavailable: nil, + dead: nil, + decommissioning: nil, + }, + { + storeList: []roachpb.StoreID{1, 2, 3, 4, 5}, + expectedAction: AllocatorConsiderRebalance, + live: []roachpb.StoreID{1, 2, 3, 4}, + unavailable: []roachpb.StoreID{5}, + dead: nil, + decommissioning: nil, + }, + { + storeList: []roachpb.StoreID{1, 2, 3, 4, 5}, + expectedAction: AllocatorConsiderRebalance, + live: []roachpb.StoreID{1, 2, 3}, + unavailable: []roachpb.StoreID{4, 5}, + dead: nil, + decommissioning: nil, + }, + { + storeList: []roachpb.StoreID{1, 2, 3, 4, 5}, + expectedAction: AllocatorNoop, + live: []roachpb.StoreID{1, 2}, + unavailable: []roachpb.StoreID{3, 4, 5}, + dead: nil, + decommissioning: nil, + }, + { + storeList: []roachpb.StoreID{1, 2, 3, 4, 5}, + expectedAction: AllocatorRemoveDead, + live: []roachpb.StoreID{1, 2, 3}, + unavailable: []roachpb.StoreID{4}, + dead: []roachpb.StoreID{5}, + decommissioning: nil, + }, + { + storeList: []roachpb.StoreID{1, 2, 3, 4, 5}, + expectedAction: AllocatorRemoveDecommissioning, + live: []roachpb.StoreID{1, 2, 3}, + unavailable: []roachpb.StoreID{4}, + dead: nil, + decommissioning: []roachpb.StoreID{5}, + }, } stopper, _, sp, a, _ := createTestAllocator( /* deterministic */ false) @@ -4878,12 +4948,13 @@ func TestAllocatorComputeActionDynamicNumReplicas(t *testing.T) { for _, prefixKey := range []roachpb.RKey{roachpb.RKey(keys.NodeLivenessPrefix), roachpb.RKey(keys.SystemPrefix)} { for i, tcase := range testCases { - mockStorePool(sp, tcase.live, tcase.dead, tcase.decommissioning, tcase.decommissioned, nil) + mockStorePool(sp, tcase.live, tcase.unavailable, tcase.dead, tcase.decommissioning, []roachpb.StoreID{}, nil) desc := makeDescriptor(tcase.storeList) desc.EndKey = prefixKey action, _ := a.ComputeAction(ctx, zone, RangeInfo{Desc: &desc}) if tcase.expectedAction != action { - t.Errorf("Test case %d expected action %d, got action %d", i, tcase.expectedAction, action) + t.Errorf("test case %d expected action %q, got action %q", + i, allocatorActionNames[tcase.expectedAction], allocatorActionNames[action]) continue } } diff --git a/pkg/storage/store.go b/pkg/storage/store.go index cb503629e250..4cecd4b7e5cf 100644 --- a/pkg/storage/store.go +++ b/pkg/storage/store.go @@ -139,9 +139,9 @@ func TestStoreConfig(clock *hlc.Clock) StoreConfig { } st := cluster.MakeTestingClusterSettings() sc := StoreConfig{ - Settings: st, - AmbientCtx: log.AmbientContext{Tracer: st.Tracer}, - Clock: clock, + Settings: st, + AmbientCtx: log.AmbientContext{Tracer: st.Tracer}, + Clock: clock, CoalescedHeartbeatsInterval: 50 * time.Millisecond, RaftHeartbeatIntervalTicks: 1, ScanInterval: 10 * time.Minute, diff --git a/pkg/storage/store_pool.go b/pkg/storage/store_pool.go index f0030c46ad15..099e197b73c4 100644 --- a/pkg/storage/store_pool.go +++ b/pkg/storage/store_pool.go @@ -435,9 +435,10 @@ func (sp *StorePool) decommissioningReplicas( return } -// AvailableNodeCount returns the number of nodes which are -// considered available for use as allocation targets. This includes -// only live nodes which are not decommissioning. +// AvailableNodeCount returns the number of nodes which are considered +// available for use as allocation targets. This includes only nodes which are +// not dead or decommissioning. It notably does include nodes that are not +// considered live by node liveness but are also not yet considered dead. func (sp *StorePool) AvailableNodeCount() int { sp.detailsMu.RLock() defer sp.detailsMu.RUnlock() @@ -447,11 +448,14 @@ func (sp *StorePool) AvailableNodeCount() int { timeUntilStoreDead := TimeUntilStoreDead.Get(&sp.st.SV) for _, detail := range sp.detailsMu.storeDetails { + if detail.desc == nil { + continue + } switch s := detail.status(now, timeUntilStoreDead, 0, sp.nodeLivenessFn); s { - case storeStatusThrottled, storeStatusAvailable: + case storeStatusThrottled, storeStatusAvailable, storeStatusUnknown, storeStatusReplicaCorrupted: availableNodes[detail.desc.Node.NodeID] = struct{}{} - case storeStatusReplicaCorrupted, storeStatusDead, storeStatusUnknown, storeStatusDecommissioning: - // Do nothing; this node cannot be used. + case storeStatusDead, storeStatusDecommissioning: + // Do nothing; this node can't/shouldn't have any replicas on it. default: panic(fmt.Sprintf("unknown store status: %d", s)) }