diff --git a/roachpb/metadata.go b/roachpb/metadata.go index a54170ba4b30..409c10eb663a 100644 --- a/roachpb/metadata.go +++ b/roachpb/metadata.go @@ -193,15 +193,6 @@ func (sc StoreCapacity) FractionUsed() float64 { return float64(sc.Capacity-sc.Available) / float64(sc.Capacity) } -// CombinedAttrs returns the full list of attributes for the store, including -// both the node and store attributes. -func (s StoreDescriptor) CombinedAttrs() *Attributes { - var a []string - a = append(a, s.Node.Attrs.Attrs...) - a = append(a, s.Attrs.Attrs...) - return &Attributes{Attrs: a} -} - // String returns a string representation of the Tier. func (t Tier) String() string { return fmt.Sprintf("%s=%s", t.Key, t.Value) diff --git a/storage/allocator.go b/storage/allocator.go index b7b58c4247dd..038785146c53 100644 --- a/storage/allocator.go +++ b/storage/allocator.go @@ -20,7 +20,6 @@ package storage import ( "fmt" - "math/rand" "golang.org/x/net/context" @@ -28,7 +27,6 @@ import ( "github.com/cockroachdb/cockroach/roachpb" "github.com/cockroachdb/cockroach/util" "github.com/cockroachdb/cockroach/util/log" - "github.com/cockroachdb/cockroach/util/syncutil" "github.com/pkg/errors" ) @@ -88,58 +86,28 @@ func (*allocatorError) purgatoryErrorMarker() {} var _ purgatoryError = &allocatorError{} -// allocatorRand pairs a rand.Rand with a mutex. -// TODO: Allocator is typically only accessed from a single thread (the -// replication queue), but this assumption is broken in tests which force -// replication scans. If those tests can be modified to suspend the normal -// replication queue during the forced scan, then this rand could be used -// without a mutex. -type allocatorRand struct { - *syncutil.Mutex - *rand.Rand -} - -func makeAllocatorRand(source rand.Source) allocatorRand { - return allocatorRand{ - Mutex: &syncutil.Mutex{}, - Rand: rand.New(source), - } -} - // AllocatorOptions are configurable options which effect the way that the // replicate queue will handle rebalancing opportunities. type AllocatorOptions struct { // AllowRebalance allows this store to attempt to rebalance its own // replicas to other stores. AllowRebalance bool - - // Deterministic makes allocation decisions deterministic, based on - // current cluster statistics. If this flag is not set, allocation operations - // will have random behavior. This flag is intended to be set for testing - // purposes only. - Deterministic bool } // Allocator tries to spread replicas as evenly as possible across the stores // in the cluster. type Allocator struct { - storePool *StorePool - randGen allocatorRand - options AllocatorOptions + storePool *StorePool + options AllocatorOptions + ruleSolver *ruleSolver } // MakeAllocator creates a new allocator using the specified StorePool. func MakeAllocator(storePool *StorePool, options AllocatorOptions) Allocator { - var randSource rand.Source - if options.Deterministic { - randSource = rand.NewSource(777) - } else { - randSource = rand.NewSource(rand.Int63()) - } return Allocator{ - storePool: storePool, - options: options, - randGen: makeAllocatorRand(randSource), + storePool: storePool, + options: options, + ruleSolver: makeDefaultRuleSolver(storePool), } } @@ -186,62 +154,42 @@ func (a *Allocator) ComputeAction(zone config.ZoneConfig, desc *roachpb.RangeDes // AllocateTarget returns a suitable store for a new allocation with the // required attributes. Nodes already accommodating existing replicas are ruled -// out as targets. If relaxConstraints is true, then the required attributes -// will be relaxed as necessary, from least specific to most specific, in order -// to allocate a target. +// out as targets. func (a *Allocator) AllocateTarget( constraints config.Constraints, existing []roachpb.ReplicaDescriptor, - relaxConstraints bool, ) (*roachpb.StoreDescriptor, error) { - existingNodes := make(nodeIDSet, len(existing)) - for _, repl := range existing { - existingNodes[repl.NodeID] = struct{}{} + candidates, err := a.ruleSolver.Solve(constraints, existing) + if err != nil { + return nil, err } - // Because more redundancy is better than less, if relaxConstraints, the - // matching here is lenient, and tries to find a target by relaxing an - // attribute constraint, from last attribute to first. - for attrs := append([]config.Constraint(nil), constraints.Constraints...); ; attrs = attrs[:len(attrs)-1] { - sl, aliveStoreCount, throttledStoreCount := a.storePool.getStoreList( - config.Constraints{Constraints: attrs}, - a.options.Deterministic, - ) - if target := a.selectGood(sl, existingNodes); target != nil { - return target, nil - } - - // When there are throttled stores that do match, we shouldn't send - // the replica to purgatory or even consider relaxing the constraints. - if throttledStoreCount > 0 { - return nil, errors.Errorf("%d matching stores are currently throttled", throttledStoreCount) - } - if len(attrs) == 0 || !relaxConstraints { - return nil, &allocatorError{ - required: constraints.Constraints, - relaxConstraints: relaxConstraints, - aliveStoreCount: aliveStoreCount, - } + if len(candidates) == 0 { + return nil, &allocatorError{ + required: constraints.Constraints, } } + return &candidates[0].store, nil } // RemoveTarget returns a suitable replica to remove from the provided replica // set. It attempts to consider which of the provided replicas would be the best // candidate for removal. It also will exclude any replica that belongs to the // range lease holder's store ID. -// -// TODO(mrtracy): removeTarget eventually needs to accept the attributes from -// the zone config associated with the provided replicas. This will allow it to -// make correct decisions in the case of ranges with heterogeneous replica -// requirements (i.e. multiple data centers). -func (a Allocator) RemoveTarget(existing []roachpb.ReplicaDescriptor, leaseStoreID roachpb.StoreID) (roachpb.ReplicaDescriptor, error) { +func (a Allocator) RemoveTarget( + constraints config.Constraints, + existing []roachpb.ReplicaDescriptor, + leaseStoreID roachpb.StoreID, +) (roachpb.ReplicaDescriptor, error) { if len(existing) == 0 { return roachpb.ReplicaDescriptor{}, errors.Errorf("must supply at least one replica to allocator.RemoveTarget()") } - // Retrieve store descriptors for the provided replicas from the StorePool. - sl := StoreList{} + sl, _, _ := a.storePool.getStoreList() + + found := false + var worst roachpb.ReplicaDescriptor + var worstScore float64 for _, exist := range existing { if exist.StoreID == leaseStoreID { continue @@ -250,16 +198,26 @@ func (a Allocator) RemoveTarget(existing []roachpb.ReplicaDescriptor, leaseStore if !ok { continue } - sl.add(desc) + // If it's not a valid candidate, score will be zero. + candidate, _ := a.ruleSolver.computeCandidate(solveState{ + constraints: constraints, + store: desc, + existing: nil, + sl: sl, + tierOrder: canonicalTierOrder(sl), + tiers: storeTierMap(sl), + }) + if !found || candidate.score < worstScore { + worstScore = candidate.score + worst = exist + found = true + } } - if bad := a.selectBad(sl); bad != nil { - for _, exist := range existing { - if exist.StoreID == bad.StoreID { - return exist, nil - } - } + if found { + return worst, nil } + return roachpb.ReplicaDescriptor{}, errors.Errorf("RemoveTarget() could not select an appropriate replica to be remove") } @@ -285,12 +243,12 @@ func (a Allocator) RebalanceTarget( constraints config.Constraints, existing []roachpb.ReplicaDescriptor, leaseStoreID roachpb.StoreID, -) *roachpb.StoreDescriptor { +) (*roachpb.StoreDescriptor, error) { if !a.options.AllowRebalance { - return nil + return nil, nil } - sl, _, _ := a.storePool.getStoreList(constraints, a.options.Deterministic) + sl, _, _ := a.storePool.getStoreList() if log.V(3) { log.Infof(context.TODO(), "rebalance-target (lease-holder=%d):\n%s", leaseStoreID, sl) } @@ -307,42 +265,36 @@ func (a Allocator) RebalanceTarget( } } if !shouldRebalance { - return nil + return nil, nil } - existingNodes := make(nodeIDSet, len(existing)) - for _, repl := range existing { - existingNodes[repl.NodeID] = struct{}{} + // Get candidate stores. + candidates, err := a.ruleSolver.Solve(constraints, nil) + if err != nil { + return nil, err } - return a.improve(sl, existingNodes) -} - -// selectGood attempts to select a store from the supplied store list that it -// considers to be 'Good' relative to the other stores in the list. Any nodes -// in the supplied 'exclude' list will be disqualified from selection. Returns -// the selected store or nil if no such store can be found. -func (a Allocator) selectGood(sl StoreList, excluded nodeIDSet) *roachpb.StoreDescriptor { - rcb := rangeCountBalancer{a.randGen} - return rcb.selectGood(sl, excluded) -} -// selectBad attempts to select a store from the supplied store list that it -// considers to be 'Bad' relative to the other stores in the list. Returns the -// selected store or nil if no such store can be found. -func (a Allocator) selectBad(sl StoreList) *roachpb.StoreDescriptor { - rcb := rangeCountBalancer{a.randGen} - return rcb.selectBad(sl) -} - -// improve attempts to select an improvement over the given store from the -// stores in the given store list. Any nodes in the supplied 'exclude' list -// will be disqualified from selection. Returns the selected store, or nil if -// no such store can be found. -func (a Allocator) improve( - sl StoreList, excluded nodeIDSet, -) *roachpb.StoreDescriptor { - rcb := rangeCountBalancer{a.randGen} - return rcb.improve(sl, excluded) + // Find a candidate that is better than one of the existing stores, otherwise + // return nil. + candidatesFound := 0 + for _, candidate := range candidates { + store := candidate.store + found := false + for _, repl := range existing { + if repl.StoreID == store.StoreID { + found = true + break + } + } + if !found { + return &store, nil + } + candidatesFound++ + if candidatesFound > len(existing) { + break + } + } + return nil, nil } // shouldRebalance returns whether the specified store is a candidate for @@ -350,8 +302,23 @@ func (a Allocator) improve( func (a Allocator) shouldRebalance( store roachpb.StoreDescriptor, sl StoreList, ) bool { - rcb := rangeCountBalancer{a.randGen} - return rcb.shouldRebalance(store, sl) + const replicaInbalanceTolerance = 1 + + // Moving a replica from the given store makes its range count converge on + // the mean range count. + // + // TODO(peter,bram,cuong): The FractionUsed check seems suspicious. When a + // node becomes fuller than maxFractionUsedThreshold we will always select it + // for rebalancing. This is currently utilized by tests. + shouldRebalance := store.Capacity.FractionUsed() >= maxFractionUsedThreshold || (float64(store.Capacity.RangeCount)-sl.candidateCount.mean) >= replicaInbalanceTolerance + + if log.V(2) { + log.Infof(context.TODO(), + "%d: should-rebalance=%t: fraction-used=%.2f range-count=%d (mean=%.1f)", + store.StoreID, shouldRebalance, store.Capacity.FractionUsed(), + store.Capacity.RangeCount, sl.candidateCount.mean) + } + return shouldRebalance } // computeQuorum computes the quorum value for the given number of nodes. diff --git a/storage/allocator_test.go b/storage/allocator_test.go index d34b50935dd0..ac0c9f0a9e31 100644 --- a/storage/allocator_test.go +++ b/storage/allocator_test.go @@ -20,6 +20,7 @@ package storage import ( "fmt" + "math/rand" "reflect" "sort" "sync" @@ -180,13 +181,19 @@ func mockStorePool(storePool *StorePool, aliveStoreIDs, deadStoreIDs []roachpb.S storePool.mu.stores = make(map[roachpb.StoreID]*storeDetail) for _, storeID := range aliveStoreIDs { detail := newStoreDetail() - detail.desc = &roachpb.StoreDescriptor{StoreID: storeID} + detail.desc = &roachpb.StoreDescriptor{ + StoreID: storeID, + Node: roachpb.NodeDescriptor{NodeID: roachpb.NodeID(storeID)}, + } storePool.mu.stores[storeID] = detail } for _, storeID := range deadStoreIDs { detail := newStoreDetail() detail.dead = true - detail.desc = &roachpb.StoreDescriptor{StoreID: storeID} + detail.desc = &roachpb.StoreDescriptor{ + StoreID: storeID, + Node: roachpb.NodeDescriptor{NodeID: roachpb.NodeID(storeID)}, + } storePool.mu.stores[storeID] = detail } for storeID, detail := range storePool.mu.stores { @@ -204,7 +211,7 @@ func TestAllocatorSimpleRetrieval(t *testing.T) { stopper, g, _, a, _ := createTestAllocator() defer stopper.Stop() gossiputil.NewStoreGossiper(g).GossipStores(singleStore, t) - result, err := a.AllocateTarget(simpleZoneConfig.Constraints, []roachpb.ReplicaDescriptor{}, false) + result, err := a.AllocateTarget(simpleZoneConfig.Constraints, []roachpb.ReplicaDescriptor{}) if err != nil { t.Fatalf("Unable to perform allocation: %v", err) } @@ -217,7 +224,7 @@ func TestAllocatorNoAvailableDisks(t *testing.T) { defer leaktest.AfterTest(t)() stopper, _, _, a, _ := createTestAllocator() defer stopper.Stop() - result, err := a.AllocateTarget(simpleZoneConfig.Constraints, []roachpb.ReplicaDescriptor{}, false) + result, err := a.AllocateTarget(simpleZoneConfig.Constraints, []roachpb.ReplicaDescriptor{}) if result != nil { t.Errorf("expected nil result: %+v", result) } @@ -231,14 +238,14 @@ func TestAllocatorTwoDatacenters(t *testing.T) { stopper, g, _, a, _ := createTestAllocator() defer stopper.Stop() gossiputil.NewStoreGossiper(g).GossipStores(multiDCStores, t) - result1, err := a.AllocateTarget(multiDCConfig.Constraints, []roachpb.ReplicaDescriptor{}, false) + result1, err := a.AllocateTarget(multiDCConfig.Constraints, []roachpb.ReplicaDescriptor{}) if err != nil { t.Fatalf("Unable to perform allocation: %v", err) } result2, err := a.AllocateTarget(multiDCConfig.Constraints, []roachpb.ReplicaDescriptor{{ NodeID: result1.Node.NodeID, StoreID: result1.StoreID, - }}, false) + }}) if err != nil { t.Fatalf("Unable to perform allocation: %v", err) } @@ -257,7 +264,7 @@ func TestAllocatorTwoDatacenters(t *testing.T) { NodeID: result2.Node.NodeID, StoreID: result2.StoreID, }, - }, false) + }) if err == nil { t.Errorf("expected error on allocation without available stores: %+v", result3) } @@ -280,7 +287,7 @@ func TestAllocatorExistingReplica(t *testing.T) { NodeID: 2, StoreID: 2, }, - }, false) + }) if err != nil { t.Fatalf("Unable to perform allocation: %v", err) } @@ -299,30 +306,121 @@ func TestAllocatorRelaxConstraints(t *testing.T) { gossiputil.NewStoreGossiper(g).GossipStores(multiDCStores, t) testCases := []struct { - required []config.Constraint // attribute strings - existing []int // existing store/node ID - relaxConstraints bool // allow constraints to be relaxed? - expID int // expected store/node ID on allocate - expErr bool + required []config.Constraint // attribute strings + existing []int // existing store/node ID + expID int // expected store/node ID on allocate + expErr bool }{ // The two stores in the system have attributes: // storeID=1 {"a", "ssd"} // storeID=2 {"b", "ssd"} - {[]config.Constraint{{Value: "a"}, {Value: "ssd"}}, []int{}, true, 1, false}, - {[]config.Constraint{{Value: "a"}, {Value: "ssd"}}, []int{1}, true, 2, false}, - {[]config.Constraint{{Value: "a"}, {Value: "ssd"}}, []int{1}, false, 0, true}, - {[]config.Constraint{{Value: "a"}, {Value: "ssd"}}, []int{1, 2}, true, 0, true}, - {[]config.Constraint{{Value: "b"}, {Value: "ssd"}}, []int{}, true, 2, false}, - {[]config.Constraint{{Value: "b"}, {Value: "ssd"}}, []int{1}, true, 2, false}, - {[]config.Constraint{{Value: "b"}, {Value: "ssd"}}, []int{2}, false, 0, true}, - {[]config.Constraint{{Value: "b"}, {Value: "ssd"}}, []int{2}, true, 1, false}, - {[]config.Constraint{{Value: "b"}, {Value: "ssd"}}, []int{1, 2}, true, 0, true}, - {[]config.Constraint{{Value: "b"}, {Value: "hdd"}}, []int{}, true, 2, false}, - {[]config.Constraint{{Value: "b"}, {Value: "hdd"}}, []int{2}, true, 1, false}, - {[]config.Constraint{{Value: "b"}, {Value: "hdd"}}, []int{2}, false, 0, true}, - {[]config.Constraint{{Value: "b"}, {Value: "hdd"}}, []int{1, 2}, true, 0, true}, - {[]config.Constraint{{Value: "b"}, {Value: "ssd"}, {Value: "gpu"}}, []int{}, true, 2, false}, - {[]config.Constraint{{Value: "b"}, {Value: "hdd"}, {Value: "gpu"}}, []int{}, true, 2, false}, + { + []config.Constraint{ + {Value: "a"}, + {Value: "ssd"}, + }, + []int{}, 1, false, + }, + { + []config.Constraint{ + {Value: "a"}, + {Value: "ssd"}, + }, + []int{1}, 2, false, + }, + { + []config.Constraint{ + {Value: "a", Type: config.Constraint_REQUIRED}, + {Value: "ssd", Type: config.Constraint_REQUIRED}, + }, + []int{1}, 0, true, + }, + { + []config.Constraint{ + {Value: "a"}, + {Value: "ssd"}, + }, + []int{1, 2}, 0, true, + }, + { + []config.Constraint{ + {Value: "b"}, + {Value: "ssd"}, + }, + []int{}, 2, false, + }, + { + []config.Constraint{ + {Value: "b"}, + {Value: "ssd"}, + }, + []int{1}, 2, false, + }, + { + []config.Constraint{ + {Value: "b", Type: config.Constraint_REQUIRED}, + {Value: "ssd", Type: config.Constraint_REQUIRED}, + }, + []int{2}, 0, true, + }, + { + []config.Constraint{ + {Value: "b"}, + {Value: "ssd"}, + }, + []int{2}, 1, false, + }, + { + []config.Constraint{ + {Value: "b"}, + {Value: "ssd"}, + }, + []int{1, 2}, 0, true, + }, + { + []config.Constraint{ + {Value: "b"}, + {Value: "hdd"}, + }, + []int{}, 2, false, + }, + { + []config.Constraint{ + {Value: "b"}, + {Value: "hdd"}, + }, + []int{2}, 1, false, + }, + { + []config.Constraint{ + {Value: "b", Type: config.Constraint_REQUIRED}, + {Value: "hdd", Type: config.Constraint_REQUIRED}, + }, + []int{2}, 0, true, + }, + { + []config.Constraint{ + {Value: "b"}, + {Value: "hdd"}, + }, + []int{1, 2}, 0, true, + }, + { + []config.Constraint{ + {Value: "b"}, + {Value: "ssd"}, + {Value: "gpu"}, + }, + []int{}, 2, false, + }, + { + []config.Constraint{ + {Value: "b"}, + {Value: "hdd"}, + {Value: "gpu"}, + }, + []int{}, 2, false, + }, } for i, test := range testCases { var existing []roachpb.ReplicaDescriptor @@ -330,7 +428,7 @@ func TestAllocatorRelaxConstraints(t *testing.T) { existing = append(existing, roachpb.ReplicaDescriptor{NodeID: roachpb.NodeID(id), StoreID: roachpb.StoreID(id)}) } constraints := config.Constraints{Constraints: test.required} - result, err := a.AllocateTarget(constraints, existing, test.relaxConstraints) + result, err := a.AllocateTarget(constraints, existing) if haveErr := (err != nil); haveErr != test.expErr { t.Errorf("%d: expected error %t; got %t: %s", i, test.expErr, haveErr, err) } else if err == nil && roachpb.StoreID(test.expID) != result.StoreID { @@ -392,7 +490,10 @@ func TestAllocatorRebalance(t *testing.T) { // Every rebalance target must be either stores 1 or 2. for i := 0; i < 10; i++ { - result := a.RebalanceTarget(config.Constraints{}, []roachpb.ReplicaDescriptor{{StoreID: 3}}, 0) + result, err := a.RebalanceTarget(config.Constraints{}, []roachpb.ReplicaDescriptor{{StoreID: 3}}, 0) + if err != nil { + t.Fatal(err) + } if result == nil { t.Fatal("nil result") } @@ -402,13 +503,12 @@ func TestAllocatorRebalance(t *testing.T) { } // Verify shouldRebalance results. - a.options.Deterministic = true for i, store := range stores { desc, ok := a.storePool.getStoreDescriptor(store.StoreID) if !ok { t.Fatalf("%d: unable to get store %d descriptor", i, store.StoreID) } - sl, _, _ := a.storePool.getStoreList(config.Constraints{}, true) + sl, _, _ := a.storePool.getStoreList() result := a.shouldRebalance(desc, sl) if expResult := (i >= 2); expResult != result { t.Errorf("%d: expected rebalance %t; got %t", i, expResult, result) @@ -451,20 +551,22 @@ func TestAllocatorRebalanceByCount(t *testing.T) { // Every rebalance target must be store 4 (or nil for case of missing the only option). for i := 0; i < 10; i++ { - result := a.RebalanceTarget(config.Constraints{}, []roachpb.ReplicaDescriptor{{StoreID: 1}}, 0) + result, err := a.RebalanceTarget(config.Constraints{}, []roachpb.ReplicaDescriptor{{StoreID: 1}}, 0) + if err != nil { + t.Fatal(err) + } if result != nil && result.StoreID != 4 { t.Errorf("expected store 4; got %d", result.StoreID) } } // Verify shouldRebalance results. - a.options.Deterministic = true for i, store := range stores { desc, ok := a.storePool.getStoreDescriptor(store.StoreID) if !ok { t.Fatalf("%d: unable to get store %d descriptor", i, store.StoreID) } - sl, _, _ := a.storePool.getStoreList(config.Constraints{}, true) + sl, _, _ := a.storePool.getStoreList() result := a.shouldRebalance(desc, sl) if expResult := (i < 3); expResult != result { t.Errorf("%d: expected rebalance %t; got %t", i, expResult, result) @@ -530,7 +632,7 @@ func TestAllocatorRemoveTarget(t *testing.T) { sg := gossiputil.NewStoreGossiper(g) sg.GossipStores(stores, t) - targetRepl, err := a.RemoveTarget(replicas, stores[0].StoreID) + targetRepl, err := a.RemoveTarget(config.Constraints{}, replicas, stores[0].StoreID) if err != nil { t.Fatal(err) } @@ -540,7 +642,7 @@ func TestAllocatorRemoveTarget(t *testing.T) { // Now perform the same test, but pass in the store ID of store 3 so it's // excluded. - targetRepl, err = a.RemoveTarget(replicas, stores[2].StoreID) + targetRepl, err = a.RemoveTarget(config.Constraints{}, replicas, stores[2].StoreID) if err != nil { t.Fatal(err) } @@ -970,7 +1072,7 @@ func TestAllocatorThrottled(t *testing.T) { _, err := a.AllocateTarget( simpleZoneConfig.Constraints, []roachpb.ReplicaDescriptor{}, - false) + ) if _, ok := err.(purgatoryError); !ok { t.Fatalf("expected a purgatory error, got: %v", err) } @@ -980,7 +1082,7 @@ func TestAllocatorThrottled(t *testing.T) { result, err := a.AllocateTarget( simpleZoneConfig.Constraints, []roachpb.ReplicaDescriptor{}, - false) + ) if err != nil { t.Fatalf("unable to perform allocation: %v", err) } @@ -1000,7 +1102,7 @@ func TestAllocatorThrottled(t *testing.T) { _, err = a.AllocateTarget( simpleZoneConfig.Constraints, []roachpb.ReplicaDescriptor{}, - false) + ) if _, ok := err.(purgatoryError); ok { t.Fatalf("expected a non purgatory error, got: %v", err) } @@ -1044,13 +1146,18 @@ func Example_rebalancing() { TestTimeUntilStoreDeadOff, stopper, ) - alloc := MakeAllocator(sp, AllocatorOptions{AllowRebalance: true, Deterministic: true}) + sp.TestSetDeterministic(true) + alloc := MakeAllocator(sp, AllocatorOptions{AllowRebalance: true}) var wg sync.WaitGroup g.RegisterCallback(gossip.MakePrefixPattern(gossip.KeyStorePrefix), func(_ string, _ roachpb.Value) { wg.Done() }) - const generations = 100 const nodes = 20 + const generations = 100 + const printGenerations = generations / 2 + const generationToStopAdding = generations * 9 / 10 + + randGen := rand.New(rand.NewSource(777)) // Initialize testStores. var testStores [nodes]testStore @@ -1060,16 +1167,21 @@ func Example_rebalancing() { testStores[i].Capacity = roachpb.StoreCapacity{Capacity: 1 << 30, Available: 1 << 30} } // Initialize the cluster with a single range. - testStores[0].add(alloc.randGen.Int63n(1 << 20)) + testStores[0].add(randGen.Int63n(1 << 20)) for i := 0; i < generations; i++ { - // First loop through test stores and add data. + if i < generationToStopAdding { + // First loop through test stores and add data. + for j := 0; j < len(testStores); j++ { + // Add a pretend range to the testStore if there's already one. + if testStores[j].Capacity.RangeCount > 0 { + testStores[j].add(randGen.Int63n(1 << 20)) + } + } + } + // Gossip the new store info. wg.Add(len(testStores)) for j := 0; j < len(testStores); j++ { - // Add a pretend range to the testStore if there's already one. - if testStores[j].Capacity.RangeCount > 0 { - testStores[j].add(alloc.randGen.Int63n(1 << 20)) - } if err := g.AddInfoProto(gossip.MakeStoreKey(roachpb.StoreID(j)), &testStores[j].StoreDescriptor, 0); err != nil { panic(err) } @@ -1079,35 +1191,33 @@ func Example_rebalancing() { // Next loop through test stores and maybe rebalance. for j := 0; j < len(testStores); j++ { ts := &testStores[j] - target := alloc.RebalanceTarget( + target, err := alloc.RebalanceTarget( config.Constraints{}, []roachpb.ReplicaDescriptor{{NodeID: ts.Node.NodeID, StoreID: ts.StoreID}}, -1) + if err != nil { + panic(err) + } if target != nil { - testStores[j].rebalance(&testStores[int(target.StoreID)], alloc.randGen.Int63n(1<<20)) + testStores[j].rebalance(&testStores[int(target.StoreID)], randGen.Int63n(1<<20)) } } - // Output store capacities as hexadecimal 2-character values. - if i%(generations/50) == 0 { - var maxBytes int64 + if i%(generations/printGenerations) == 0 { + var totalBytes int64 for j := 0; j < len(testStores); j++ { - bytes := testStores[j].Capacity.Capacity - testStores[j].Capacity.Available - if bytes > maxBytes { - maxBytes = bytes - } + totalBytes += testStores[j].Capacity.Capacity - testStores[j].Capacity.Available } - if maxBytes > 0 { - for j := 0; j < len(testStores); j++ { - endStr := " " - if j == len(testStores)-1 { - endStr = "" - } - bytes := testStores[j].Capacity.Capacity - testStores[j].Capacity.Available - fmt.Printf("%03d%s", (999*bytes)/maxBytes, endStr) + fmt.Printf("generation %4d: ", i) + for j := 0; j < len(testStores); j++ { + if j != 0 && j != len(testStores)-1 { + fmt.Printf(",") } - fmt.Printf("\n") + ts := testStores[j] + bytes := ts.Capacity.Capacity - ts.Capacity.Available + fmt.Printf("%3d %2d%%", ts.Capacity.RangeCount, (100*bytes)/totalBytes) } + fmt.Printf("\n") } } @@ -1120,55 +1230,55 @@ func Example_rebalancing() { fmt.Printf("Total bytes=%d, ranges=%d\n", totBytes, totRanges) // Output: - // 999 000 000 000 000 000 000 000 000 000 000 000 000 000 000 000 000 000 000 000 - // 999 000 000 000 000 000 000 000 000 000 000 000 000 000 000 000 000 000 000 000 - // 999 000 000 000 000 000 000 000 000 000 000 000 000 000 000 000 000 000 000 000 - // 999 000 000 000 000 000 000 000 000 000 000 000 000 000 000 000 000 000 000 000 - // 999 000 000 000 000 000 000 000 000 000 000 000 000 000 000 000 000 000 000 000 - // 999 000 000 000 014 000 000 118 000 000 000 000 111 000 000 000 000 000 000 000 - // 999 113 095 000 073 064 000 221 003 000 020 178 182 000 057 000 027 000 055 000 - // 999 398 222 000 299 366 000 525 239 135 263 385 424 261 261 000 260 194 207 322 - // 999 307 170 335 380 357 336 539 233 373 218 444 539 336 258 479 267 352 384 387 - // 999 558 318 587 623 602 453 719 409 506 414 617 638 411 359 486 502 507 454 673 - // 999 588 404 854 616 642 559 705 482 622 505 554 673 489 410 390 524 607 535 671 - // 999 651 498 922 668 696 612 809 619 691 682 674 682 584 533 449 619 724 646 702 - // 999 710 505 832 645 732 719 867 605 794 743 693 717 645 602 503 683 733 686 776 - // 999 773 688 810 658 761 812 957 663 875 856 797 871 670 733 602 839 781 736 909 - // 959 778 750 879 685 797 786 999 751 944 870 786 882 670 828 611 880 817 714 883 - // 946 843 781 892 726 887 876 993 717 999 940 802 879 672 842 634 862 818 736 906 - // 924 826 754 859 742 878 836 927 721 999 893 762 874 672 882 684 918 818 745 897 - // 910 872 789 858 752 878 824 976 715 999 860 739 848 684 890 699 968 846 751 833 - // 938 892 789 879 754 916 861 997 774 983 887 805 827 690 912 751 999 927 800 893 - // 895 887 792 845 784 920 800 999 770 961 890 747 871 701 907 733 970 893 811 858 - // 887 843 742 839 792 938 826 999 778 971 859 792 857 731 889 777 979 900 779 833 - // 891 861 802 819 802 966 826 999 776 946 843 792 836 769 914 792 968 879 775 874 - // 923 840 830 842 778 969 820 999 791 950 843 820 838 767 893 794 995 915 789 885 - // 932 816 783 830 805 926 783 999 790 977 824 856 838 789 866 787 992 892 760 896 - // 917 799 781 813 800 901 759 999 776 983 795 861 813 799 852 776 944 891 739 883 - // 895 759 757 827 799 894 741 999 772 955 779 864 823 812 835 785 956 882 746 865 - // 906 762 773 867 848 874 747 999 763 992 766 866 831 812 839 820 973 906 765 885 - // 915 795 781 884 854 899 782 983 756 999 744 890 840 791 848 806 992 934 774 904 - // 935 768 813 893 859 881 788 948 758 999 748 892 828 803 857 834 989 940 798 900 - // 953 752 816 919 852 882 806 966 771 976 733 877 804 802 854 822 999 957 800 898 - // 909 732 804 882 874 885 814 956 758 937 703 877 805 783 849 833 999 955 796 903 - // 885 744 788 859 851 881 802 929 732 905 702 843 801 774 847 810 999 936 778 880 - // 856 741 790 827 842 897 771 922 732 873 719 849 771 789 845 828 999 914 764 859 - // 880 787 825 841 867 941 782 962 752 918 749 886 797 819 899 862 999 935 792 891 - // 902 829 841 857 903 979 786 979 760 935 767 903 816 839 907 880 999 963 827 927 - // 873 809 831 837 906 964 786 952 772 928 780 904 810 817 914 878 999 974 827 914 - // 879 810 855 843 936 977 806 956 799 930 801 931 823 835 928 895 997 999 864 935 - // 885 806 858 825 921 971 791 965 784 930 809 936 813 829 904 893 999 974 858 902 - // 865 776 855 811 903 966 771 958 770 906 809 923 810 825 896 901 999 964 841 895 - // 880 789 876 816 918 987 772 972 776 912 814 935 836 833 913 901 999 978 863 903 - // 866 779 861 824 926 986 773 958 776 920 810 936 836 855 894 899 999 989 859 904 - // 880 798 862 826 910 997 795 948 767 910 798 923 838 835 872 911 999 975 856 894 - // 885 785 845 807 906 973 783 943 782 918 789 920 832 838 861 894 999 965 849 877 - // 889 793 855 802 918 985 786 948 793 920 800 941 818 849 846 899 999 982 851 886 - // 866 796 854 801 911 969 782 958 791 907 788 940 800 844 843 890 999 977 851 873 - // 849 794 855 815 912 970 790 942 792 898 789 938 794 850 843 884 999 964 854 886 - // 856 806 867 837 930 980 787 944 789 903 804 947 800 863 840 891 999 977 860 874 - // 847 796 852 849 925 980 777 948 786 905 792 922 798 853 835 887 999 968 868 866 - // 851 801 866 846 936 999 795 945 774 909 793 931 794 860 846 908 985 976 882 854 - // 861 815 861 845 934 999 808 958 784 913 780 924 800 860 844 912 986 974 897 844 - // Total bytes=941960698, ranges=1750 + // generation 0: 1 88%, 0 0%, 0 0%, 0 0%, 0 0%, 0 0%, 0 0%, 0 0%, 0 0%, 0 0%, 0 0%, 0 0%, 0 0%, 0 0%, 0 0%, 0 0%, 0 0%, 0 0%, 0 0% 1 11% + // generation 2: 1 32%, 0 0%, 0 0%, 0 0%, 0 0%, 0 0%, 0 0%, 0 0%, 0 0%, 0 0%, 0 0%, 0 0%, 0 0%, 0 0%, 0 0%, 0 0%, 0 0%, 2 21%, 3 24% 1 21% + // generation 4: 1 8%, 0 0%, 0 0%, 0 0%, 0 0%, 0 0%, 0 0%, 0 0%, 0 0%, 0 0%, 0 0%, 0 0%, 0 0%, 0 0%, 0 0%, 5 21%, 4 14%, 2 22%, 3 25% 1 7% + // generation 6: 2 9%, 0 0%, 0 0%, 0 0%, 0 0%, 0 0%, 0 0%, 0 0%, 0 0%, 0 0%, 0 0%, 0 0%, 0 0%, 7 21%, 4 23%, 5 15%, 4 2%, 2 3%, 3 19% 2 5% + // generation 8: 3 7%, 0 0%, 0 0%, 0 0%, 0 0%, 0 0%, 6 14%, 0 0%, 0 0%, 0 0%, 0 0%, 0 0%, 8 14%, 7 12%, 4 15%, 5 9%, 4 4%, 3 2%, 3 13% 3 5% + // generation 10: 4 5%, 10 13%, 0 0%, 0 0%, 0 0%, 0 0%, 6 8%, 0 0%, 0 0%, 0 0%, 0 0%, 7 9%, 8 5%, 7 9%, 4 9%, 5 8%, 4 6%, 4 3%, 4 12% 4 6% + // generation 12: 5 6%, 10 9%, 7 6%, 0 0%, 0 0%, 0 0%, 6 5%, 12 13%, 0 0%, 0 0%, 0 0%, 7 7%, 8 3%, 7 6%, 5 7%, 5 7%, 5 4%, 5 5%, 5 9% 5 5% + // generation 14: 7 6%, 10 5%, 7 4%, 7 9%, 0 0%, 0 0%, 7 5%, 12 8%, 7 6%, 0 0%, 0 0%, 7 4%, 8 3%, 7 4%, 7 6%, 7 7%, 7 4%, 7 5%, 7 9% 7 6% + // generation 16: 8 5%, 10 4%, 8 4%, 8 7%, 16 11%, 4 4%, 8 5%, 12 5%, 8 6%, 0 0%, 0 0%, 8 4%, 8 1%, 8 4%, 8 5%, 8 7%, 8 4%, 8 4%, 8 9% 8 4% + // generation 18: 10 5%, 10 3%, 10 4%, 10 6%, 16 9%, 6 4%, 10 6%, 12 4%, 10 5%, 3 0%, 4 1%, 10 4%, 10 2%, 10 3%, 10 5%, 10 7%, 10 4%, 10 4%, 10 7% 10 5% + // generation 20: 12 5%, 12 4%, 12 3%, 12 6%, 16 8%, 8 4%, 12 6%, 12 4%, 12 5%, 7 2%, 8 3%, 12 4%, 12 3%, 12 4%, 12 5%, 12 6%, 12 5%, 12 4%, 12 6% 12 5% + // generation 22: 14 5%, 14 3%, 14 4%, 14 5%, 16 7%, 11 5%, 14 6%, 14 3%, 14 5%, 10 2%, 10 3%, 14 4%, 14 3%, 14 4%, 14 5%, 14 6%, 14 5%, 14 4%, 14 6% 14 5% + // generation 24: 16 5%, 16 4%, 16 4%, 16 5%, 16 7%, 13 5%, 16 6%, 16 4%, 16 5%, 13 2%, 13 3%, 16 4%, 16 3%, 16 3%, 16 5%, 16 6%, 16 5%, 16 5%, 16 5% 16 4% + // generation 26: 18 4%, 18 4%, 18 5%, 18 5%, 18 7%, 15 5%, 18 6%, 18 4%, 18 5%, 15 3%, 15 3%, 18 4%, 18 4%, 18 3%, 18 5%, 18 6%, 18 5%, 18 5%, 18 5% 18 4% + // generation 28: 20 4%, 20 4%, 20 4%, 20 5%, 20 7%, 17 5%, 20 6%, 20 4%, 20 5%, 17 3%, 17 3%, 20 4%, 20 4%, 20 3%, 20 5%, 20 6%, 20 5%, 20 5%, 20 5% 20 4% + // generation 30: 22 4%, 22 4%, 22 4%, 22 5%, 22 6%, 19 5%, 22 6%, 22 4%, 22 5%, 19 3%, 19 3%, 22 4%, 22 4%, 22 3%, 22 5%, 22 5%, 22 5%, 22 5%, 22 5% 22 5% + // generation 32: 24 4%, 24 4%, 24 4%, 24 5%, 24 6%, 21 5%, 24 5%, 24 4%, 24 5%, 21 3%, 21 3%, 24 4%, 24 4%, 24 3%, 24 5%, 24 5%, 24 5%, 24 5%, 24 5% 24 4% + // generation 34: 26 4%, 26 4%, 26 4%, 26 5%, 26 5%, 23 4%, 26 5%, 26 4%, 26 5%, 23 3%, 23 3%, 26 4%, 26 4%, 26 3%, 26 5%, 26 6%, 26 5%, 26 5%, 26 5% 26 5% + // generation 36: 28 4%, 28 4%, 28 4%, 28 5%, 28 6%, 25 4%, 28 5%, 28 4%, 28 5%, 25 3%, 25 4%, 28 4%, 28 4%, 28 3%, 28 5%, 28 5%, 28 5%, 28 5%, 28 5% 28 5% + // generation 38: 30 4%, 30 5%, 30 4%, 30 5%, 30 6%, 27 4%, 30 5%, 30 4%, 30 5%, 27 4%, 27 3%, 30 4%, 30 4%, 30 3%, 30 6%, 30 5%, 30 5%, 30 5%, 30 5% 30 5% + // generation 40: 32 4%, 32 5%, 32 4%, 32 5%, 32 6%, 29 4%, 32 5%, 32 4%, 32 5%, 29 4%, 29 4%, 32 4%, 32 4%, 32 3%, 32 5%, 32 5%, 32 5%, 32 5%, 32 5% 32 4% + // generation 42: 34 4%, 34 5%, 34 4%, 34 5%, 34 6%, 31 4%, 34 5%, 34 4%, 34 5%, 31 4%, 31 4%, 34 4%, 34 4%, 34 3%, 34 5%, 34 5%, 34 5%, 34 5%, 34 4% 34 4% + // generation 44: 36 4%, 36 5%, 36 4%, 36 5%, 36 6%, 33 4%, 36 5%, 36 4%, 36 5%, 33 4%, 33 4%, 36 4%, 36 4%, 36 4%, 36 5%, 36 5%, 36 5%, 36 5%, 36 4% 36 4% + // generation 46: 38 4%, 38 5%, 38 4%, 38 5%, 38 6%, 35 4%, 38 5%, 38 4%, 38 5%, 35 4%, 35 4%, 38 4%, 38 4%, 38 4%, 38 5%, 38 5%, 38 5%, 38 5%, 38 4% 38 4% + // generation 48: 40 4%, 40 4%, 40 4%, 40 5%, 40 6%, 37 4%, 40 5%, 40 4%, 40 5%, 37 4%, 37 4%, 40 4%, 40 4%, 40 4%, 40 5%, 40 5%, 40 5%, 40 5%, 40 4% 40 4% + // generation 50: 42 4%, 42 5%, 42 4%, 42 5%, 42 6%, 39 4%, 42 5%, 42 4%, 42 5%, 39 4%, 39 4%, 42 4%, 42 4%, 42 4%, 42 5%, 42 5%, 42 5%, 42 5%, 42 4% 42 4% + // generation 52: 44 4%, 44 4%, 44 4%, 44 5%, 44 6%, 41 4%, 44 5%, 44 4%, 44 5%, 41 4%, 41 4%, 44 4%, 44 4%, 44 4%, 44 5%, 44 5%, 44 5%, 44 5%, 44 4% 44 4% + // generation 54: 46 4%, 46 4%, 46 4%, 46 5%, 46 6%, 43 4%, 46 5%, 46 4%, 46 5%, 43 4%, 43 4%, 46 4%, 46 4%, 46 4%, 46 5%, 46 5%, 46 5%, 46 5%, 46 4% 46 4% + // generation 56: 48 4%, 48 4%, 48 4%, 48 5%, 48 6%, 45 4%, 48 5%, 48 4%, 48 5%, 45 4%, 45 4%, 48 4%, 48 4%, 48 4%, 48 5%, 48 5%, 48 5%, 48 5%, 48 4% 48 4% + // generation 58: 50 4%, 50 4%, 50 4%, 50 5%, 50 6%, 47 4%, 50 5%, 50 4%, 50 5%, 47 4%, 47 4%, 50 4%, 50 5%, 50 4%, 50 5%, 50 5%, 50 5%, 50 5%, 50 4% 50 4% + // generation 60: 52 4%, 52 5%, 52 4%, 52 5%, 52 6%, 49 4%, 52 5%, 52 4%, 52 5%, 49 4%, 49 4%, 52 4%, 52 5%, 52 4%, 52 5%, 52 5%, 52 5%, 52 5%, 52 4% 52 4% + // generation 62: 54 4%, 54 5%, 54 4%, 54 5%, 54 5%, 51 4%, 54 5%, 54 4%, 54 5%, 51 4%, 51 4%, 54 4%, 54 5%, 54 4%, 54 5%, 54 5%, 54 5%, 54 5%, 54 4% 54 4% + // generation 64: 56 4%, 56 5%, 56 4%, 56 5%, 56 5%, 53 4%, 56 5%, 56 4%, 56 5%, 53 4%, 53 4%, 56 4%, 56 5%, 56 4%, 56 5%, 56 5%, 56 5%, 56 5%, 56 4% 56 4% + // generation 66: 58 4%, 58 4%, 58 4%, 58 5%, 58 5%, 55 4%, 58 5%, 58 4%, 58 5%, 55 4%, 55 4%, 58 4%, 58 5%, 58 4%, 58 5%, 58 5%, 58 5%, 58 5%, 58 4% 58 5% + // generation 68: 60 4%, 60 5%, 60 4%, 60 5%, 60 5%, 57 4%, 60 5%, 60 4%, 60 5%, 57 4%, 57 4%, 60 4%, 60 5%, 60 4%, 60 5%, 60 5%, 60 5%, 60 5%, 60 4% 60 5% + // generation 70: 62 4%, 62 5%, 62 4%, 62 5%, 62 5%, 59 4%, 62 5%, 62 4%, 62 5%, 59 4%, 59 4%, 62 4%, 62 5%, 62 4%, 62 5%, 62 5%, 62 5%, 62 5%, 62 4% 62 5% + // generation 72: 64 4%, 64 5%, 64 4%, 64 5%, 64 5%, 61 4%, 64 5%, 64 4%, 64 5%, 61 4%, 61 4%, 64 4%, 64 5%, 64 4%, 64 5%, 64 5%, 64 4%, 64 5%, 64 4% 64 5% + // generation 74: 66 4%, 66 5%, 66 4%, 66 5%, 66 5%, 63 4%, 66 5%, 66 4%, 66 5%, 63 4%, 63 4%, 66 4%, 66 5%, 66 4%, 66 5%, 66 5%, 66 4%, 66 5%, 66 4% 66 4% + // generation 76: 68 4%, 68 5%, 68 4%, 68 5%, 68 5%, 65 4%, 68 5%, 68 4%, 68 5%, 65 4%, 65 4%, 68 4%, 68 4%, 68 4%, 68 5%, 68 5%, 68 4%, 68 5%, 68 4% 68 5% + // generation 78: 70 4%, 70 5%, 70 4%, 70 5%, 70 5%, 67 4%, 70 5%, 70 4%, 70 5%, 67 4%, 67 4%, 70 4%, 70 4%, 70 4%, 70 5%, 70 5%, 70 5%, 70 5%, 70 4% 70 4% + // generation 80: 72 4%, 72 5%, 72 4%, 72 5%, 72 5%, 69 4%, 72 5%, 72 4%, 72 5%, 69 4%, 69 4%, 72 4%, 72 4%, 72 4%, 72 5%, 72 5%, 72 4%, 72 5%, 72 4% 72 5% + // generation 82: 74 4%, 74 5%, 74 4%, 74 5%, 74 5%, 71 4%, 74 5%, 74 4%, 74 5%, 71 4%, 71 4%, 74 4%, 74 4%, 74 4%, 74 5%, 74 5%, 74 4%, 74 5%, 74 4% 74 5% + // generation 84: 76 4%, 76 5%, 76 4%, 76 5%, 76 5%, 73 4%, 76 5%, 76 4%, 76 5%, 73 4%, 73 4%, 76 4%, 76 4%, 76 4%, 76 5%, 76 5%, 76 4%, 76 5%, 76 4% 76 5% + // generation 86: 78 4%, 78 5%, 78 4%, 78 5%, 78 5%, 75 4%, 78 5%, 78 4%, 78 5%, 75 4%, 75 4%, 78 4%, 78 4%, 78 4%, 78 5%, 78 5%, 78 4%, 78 5%, 78 4% 78 4% + // generation 88: 80 4%, 80 5%, 80 4%, 80 5%, 80 5%, 77 4%, 80 5%, 80 4%, 80 5%, 77 4%, 77 4%, 80 4%, 80 4%, 80 4%, 80 5%, 80 5%, 80 5%, 80 5%, 80 4% 80 4% + // generation 90: 81 4%, 81 5%, 81 4%, 81 5%, 81 5%, 78 4%, 81 5%, 81 5%, 81 5%, 78 4%, 78 4%, 81 4%, 81 4%, 81 4%, 81 5%, 81 5%, 81 5%, 81 5%, 81 4% 81 4% + // generation 92: 81 4%, 81 5%, 81 4%, 81 5%, 81 5%, 78 4%, 81 5%, 81 5%, 81 5%, 78 4%, 78 4%, 81 4%, 81 4%, 81 4%, 81 5%, 81 5%, 81 5%, 81 5%, 81 4% 81 4% + // generation 94: 81 4%, 81 5%, 81 4%, 81 5%, 81 5%, 78 4%, 81 5%, 81 5%, 81 5%, 78 4%, 78 4%, 81 4%, 81 4%, 81 4%, 81 5%, 81 5%, 81 5%, 81 5%, 81 4% 81 4% + // generation 96: 81 4%, 81 5%, 81 4%, 81 5%, 81 5%, 78 4%, 81 5%, 81 5%, 81 5%, 78 4%, 78 4%, 81 4%, 81 4%, 81 4%, 81 5%, 81 5%, 81 5%, 81 5%, 81 4% 81 4% + // generation 98: 81 4%, 81 5%, 81 4%, 81 5%, 81 5%, 78 4%, 81 5%, 81 5%, 81 5%, 78 4%, 78 4%, 81 4%, 81 4%, 81 4%, 81 5%, 81 5%, 81 5%, 81 5%, 81 4% 81 4% + // Total bytes=839900871, ranges=1611 } diff --git a/storage/balancer.go b/storage/balancer.go deleted file mode 100644 index 40ed76318651..000000000000 --- a/storage/balancer.go +++ /dev/null @@ -1,198 +0,0 @@ -// Copyright 2014 The Cockroach Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or -// implied. See the License for the specific language governing -// permissions and limitations under the License. -// -// Author: Matt Tracy (matt@cockroachlabs.com) - -package storage - -import ( - "bytes" - "fmt" - "math" - - "golang.org/x/net/context" - - "github.com/cockroachdb/cockroach/roachpb" - "github.com/cockroachdb/cockroach/util/log" -) - -const allocatorRandomCount = 10 - -type nodeIDSet map[roachpb.NodeID]struct{} - -func formatCandidates( - selected *roachpb.StoreDescriptor, - candidates []roachpb.StoreDescriptor, -) string { - var buf bytes.Buffer - _, _ = buf.WriteString("[") - for i := range candidates { - candidate := &candidates[i] - if i > 0 { - _, _ = buf.WriteString(" ") - } - fmt.Fprintf(&buf, "%d:%d", candidate.StoreID, candidate.Capacity.RangeCount) - if candidate == selected { - _, _ = buf.WriteString("*") - } - } - _, _ = buf.WriteString("]") - return buf.String() -} - -// rangeCountBalancer attempts to balance ranges across the cluster while -// considering only the number of ranges being serviced each store. -type rangeCountBalancer struct { - rand allocatorRand -} - -func (rcb rangeCountBalancer) selectBest(sl StoreList) *roachpb.StoreDescriptor { - var best *roachpb.StoreDescriptor - for i := range sl.stores { - candidate := &sl.stores[i] - if best == nil { - best = candidate - continue - } - if candidate.Capacity.RangeCount < best.Capacity.RangeCount { - best = candidate - } - } - - // NB: logging of the best candidate is performed by the caller (selectGood - // or improve). - return best -} - -func (rcb rangeCountBalancer) selectGood( - sl StoreList, excluded nodeIDSet, -) *roachpb.StoreDescriptor { - // Consider a random sample of stores from the store list. - sl.stores = selectRandom(rcb.rand, allocatorRandomCount, sl, excluded) - good := rcb.selectBest(sl) - - if log.V(2) { - log.Infof(context.TODO(), "selected good: mean=%.1f %s", - sl.candidateCount.mean, formatCandidates(good, sl.stores)) - } - return good -} - -func (rcb rangeCountBalancer) selectBad(sl StoreList) *roachpb.StoreDescriptor { - var worst *roachpb.StoreDescriptor - for i := range sl.stores { - candidate := &sl.stores[i] - if worst == nil { - worst = candidate - continue - } - if candidate.Capacity.RangeCount > worst.Capacity.RangeCount { - worst = candidate - } - } - - if log.V(2) { - log.Infof(context.TODO(), "selected bad: mean=%.1f %s", - sl.candidateCount.mean, formatCandidates(worst, sl.stores)) - } - return worst -} - -// improve returns a candidate StoreDescriptor to rebalance a replica to. The -// strategy is to always converge on the mean range count. If that isn't -// possible, we don't return any candidate. -func (rcb rangeCountBalancer) improve( - sl StoreList, excluded nodeIDSet, -) *roachpb.StoreDescriptor { - // Attempt to select a better candidate from the supplied list. - sl.stores = selectRandom(rcb.rand, allocatorRandomCount, sl, excluded) - candidate := rcb.selectBest(sl) - if candidate == nil { - if log.V(2) { - log.Infof(context.TODO(), "not rebalancing: no valid candidate targets: %s", - formatCandidates(nil, sl.stores)) - } - return nil - } - - // Adding a replica to the candidate must make its range count converge on the - // mean range count. - if math.Abs(float64(candidate.Capacity.RangeCount+1)-sl.candidateCount.mean) >= - math.Abs(float64(candidate.Capacity.RangeCount)-sl.candidateCount.mean) { - if log.V(2) { - log.Infof(context.TODO(), "not rebalancing: %s wouldn't converge on the mean %.1f", - formatCandidates(candidate, sl.stores), sl.candidateCount.mean) - } - return nil - } - - if log.V(2) { - log.Infof(context.TODO(), "rebalancing: mean=%.1f %s", - sl.candidateCount.mean, formatCandidates(candidate, sl.stores)) - } - return candidate -} - -func (rcb rangeCountBalancer) shouldRebalance( - store roachpb.StoreDescriptor, sl StoreList, -) bool { - // Moving a replica from the given store makes its range count converge on - // the mean range count. - // - // TODO(peter,bram,cuong): The FractionUsed check seems suspicious. When a - // node becomes fuller than maxFractionUsedThreshold we will always select it - // for rebalancing. This is currently utilized by tests. - rebalanceNotNeeded := store.Capacity.FractionUsed() <= maxFractionUsedThreshold && - (math.Abs(float64(store.Capacity.RangeCount-1)-sl.candidateCount.mean) > - math.Abs(float64(store.Capacity.RangeCount)-sl.candidateCount.mean)) - shouldRebalance := !rebalanceNotNeeded - - if log.V(2) { - log.Infof(context.TODO(), - "%d: should-rebalance=%t: fraction-used=%.2f range-count=%d (mean=%.1f)", - store.StoreID, shouldRebalance, store.Capacity.FractionUsed(), - store.Capacity.RangeCount, sl.candidateCount.mean) - } - return shouldRebalance -} - -// selectRandom chooses up to count random store descriptors from the given -// store list, excluding any stores that are too full to accept more replicas. -func selectRandom( - randGen allocatorRand, count int, sl StoreList, excluded nodeIDSet, -) []roachpb.StoreDescriptor { - var descs []roachpb.StoreDescriptor - // Randomly permute available stores matching the required attributes. - randGen.Lock() - defer randGen.Unlock() - for _, idx := range randGen.Perm(len(sl.stores)) { - desc := sl.stores[idx] - // Skip if store is in excluded set. - if _, ok := excluded[desc.Node.NodeID]; ok { - continue - } - - // Don't overfill stores. - if desc.Capacity.FractionUsed() > maxFractionUsedThreshold { - continue - } - - // Add this store; exit loop if we've satisfied count. - descs = append(descs, sl.stores[idx]) - if len(descs) >= count { - break - } - } - return descs -} diff --git a/storage/client_raft_test.go b/storage/client_raft_test.go index 370501c40b34..fe0934b356e2 100644 --- a/storage/client_raft_test.go +++ b/storage/client_raft_test.go @@ -1709,7 +1709,6 @@ func TestStoreRangeRebalance(t *testing.T) { sc := storage.TestStoreContext() sc.AllocatorOptions = storage.AllocatorOptions{ AllowRebalance: true, - Deterministic: true, } mtc := &multiTestContext{ storeContext: &sc, @@ -1795,7 +1794,7 @@ func TestStoreRangeRebalance(t *testing.T) { // Exit when all stores have a single replica. actual := countReplicas() if !reflect.DeepEqual(expected, actual) { - return errors.Errorf("replicas are not distributed as expected %s", pretty.Diff(expected, actual)) + return errors.Errorf("replicas are not distributed as expected = %+v, want %+v, %s", actual, expected, pretty.Diff(expected, actual)) } return nil }) diff --git a/storage/client_test.go b/storage/client_test.go index fad9a8986190..d716f7edf853 100644 --- a/storage/client_test.go +++ b/storage/client_test.go @@ -44,7 +44,6 @@ import ( "google.golang.org/grpc" "github.com/cockroachdb/cockroach/base" - "github.com/cockroachdb/cockroach/config" "github.com/cockroachdb/cockroach/gossip" "github.com/cockroachdb/cockroach/gossip/resolver" "github.com/cockroachdb/cockroach/internal/client" @@ -397,10 +396,7 @@ func (m *multiTestContext) initGossipNetwork() { m.gossipStores() util.SucceedsSoon(m.t, func() error { for i := 0; i < len(m.stores); i++ { - _, alive, _ := m.storePools[i].GetStoreList( - config.Constraints{}, - /* deterministic */ false, - ) + _, alive, _ := m.storePools[i].GetStoreList() if alive != len(m.stores) { return errors.Errorf("node %d's store pool only has %d alive stores, expected %d", m.stores[i].Ident.NodeID, alive, len(m.stores)) diff --git a/storage/helpers_test.go b/storage/helpers_test.go index febde3616c38..0b77f756c0d9 100644 --- a/storage/helpers_test.go +++ b/storage/helpers_test.go @@ -22,7 +22,6 @@ package storage import ( - "github.com/cockroachdb/cockroach/config" "github.com/cockroachdb/cockroach/internal/client" "github.com/cockroachdb/cockroach/roachpb" "github.com/cockroachdb/cockroach/storage/engine/enginepb" @@ -170,6 +169,6 @@ func (r *Replica) GetTimestampCacheLowWater() hlc.Timestamp { } // GetStoreList is the same function as GetStoreList exposed for tests only. -func (sp *StorePool) GetStoreList(constraints config.Constraints, deterministic bool) (StoreList, int, int) { - return sp.getStoreList(constraints, deterministic) +func (sp *StorePool) GetStoreList() (StoreList, int, int) { + return sp.getStoreList() } diff --git a/storage/replicate_queue.go b/storage/replicate_queue.go index 984ad4944103..5fcfb7b9af4e 100644 --- a/storage/replicate_queue.go +++ b/storage/replicate_queue.go @@ -109,8 +109,10 @@ func (rq *replicateQueue) shouldQueue( if lease, _ := repl.getLease(); lease != nil { leaseStoreID = lease.Replica.StoreID } - target := rq.allocator.RebalanceTarget( - zone.Constraints, desc.Replicas, leaseStoreID) + target, err := rq.allocator.RebalanceTarget(zone.Constraints, desc.Replicas, leaseStoreID) + if err != nil { + return false, 0 + } return target != nil, 0 } @@ -140,7 +142,7 @@ func (rq *replicateQueue) process( switch action { case AllocatorAdd: log.Trace(ctx, "adding a new replica") - newStore, err := rq.allocator.AllocateTarget(zone.Constraints, desc.Replicas, true) + newStore, err := rq.allocator.AllocateTarget(zone.Constraints, desc.Replicas) if err != nil { return err } @@ -157,7 +159,7 @@ func (rq *replicateQueue) process( log.Trace(ctx, "removing a replica") // We require the lease in order to process replicas, so // repl.store.StoreID() corresponds to the lease-holder's store ID. - removeReplica, err := rq.allocator.RemoveTarget(desc.Replicas, repl.store.StoreID()) + removeReplica, err := rq.allocator.RemoveTarget(zone.Constraints, desc.Replicas, repl.store.StoreID()) if err != nil { return err } @@ -189,13 +191,13 @@ func (rq *replicateQueue) process( // // We require the lease in order to process replicas, so // repl.store.StoreID() corresponds to the lease-holder's store ID. - rebalanceStore := rq.allocator.RebalanceTarget( + rebalanceStore, err := rq.allocator.RebalanceTarget( zone.Constraints, desc.Replicas, repl.store.StoreID()) - if rebalanceStore == nil { + if rebalanceStore == nil || err != nil { log.VTracef(1, ctx, "%s: no suitable rebalance target", repl) // No action was necessary and no rebalance target was found. Return // without re-queuing this replica. - return nil + return err } rebalanceReplica := roachpb.ReplicaDescriptor{ NodeID: rebalanceStore.Node.NodeID, diff --git a/storage/rule_solver.go b/storage/rule_solver.go new file mode 100644 index 000000000000..20a0f74bdaa8 --- /dev/null +++ b/storage/rule_solver.go @@ -0,0 +1,282 @@ +// Copyright 2016 The Cockroach Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +// implied. See the License for the specific language governing +// permissions and limitations under the License. +// +// Author: Tristan Rice (rice@fn.lc) + +package storage + +import ( + "math" + "sort" + + "github.com/cockroachdb/cockroach/config" + "github.com/cockroachdb/cockroach/roachpb" + "github.com/pkg/errors" +) + +// candidate represents a candidate for allocation. +type candidate struct { + store roachpb.StoreDescriptor + score float64 +} + +// solveState is used to pass solution state information into a rule. +type solveState struct { + constraints config.Constraints + store roachpb.StoreDescriptor + existing []roachpb.ReplicaDescriptor + sl StoreList + tiers map[roachpb.StoreID]map[string]roachpb.Tier + tierOrder []roachpb.Tier +} + +// rule is a generic rule that can be used to solve a constraint problem. +// Returning false will remove the store from the list of candidate stores. The +// score will be weighted and then summed together with the other rule scores to +// create a store ranking (higher is better). +type rule struct { + weight float64 + run func(state solveState) (candidate bool, score float64) +} + +// defaultRules is the default rule set to use. +var defaultRules = []rule{ + { + weight: 1.0, + run: ruleReplicasUniqueNodes, + }, + { + weight: 1.0, + run: ruleConstraints, + }, + { + weight: 0.01, + run: ruleCapacity, + }, + { + weight: 0.1, + run: ruleDiversity, + }, +} + +// makeDefaultRuleSolver returns a ruleSolver with defaultRules. +func makeDefaultRuleSolver(storePool *StorePool) *ruleSolver { + return makeRuleSolver(storePool, defaultRules) +} + +// makeRuleSolver makes a new ruleSolver. The order of the rules is the order in +// which they are run. For optimization purposes, less computationally intense +// rules should run first to eliminate candidates. +func makeRuleSolver(storePool *StorePool, rules []rule) *ruleSolver { + return &ruleSolver{ + storePool: storePool, + rules: rules, + } +} + +// ruleSolver solves a set of rules for a store. +type ruleSolver struct { + storePool *StorePool + rules []rule +} + +// solve given constraints and return the score. +func (rs *ruleSolver) Solve( + c config.Constraints, existing []roachpb.ReplicaDescriptor, +) ([]candidate, error) { + sl, _, throttledStoreCount := rs.storePool.getStoreList() + + // When there are throttled stores that do match, we shouldn't send + // the replica to purgatory or even consider relaxing the constraints. + if throttledStoreCount > 0 { + return nil, errors.Errorf("%d matching stores are currently throttled", throttledStoreCount) + } + + candidates := make([]candidate, 0, len(sl.stores)) + state := solveState{ + constraints: c, + existing: existing, + sl: sl, + tierOrder: canonicalTierOrder(sl), + tiers: storeTierMap(sl), + } + + for _, store := range sl.stores { + state.store = store + if cand, ok := rs.computeCandidate(state); ok { + candidates = append(candidates, cand) + } + } + sort.Sort(byScore(candidates)) + return candidates, nil +} + +// computeCandidate runs all the rules for the store and returns the candidacy +// information. Returns false if not a candidate. +func (rs *ruleSolver) computeCandidate( + state solveState, +) (candidate, bool) { + var totalScore float64 + for _, rule := range rs.rules { + isCandidate, score := rule.run(state) + if !isCandidate { + return candidate{}, false + } + if !math.IsNaN(score) { + totalScore += score * rule.weight + } + } + return candidate{store: state.store, score: totalScore}, true +} + +// ruleReplicasUniqueNodes ensures that no two replicas are put on the same +// node. +func ruleReplicasUniqueNodes(state solveState) (candidate bool, score float64) { + for _, r := range state.existing { + if r.NodeID == state.store.Node.NodeID { + return false, 0 + } + } + return true, 0 +} + +// storeHasConstraint returns whether a store descriptor attributes or locality +// matches the key value pair in the constraint. +func storeHasConstraint(store roachpb.StoreDescriptor, c config.Constraint) bool { + var found bool + if c.Key == "" { + for _, attrs := range []roachpb.Attributes{store.Attrs, store.Node.Attrs} { + for _, attr := range attrs.Attrs { + if attr == c.Value { + return true + } + } + } + } else { + for _, tier := range store.Locality.Tiers { + if c.Key == tier.Key && c.Value == tier.Value { + return true + } + } + } + return found +} + +// ruleConstraints enforces that required and prohibited constraints are +// followed, and that stores with more positive constraints are ranked higher. +func ruleConstraints(state solveState) (candidate bool, score float64) { + matched := 0 + for _, c := range state.constraints.Constraints { + hasConstraint := storeHasConstraint(state.store, c) + switch { + case c.Type == config.Constraint_POSITIVE && hasConstraint: + matched++ + case c.Type == config.Constraint_REQUIRED && !hasConstraint: + return false, 0 + case c.Type == config.Constraint_PROHIBITED && hasConstraint: + return false, 0 + } + } + + return true, float64(matched) / float64(len(state.constraints.Constraints)) +} + +// ruleDiversity ensures that nodes that have the fewest locality tiers in +// common are given higher priority. +func ruleDiversity(state solveState) (candidate bool, score float64) { + storeTiers := state.tiers[state.store.StoreID] + var maxScore float64 + for i, tier := range state.tierOrder { + storeTier, ok := storeTiers[tier.Key] + if !ok { + continue + } + tierScore := 1 / (float64(i) + 1) + for _, existing := range state.existing { + existingTier, ok := state.tiers[existing.StoreID][tier.Key] + if ok && existingTier.Value != storeTier.Value { + score += tierScore + } + maxScore += tierScore + } + } + return true, score / maxScore +} + +// ruleCapacity prioritizes placing data on empty nodes when the choice is +// available and prevents data from going onto mostly full nodes. +func ruleCapacity(state solveState) (candidate bool, score float64) { + // Don't overfill stores. + if state.store.Capacity.FractionUsed() > maxFractionUsedThreshold { + return false, 0 + } + + return true, 1 / float64(state.store.Capacity.RangeCount+1) +} + +// canonicalTierOrder returns the most common key at each tier level. +func canonicalTierOrder(sl StoreList) []roachpb.Tier { + maxTierCount := 0 + for _, store := range sl.stores { + if count := len(store.Locality.Tiers); maxTierCount < count { + maxTierCount = count + } + } + + // Might have up to maxTierCount of tiers. + tiers := make([]roachpb.Tier, 0, maxTierCount) + for i := 0; i < maxTierCount; i++ { + // At each tier, count the number of occurrences of each key. + counts := map[string]int{} + maxKey := "" + for _, store := range sl.stores { + key := "" + if i < len(store.Locality.Tiers) { + key = store.Locality.Tiers[i].Key + } + counts[key]++ + if counts[key] > counts[maxKey] { + maxKey = key + } + } + // Don't add the tier if most nodes don't have that many tiers. + if maxKey != "" { + tiers = append(tiers, roachpb.Tier{Key: maxKey}) + } + } + return tiers +} + +// storeTierMap indexes a store list so you can look up the locality tier +// value from store ID and tier key. +func storeTierMap(sl StoreList) map[roachpb.StoreID]map[string]roachpb.Tier { + m := map[roachpb.StoreID]map[string]roachpb.Tier{} + for _, store := range sl.stores { + sm := map[string]roachpb.Tier{} + m[store.StoreID] = sm + for _, tier := range store.Locality.Tiers { + sm[tier.Key] = tier + } + } + return m +} + +// byScore implements sort.Interface for candidate slices. +type byScore []candidate + +var _ sort.Interface = byScore(nil) + +func (c byScore) Len() int { return len(c) } +func (c byScore) Less(i, j int) bool { return c[i].score > c[j].score } +func (c byScore) Swap(i, j int) { c[i], c[j] = c[j], c[i] } diff --git a/storage/rule_solver_test.go b/storage/rule_solver_test.go new file mode 100644 index 000000000000..2cea11aeb5e5 --- /dev/null +++ b/storage/rule_solver_test.go @@ -0,0 +1,330 @@ +// Copyright 2016 The Cockroach Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +// implied. See the License for the specific language governing +// permissions and limitations under the License. +// +// Author: Tristan Rice (rice@fn.lc) + +package storage + +import ( + "reflect" + "sort" + "testing" + + "github.com/cockroachdb/cockroach/config" + "github.com/cockroachdb/cockroach/roachpb" + "github.com/cockroachdb/cockroach/util/leaktest" +) + +type byScoreAndID []candidate + +func (c byScoreAndID) Len() int { return len(c) } +func (c byScoreAndID) Less(i, j int) bool { + if c[i].score == c[j].score { + return c[i].store.StoreID < c[j].store.StoreID + } + return c[i].score > c[j].score +} +func (c byScoreAndID) Swap(i, j int) { c[i], c[j] = c[j], c[i] } + +// TestRuleSolver tests the mechanics of ruleSolver. +func TestRuleSolver(t *testing.T) { + defer leaktest.AfterTest(t)() + stopper, _, _, storePool := createTestStorePool(TestTimeUntilStoreDeadOff) + defer stopper.Stop() + // 3 alive replicas, 1 dead + mockStorePool(storePool, []roachpb.StoreID{1, 2, 3, 5}, []roachpb.StoreID{4}, nil) + + storePool.mu.Lock() + storePool.mu.stores[1].desc.Attrs.Attrs = []string{"a"} + storePool.mu.stores[2].desc.Attrs.Attrs = []string{"a", "b"} + storePool.mu.stores[3].desc.Attrs.Attrs = []string{"a", "b", "c"} + + storePool.mu.stores[1].desc.Locality.Tiers = []roachpb.Tier{ + {Key: "datacenter", Value: "us"}, + {Key: "rack", Value: "1"}, + {Key: "slot", Value: "5"}, + } + storePool.mu.stores[2].desc.Locality.Tiers = []roachpb.Tier{ + {Key: "datacenter", Value: "us"}, + {Key: "rack", Value: "1"}, + } + storePool.mu.stores[3].desc.Locality.Tiers = []roachpb.Tier{ + {Key: "datacenter", Value: "us"}, + {Key: "floor", Value: "1"}, + {Key: "rack", Value: "2"}, + } + storePool.mu.stores[5].desc.Locality.Tiers = []roachpb.Tier{ + {Key: "datacenter", Value: "eur"}, + {Key: "rack", Value: "1"}, + } + + storePool.mu.stores[1].desc.Capacity = roachpb.StoreCapacity{ + Capacity: 100, + Available: 1, + RangeCount: 99, + } + storePool.mu.stores[2].desc.Capacity = roachpb.StoreCapacity{ + Capacity: 100, + Available: 100, + RangeCount: 0, + } + storePool.mu.stores[3].desc.Capacity = roachpb.StoreCapacity{ + Capacity: 100, + Available: 50, + RangeCount: 50, + } + storePool.mu.stores[5].desc.Capacity = roachpb.StoreCapacity{ + Capacity: 100, + Available: 60, + RangeCount: 40, + } + storePool.mu.Unlock() + + testCases := []struct { + rules []rule + c config.Constraints + existing []roachpb.ReplicaDescriptor + expected []roachpb.StoreID + }{ + // No constraints or rules. + { + expected: []roachpb.StoreID{1, 2, 3, 5}, + }, + // Store 1: score 0; Store 3: score 1; everything else fails. + { + rules: []rule{ + { + weight: 1, + run: func(state solveState) (candidate bool, score float64) { + switch state.store.StoreID { + case 1: + return true, 0 + case 3: + return true, 1 + default: + return false, 0 + } + }, + }, + }, + expected: []roachpb.StoreID{3, 1}, + }, + // Don't put a replica on the same node as another. + { + rules: []rule{{weight: 1, run: ruleReplicasUniqueNodes}}, + existing: []roachpb.ReplicaDescriptor{ + {NodeID: 1}, + {NodeID: 3}, + }, + expected: []roachpb.StoreID{2, 5}, + }, + { + rules: []rule{{weight: 1, run: ruleReplicasUniqueNodes}}, + existing: []roachpb.ReplicaDescriptor{ + {NodeID: 1}, + {NodeID: 2}, + {NodeID: 3}, + {NodeID: 5}, + }, + expected: nil, + }, + // Only put replicas on nodes with required constraints. + { + rules: []rule{{weight: 1, run: ruleConstraints}}, + c: config.Constraints{ + Constraints: []config.Constraint{ + {Value: "b", Type: config.Constraint_REQUIRED}, + }, + }, + expected: []roachpb.StoreID{2, 3}, + }, + // Required locality constraints. + { + rules: []rule{{weight: 1, run: ruleConstraints}}, + c: config.Constraints{ + Constraints: []config.Constraint{ + {Key: "datacenter", Value: "us", Type: config.Constraint_REQUIRED}, + }, + }, + expected: []roachpb.StoreID{1, 2, 3}, + }, + // Don't put a replica on a node with a prohibited constraint. + { + rules: []rule{{weight: 1, run: ruleConstraints}}, + c: config.Constraints{ + Constraints: []config.Constraint{ + {Value: "b", Type: config.Constraint_PROHIBITED}, + }, + }, + expected: []roachpb.StoreID{1, 5}, + }, + // Prohibited locality constraints. + { + rules: []rule{{weight: 1, run: ruleConstraints}}, + c: config.Constraints{ + Constraints: []config.Constraint{ + {Key: "datacenter", Value: "us", Type: config.Constraint_PROHIBITED}, + }, + }, + expected: []roachpb.StoreID{5}, + }, + // Positive constraints ordered by number of matches. + { + rules: []rule{{weight: 1, run: ruleConstraints}}, + c: config.Constraints{ + Constraints: []config.Constraint{ + {Value: "a"}, + {Value: "b"}, + {Value: "c"}, + }, + }, + expected: []roachpb.StoreID{3, 2, 1, 5}, + }, + // Positive locality constraints. + { + rules: []rule{{weight: 1, run: ruleConstraints}}, + c: config.Constraints{ + Constraints: []config.Constraint{ + {Key: "datacenter", Value: "eur"}, + }, + }, + expected: []roachpb.StoreID{5, 1, 2, 3}, + }, + // Diversity with no existing. + { + rules: []rule{{weight: 1, run: ruleDiversity}}, + existing: nil, + expected: []roachpb.StoreID{1, 2, 3, 5}, + }, + // Diversity with one existing. + { + rules: []rule{{weight: 1, run: ruleDiversity}}, + existing: []roachpb.ReplicaDescriptor{ + {StoreID: 1}, + }, + expected: []roachpb.StoreID{5, 3, 1, 2}, + }, + // Prioritize lower capacity nodes, and don't overfill. + { + rules: []rule{{weight: 1, run: ruleCapacity}}, + expected: []roachpb.StoreID{2, 5, 3}, + }, + } + + for i, tc := range testCases { + solver := makeRuleSolver(storePool, tc.rules) + candidates, err := solver.Solve(tc.c, tc.existing) + if err != nil { + t.Fatal(err) + } + sort.Sort(byScoreAndID(candidates)) + if len(candidates) != len(tc.expected) { + t.Errorf("%d: length of %+v should match %+v", i, candidates, tc.expected) + continue + } + for j, expected := range tc.expected { + if out := candidates[j].store.StoreID; out != expected { + t.Errorf("%d: candidates[%d].store.StoreID = %d; not %d; %+v", i, j, out, expected, candidates) + } + } + } +} + +func TestCanonicalTierOrder(t *testing.T) { + defer leaktest.AfterTest(t)() + + testCases := []struct { + stores [][]roachpb.Tier + want []roachpb.Tier + }{ + { + nil, + []roachpb.Tier{}, + }, + { + [][]roachpb.Tier{nil, nil}, + []roachpb.Tier{}, + }, + { + [][]roachpb.Tier{ + { + {Key: "a"}, + {Key: "b"}, + {Key: "c"}, + }, + }, + []roachpb.Tier{ + {Key: "a"}, + {Key: "b"}, + {Key: "c"}, + }, + }, + { + [][]roachpb.Tier{ + {{Key: "a"}, + {Key: "b"}, + {Key: "c"}, + }, + { + {Key: "a"}, + {Key: "b"}, + {Key: "c"}, + }, + { + {Key: "b"}, + {Key: "c"}, + {Key: "a"}, + {Key: "d"}, + }, + }, + []roachpb.Tier{ + {Key: "a"}, + {Key: "b"}, + {Key: "c"}, + }, + }, + { + [][]roachpb.Tier{ + { + {Key: "a"}, + {Key: "b"}, + {Key: "c"}, + }, + { + {Key: "e"}, + {Key: "f"}, + {Key: "g"}, + }, + }, + []roachpb.Tier{ + {Key: "a"}, + {Key: "b"}, + {Key: "c"}, + }, + }, + } + + for i, tc := range testCases { + sl := StoreList{} + for _, tiers := range tc.stores { + sl.stores = append(sl.stores, roachpb.StoreDescriptor{ + Locality: roachpb.Locality{Tiers: tiers}, + }) + } + + if out := canonicalTierOrder(sl); !reflect.DeepEqual(out, tc.want) { + t.Errorf("%d: canonicalTierOrder(%+v) = %+v; not %+v", i, tc.stores, out, tc.want) + } + } +} diff --git a/storage/simulation/cluster.go b/storage/simulation/cluster.go index 352e84d5230b..500c8d878dbe 100644 --- a/storage/simulation/cluster.go +++ b/storage/simulation/cluster.go @@ -99,7 +99,6 @@ func createCluster( storePool: storePool, allocator: storage.MakeAllocator(storePool, storage.AllocatorOptions{ AllowRebalance: true, - Deterministic: true, }), storeGossiper: gossiputil.NewStoreGossiper(g), nodes: make(map[roachpb.NodeID]*Node), @@ -292,7 +291,7 @@ func (c *Cluster) prepareActions() { for storeID, rep := range r.replicas { rep.action, rep.priority = r.allocator.ComputeAction(r.zone, &r.desc) if rep.action == storage.AllocatorNoop { - if _, ok := r.getRebalanceTarget(storeID); ok { + if _, ok := r.getRebalanceTarget(storeID); ok && c.rand.Float64() < 0.5 { rep.rebalance = true // Set the priority to 1 so that rebalances will occur in // performActions. diff --git a/storage/simulation/range.go b/storage/simulation/range.go index a4a2dd6c2874..9555d703b768 100644 --- a/storage/simulation/range.go +++ b/storage/simulation/range.go @@ -114,7 +114,7 @@ func (r *Range) splitRange(originalRange *Range) { // getAllocateTarget queries the allocator for the store that would be the best // candidate to take on a new replica. func (r *Range) getAllocateTarget() (roachpb.StoreID, error) { - newStore, err := r.allocator.AllocateTarget(r.zone.Constraints, r.desc.Replicas, true) + newStore, err := r.allocator.AllocateTarget(r.zone.Constraints, r.desc.Replicas) if err != nil { return 0, err } @@ -126,7 +126,7 @@ func (r *Range) getAllocateTarget() (roachpb.StoreID, error) { func (r *Range) getRemoveTarget() (roachpb.StoreID, error) { // Pass in an invalid store ID since we don't consider range leases as part // of the simulator. - removeStore, err := r.allocator.RemoveTarget(r.desc.Replicas, roachpb.StoreID(-1)) + removeStore, err := r.allocator.RemoveTarget(r.zone.Constraints, r.desc.Replicas, roachpb.StoreID(-1)) if err != nil { return 0, err } @@ -137,7 +137,10 @@ func (r *Range) getRemoveTarget() (roachpb.StoreID, error) { // candidate to add a replica for rebalancing. Returns true only if a target is // found. func (r *Range) getRebalanceTarget(storeID roachpb.StoreID) (roachpb.StoreID, bool) { - rebalanceTarget := r.allocator.RebalanceTarget(r.zone.Constraints, r.desc.Replicas, storeID) + rebalanceTarget, err := r.allocator.RebalanceTarget(r.zone.Constraints, r.desc.Replicas, storeID) + if err != nil { + panic(err) + } if rebalanceTarget == nil { return 0, false } diff --git a/storage/store_pool.go b/storage/store_pool.go index 18a7995f0168..546b5696fca0 100644 --- a/storage/store_pool.go +++ b/storage/store_pool.go @@ -26,7 +26,6 @@ import ( "github.com/pkg/errors" "golang.org/x/net/context" - "github.com/cockroachdb/cockroach/config" "github.com/cockroachdb/cockroach/gossip" "github.com/cockroachdb/cockroach/roachpb" "github.com/cockroachdb/cockroach/rpc" @@ -93,43 +92,35 @@ func (sd *storeDetail) markAlive(foundAliveOn hlc.Timestamp, storeDesc *roachpb. sd.lastUpdatedTime = foundAliveOn } -// storeMatch is the return value for match(). -type storeMatch int +// storeStatus is the current status of a store. +type storeStatus int -// These are the possible values for a storeMatch. +// These are the possible values for a storeStatus. const ( - storeMatchDead storeMatch = iota // The store is not yet available or has been timed out. - storeMatchAlive // The store is alive, but its attributes didn't match the required ones. - storeMatchThrottled // The store is alive and its attributes matched, but it is throttled. - storeMatchAvailable // The store is alive, available and its attributes matched. + storeStatusDead storeStatus = iota // The store is not yet available or has been timed out. + storeStatusThrottled // The store is alive but throttled. + storeStatusAvailable // The store is alive. ) -// match checks the store against the attributes and returns a storeMatch. -func (sd *storeDetail) match(now time.Time, constraints config.Constraints) storeMatch { +// status returns the current status of the store. +func (sd *storeDetail) status(now time.Time) storeStatus { // The store must be alive and it must have a descriptor to be considered // alive. if sd.dead || sd.desc == nil { - return storeMatchDead - } - - // Does the store match the attributes? - m := map[string]struct{}{} - for _, s := range sd.desc.CombinedAttrs().Attrs { - m[s] = struct{}{} - } - for _, c := range constraints.Constraints { - // TODO(d4l3k): Locality constraints, number of matches. - if _, ok := m[c.Value]; !ok { - return storeMatchAlive - } + return storeStatusDead } // The store must not have a recent declined reservation to be available. - if sd.throttledUntil.After(now) { - return storeMatchThrottled + if sd.isThrottled(now) { + return storeStatusThrottled } - return storeMatchAvailable + return storeStatusAvailable +} + +// isThrottled returns whether the store is currently throttled. +func (sd storeDetail) isThrottled(now time.Time) bool { + return sd.throttledUntil.After(now) } // storePoolPQ implements the heap.Interface (which includes sort.Interface) @@ -211,8 +202,9 @@ type StorePool struct { syncutil.RWMutex // Each storeDetail is contained in both a map and a priorityQueue; // pointers are used so that data can be kept in sync. - stores map[roachpb.StoreID]*storeDetail - queue storePoolPQ + stores map[roachpb.StoreID]*storeDetail + queue storePoolPQ + deterministic bool } } @@ -471,44 +463,45 @@ func (sl *StoreList) add(s roachpb.StoreDescriptor) { } } -// getStoreList returns a storeList that contains all active stores that -// contain the required attributes and their associated stats. It also returns -// the total number of alive and throttled stores. -// TODO(embark, spencer): consider using a reverse index map from -// Attr->stores, for efficiency. Ensure that entries in this map still -// have an opportunity to be garbage collected. -func (sp *StorePool) getStoreList(constraints config.Constraints, deterministic bool) (StoreList, int, int) { +var _ sort.Interface = StoreList{} + +// Len implements sort.Interface. +func (sl StoreList) Len() int { return len(sl.stores) } + +// Less implements sort.Interface. +func (sl StoreList) Less(i, j int) bool { return sl.stores[i].StoreID < sl.stores[j].StoreID } + +// Swap implements sort.Interface. +func (sl StoreList) Swap(i, j int) { sl.stores[i], sl.stores[j] = sl.stores[j], sl.stores[i] } + +// getStoreList returns a storeList that contains all active stores and their +// associated stats. It also returns the total number of alive and throttled +// stores. +func (sp *StorePool) getStoreList() (StoreList, int, int) { sp.mu.RLock() defer sp.mu.RUnlock() - var storeIDs roachpb.StoreIDSlice - for storeID := range sp.mu.stores { - storeIDs = append(storeIDs, storeID) - } - // Sort the stores by key if deterministic is requested. This is only for - // unit testing. - if deterministic { - sort.Sort(storeIDs) - } now := sp.clock.Now().GoTime() sl := StoreList{} var aliveStoreCount int var throttledStoreCount int - for _, storeID := range storeIDs { + for storeID := range sp.mu.stores { detail := sp.mu.stores[storeID] - // TODO(d4l3k): Sort by number of matches. - matched := detail.match(now, constraints) - switch matched { - case storeMatchAlive: - aliveStoreCount++ - case storeMatchThrottled: + + switch detail.status(now) { + case storeStatusThrottled: aliveStoreCount++ throttledStoreCount++ - case storeMatchAvailable: + case storeStatusAvailable: aliveStoreCount++ sl.add(*detail.desc) } } + + if sp.mu.deterministic { + sort.Sort(sl) + } + return sl, aliveStoreCount, throttledStoreCount } diff --git a/storage/store_pool_test.go b/storage/store_pool_test.go index 0b390021669b..63c7b6220ba2 100644 --- a/storage/store_pool_test.go +++ b/storage/store_pool_test.go @@ -26,7 +26,6 @@ import ( "golang.org/x/net/context" "github.com/cockroachdb/cockroach/base" - "github.com/cockroachdb/cockroach/config" "github.com/cockroachdb/cockroach/gossip" "github.com/cockroachdb/cockroach/roachpb" "github.com/cockroachdb/cockroach/rpc" @@ -42,6 +41,13 @@ import ( "github.com/pkg/errors" ) +// TestSetDeterministic makes StorePool return results in a deterministic way. +func (sp *StorePool) TestSetDeterministic(deterministic bool) { + sp.mu.Lock() + defer sp.mu.Unlock() + sp.mu.deterministic = deterministic +} + var uniqueStore = []*roachpb.StoreDescriptor{ { StoreID: 2, @@ -216,13 +222,12 @@ func TestStorePoolDies(t *testing.T) { // verifyStoreList ensures that the returned list of stores is correct. func verifyStoreList( sp *StorePool, - constraints config.Constraints, expected []int, expectedAliveStoreCount int, expectedThrottledStoreCount int, ) error { var actual []int - sl, aliveStoreCount, throttledStoreCount := sp.getStoreList(constraints, false) + sl, aliveStoreCount, throttledStoreCount := sp.getStoreList() if aliveStoreCount != expectedAliveStoreCount { return errors.Errorf("expected AliveStoreCount %d does not match actual %d", expectedAliveStoreCount, aliveStoreCount) @@ -243,67 +248,51 @@ func verifyStoreList( } // TestStorePoolGetStoreList ensures that the store list returns only stores -// that are alive and match the attribute criteria. +// that are alive. func TestStorePoolGetStoreList(t *testing.T) { defer leaktest.AfterTest(t)() // We're going to manually mark stores dead in this test. stopper, g, _, sp := createTestStorePool(TestTimeUntilStoreDeadOff) defer stopper.Stop() sg := gossiputil.NewStoreGossiper(g) - constraints := config.Constraints{Constraints: []config.Constraint{{Value: "ssd"}, {Value: "dc"}}} - required := []string{"ssd", "dc"} // Nothing yet. - if sl, _, _ := sp.getStoreList(constraints, false); len(sl.stores) != 0 { + if sl, _, _ := sp.getStoreList(); len(sl.stores) != 0 { t.Errorf("expected no stores, instead %+v", sl.stores) } matchingStore := roachpb.StoreDescriptor{ StoreID: 1, Node: roachpb.NodeDescriptor{NodeID: 1}, - Attrs: roachpb.Attributes{Attrs: required}, } supersetStore := roachpb.StoreDescriptor{ StoreID: 2, Node: roachpb.NodeDescriptor{NodeID: 1}, - Attrs: roachpb.Attributes{Attrs: append(required, "db")}, - } - unmatchingStore := roachpb.StoreDescriptor{ - StoreID: 3, - Node: roachpb.NodeDescriptor{NodeID: 1}, - Attrs: roachpb.Attributes{Attrs: []string{"ssd", "otherdc"}}, - } - emptyStore := roachpb.StoreDescriptor{ - StoreID: 4, - Node: roachpb.NodeDescriptor{NodeID: 1}, - Attrs: roachpb.Attributes{}, } deadStore := roachpb.StoreDescriptor{ - StoreID: 5, + StoreID: 3, Node: roachpb.NodeDescriptor{NodeID: 1}, - Attrs: roachpb.Attributes{Attrs: required}, } declinedStore := roachpb.StoreDescriptor{ - StoreID: 6, + StoreID: 4, Node: roachpb.NodeDescriptor{NodeID: 1}, - Attrs: roachpb.Attributes{Attrs: required}, } - // Mark all alive initially. - sg.GossipStores([]*roachpb.StoreDescriptor{ + allStores := []*roachpb.StoreDescriptor{ &matchingStore, &supersetStore, - &unmatchingStore, - &emptyStore, &deadStore, &declinedStore, - }, t) + } + + // Mark all alive initially. + sg.GossipStores(allStores, t) - if err := verifyStoreList(sp, constraints, []int{ + if err := verifyStoreList(sp, []int{ int(matchingStore.StoreID), int(supersetStore.StoreID), int(deadStore.StoreID), int(declinedStore.StoreID), - }, 6, 0); err != nil { + }, len(allStores), 0); err != nil { t.Error(err) } @@ -313,10 +302,10 @@ func TestStorePoolGetStoreList(t *testing.T) { sp.mu.stores[declinedStore.StoreID].throttledUntil = sp.clock.Now().GoTime().Add(time.Hour) sp.mu.Unlock() - if err := verifyStoreList(sp, constraints, []int{ + if err := verifyStoreList(sp, []int{ int(matchingStore.StoreID), int(supersetStore.StoreID), - }, 5, 1); err != nil { + }, len(allStores)-1, 1); err != nil { t.Error(err) } } @@ -430,7 +419,7 @@ func TestStorePoolDefaultState(t *testing.T) { t.Errorf("expected 0 dead replicas; got %v", dead) } - sl, alive, throttled := sp.getStoreList(config.Constraints{}, true) + sl, alive, throttled := sp.getStoreList() if len(sl.stores) > 0 { t.Errorf("expected no live stores; got list of %v", sl) }