Skip to content

Commit

Permalink
storage: replace TestStoreRangeDownReplicate with TestReplicateQueueD…
Browse files Browse the repository at this point in the history
…ownReplicate

The new test tests the down replication functionality using TestCluster.

fixes cockroachdb#10536 fixes cockroachdb#10171 fixes cockroachdb#9603
  • Loading branch information
vivekmenezes committed Nov 22, 2016
1 parent b3a50e7 commit 44ea60d
Show file tree
Hide file tree
Showing 2 changed files with 70 additions and 78 deletions.
78 changes: 0 additions & 78 deletions pkg/storage/client_raft_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1148,84 +1148,6 @@ func TestUnreplicateFirstRange(t *testing.T) {
mtc.replicateRange(rangeID, 2)
}

// TestStoreRangeDownReplicate verifies that the replication queue will notice
// over-replicated ranges and remove replicas from them.
func TestStoreRangeDownReplicate(t *testing.T) {
defer leaktest.AfterTest(t)()
t.Skip("#9603, #10171, #10536")
mtc := startMultiTestContext(t, 5)
defer mtc.Stop()
mtc.initGossipNetwork()
store0 := mtc.stores[0]

// Split off a range from the initial range for testing; there are
// complications if the metadata ranges are removed from store 1, this
// simplifies the test.
splitKey := roachpb.Key("m")
rightKey := roachpb.Key("z")
{
replica := store0.LookupReplica(roachpb.RKeyMin, nil)
mtc.replicateRange(replica.RangeID, 1, 2)
splitArgs := adminSplitArgs(splitKey, splitKey)
if _, err := replica.AdminSplit(context.Background(), splitArgs); err != nil {
t.Fatal(err)
}
}
// Replicate the new range to all five stores.
rightKeyAddr, err := keys.Addr(rightKey)
if err != nil {
t.Fatal(err)
}
replica := store0.LookupReplica(rightKeyAddr, nil)
desc := replica.Desc()
mtc.replicateRange(desc.RangeID, 3, 4)

maxTimeout := time.After(10 * time.Second)
succeeded := false
i := 0
for !succeeded {
select {
case <-maxTimeout:
t.Fatalf("Failed to achieve proper replication within 10 seconds")
case <-time.After(10 * time.Millisecond):
rangeDesc := getRangeMetadata(rightKeyAddr, mtc, t)
if count := len(rangeDesc.Replicas); count < 3 {
t.Fatalf("Removed too many replicas; expected at least 3 replicas, found %d", count)
} else if count == 3 {
succeeded = true
break
}

// Cycle the lease to the next replica (on the next store) if that
// replica still exists. This avoids the condition in which we try
// to continuously remove the replica on a store when
// down-replicating while it also still holds the lease.
for {
i++
if i >= len(mtc.stores) {
i = 0
}
rep := mtc.stores[i].LookupReplica(rightKeyAddr, nil)
if rep != nil {
mtc.expireLeases()
// Force the read command request a new lease.
getArgs := getArgs(rightKey)
if _, err := client.SendWrapped(context.Background(), mtc.distSenders[i], &getArgs); err != nil {
t.Fatal(err)
}
mtc.stores[i].ForceReplicationScanAndProcess()
break
}
}
}
}

// Expire range leases one more time, so that any remaining resolutions can
// get a range lease.
// TODO(bdarnell): understand why some tests need this.
mtc.expireLeases()
}

// TestChangeReplicasDescriptorInvariant tests that a replica change aborts if
// another change has been made to the RangeDescriptor since it was initiated.
//
Expand Down
70 changes: 70 additions & 0 deletions pkg/storage/replicate_queue_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,10 @@ import (
"github.com/cockroachdb/cockroach/pkg/base"
"github.com/cockroachdb/cockroach/pkg/gossip"
"github.com/cockroachdb/cockroach/pkg/keys"
"github.com/cockroachdb/cockroach/pkg/roachpb"
"github.com/cockroachdb/cockroach/pkg/server"
"github.com/cockroachdb/cockroach/pkg/storage"
"github.com/cockroachdb/cockroach/pkg/testutils"
"github.com/cockroachdb/cockroach/pkg/testutils/testcluster"
"github.com/cockroachdb/cockroach/pkg/util"
"github.com/cockroachdb/cockroach/pkg/util/leaktest"
Expand Down Expand Up @@ -97,3 +99,71 @@ func TestReplicateQueueRebalance(t *testing.T) {
return nil
})
}

// TestReplicateQueueDownReplicate verifies that the replication queue will notice
// over-replicated ranges and remove replicas from them.
func TestReplicateQueueDownReplicate(t *testing.T) {
defer leaktest.AfterTest(t)()

const replicaCount = 3

tc := testcluster.StartTestCluster(t, replicaCount+2,
base.TestClusterArgs{ReplicationMode: base.ReplicationAuto},
)
defer tc.Stopper().Stop()

// Split off a range from the initial range for testing; there are
// complications if the metadata ranges are moved.
testKey := roachpb.Key("m")
if _, _, err := tc.SplitRange(testKey); err != nil {
t.Fatal(err)
}

desc, err := tc.LookupRange(testKey)
if err != nil {
t.Fatal(err)
}
rangeID := desc.RangeID

countReplicas := func() int {
count := 0
for _, s := range tc.Servers {
if err := s.Stores().VisitStores(func(store *storage.Store) error {
if _, err := store.GetReplica(rangeID); err == nil {
count++
}
return nil
}); err != nil {
t.Fatal(err)
}
}
return count
}

// Up-replicate the new range to all servers to create redundant replicas.
// Add replicas to all of the nodes. Only 2 of these calls will succeed
// because the range is already replicated to the other 3 nodes.
util.SucceedsSoon(t, func() error {
for i := 0; i < tc.NumServers(); i++ {
_, err := tc.AddReplicas(testKey, tc.Target(i))
if err != nil {
if testutils.IsError(err, "unable to add replica .* which is already present") {
continue
}
return err
}
}
if c := countReplicas(); c != tc.NumServers() {
return errors.Errorf("replica count = %d", c)
}
return nil
})

// Ensure that the replicas for the new range down replicate.
util.SucceedsSoon(t, func() error {
if c := countReplicas(); c != replicaCount {
return errors.Errorf("replica count = %d", c)
}
return nil
})
}

0 comments on commit 44ea60d

Please sign in to comment.