Skip to content

Commit

Permalink
Merge pull request #7700 from tschottdorf/split-race-test
Browse files Browse the repository at this point in the history
storage: add test to prevent regression of #7600
  • Loading branch information
tbg authored Jul 9, 2016
2 parents dafba1f + 52eaaa0 commit ed11c3d
Show file tree
Hide file tree
Showing 3 changed files with 111 additions and 3 deletions.
103 changes: 103 additions & 0 deletions storage/client_split_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ import (
"github.com/cockroachdb/cockroach/util/protoutil"
"github.com/cockroachdb/cockroach/util/randutil"
"github.com/cockroachdb/cockroach/util/tracing"
"github.com/coreos/etcd/raft/raftpb"
"github.com/pkg/errors"
)

Expand Down Expand Up @@ -1010,6 +1011,108 @@ func TestStoreSplitReadRace(t *testing.T) {
}
}

// TestStoreRangeSplitRaceUninitializedRHS reproduces #7600 (before it was
// fixed). While splits are happening, we simulate incoming messages for the
// right-hand side to trigger a race between the creation of the proper replica
// and the uninitialized replica reacting to messages.
func TestStoreRangeSplitRaceUninitializedRHS(t *testing.T) {
defer leaktest.AfterTest(t)()
mtc := &multiTestContext{}
storeCtx := storage.TestStoreContext()
// An aggressive tick interval lets groups communicate more and thus
// triggers test failures much more reliably. We can't go too aggressive
// or race tests never make any progress.
storeCtx.RaftTickInterval = 50 * time.Millisecond
storeCtx.RaftElectionTimeoutTicks = 2
currentTrigger := make(chan *roachpb.SplitTrigger)
seen := make(map[storagebase.CmdIDKey]struct{})
storeCtx.TestingKnobs.TestingCommandFilter = func(args storagebase.FilterArgs) *roachpb.Error {
et, ok := args.Req.(*roachpb.EndTransactionRequest)
if !ok || et.InternalCommitTrigger == nil {
return nil
}
trigger := protoutil.Clone(et.InternalCommitTrigger.GetSplitTrigger()).(*roachpb.SplitTrigger)
if trigger != nil && len(trigger.NewDesc.Replicas) == 2 && args.Hdr.Txn.Epoch == 0 && args.Sid == mtc.stores[0].StoreID() {
if _, ok := seen[args.CmdID]; ok {
return nil
}
// Without replay protection, a single reproposal locks up the
// test.
seen[args.CmdID] = struct{}{}
currentTrigger <- trigger
return roachpb.NewError(roachpb.NewReadWithinUncertaintyIntervalError(args.Hdr.Timestamp, args.Hdr.Timestamp))
}
return nil
}

mtc.storeContext = &storeCtx
mtc.Start(t, 2)
defer mtc.Stop()

leftRange := mtc.stores[0].LookupReplica(roachpb.RKey("a"), nil)

// We'll fake messages from term 1, ..., .magicIters-1. The exact number
// doesn't matter for anything but for its likelihood of triggering the
// race.
const magicIters = 5

// Replicate the left range onto the second node. We don't wait since we
// don't actually care what the second node does. All we want is that the
// first node isn't surprised by messages from that node.
mtc.replicateRange(leftRange.RangeID, 1)

for i := 0; i < 10; i++ {
var wg sync.WaitGroup
wg.Add(2)

go func() {
defer wg.Done()
// Split the data range. The split keys are chosen so that they move
// towards "a" (so that the range being split is always the first
// range).
splitKey := roachpb.Key(encoding.EncodeVarintDescending([]byte("a"), int64(i)))
splitArgs := adminSplitArgs(keys.SystemMax, splitKey)
if _, pErr := client.SendWrapped(mtc.distSenders[0], nil, &splitArgs); pErr != nil {
t.Fatal(pErr)
}
}()
go func() {
defer wg.Done()

trigger := <-currentTrigger // our own copy
// Make sure the first node is first for convenience.
replicas := trigger.NewDesc.Replicas
if replicas[0].NodeID > replicas[1].NodeID {
tmp := replicas[1]
replicas[1] = replicas[0]
replicas[0] = tmp
}

// Send a few vote requests which look like they're from the other
// node's right hand side of the split. This triggers a race which
// is discussed in #7600 (briefly, the creation of the right hand
// side in the split trigger was racing with the uninitialized
// version for the same group, resulting in clobbered HardState).
for term := uint64(1); term < magicIters; term++ {
if err := mtc.stores[0].HandleRaftMessage(&storage.RaftMessageRequest{
RangeID: trigger.NewDesc.RangeID,
ToReplica: replicas[0],
FromReplica: replicas[1],
Message: raftpb.Message{
Type: raftpb.MsgVote,
To: uint64(replicas[0].ReplicaID),
From: uint64(replicas[1].ReplicaID),
Term: term,
},
}); err != nil {
t.Error(err)
}
}
}()
wg.Wait()
}
}

// TestLeaderAfterSplit verifies that a raft group created by a split
// elects a leader without waiting for an election timeout.
func TestLeaderAfterSplit(t *testing.T) {
Expand Down
5 changes: 5 additions & 0 deletions storage/helpers_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,11 @@ import (
"github.com/cockroachdb/cockroach/storage/engine/enginepb"
)

// HandleRaftMessage delegates to handleRaftMessage.
func (s *Store) HandleRaftMessage(req *RaftMessageRequest) error {
return s.handleRaftMessage(req)
}

// ComputeMVCCStats immediately computes correct total MVCC usage statistics
// for the store, returning the computed values (but without modifying the
// store).
Expand Down
6 changes: 3 additions & 3 deletions storage/replica.go
Original file line number Diff line number Diff line change
Expand Up @@ -85,9 +85,9 @@ const (
// simpler with this being turned off.
var txnAutoGC = true

// raftInitialLogIndex is the starting point for the raft log. We bootstrap
// the raft membership by synthesizing a snapshot as if there were some
// discarded prefix to the log, so we must begin the log at an arbitrary
// raftInitialLog{Index,Term} are the starting points for the raft log. We
// bootstrap the raft membership by synthesizing a snapshot as if there were
// some discarded prefix to the log, so we must begin the log at an arbitrary
// index greater than 1.
const (
raftInitialLogIndex = 10
Expand Down

0 comments on commit ed11c3d

Please sign in to comment.