Skip to content

Commit

Permalink
Merge pull request #10395 from bdarnell/bdarnell/disable-prevote
Browse files Browse the repository at this point in the history
storage: Switch back from PreVote to CheckQuorum
  • Loading branch information
a-robinson authored Nov 2, 2016
2 parents 99e14f7 + f61b94d commit 9ce8833
Show file tree
Hide file tree
Showing 2 changed files with 36 additions and 1 deletion.
25 changes: 25 additions & 0 deletions pkg/storage/replica.go
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,8 @@ const (
// simpler with this being turned off.
var txnAutoGC = true

var tickQuiesced = envutil.EnvOrDefaultBool("COCKROACH_TICK_QUIESCED", true)

// raftInitialLog{Index,Term} are the starting points for the raft log. We
// bootstrap the raft membership by synthesizing a snapshot as if there were
// some discarded prefix to the log, so we must begin the log at an arbitrary
Expand Down Expand Up @@ -2216,6 +2218,29 @@ func (r *Replica) tickRaftMuLocked() (bool, error) {
return false, nil
}
if r.mu.quiescent {
// While a replica is quiesced we still advance its logical clock. This is
// necessary to avoid a scenario where the leader quiesces and a follower
// does not. The follower calls an election but the election fails because
// the leader and other follower believe that no time in the current term
// has passed. The Raft group is then in a state where one member has a
// term that is advanced which will then cause subsequent heartbeats from
// the existing leader to be rejected in a way that the leader will step
// down. This situation is caused by an interaction between quiescence and
// the Raft CheckQuorum feature which relies on the logical clock ticking
// at roughly the same rate on all members of the group.
//
// By ticking the logical clock (incrementing an integer) we avoid this
// situation. If one of the followers does not quiesce it will call an
// election but the election will succeed. Note that while we expect such
// elections from quiesced followers to be extremely rare, it is very
// difficult to completely eliminate them so we want to minimize the
// disruption when they do occur.
//
// For more details, see #9372.
// TODO(bdarnell): remove this once we have fully switched to PreVote
if tickQuiesced {
r.mu.internalRaftGroup.TickQuiesced()
}
return false, nil
}
if r.maybeQuiesceLocked() {
Expand Down
12 changes: 11 additions & 1 deletion pkg/storage/store.go
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,9 @@ var changeTypeInternalToRaft = map[roachpb.ReplicaChangeType]raftpb.ConfChangeTy
var storeSchedulerConcurrency = envutil.EnvOrDefaultInt(
"COCKROACH_SCHEDULER_CONCURRENCY", 2*runtime.NumCPU())

var enablePreVote = envutil.EnvOrDefaultBool(
"COCKROACH_ENABLE_PREVOTE", false)

// RaftElectionTimeout returns the raft election timeout, as computed
// from the specified tick interval and number of election timeout
// ticks. If raftElectionTimeoutTicks is 0, uses the value of
Expand Down Expand Up @@ -150,7 +153,14 @@ func newRaftConfig(
HeartbeatTick: storeCfg.RaftHeartbeatIntervalTicks,
Storage: strg,
Logger: logger,
PreVote: true,

// TODO(bdarnell): PreVote and CheckQuorum are two ways of
// achieving the same thing. PreVote is more compatible with
// quiesced ranges, so we want to switch to it once we've worked
// out the bugs.
PreVote: enablePreVote,
CheckQuorum: !enablePreVote,

// TODO(bdarnell): make these configurable; evaluate defaults.
MaxSizePerMsg: 1024 * 1024,
MaxInflightMsgs: 256,
Expand Down

0 comments on commit 9ce8833

Please sign in to comment.