Skip to content

Commit

Permalink
storage: always write a HardState
Browse files Browse the repository at this point in the history
As discovered in
#6991 (comment),
it's possible that we apply a Raft snapshot without writing a corresponding
HardState since we write the snapshot in its own batch first and only then
write a HardState.

If that happens, the server is going to panic on restart: It will have a
nontrivial first index, but a committed index of zero (from the empty
HardState).

This change prevents us from applying a snapshot when there is no HardState
supplied along with it, except when applying a preemptive snapshot (in which
case we synthesize a HardState).
  • Loading branch information
tbg committed Jul 1, 2016
1 parent a62a0cf commit df9d77d
Show file tree
Hide file tree
Showing 4 changed files with 24 additions and 9 deletions.
2 changes: 1 addition & 1 deletion storage/replica.go
Original file line number Diff line number Diff line change
Expand Up @@ -1409,7 +1409,7 @@ func (r *Replica) handleRaftReady() error {

if !raft.IsEmptySnap(rd.Snapshot) {
var err error
lastIndex, err = r.applySnapshot(rd.Snapshot)
lastIndex, err = r.applySnapshot(rd.Snapshot, rd.HardState)
if err != nil {
return err
}
Expand Down
23 changes: 17 additions & 6 deletions storage/replica_raftstorage.go
Original file line number Diff line number Diff line change
Expand Up @@ -511,9 +511,14 @@ func (r *Replica) updateRangeInfo(desc *roachpb.RangeDescriptor) error {
return nil
}

// applySnapshot updates the replica based on the given snapshot.
// applySnapshot updates the replica based on the given snapshot and
// corresponding HardState. When the Replica's replicaID is zero, the
// supplied HardState must be empty - we are applying a preemptive snapshot
// and will be synthesizing our HardState.
// Otherwise, the HardState must be nonempty and is persisted along with the
// snapshot.
// Returns the new last index.
func (r *Replica) applySnapshot(snap raftpb.Snapshot) (uint64, error) {
func (r *Replica) applySnapshot(snap raftpb.Snapshot, hs raftpb.HardState) (uint64, error) {
// We use a separate batch to apply the snapshot since the Replica (and in
// particular the last index) is updated after the batch commits. Using a
// separate batch also allows for future optimization (such as using a
Expand All @@ -529,6 +534,9 @@ func (r *Replica) applySnapshot(snap raftpb.Snapshot) (uint64, error) {

// Extract the updated range descriptor.
desc := snapData.RangeDescriptor
// Fill the reservation if there was any one for this range, regardless of
// whether the application succeeded.
defer r.store.bookie.Fill(desc.RangeID)

r.mu.Lock()
replicaID := r.mu.replicaID
Expand Down Expand Up @@ -597,12 +605,19 @@ func (r *Replica) applySnapshot(snap raftpb.Snapshot) (uint64, error) {
}

if replicaID == 0 {
if !raft.IsEmptyHardState(hs) {
return 0, errors.Errorf("cannot apply HardState on preemptive snapshot")
}
// The replica is not part of the Raft group so we need to write the Raft
// hard state for the replica in order for the Raft state machine to start
// correctly.
if err := updateHardState(batch, s); err != nil {
return 0, err
}
} else {
if raft.IsEmptyHardState(hs) {
return 0, errors.Errorf("cannot apply empty HardState on non-preemptive snapshot")
}
}

if err := batch.Commit(); err != nil {
Expand Down Expand Up @@ -634,10 +649,6 @@ func (r *Replica) applySnapshot(snap raftpb.Snapshot) (uint64, error) {
if err := r.updateRangeInfo(&desc); err != nil {
panic(err)
}

// Fill the reservation if there was any one for this range.
r.store.bookie.Fill(desc.RangeID)

// Update the range descriptor. This is done last as this is the step that
// makes the Replica visible in the Store.
if err := r.setDesc(&desc); err != nil {
Expand Down
6 changes: 5 additions & 1 deletion storage/replica_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ import (
"golang.org/x/net/context"

"github.com/coreos/etcd/raft"
"github.com/coreos/etcd/raft/raftpb"
"github.com/gogo/protobuf/proto"
"github.com/pkg/errors"

Expand Down Expand Up @@ -6163,7 +6164,10 @@ func TestReserveAndApplySnapshot(t *testing.T) {
t.Fatalf("Can't reserve the replica")
}
checkReservations(t, 1)
if _, err := firstRng.applySnapshot(snap); err != nil {

// Run a (failing) attempt to apply the snapshot (which will still fill the
// reservation).
if _, err := firstRng.applySnapshot(snap, raftpb.HardState{}); !testutils.IsError(err, "empty HardState") {
t.Fatal(err)
}
checkReservations(t, 0)
Expand Down
2 changes: 1 addition & 1 deletion storage/store.go
Original file line number Diff line number Diff line change
Expand Up @@ -2101,7 +2101,7 @@ func (s *Store) handleRaftMessage(req *RaftMessageRequest) error {
// the raft group (i.e. replicas with an ID of 0). This is the only
// operation that can be performed on a replica before it is part of the
// raft group.
_, err := r.applySnapshot(req.Message.Snapshot)
_, err := r.applySnapshot(req.Message.Snapshot, raftpb.HardState{})
return err
}
// We disallow non-snapshot messages to replica ID 0. Note that
Expand Down

0 comments on commit df9d77d

Please sign in to comment.