Skip to content

Commit

Permalink
storage: add replica placeholders
Browse files Browse the repository at this point in the history
Add replica placeholders as a separate type. Replica laceholders are
added to the store.mu.replicasByKey BTree when a pre-emptive snapshot
is approved, and atomically swapped with replicas once the snapshot is
applied, preventing two overlapping snapshots being approved
simultaneously. Closes cockroachdb#7830.

Also move some replica functions from store.go into replica.go.
  • Loading branch information
Arjun Narayan committed Aug 16, 2016
1 parent 15dd44b commit b0f10a8
Show file tree
Hide file tree
Showing 6 changed files with 337 additions and 85 deletions.
41 changes: 41 additions & 0 deletions storage/replica.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ import (

"github.com/coreos/etcd/raft"
"github.com/coreos/etcd/raft/raftpb"
"github.com/google/btree"
"github.com/kr/pretty"
"github.com/opentracing/opentracing-go"
"github.com/pkg/errors"
Expand Down Expand Up @@ -303,6 +304,17 @@ type Replica struct {
}
}

// KeyRange is an interface type for the replicasByKey BTree, to compare
// Replica and ReplicaPlaceholder.
type KeyRange interface {
Desc() *roachpb.RangeDescriptor
rangeKeyItem
btree.Item
fmt.Stringer
}

var _ KeyRange = &Replica{}

// withRaftGroupLocked calls the supplied function with the (lazily
// initialized) Raft group. It assumes that the Replica lock is held.
func (r *Replica) withRaftGroupLocked(f func(r *raft.RawNode) error) error {
Expand Down Expand Up @@ -1562,6 +1574,26 @@ func (r *Replica) handleRaftReady() error {
if err := r.applySnapshot(ctx, rd.Snapshot, rd.HardState); err != nil {
return err
}

// handleRaftReady is called under the processRaftMu lock, so it is
// safe to lock the store here.
if err := func() error {
r.store.mu.Lock()
defer r.store.mu.Unlock()

if _, exists := r.store.mu.replicaPlaceholders[r.RangeID]; exists {
if err := r.store.removePlaceholderLocked(r.RangeID); err != nil {
return errors.Wrapf(err, "could not remove placeholder before applySnapshot")
}
}
if err := r.store.processRangeDescriptorUpdateLocked(r); err != nil {
return errors.Wrapf(err, "could not processRangeDescriptorUpdate after applySnapshot")
}
return nil
}(); err != nil {
return err
}

var err error
if lastIndex, err = loadLastIndex(ctx, r.store.Engine(), r.RangeID); err != nil {
return err
Expand Down Expand Up @@ -2812,6 +2844,15 @@ func (r *Replica) maybeAddToRaftLogQueue(appliedIndex uint64) {
}
}

func (r *Replica) endKey() roachpb.RKey {
return r.Desc().EndKey
}

// Less implements the btree.Item interface.
func (r *Replica) Less(i btree.Item) bool {
return r.endKey().Less(i.(rangeKeyItem).endKey())
}

func (r *Replica) panic(err error) {
panic(r.String() + ": " + err.Error())
}
Expand Down
2 changes: 1 addition & 1 deletion storage/replica_command.go
Original file line number Diff line number Diff line change
Expand Up @@ -2357,7 +2357,7 @@ func (r *Replica) AdminSplit(
// it isn't until C receives a snapshot of range 2 from the leader that it
// discovers the span of keys it covers. In order to prevent C from fully
// initializing range 2 in this instance, we prohibit applying a snapshot to a
// range if the snapshot overlaps another range. See Store.canApplySnapshot.
// range if the snapshot overlaps another range. See Store.canApplySnapshotLocked.
//
// But while a snapshot may not have been applied at C, an uninitialized
// Replica was created. An uninitialized Replica is one which belongs to a Raft
Expand Down
51 changes: 51 additions & 0 deletions storage/replica_placeholder.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
// Copyright 2016 The Cockroach Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
// implied. See the License for the specific language governing
// permissions and limitations under the License.
//
// Author: Arjun Narayan ([email protected])

package storage

import (
"fmt"

"github.com/cockroachdb/cockroach/roachpb"
"github.com/google/btree"
)

// ReplicaPlaceholder is created by a Store in anticipation of replacing it at
// some point in the future with a Replica. It has a RangeDescriptor.
type ReplicaPlaceholder struct {
rangeDesc roachpb.RangeDescriptor
}

var _ KeyRange = &ReplicaPlaceholder{}

// Desc returns the range Placeholder's descriptor.
func (r *ReplicaPlaceholder) Desc() *roachpb.RangeDescriptor {
return &r.rangeDesc
}

func (r *ReplicaPlaceholder) endKey() roachpb.RKey {
return r.Desc().EndKey
}

// Less implements the btree.Item interface.
func (r *ReplicaPlaceholder) Less(i btree.Item) bool {
return r.Desc().EndKey.Less(i.(rangeKeyItem).endKey())
}

func (r *ReplicaPlaceholder) String() string {
return fmt.Sprintf("range=%d [%s-%s): placeholder",
r.Desc().RangeID, r.rangeDesc.StartKey, r.rangeDesc.EndKey)
}
10 changes: 4 additions & 6 deletions storage/replica_raftstorage.go
Original file line number Diff line number Diff line change
Expand Up @@ -557,7 +557,8 @@ func (r *Replica) updateRangeInfo(desc *roachpb.RangeDescriptor) error {
// HardState (which may be empty, as Raft may apply some snapshots which don't
// require an update to the HardState). All snapshots must pass through Raft
// for correctness, i.e. the parameters to this method must be taken from
// a raft.Ready.
// a raft.Ready. It is the caller's responsibility to call
// r.store.processRangeDescriptorUpdate(r) after a successful applySnapshot.
func (r *Replica) applySnapshot(
ctx context.Context, snap raftpb.Snapshot, hs raftpb.HardState,
) error {
Expand Down Expand Up @@ -700,11 +701,8 @@ func (r *Replica) applySnapshot(
if err := r.updateRangeInfo(&desc); err != nil {
panic(err)
}
// Update the range descriptor. This is done last as this is the step that
// makes the Replica visible in the Store.
if err := r.setDesc(&desc); err != nil {
panic(err)
}

r.setDescWithoutProcessUpdate(&desc)

if !isPreemptive {
r.store.metrics.RangeSnapshotsNormalApplied.Inc(1)
Expand Down
Loading

0 comments on commit b0f10a8

Please sign in to comment.