Skip to content

Commit

Permalink
storage: indicate when a split causes a Raft snapshot
Browse files Browse the repository at this point in the history
Touches cockroachdb#31409.

Release note: None
  • Loading branch information
tbg committed Nov 26, 2018
1 parent 7125d09 commit 5f865f3
Show file tree
Hide file tree
Showing 3 changed files with 60 additions and 5 deletions.
1 change: 1 addition & 0 deletions pkg/storage/allocator_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -309,6 +309,7 @@ func replicas(storeIDs ...roachpb.StoreID) []roachpb.ReplicaDescriptor {
for i, storeID := range storeIDs {
res[i].NodeID = roachpb.NodeID(storeID)
res[i].StoreID = storeID
res[i].ReplicaID = roachpb.ReplicaID(i + 1)
}
return res
}
Expand Down
29 changes: 24 additions & 5 deletions pkg/storage/replica_command.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,6 @@ import (
"strings"
"time"

"github.com/pkg/errors"
"go.etcd.io/etcd/raft/raftpb"

"github.com/cockroachdb/cockroach/pkg/base"
"github.com/cockroachdb/cockroach/pkg/internal/client"
"github.com/cockroachdb/cockroach/pkg/keys"
Expand All @@ -42,6 +39,9 @@ import (
"github.com/cockroachdb/cockroach/pkg/util/log"
"github.com/cockroachdb/cockroach/pkg/util/protoutil"
"github.com/cockroachdb/cockroach/pkg/util/retry"
"github.com/pkg/errors"
"go.etcd.io/etcd/raft"
"go.etcd.io/etcd/raft/raftpb"
)

// evaluateCommand delegates to the eval method for the given
Expand Down Expand Up @@ -205,6 +205,23 @@ func maybeDescriptorChangedError(desc *roachpb.RangeDescriptor, err error) (stri
return "", false
}

func splitSnapshotWarningStr(rangeID roachpb.RangeID, status *raft.Status) string {
var s string
if status != nil && status.RaftState == raft.StateLeader {
for replicaID, pr := range status.Progress {
if replicaID == status.Lead {
// TODO(tschottdorf): remove this line once we have picked up
// https://github.com/etcd-io/etcd/pull/10279
continue
}
if pr.State != raft.ProgressStateReplicate {
s += fmt.Sprintf("; may cause Raft snapshot to r%d/%d: %v", rangeID, replicaID, &pr)
}
}
}
return s
}

// adminSplitWithDescriptor divides the range into into two ranges, using
// either args.SplitKey (if provided) or an internally computed key that aims
// to roughly equipartition the range by size. The split is done inside of a
Expand Down Expand Up @@ -303,8 +320,10 @@ func (r *Replica) adminSplitWithDescriptor(
}
leftDesc.EndKey = splitKey

log.Infof(ctx, "initiating a split of this range at key %s [r%d]",
splitKey, rightDesc.RangeID)
extra := splitSnapshotWarningStr(r.RangeID, r.RaftStatus())

log.Infof(ctx, "initiating a split of this range at key %s [r%d]%s",
splitKey, rightDesc.RangeID, extra)

if err := r.store.DB().Txn(ctx, func(ctx context.Context, txn *client.Txn) error {
log.Event(ctx, "split closure begins")
Expand Down
35 changes: 35 additions & 0 deletions pkg/storage/replica_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ import (
"github.com/gogo/protobuf/proto"
"github.com/kr/pretty"
"github.com/pkg/errors"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"go.etcd.io/etcd/raft"
"go.etcd.io/etcd/raft/raftpb"
Expand Down Expand Up @@ -135,6 +136,22 @@ func leaseExpiry(repl *Replica) int64 {
return l.Expiration.WallTime + 1
}

// Create a Raft status that shows everyone fully up to date.
func upToDateRaftStatus(repls []roachpb.ReplicaDescriptor) *raft.Status {
prs := make(map[uint64]raft.Progress)
for _, repl := range repls {
prs[uint64(repl.ReplicaID)] = raft.Progress{
State: raft.ProgressStateReplicate,
Match: 100,
}
}
return &raft.Status{
HardState: raftpb.HardState{Commit: 100},
SoftState: raft.SoftState{Lead: 1, RaftState: raft.StateLeader},
Progress: prs,
}
}

// testContext contains all the objects necessary to test a Range.
// In most cases, simply call Start(t) (and later Stop()) on a zero-initialized
// testContext{}. Any fields which are initialized to non-nil values
Expand Down Expand Up @@ -10977,3 +10994,21 @@ func TestRollbackMissingTxnRecordNoError(t *testing.T) {
t.Errorf("expected %s; got %v", expErr, pErr)
}
}

func TestSplitSnapshotWarningStr(t *testing.T) {
defer leaktest.AfterTest(t)()

status := upToDateRaftStatus(replicas(1, 3, 5))
assert.Equal(t, "", splitSnapshotWarningStr(12, status))

pr := status.Progress[2]
pr.State = raft.ProgressStateProbe
status.Progress[2] = pr

assert.Equal(
t,
"; may cause Raft snapshot to r12/2: next = 0, match = 100, state = ProgressStateProbe,"+
" waiting = false, pendingSnapshot = 0",
splitSnapshotWarningStr(12, status),
)
}

0 comments on commit 5f865f3

Please sign in to comment.