From 8c12c3ec4f05601380384e82eb98a39ca409822a Mon Sep 17 00:00:00 2001 From: Tobias Schottdorf Date: Mon, 26 Nov 2018 12:39:31 +0100 Subject: [PATCH] storage: indicate when a split causes a Raft snapshot Touches #31409. Release note: None --- pkg/storage/allocator_test.go | 1 + pkg/storage/replica_command.go | 29 +++++++++++++++++++++++----- pkg/storage/replica_test.go | 35 ++++++++++++++++++++++++++++++++++ 3 files changed, 60 insertions(+), 5 deletions(-) diff --git a/pkg/storage/allocator_test.go b/pkg/storage/allocator_test.go index 6ebea2da836f..f9381c6b65bb 100644 --- a/pkg/storage/allocator_test.go +++ b/pkg/storage/allocator_test.go @@ -306,6 +306,7 @@ func replicas(storeIDs ...roachpb.StoreID) []roachpb.ReplicaDescriptor { for i, storeID := range storeIDs { res[i].NodeID = roachpb.NodeID(storeID) res[i].StoreID = storeID + res[i].ReplicaID = roachpb.ReplicaID(i + 1) } return res } diff --git a/pkg/storage/replica_command.go b/pkg/storage/replica_command.go index 83e990373d35..13972719b93a 100644 --- a/pkg/storage/replica_command.go +++ b/pkg/storage/replica_command.go @@ -22,9 +22,6 @@ import ( "strings" "time" - "github.com/pkg/errors" - "go.etcd.io/etcd/raft/raftpb" - "github.com/cockroachdb/cockroach/pkg/base" "github.com/cockroachdb/cockroach/pkg/internal/client" "github.com/cockroachdb/cockroach/pkg/keys" @@ -40,6 +37,9 @@ import ( "github.com/cockroachdb/cockroach/pkg/util/log" "github.com/cockroachdb/cockroach/pkg/util/protoutil" "github.com/cockroachdb/cockroach/pkg/util/retry" + "github.com/pkg/errors" + "go.etcd.io/etcd/raft" + "go.etcd.io/etcd/raft/raftpb" ) // evaluateCommand delegates to the eval method for the given @@ -197,6 +197,23 @@ func maybeDescriptorChangedError(desc *roachpb.RangeDescriptor, err error) (stri return "", false } +func splitSnapshotWarningStr(rangeID roachpb.RangeID, status *raft.Status) string { + var s string + if status != nil && status.RaftState == raft.StateLeader { + for replicaID, pr := range status.Progress { + if replicaID == status.Lead { + // TODO(tschottdorf): remove this line once we have picked up + // https://github.com/etcd-io/etcd/pull/10279 + continue + } + if pr.State != raft.ProgressStateReplicate { + s += fmt.Sprintf("; may cause Raft snapshot to r%d/%d: %v", rangeID, replicaID, &pr) + } + } + } + return s +} + // adminSplitWithDescriptor divides the range into into two ranges, using // either args.SplitKey (if provided) or an internally computed key that aims // to roughly equipartition the range by size. The split is done inside of a @@ -295,8 +312,10 @@ func (r *Replica) adminSplitWithDescriptor( } leftDesc.EndKey = splitKey - log.Infof(ctx, "initiating a split of this range at key %s [r%d]", - splitKey, rightDesc.RangeID) + extra := splitSnapshotWarningStr(r.RangeID, r.RaftStatus()) + + log.Infof(ctx, "initiating a split of this range at key %s [r%d]%s", + splitKey, rightDesc.RangeID, extra) if err := r.store.DB().Txn(ctx, func(ctx context.Context, txn *client.Txn) error { log.Event(ctx, "split closure begins") diff --git a/pkg/storage/replica_test.go b/pkg/storage/replica_test.go index d70f10decd25..34c5eb675037 100644 --- a/pkg/storage/replica_test.go +++ b/pkg/storage/replica_test.go @@ -33,6 +33,7 @@ import ( "github.com/gogo/protobuf/proto" "github.com/kr/pretty" "github.com/pkg/errors" + "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "go.etcd.io/etcd/raft" "go.etcd.io/etcd/raft/raftpb" @@ -134,6 +135,22 @@ func leaseExpiry(repl *Replica) int64 { return l.Expiration.WallTime + 1 } +// Create a Raft status that shows everyone fully up to date. +func upToDateRaftStatus(repls []roachpb.ReplicaDescriptor) *raft.Status { + prs := make(map[uint64]raft.Progress) + for _, repl := range repls { + prs[uint64(repl.ReplicaID)] = raft.Progress{ + State: raft.ProgressStateReplicate, + Match: 100, + } + } + return &raft.Status{ + HardState: raftpb.HardState{Commit: 100}, + SoftState: raft.SoftState{Lead: 1, RaftState: raft.StateLeader}, + Progress: prs, + } +} + // testContext contains all the objects necessary to test a Range. // In most cases, simply call Start(t) (and later Stop()) on a zero-initialized // testContext{}. Any fields which are initialized to non-nil values @@ -10856,3 +10873,21 @@ func TestRollbackMissingTxnRecordNoError(t *testing.T) { t.Errorf("expected %s; got %v", expErr, pErr) } } + +func TestSplitSnapshotWarningStr(t *testing.T) { + defer leaktest.AfterTest(t)() + + status := upToDateRaftStatus(replicas(1, 3, 5)) + assert.Equal(t, "", splitSnapshotWarningStr(12, status)) + + pr := status.Progress[2] + pr.State = raft.ProgressStateProbe + status.Progress[2] = pr + + assert.Equal( + t, + "; may cause Raft snapshot to r12/2: next = 0, match = 100, state = ProgressStateProbe,"+ + " waiting = false, pendingSnapshot = 0", + splitSnapshotWarningStr(12, status), + ) +}