Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
74654: loqrecovery: verify pending descriptor changes in raft log r=tbg a=aliher1911

Previously, when generating unsafe loss of quorum recovery plan recovery command could pick replicas that have unapplied changes to local range descriptor. That would cause nodes to crash because of unexpected change.
To resolve this, this patch adds a check that would extract information about descriptor changes from raft log and subsequently fail if recovered range would need to use replica with pending descriptor changes.

It adds the flag --force to allow creation of the plan when some ranges have potentially dangerous changes.

For those cases prompt will provide following info:
```
Found replica inconsistencies:

range gap /Table/{59-60}
  r88: /Table/5{8/1/"washington dc"/"Tz\xe1G\xae\x14L\x00\x80\x00\x00\x00\x00\x00\x00\xa5"-9}
  r49: /Table/6{0-1}

Only proceed as a last resort!
Proceed with plan creation [f/N] f
Plan created
To complete recovery, distribute the plan to the below nodes and invoke `debug recover apply-plan` on:
- node n6, store(s) s6
- node n5, store(s) s5
- node n1, store(s) s1
```

--confirm y together with --force:
```
Found replica inconsistencies:

range gap /Table/{59-60}
  r88: /Table/5{8/1/"washington dc"/"Tz\xe1G\xae\x14L\x00\x80\x00\x00\x00\x00\x00\x00\xa5"-9}
  r49: /Table/6{0-1}

Plan created
To complete recovery, distribute the plan to the below nodes and invoke `debug recover apply-plan` on:
- node n5, store(s) s5
- node n1, store(s) s1
- node n6, store(s) s6
```

while --confirm y without --force will result in:
```
Found replica inconsistencies:

range gap /Table/{59-60}
  r88: /Table/5{8/1/"washington dc"/"Tz\xe1G\xae\x14L\x00\x80\x00\x00\x00\x00\x00\x00\xa5"-9}
  r49: /Table/6{0-1}

ERROR: can not create plan because of errors and no --force flag is given
Failed running "debug recover make-plan"
```

Fixes: cockroachdb#74630

Co-authored-by: Oleg Afanasyev <[email protected]>
  • Loading branch information
craig[bot] and aliher1911 committed Feb 4, 2022
2 parents 3eb782c + c4d4df9 commit 4a741cd
Show file tree
Hide file tree
Showing 16 changed files with 1,251 additions and 85 deletions.
1 change: 1 addition & 0 deletions pkg/cli/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -210,6 +210,7 @@ go_library(
"//pkg/workload/ycsb",
"@com_github_cockroachdb_apd_v3//:apd",
"@com_github_cockroachdb_errors//:errors",
"@com_github_cockroachdb_errors//hintdetail",
"@com_github_cockroachdb_errors//oserror",
"@com_github_cockroachdb_logtags//:logtags",
"@com_github_cockroachdb_pebble//tool",
Expand Down
3 changes: 3 additions & 0 deletions pkg/cli/debug.go
Original file line number Diff line number Diff line change
Expand Up @@ -1654,6 +1654,9 @@ func init() {
"list of dead store IDs")
f.VarP(&debugRecoverPlanOpts.confirmAction, cliflags.ConfirmActions.Name, cliflags.ConfirmActions.Shorthand,
cliflags.ConfirmActions.Usage())
f.BoolVar(&debugRecoverPlanOpts.force, "force", false,
"force creation of plan even when problems were encountered; applying this plan may "+
"result in additional problems and should be done only with care and as a last resort")

f = debugRecoverExecuteCmd.Flags()
f.VarP(&debugRecoverExecuteOpts.Stores, cliflags.RecoverStore.Name, cliflags.RecoverStore.Shorthand, cliflags.RecoverStore.Usage())
Expand Down
65 changes: 51 additions & 14 deletions pkg/cli/debug_recover_loss_of_quorum.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ import (
"github.com/cockroachdb/cockroach/pkg/util/timeutil"
"github.com/cockroachdb/cockroach/pkg/util/uuid"
"github.com/cockroachdb/errors"
"github.com/cockroachdb/errors/hintdetail"
"github.com/spf13/cobra"
)

Expand Down Expand Up @@ -264,6 +265,7 @@ var debugRecoverPlanOpts struct {
outputFileName string
deadStoreIDs []int
confirmAction confirmActionFlag
force bool
}

func runDebugPlanReplicaRemoval(cmd *cobra.Command, args []string) error {
Expand Down Expand Up @@ -302,28 +304,63 @@ Discarded live replicas: %d
_, _ = fmt.Fprintf(stderr, "%s\n\n", deadStoreMsg)
}

if len(plan.Updates) == 0 {
_, _ = fmt.Fprintf(stderr, "No recoverable ranges found.\n")
return nil
planningErr := report.Error()
if planningErr != nil {
// Need to warn user before they make a decision that ignoring
// inconsistencies is a really bad idea.
_, _ = fmt.Fprintf(stderr,
"Found replica inconsistencies:\n\n%s\n\nOnly proceed as a last resort!\n",
hintdetail.FlattenDetails(planningErr))
}

if debugRecoverPlanOpts.confirmAction == allNo {
return errors.New("abort")
}

switch debugRecoverPlanOpts.confirmAction {
case prompt:
_, _ = fmt.Fprintf(stderr, "Proceed with plan creation [y/N] ")
reader := bufio.NewReader(os.Stdin)
line, err := reader.ReadString('\n')
if err != nil {
return errors.Wrap(err, "failed to read user input")
opts := "y/N"
if planningErr != nil {
opts = "f/N"
}
_, _ = fmt.Fprintf(stderr, "\n")
if len(line) < 1 || (line[0] != 'y' && line[0] != 'Y') {
_, _ = fmt.Fprint(stderr, "Aborted at user request\n")
return nil
done := false
for !done {
_, _ = fmt.Fprintf(stderr, "Proceed with plan creation [%s] ", opts)
reader := bufio.NewReader(os.Stdin)
line, err := reader.ReadString('\n')
if err != nil {
return errors.Wrap(err, "failed to read user input")
}
line = strings.ToLower(strings.TrimSpace(line))
if len(line) == 0 {
line = "n"
}
switch line {
case "y":
// We ignore y if we have errors. In that case you can only force or
// abandon attempt.
if planningErr != nil {
continue
}
done = true
case "f":
done = true
case "n":
return errors.New("abort")
}
}
case allYes:
// All actions enabled by default.
if planningErr != nil && !debugRecoverPlanOpts.force {
return errors.Errorf(
"can not create plan because of errors and no --force flag is given")
}
default:
return errors.New("Aborted by --confirm option")
return errors.New("unexpected CLI error, try using different --confirm option value")
}

if len(plan.Updates) == 0 {
_, _ = fmt.Fprintln(stderr, "Found no ranges in need of recovery, nothing to do.")
return nil
}

var writer io.Writer = os.Stdout
Expand Down
14 changes: 14 additions & 0 deletions pkg/kv/kvserver/loqrecovery/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -19,37 +19,51 @@ go_library(
"//pkg/kv/kvserver/stateloader",
"//pkg/roachpb:with-mocks",
"//pkg/storage",
"//pkg/storage/enginepb",
"//pkg/util/hlc",
"//pkg/util/log",
"//pkg/util/protoutil",
"//pkg/util/timeutil",
"//pkg/util/uuid",
"@com_github_cockroachdb_errors//:errors",
"@io_etcd_go_etcd_raft_v3//raftpb",
],
)

go_test(
name = "loqrecovery_test",
srcs = [
"collect_raft_log_test.go",
"main_test.go",
"record_test.go",
"recovery_env_test.go",
"recovery_test.go",
],
data = glob(["testdata/**"]),
embed = [":loqrecovery"],
deps = [
"//pkg/base",
"//pkg/keys",
"//pkg/kv/kvserver",
"//pkg/kv/kvserver/kvserverbase",
"//pkg/kv/kvserver/kvserverpb",
"//pkg/kv/kvserver/loqrecovery/loqrecoverypb",
"//pkg/kv/kvserver/stateloader",
"//pkg/roachpb:with-mocks",
"//pkg/security",
"//pkg/security/securitytest",
"//pkg/server",
"//pkg/storage",
"//pkg/storage/enginepb",
"//pkg/testutils",
"//pkg/testutils/serverutils",
"//pkg/testutils/testcluster",
"//pkg/util/hlc",
"//pkg/util/keysutil",
"//pkg/util/leaktest",
"//pkg/util/log",
"//pkg/util/protoutil",
"//pkg/util/randutil",
"//pkg/util/timeutil",
"//pkg/util/uuid",
"@com_github_cockroachdb_datadriven//:datadriven",
Expand Down
136 changes: 129 additions & 7 deletions pkg/kv/kvserver/loqrecovery/collect.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,19 @@ package loqrecovery

import (
"context"
"math"

"github.com/cockroachdb/cockroach/pkg/keys"
"github.com/cockroachdb/cockroach/pkg/kv/kvserver"
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/kvserverpb"
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/loqrecovery/loqrecoverypb"
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/stateloader"
"github.com/cockroachdb/cockroach/pkg/roachpb"
"github.com/cockroachdb/cockroach/pkg/storage"
"github.com/cockroachdb/cockroach/pkg/storage/enginepb"
"github.com/cockroachdb/cockroach/pkg/util/protoutil"
"github.com/cockroachdb/errors"
"go.etcd.io/etcd/raft/v3/raftpb"
)

// CollectReplicaInfo captures states of all replicas in all stores for the sake of quorum recovery.
Expand All @@ -45,14 +51,24 @@ func CollectReplicaInfo(
if err != nil {
return err
}
// Check raft log for un-applied range descriptor changes. We start from
// applied+1 (inclusive) and read until the end of the log. We also look
// at potentially uncommitted entries as we have no way to determine their
// outcome, and they will become committed as soon as the replica is
// designated as a survivor.
rangeUpdates, err := GetDescriptorChangesFromRaftLog(desc.RangeID,
rstate.RaftAppliedIndex+1, math.MaxInt64, reader)
if err != nil {
return err
}

replicaData := loqrecoverypb.ReplicaInfo{
StoreID: storeIdent.StoreID,
NodeID: storeIdent.NodeID,
Desc: desc,
RaftAppliedIndex: rstate.RaftAppliedIndex,
RaftCommittedIndex: hstate.Commit,
// TODO(oleg): #73282 Track presence of uncommitted descriptors
HasUncommittedDescriptors: false,
StoreID: storeIdent.StoreID,
NodeID: storeIdent.NodeID,
Desc: desc,
RaftAppliedIndex: rstate.RaftAppliedIndex,
RaftCommittedIndex: hstate.Commit,
RaftLogDescriptorChanges: rangeUpdates,
}
replicas = append(replicas, replicaData)
return nil
Expand All @@ -62,3 +78,109 @@ func CollectReplicaInfo(
}
return loqrecoverypb.NodeReplicaInfo{Replicas: replicas}, nil
}

// GetDescriptorChangesFromRaftLog iterates over raft log between indicies
// lo (inclusive) and hi (exclusive) and searches for changes to range
// descriptor. Changes are identified by commit trigger content which is
// extracted either from EntryNormal where change updates key range info
// (split/merge) or from EntryConfChange* for changes in replica set.
func GetDescriptorChangesFromRaftLog(
rangeID roachpb.RangeID, lo, hi uint64, reader storage.Reader,
) ([]loqrecoverypb.DescriptorChangeInfo, error) {
var changes []loqrecoverypb.DescriptorChangeInfo

key := keys.RaftLogKey(rangeID, lo)
endKey := keys.RaftLogKey(rangeID, hi)
iter := reader.NewMVCCIterator(storage.MVCCKeyIterKind, storage.IterOptions{
UpperBound: endKey,
})
defer iter.Close()

var meta enginepb.MVCCMetadata
var ent raftpb.Entry

decodeRaftChange := func(ccI raftpb.ConfChangeI) ([]byte, error) {
var ccC kvserver.ConfChangeContext
if err := protoutil.Unmarshal(ccI.AsV2().Context, &ccC); err != nil {
return nil, errors.Wrap(err, "while unmarshaling CCContext")
}
return ccC.Payload, nil
}

iter.SeekGE(storage.MakeMVCCMetadataKey(key))
for ; ; iter.Next() {
ok, err := iter.Valid()
if err != nil {
return nil, err
}
if !ok {
return changes, nil
}
if err := protoutil.Unmarshal(iter.UnsafeValue(), &meta); err != nil {
return nil, errors.Wrap(err, "unable to decode raft log MVCCMetadata")
}
if err := storage.MakeValue(meta).GetProto(&ent); err != nil {
return nil, errors.Wrap(err, "unable to unmarshal raft Entry")
}
if len(ent.Data) == 0 {
continue
}
// Following code extracts our raft command from raft log entry. Depending
// on entry type we either need to extract encoded command from configuration
// change (for replica changes) or from normal command (for splits and
// merges).
var payload []byte
switch ent.Type {
case raftpb.EntryConfChange:
var cc raftpb.ConfChange
if err := protoutil.Unmarshal(ent.Data, &cc); err != nil {
return nil, errors.Wrap(err, "while unmarshaling ConfChange")
}
payload, err = decodeRaftChange(cc)
if err != nil {
return nil, err
}
case raftpb.EntryConfChangeV2:
var cc raftpb.ConfChangeV2
if err := protoutil.Unmarshal(ent.Data, &cc); err != nil {
return nil, errors.Wrap(err, "while unmarshaling ConfChangeV2")
}
payload, err = decodeRaftChange(cc)
if err != nil {
return nil, err
}
case raftpb.EntryNormal:
_, payload = kvserver.DecodeRaftCommand(ent.Data)
default:
continue
}
if len(payload) == 0 {
continue
}
var raftCmd kvserverpb.RaftCommand
if err := protoutil.Unmarshal(payload, &raftCmd); err != nil {
return nil, errors.Wrap(err, "unable to unmarshal raft command")
}
switch {
case raftCmd.ReplicatedEvalResult.Split != nil:
changes = append(changes,
loqrecoverypb.DescriptorChangeInfo{
ChangeType: loqrecoverypb.DescriptorChangeType_Split,
Desc: &raftCmd.ReplicatedEvalResult.Split.LeftDesc,
OtherDesc: &raftCmd.ReplicatedEvalResult.Split.RightDesc,
})
case raftCmd.ReplicatedEvalResult.Merge != nil:
changes = append(changes,
loqrecoverypb.DescriptorChangeInfo{
ChangeType: loqrecoverypb.DescriptorChangeType_Merge,
Desc: &raftCmd.ReplicatedEvalResult.Merge.LeftDesc,
OtherDesc: &raftCmd.ReplicatedEvalResult.Merge.RightDesc,
})
case raftCmd.ReplicatedEvalResult.ChangeReplicas != nil:
changes = append(changes, loqrecoverypb.DescriptorChangeInfo{
ChangeType: loqrecoverypb.DescriptorChangeType_ReplicaChange,
Desc: raftCmd.ReplicatedEvalResult.ChangeReplicas.Desc,
})
}
}
}
Loading

0 comments on commit 4a741cd

Please sign in to comment.