Skip to content

Commit

Permalink
kvserver/loqrecovery: record and post replica recovery events
Browse files Browse the repository at this point in the history
Previously when replica is rewritten offline to recover from loss
of quorum, no trace of this event was kept in the system.
This is not ideal as it is not possible to understand what happened
unless person performing recovery documented its actions and then
informed person doing investigation.
This diff adds records of such actions. Records are created offline
in store and then propagated to server logs when node is restarted.

Release note: None
  • Loading branch information
aliher1911 committed Jan 12, 2022
1 parent 4149ca7 commit ce71b50
Show file tree
Hide file tree
Showing 23 changed files with 622 additions and 82 deletions.
24 changes: 24 additions & 0 deletions docs/generated/eventlog.md
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,30 @@ e.g. directly access and mutate internal state, breaking system invariants.
Events in this category are logged to the `OPS` channel.


### `debug_recover_replica`

An event of type `debug_recover_replica` is recorded when unsafe loss of quorum recovery is performed.


| Field | Description | Sensitive |
|--|--|--|
| `RangeID` | | no |
| `StoreID` | | no |
| `SurvivorReplicaID` | | no |
| `UpdatedReplicaID` | | no |
| `StartKey` | | yes |
| `EndKey` | | yes |


#### Common fields

| Field | Description | Sensitive |
|--|--|--|
| `Timestamp` | The timestamp of the event. Expressed as nanoseconds since the Unix epoch. | no |
| `EventType` | The type of the event. | no |
| `NodeID` | The node ID where the event originated. | no |
| `User` | The user which performed the operation. | yes |

### `debug_send_kv_batch`

An event of type `debug_send_kv_batch` is recorded when an arbitrary KV BatchRequest is submitted
Expand Down
13 changes: 8 additions & 5 deletions pkg/cli/debug_recover_loss_of_quorum.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@ import (
"github.com/cockroachdb/cockroach/pkg/util/log"
"github.com/cockroachdb/cockroach/pkg/util/protoutil"
"github.com/cockroachdb/cockroach/pkg/util/stop"
"github.com/cockroachdb/cockroach/pkg/util/timeutil"
"github.com/cockroachdb/cockroach/pkg/util/uuid"
"github.com/cockroachdb/errors"
"github.com/spf13/cobra"
)
Expand Down Expand Up @@ -411,16 +413,16 @@ func runDebugExecuteRecoverPlan(cmd *cobra.Command, args []string) error {
var localNodeID roachpb.NodeID
batches := make(map[roachpb.StoreID]storage.Batch)
for _, storeSpec := range debugRecoverExecuteOpts.Stores.Specs {
db, err := OpenExistingStore(storeSpec.Path, stopper, false /* readOnly */)
store, err := OpenExistingStore(storeSpec.Path, stopper, false /* readOnly */)
if err != nil {
return errors.Wrapf(err, "failed to open store at path %q. ensure that store path is "+
"correct and that it is not used by another process", storeSpec.Path)
}
batch := db.NewBatch()
defer db.Close()
batch := store.NewBatch()
defer store.Close()
defer batch.Close()

storeIdent, err := kvserver.ReadStoreIdent(cmd.Context(), db)
storeIdent, err := kvserver.ReadStoreIdent(cmd.Context(), store)
if err != nil {
return err
}
Expand All @@ -434,8 +436,9 @@ func runDebugExecuteRecoverPlan(cmd *cobra.Command, args []string) error {
batches[storeIdent.StoreID] = batch
}

updateTime := timeutil.Now()
prepReport, err := loqrecovery.PrepareUpdateReplicas(
cmd.Context(), nodeUpdates, localNodeID, batches)
cmd.Context(), nodeUpdates, uuid.DefaultGenerator, updateTime, localNodeID, batches)
if err != nil {
return err
}
Expand Down
31 changes: 25 additions & 6 deletions pkg/keys/constants.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,13 @@ const (
tenantPrefixByte = '\xfe'
)

// Constants to subdivide unsafe loss of quorum recovery data into groups.
// Currently we only store keys as they are applied, but might benefit from
// archiving them to make them more "durable".
const (
appliedUnsafeReplicaRecoveryPrefix = "applied"
)

// Constants for system-reserved keys in the KV map.
//
// Note: Preserve group-wise ordering when adding new constants.
Expand Down Expand Up @@ -160,9 +167,27 @@ var (
// localStoreIdentSuffix stores an immutable identifier for this
// store, created when the store is first bootstrapped.
localStoreIdentSuffix = []byte("iden")
// LocalStoreUnsafeReplicaRecoverySuffix is a suffix for temporary record
// entries put when loss of quorum recovery operations are performed offline
// on the store.
// See StoreUnsafeReplicaRecoveryKey for details.
localStoreUnsafeReplicaRecoverySuffix = makeKey([]byte("loqr"),
[]byte(appliedUnsafeReplicaRecoveryPrefix))
// LocalStoreUnsafeReplicaRecoveryKeyMin is the start of keyspace used to store
// loss of quorum recovery record entries.
LocalStoreUnsafeReplicaRecoveryKeyMin = MakeStoreKey(localStoreUnsafeReplicaRecoverySuffix, nil)
// LocalStoreUnsafeReplicaRecoveryKeyMax is the end of keyspace used to store
// loss of quorum recovery record entries.
LocalStoreUnsafeReplicaRecoveryKeyMax = LocalStoreUnsafeReplicaRecoveryKeyMin.PrefixEnd()
// localStoreNodeTombstoneSuffix stores key value pairs that map
// nodeIDs to time of removal from cluster.
localStoreNodeTombstoneSuffix = []byte("ntmb")
// localStoreCachedSettingsSuffix stores the cached settings for node.
localStoreCachedSettingsSuffix = []byte("stng")
// LocalStoreCachedSettingsKeyMin is the start of span of possible cached settings keys.
LocalStoreCachedSettingsKeyMin = MakeStoreKey(localStoreCachedSettingsSuffix, nil)
// LocalStoreCachedSettingsKeyMax is the end of span of possible cached settings keys.
LocalStoreCachedSettingsKeyMax = LocalStoreCachedSettingsKeyMin.PrefixEnd()
// localStoreLastUpSuffix stores the last timestamp that a store's node
// acknowledged that it was still running. This value will be regularly
// refreshed on all stores for a running node; the intention of this value
Expand All @@ -172,12 +197,6 @@ var (
// localRemovedLeakedRaftEntriesSuffix is DEPRECATED and remains to prevent
// reuse.
localRemovedLeakedRaftEntriesSuffix = []byte("dlre")
// localStoreCachedSettingsSuffix stores the cached settings for node.
localStoreCachedSettingsSuffix = []byte("stng")
// LocalStoreCachedSettingsKeyMin is the start of span of possible cached settings keys.
LocalStoreCachedSettingsKeyMin = MakeStoreKey(localStoreCachedSettingsSuffix, nil)
// LocalStoreCachedSettingsKeyMax is the end of span of possible cached settings keys.
LocalStoreCachedSettingsKeyMax = LocalStoreCachedSettingsKeyMin.PrefixEnd()

// 5. Lock table keys
//
Expand Down
15 changes: 8 additions & 7 deletions pkg/keys/doc.go
Original file line number Diff line number Diff line change
Expand Up @@ -214,13 +214,14 @@ var _ = [...]interface{}{
// 4. Store local keys: These contain metadata about an individual store.
// They are unreplicated and unaddressable. The typical example is the
// store 'ident' record. They all share `localStorePrefix`.
StoreClusterVersionKey, // "cver"
StoreGossipKey, // "goss"
StoreHLCUpperBoundKey, // "hlcu"
StoreIdentKey, // "iden"
StoreNodeTombstoneKey, // "ntmb"
StoreLastUpKey, // "uptm"
StoreCachedSettingsKey, // "stng"
StoreClusterVersionKey, // "cver"
StoreGossipKey, // "goss"
StoreHLCUpperBoundKey, // "hlcu"
StoreIdentKey, // "iden"
StoreUnsafeReplicaRecoveryKey, // "loqr"
StoreNodeTombstoneKey, // "ntmb"
StoreCachedSettingsKey, // "stng"
StoreLastUpKey, // "uptm"

// 5. Range lock keys for all replicated locks. All range locks share
// LocalRangeLockTablePrefix. Locks can be acquired on global keys and on
Expand Down
27 changes: 27 additions & 0 deletions pkg/keys/keys.go
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,33 @@ func DecodeStoreCachedSettingsKey(key roachpb.Key) (settingKey roachpb.Key, err
return
}

// StoreUnsafeReplicaRecoveryKey creates a key for loss of quorum replica
// recovery entry. Those keys are written by `debug recover apply-plan` command
// on the store while node is stopped. Once node boots up, entries are
// translated into structured log events to leave audit trail of recovery
// operation.
func StoreUnsafeReplicaRecoveryKey(uuid uuid.UUID) roachpb.Key {
key := make(roachpb.Key, 0, len(LocalStoreUnsafeReplicaRecoveryKeyMin)+len(uuid))
key = append(key, LocalStoreUnsafeReplicaRecoveryKeyMin...)
key = append(key, uuid.GetBytes()...)
return key
}

// DecodeStoreUnsafeReplicaRecoveryKey decodes uuid key used to create record
// key for unsafe replica recovery record.
func DecodeStoreUnsafeReplicaRecoveryKey(key roachpb.Key) (uuid.UUID, error) {
if !bytes.HasPrefix(key, LocalStoreUnsafeReplicaRecoveryKeyMin) {
return uuid.UUID{},
errors.Errorf("key %q does not have %q prefix", string(key), LocalRangeIDPrefix)
}
remainder := key[len(LocalStoreUnsafeReplicaRecoveryKeyMin):]
entryID, err := uuid.FromBytes(remainder)
if err != nil {
return entryID, errors.Wrap(err, "failed to get uuid from unsafe replica recovery key")
}
return entryID, nil
}

// NodeLivenessKey returns the key for the node liveness record.
func NodeLivenessKey(nodeID roachpb.NodeID) roachpb.Key {
key := make(roachpb.Key, 0, len(NodeLivenessPrefix)+9)
Expand Down
22 changes: 21 additions & 1 deletion pkg/keys/printer.go
Original file line number Diff line number Diff line change
Expand Up @@ -194,6 +194,7 @@ var constSubKeyDict = []struct {
{"/clusterVersion", localStoreClusterVersionSuffix},
{"/nodeTombstone", localStoreNodeTombstoneSuffix},
{"/cachedSettings", localStoreCachedSettingsSuffix},
{"/lossOfQuorumRecovery/applied", localStoreUnsafeReplicaRecoverySuffix},
}

func nodeTombstoneKeyPrint(key roachpb.Key) string {
Expand Down Expand Up @@ -223,6 +224,10 @@ func localStoreKeyPrint(_ []encoding.Direction, key roachpb.Key) string {
return v.name + "/" + cachedSettingsKeyPrint(
append(roachpb.Key(nil), append(LocalStorePrefix, key...)...),
)
} else if v.key.Equal(localStoreUnsafeReplicaRecoverySuffix) {
return v.name + "/" + lossOfQuorumRecoveryEntryKeyPrint(
append(roachpb.Key(nil), append(LocalStorePrefix, key...)...),
)
}
return v.name
}
Expand All @@ -231,6 +236,14 @@ func localStoreKeyPrint(_ []encoding.Direction, key roachpb.Key) string {
return fmt.Sprintf("%q", []byte(key))
}

func lossOfQuorumRecoveryEntryKeyPrint(key roachpb.Key) string {
entryID, err := DecodeStoreUnsafeReplicaRecoveryKey(key)
if err != nil {
return fmt.Sprintf("<invalid: %s>", err)
}
return entryID.String()
}

func localStoreKeyParse(input string) (remainder string, output roachpb.Key) {
for _, s := range constSubKeyDict {
if strings.HasPrefix(input, s.name) {
Expand All @@ -239,9 +252,16 @@ func localStoreKeyParse(input string) (remainder string, output roachpb.Key) {
s.key.Equal(localStoreNodeTombstoneSuffix),
s.key.Equal(localStoreCachedSettingsSuffix):
panic(&ErrUglifyUnsupported{errors.Errorf("cannot parse local store key with suffix %s", s.key)})
case s.key.Equal(localStoreUnsafeReplicaRecoverySuffix):
recordIDString := input[len(localStoreUnsafeReplicaRecoverySuffix):]
recordUUID, err := uuid.FromString(recordIDString)
if err != nil {
panic(&ErrUglifyUnsupported{errors.Errorf("cannot parse local store key with suffix %s", s.key)})
}
output = StoreUnsafeReplicaRecoveryKey(recordUUID)
default:
output = MakeStoreKey(s.key, nil)
}
output = MakeStoreKey(s.key, nil)
return
}
}
Expand Down
2 changes: 2 additions & 0 deletions pkg/keys/printer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ func TestPrettyPrint(t *testing.T) {
durationDesc, _ := encoding.EncodeDurationDescending(nil, duration)
bitArray := bitarray.MakeBitArrayFromInt64(8, 58, 7)
txnID := uuid.MakeV4()
loqRecoveryID := uuid.MakeV4()

// Support for asserting that the ugly printer supports a key was added after
// most of the tests here were written.
Expand All @@ -66,6 +67,7 @@ func TestPrettyPrint(t *testing.T) {
{keys.StoreClusterVersionKey(), "/Local/Store/clusterVersion", revertSupportUnknown},
{keys.StoreNodeTombstoneKey(123), "/Local/Store/nodeTombstone/n123", revertSupportUnknown},
{keys.StoreCachedSettingsKey(roachpb.Key("a")), `/Local/Store/cachedSettings/"a"`, revertSupportUnknown},
{keys.StoreUnsafeReplicaRecoveryKey(loqRecoveryID), fmt.Sprintf(`/Local/Store/lossOfQuorumRecovery/applied/%s`, loqRecoveryID), revertSupportUnknown},

{keys.AbortSpanKey(roachpb.RangeID(1000001), txnID), fmt.Sprintf(`/Local/RangeID/1000001/r/AbortSpan/%q`, txnID), revertSupportUnknown},
{keys.RangeAppliedStateKey(roachpb.RangeID(1000001)), "/Local/RangeID/1000001/r/RangeAppliedState", revertSupportUnknown},
Expand Down
3 changes: 3 additions & 0 deletions pkg/kv/kvserver/loqrecovery/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ go_library(
"apply.go",
"collect.go",
"plan.go",
"record.go",
"utils.go",
],
importpath = "github.com/cockroachdb/cockroach/pkg/kv/kvserver/loqrecovery",
Expand All @@ -19,6 +20,7 @@ go_library(
"//pkg/storage",
"//pkg/util/hlc",
"//pkg/util/log",
"//pkg/util/protoutil",
"//pkg/util/uuid",
"@com_github_cockroachdb_errors//:errors",
],
Expand All @@ -44,6 +46,7 @@ go_test(
"//pkg/util/hlc",
"//pkg/util/keysutil",
"//pkg/util/leaktest",
"//pkg/util/timeutil",
"//pkg/util/uuid",
"@com_github_cockroachdb_datadriven//:datadriven",
"@com_github_cockroachdb_errors//:errors",
Expand Down
Loading

0 comments on commit ce71b50

Please sign in to comment.