Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
95582: loqrecovery,server: apply staged loqrecovery plans on start r=erikgrinaker a=aliher1911

This commit adds loss of quorum recovery plan application functionality
to server startup. Plans are staged during application phase and then
applied when rolling restart of affected nodes is performed.
Plan application peforms same actions that are performed by debug
recovery apply-plan on offline storage. It is also updating loss of
quorum recovery status stored in a store local key.

Release note: None

Fixes #93121

96126: sql: refactor FunctionProperties and Overload r=mgartner a=mgartner

#### sql: remove FuncExpr.IsGeneratorApplication

`FuncExpr.IsGeneratorApplication` has been removed and its single usage
has been replaced with with `FuncExpr.IsGeneratorClass`.

Release note: None

#### sql: move FunctionClass from FunctionProperties to Overload

`FunctionProperties` are attached to a `FunctionDefinition`, which is
simply a collection of overloads that share the same name. Most of the
fields in `FunctionProperties` are, however, overload-specific. They
should be moved to the `Overload` struct. In the long-term,
the hierarchy of function definitions, each with child function overloads,
should be flattened to a just overloads.

This commit takes one step in this direction.

Epic: CRDB-20370

Release note: None


96207: rpc,security: simplify and enhance the client cert validation r=stevendanna a=knz

Fixes #96174.
Prerequisite for #96153.
Epic: CRDB-14537

TLDR: this change enhances various aspects of TLS client cert
validation. Between other things, it makes the error clearer in case
of authentication failure.

Example, before:
```
ERROR: requested user demo is not authorized for tenant {2}
```

Example, after:
```
ERROR: certificate authentication failed for user "demo"
DETAIL: This is tenant system; cert is valid for "root" on all tenants.
```


Co-authored-by: Oleg Afanasyev <[email protected]>
Co-authored-by: Marcus Gartner <[email protected]>
Co-authored-by: Raphael 'kena' Poss <[email protected]>
  • Loading branch information
4 people committed Feb 1, 2023
4 parents 4162a72 + 9c6b9e9 + ddb8efb + cca7a9b commit 8b35f22
Show file tree
Hide file tree
Showing 49 changed files with 1,226 additions and 482 deletions.
3 changes: 2 additions & 1 deletion pkg/ccl/changefeedccl/encoder_json.go
Original file line number Diff line number Diff line change
Expand Up @@ -426,7 +426,8 @@ func init() {
}
return tree.NewDBytes(tree.DBytes(o)), nil
},
Info: "Strings can be of the form 'resolved' or 'resolved=1s'.",
Class: tree.NormalClass,
Info: "Strings can be of the form 'resolved' or 'resolved=1s'.",
// Probably actually stable, but since this is tightly coupled to changefeed logic by design,
// best to be defensive.
Volatility: volatility.Volatile,
Expand Down
1 change: 0 additions & 1 deletion pkg/ccl/utilccl/builtins.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@ import (
// RegisterCCLBuiltin adds a builtin defined in CCL code to the global builtins registry.
func RegisterCCLBuiltin(name string, description string, overload tree.Overload) {
props := tree.FunctionProperties{
Class: tree.NormalClass,
Category: `CCL-only internal function`,
}

Expand Down
1 change: 1 addition & 0 deletions pkg/cli/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -205,6 +205,7 @@ go_library(
"//pkg/util/retry",
"//pkg/util/sdnotify",
"//pkg/util/stop",
"//pkg/util/strutil",
"//pkg/util/syncutil",
"//pkg/util/sysutil",
"//pkg/util/timeutil",
Expand Down
30 changes: 12 additions & 18 deletions pkg/cli/debug_recover_loss_of_quorum.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ import (
"github.com/cockroachdb/cockroach/pkg/util/log"
"github.com/cockroachdb/cockroach/pkg/util/protoutil"
"github.com/cockroachdb/cockroach/pkg/util/stop"
"github.com/cockroachdb/cockroach/pkg/util/strutil"
"github.com/cockroachdb/cockroach/pkg/util/timeutil"
"github.com/cockroachdb/cockroach/pkg/util/uuid"
"github.com/cockroachdb/errors"
Expand Down Expand Up @@ -375,7 +376,8 @@ func runDebugPlanReplicaRemoval(cmd *cobra.Command, args []string) error {
ctx,
replicas,
deadStoreIDs,
deadNodeIDs)
deadNodeIDs,
uuid.DefaultGenerator)
if err != nil {
return err
}
Expand Down Expand Up @@ -505,7 +507,7 @@ To stage recovery application in half-online mode invoke:
Alternatively distribute plan to below nodes and invoke 'debug recover apply-plan --store=<store-dir> %s' on:
`, remoteArgs, planFile, planFile)
for _, node := range report.UpdatedNodes {
_, _ = fmt.Fprintf(stderr, "- node n%d, store(s) %s\n", node.NodeID, joinIDs("s", node.StoreIDs))
_, _ = fmt.Fprintf(stderr, "- node n%d, store(s) %s\n", node.NodeID, strutil.JoinIDs("s", node.StoreIDs))
}

return nil
Expand Down Expand Up @@ -612,10 +614,10 @@ func stageRecoveryOntoCluster(
// Proposed report
_, _ = fmt.Fprintf(stderr, "Proposed changes in plan %s:\n", plan.PlanID)
for _, u := range plan.Updates {
_, _ = fmt.Fprintf(stderr, " range r%d:%s updating replica %s to %s and discarding all others.\n",
u.RangeID, roachpb.Key(u.StartKey), u.OldReplicaID, u.NextReplicaID)
_, _ = fmt.Fprintf(stderr, " range r%d:%s updating replica %s to %s on node n%d and discarding all others.\n",
u.RangeID, roachpb.Key(u.StartKey), u.OldReplicaID, u.NextReplicaID, u.NodeID())
}
_, _ = fmt.Fprintf(stderr, "\nNodes %s will be marked as decommissioned.\n", joinIDs("n", plan.DecommissionedNodeIDs))
_, _ = fmt.Fprintf(stderr, "\nNodes %s will be marked as decommissioned.\n", strutil.JoinIDs("n", plan.DecommissionedNodeIDs))

if len(conflicts) > 0 {
_, _ = fmt.Fprintf(stderr, "\nConflicting staged plans will be replaced:\n")
Expand Down Expand Up @@ -685,11 +687,11 @@ func stageRecoveryOntoCluster(
To verify recovery status invoke:
'cockroach debug recover verify %s %s'
`, joinIDs("n", sortedKeys(nodeSet)), remoteArgs, planFile)
`, strutil.JoinIDs("n", sortedKeys(nodeSet)), remoteArgs, planFile)
return nil
}

func sortedKeys[T ~int32](set map[T]any) []T {
func sortedKeys[T ~int | ~int32 | ~int64](set map[T]any) []T {
var sorted []T
for k := range set {
sorted = append(sorted, k)
Expand Down Expand Up @@ -747,7 +749,7 @@ func applyRecoveryToLocalStore(
if len(prepReport.UpdatedReplicas) == 0 {
if len(prepReport.MissingStores) > 0 {
return errors.Newf("stores %s expected on the node but no paths were provided",
joinIDs("s", prepReport.MissingStores))
strutil.JoinIDs("s", prepReport.MissingStores))
}
_, _ = fmt.Fprintf(stderr, "No updates planned on this node.\n")
return nil
Expand Down Expand Up @@ -785,18 +787,10 @@ func applyRecoveryToLocalStore(

// Apply batches to the stores.
applyReport, err := loqrecovery.CommitReplicaChanges(batches)
_, _ = fmt.Fprintf(stderr, "Updated store(s): %s\n", joinIDs("s", applyReport.UpdatedStores))
_, _ = fmt.Fprintf(stderr, "Updated store(s): %s\n", strutil.JoinIDs("s", applyReport.UpdatedStores))
return err
}

func joinIDs[T ~int32](prefix string, ids []T) string {
storeNames := make([]string, 0, len(ids))
for _, id := range ids {
storeNames = append(storeNames, fmt.Sprintf("%s%d", prefix, id))
}
return strings.Join(storeNames, ", ")
}

func formatNodeStores(locations []loqrecovery.NodeStores, indent string) string {
hasMultiStore := false
for _, v := range locations {
Expand All @@ -813,7 +807,7 @@ func formatNodeStores(locations []loqrecovery.NodeStores, indent string) string
nodeDetails := make([]string, 0, len(locations))
for _, node := range locations {
nodeDetails = append(nodeDetails,
indent+fmt.Sprintf("n%d: store(s): %s", node.NodeID, joinIDs("s", node.StoreIDs)))
indent+fmt.Sprintf("n%d: store(s): %s", node.NodeID, strutil.JoinIDs("s", node.StoreIDs)))
}
return strings.Join(nodeDetails, "\n")
}
Expand Down
2 changes: 1 addition & 1 deletion pkg/cmd/docgen/funcs.go
Original file line number Diff line number Diff line change
Expand Up @@ -191,7 +191,7 @@ func generateFunctions(from []string, categorize bool) []byte {
}
// We generate docs for both aggregates and window functions in separate
// files, so we want to omit them when processing all builtins.
if categorize && (props.Class == tree.AggregateClass || props.Class == tree.WindowClass) {
if categorize && (fn.Class == tree.AggregateClass || fn.Class == tree.WindowClass) {
continue
}
args := fn.Types.String()
Expand Down
7 changes: 4 additions & 3 deletions pkg/internal/sqlsmith/scalar.go
Original file line number Diff line number Diff line change
Expand Up @@ -440,19 +440,20 @@ func makeFunc(s *Smither, ctx Context, typ *types.T, refs colRefs) (tree.TypedEx
args = append(args, castType(arg, argTyp))
}

if fn.def.Class == tree.WindowClass && s.disableWindowFuncs {
if fn.overload.Class == tree.WindowClass && s.disableWindowFuncs {
return nil, false
}

if fn.def.Class == tree.AggregateClass && s.disableAggregateFuncs {
if fn.overload.Class == tree.AggregateClass && s.disableAggregateFuncs {
return nil, false
}

var window *tree.WindowDef
// Use a window function if:
// - we chose an aggregate function, then 1/6 chance, but not if we're in a HAVING (noWindow == true)
// - we explicitly chose a window function
if fn.def.Class == tree.WindowClass || (!s.disableWindowFuncs && !ctx.noWindow && s.d6() == 1 && fn.def.Class == tree.AggregateClass) {
if fn.overload.Class == tree.WindowClass ||
(!s.disableWindowFuncs && !ctx.noWindow && s.d6() == 1 && fn.overload.Class == tree.AggregateClass) {
var parts tree.Exprs
s.sample(len(refs), 2, func(i int) {
parts = append(parts, refs[i].item)
Expand Down
8 changes: 4 additions & 4 deletions pkg/internal/sqlsmith/schema.go
Original file line number Diff line number Diff line change
Expand Up @@ -519,9 +519,6 @@ var functions = func() map[tree.FunctionClass]map[oid.Oid][]function {
if skip {
continue
}
if _, ok := m[def.Class]; !ok {
m[def.Class] = map[oid.Oid][]function{}
}
// Ignore pg compat functions since many are unimplemented.
if def.Category == "Compatibility" {
continue
Expand All @@ -530,6 +527,9 @@ var functions = func() map[tree.FunctionClass]map[oid.Oid][]function {
continue
}
for _, ov := range def.Definition {
if m[ov.Class] == nil {
m[ov.Class] = map[oid.Oid][]function{}
}
// Ignore documented unusable functions.
if strings.Contains(ov.Info, "Not usable") {
continue
Expand All @@ -544,7 +544,7 @@ var functions = func() map[tree.FunctionClass]map[oid.Oid][]function {
if !found {
continue
}
m[def.Class][typ.Oid()] = append(m[def.Class][typ.Oid()], function{
m[ov.Class][typ.Oid()] = append(m[ov.Class][typ.Oid()], function{
def: def,
overload: ov,
})
Expand Down
24 changes: 15 additions & 9 deletions pkg/keys/constants.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,13 +30,6 @@ const (
tenantPrefixByte = '\xfe'
)

// Constants to subdivide unsafe loss of quorum recovery data into groups.
// Currently we only store keys as they are applied, but might benefit from
// archiving them to make them more "durable".
const (
appliedUnsafeReplicaRecoveryPrefix = "applied"
)

// Constants for system-reserved keys in the KV map.
//
// Note: Preserve group-wise ordering when adding new constants.
Expand Down Expand Up @@ -181,18 +174,31 @@ var (
// localStoreIdentSuffix stores an immutable identifier for this
// store, created when the store is first bootstrapped.
localStoreIdentSuffix = []byte("iden")
// localStoreLossOfQuorumRecoveryInfix is an infix for the group of keys used
// by loss of quorum recovery operations to track progress and outcome.
// This infix is followed by the suffix defining type of recovery key.
localStoreLossOfQuorumRecoveryInfix = []byte("loqr")
// LocalStoreUnsafeReplicaRecoverySuffix is a suffix for temporary record
// entries put when loss of quorum recovery operations are performed offline
// on the store.
// See StoreUnsafeReplicaRecoveryKey for details.
localStoreUnsafeReplicaRecoverySuffix = makeKey([]byte("loqr"),
[]byte(appliedUnsafeReplicaRecoveryPrefix))
localStoreUnsafeReplicaRecoverySuffix = makeKey(localStoreLossOfQuorumRecoveryInfix,
[]byte("applied"))
// LocalStoreUnsafeReplicaRecoveryKeyMin is the start of keyspace used to store
// loss of quorum recovery record entries.
LocalStoreUnsafeReplicaRecoveryKeyMin = MakeStoreKey(localStoreUnsafeReplicaRecoverySuffix, nil)
// LocalStoreUnsafeReplicaRecoveryKeyMax is the end of keyspace used to store
// loss of quorum recovery record entries.
LocalStoreUnsafeReplicaRecoveryKeyMax = LocalStoreUnsafeReplicaRecoveryKeyMin.PrefixEnd()
// localStoreLossOfQuorumRecoveryStatusSuffix is a local key store suffix to
// store results of loss of quorum recovery plan application.
localStoreLossOfQuorumRecoveryStatusSuffix = makeKey(localStoreLossOfQuorumRecoveryInfix,
[]byte("status"))
// localStoreLossOfQuorumRecoveryCleanupActionsSuffix is a local key store
// suffix to store information for loss of quorum recovery cleanup actions
// performed after node restart.
localStoreLossOfQuorumRecoveryCleanupActionsSuffix = makeKey(localStoreLossOfQuorumRecoveryInfix,
[]byte("cleanup"))
// localStoreNodeTombstoneSuffix stores key value pairs that map
// nodeIDs to time of removal from cluster.
localStoreNodeTombstoneSuffix = []byte("ntmb")
Expand Down
13 changes: 13 additions & 0 deletions pkg/keys/keys.go
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,19 @@ func DecodeStoreCachedSettingsKey(key roachpb.Key) (settingKey roachpb.Key, err
return
}

// StoreLossOfQuorumRecoveryStatusKey is a key used for storing results of loss
// of quorum recovery plan application.
func StoreLossOfQuorumRecoveryStatusKey() roachpb.Key {
return MakeStoreKey(localStoreLossOfQuorumRecoveryStatusSuffix, nil)
}

// StoreLossOfQuorumRecoveryCleanupActionsKey is a key used for storing data for
// post recovery cleanup actions node would perform after restart if plan was
// applied.
func StoreLossOfQuorumRecoveryCleanupActionsKey() roachpb.Key {
return MakeStoreKey(localStoreLossOfQuorumRecoveryCleanupActionsSuffix, nil)
}

// StoreUnsafeReplicaRecoveryKey creates a key for loss of quorum replica
// recovery entry. Those keys are written by `debug recover apply-plan` command
// on the store while node is stopped. Once node boots up, entries are
Expand Down
2 changes: 2 additions & 0 deletions pkg/keys/printer.go
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,8 @@ var constSubKeyDict = []struct {
{"/nodeTombstone", localStoreNodeTombstoneSuffix},
{"/cachedSettings", localStoreCachedSettingsSuffix},
{"/lossOfQuorumRecovery/applied", localStoreUnsafeReplicaRecoverySuffix},
{"/lossOfQuorumRecovery/status", localStoreLossOfQuorumRecoveryStatusSuffix},
{"/lossOfQuorumRecovery/cleanup", localStoreLossOfQuorumRecoveryCleanupActionsSuffix},
}

func nodeTombstoneKeyPrint(buf *redact.StringBuilder, key roachpb.Key) {
Expand Down
2 changes: 2 additions & 0 deletions pkg/keys/printer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -237,6 +237,8 @@ func TestPrettyPrint(t *testing.T) {
{keys.StoreNodeTombstoneKey(123), "/Local/Store/nodeTombstone/n123", revertSupportUnknown},
{keys.StoreCachedSettingsKey(roachpb.Key("a")), `/Local/Store/cachedSettings/"a"`, revertSupportUnknown},
{keys.StoreUnsafeReplicaRecoveryKey(loqRecoveryID), fmt.Sprintf(`/Local/Store/lossOfQuorumRecovery/applied/%s`, loqRecoveryID), revertSupportUnknown},
{keys.StoreLossOfQuorumRecoveryStatusKey(), "/Local/Store/lossOfQuorumRecovery/status", revertSupportUnknown},
{keys.StoreLossOfQuorumRecoveryCleanupActionsKey(), "/Local/Store/lossOfQuorumRecovery/cleanup", revertSupportUnknown},

{keys.AbortSpanKey(roachpb.RangeID(1000001), txnID), fmt.Sprintf(`/Local/RangeID/1000001/r/AbortSpan/%q`, txnID), revertSupportUnknown},
{keys.RangeAppliedStateKey(roachpb.RangeID(1000001)), "/Local/RangeID/1000001/r/RangeAppliedState", revertSupportUnknown},
Expand Down
1 change: 1 addition & 0 deletions pkg/kv/kvserver/loqrecovery/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ go_library(
"//pkg/util/log",
"//pkg/util/protoutil",
"//pkg/util/retry",
"//pkg/util/strutil",
"//pkg/util/timeutil",
"//pkg/util/uuid",
"@com_github_cockroachdb_errors//:errors",
Expand Down
Loading

0 comments on commit 8b35f22

Please sign in to comment.