From 7fdbc41c9a893da53f34380c8b8cfeb4be3c03d6 Mon Sep 17 00:00:00 2001 From: Xiang Gu Date: Wed, 30 Nov 2022 17:17:38 -0500 Subject: [PATCH] scrun: added a cluster setting to skip planner sanity checks in prod Previously, there is a niche case where we might run into a backward incompatible issue (see #91528). We decided to fix it by relaxing the sanity check that caused the issue and backport the change to relase-22.2, so this backward incompatibility issue is contained only between release-22.2.0 and master, which we think should be a rare occurrance in the wild. Epic: None Release note: None --- pkg/sql/schemachanger/scdeps/exec_deps.go | 5 +++ pkg/sql/schemachanger/scexec/BUILD.bazel | 1 + pkg/sql/schemachanger/scexec/dependencies.go | 2 + .../scexec/mocks_generated_test.go | 15 +++++++ .../scplan/internal/scstage/build.go | 44 +++++++++++-------- pkg/sql/schemachanger/scplan/plan.go | 8 ++-- pkg/sql/schemachanger/scrun/BUILD.bazel | 2 + pkg/sql/schemachanger/scrun/scrun.go | 10 +++++ 8 files changed, 65 insertions(+), 22 deletions(-) diff --git a/pkg/sql/schemachanger/scdeps/exec_deps.go b/pkg/sql/schemachanger/scdeps/exec_deps.go index 64f2f3108866..bf22ffb26d63 100644 --- a/pkg/sql/schemachanger/scdeps/exec_deps.go +++ b/pkg/sql/schemachanger/scdeps/exec_deps.go @@ -419,6 +419,11 @@ func (d *execDeps) User() username.SQLUsername { return d.user } +// ClusterSettings implements the scexec.Dependencies interface. +func (d *execDeps) ClusterSettings() *cluster.Settings { + return d.settings +} + // DescriptorMetadataUpdater implements the scexec.Dependencies interface. func (d *execDeps) DescriptorMetadataUpdater(ctx context.Context) scexec.DescriptorMetadataUpdater { return d.metadataUpdater diff --git a/pkg/sql/schemachanger/scexec/BUILD.bazel b/pkg/sql/schemachanger/scexec/BUILD.bazel index e0cf0b901e9e..f5340e494df7 100644 --- a/pkg/sql/schemachanger/scexec/BUILD.bazel +++ b/pkg/sql/schemachanger/scexec/BUILD.bazel @@ -22,6 +22,7 @@ go_library( "//pkg/keys", "//pkg/roachpb", "//pkg/security/username", + "//pkg/settings/cluster", "//pkg/sql/catalog", "//pkg/sql/catalog/descpb", "//pkg/sql/catalog/nstree", diff --git a/pkg/sql/schemachanger/scexec/dependencies.go b/pkg/sql/schemachanger/scexec/dependencies.go index 4a1450b1d54a..f3b536e4128b 100644 --- a/pkg/sql/schemachanger/scexec/dependencies.go +++ b/pkg/sql/schemachanger/scexec/dependencies.go @@ -19,6 +19,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/keys" "github.com/cockroachdb/cockroach/pkg/roachpb" "github.com/cockroachdb/cockroach/pkg/security/username" + "github.com/cockroachdb/cockroach/pkg/settings/cluster" "github.com/cockroachdb/cockroach/pkg/sql/catalog" "github.com/cockroachdb/cockroach/pkg/sql/catalog/descpb" "github.com/cockroachdb/cockroach/pkg/sql/schemachanger/scexec/scmutationexec" @@ -48,6 +49,7 @@ type Dependencies interface { // Statements returns the statements behind this schema change. Statements() []string User() username.SQLUsername + ClusterSettings() *cluster.Settings } // Catalog encapsulates the catalog-related dependencies for the executor. diff --git a/pkg/sql/schemachanger/scexec/mocks_generated_test.go b/pkg/sql/schemachanger/scexec/mocks_generated_test.go index 49a3189d201d..edbf7b7a91f9 100644 --- a/pkg/sql/schemachanger/scexec/mocks_generated_test.go +++ b/pkg/sql/schemachanger/scexec/mocks_generated_test.go @@ -9,6 +9,7 @@ import ( reflect "reflect" username "github.com/cockroachdb/cockroach/pkg/security/username" + cluster "github.com/cockroachdb/cockroach/pkg/settings/cluster" catalog "github.com/cockroachdb/cockroach/pkg/sql/catalog" scexec "github.com/cockroachdb/cockroach/pkg/sql/schemachanger/scexec" scmutationexec "github.com/cockroachdb/cockroach/pkg/sql/schemachanger/scexec/scmutationexec" @@ -180,6 +181,20 @@ func (mr *MockDependenciesMockRecorder) Clock() *gomock.Call { return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "Clock", reflect.TypeOf((*MockDependencies)(nil).Clock)) } +// ClusterSettings mocks base method. +func (m *MockDependencies) ClusterSettings() *cluster.Settings { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "ClusterSettings") + ret0, _ := ret[0].(*cluster.Settings) + return ret0 +} + +// ClusterSettings indicates an expected call of ClusterSettings. +func (mr *MockDependenciesMockRecorder) ClusterSettings() *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "ClusterSettings", reflect.TypeOf((*MockDependencies)(nil).ClusterSettings)) +} + // DescriptorMetadataUpdater mocks base method. func (m *MockDependencies) DescriptorMetadataUpdater(arg0 context.Context) scexec.DescriptorMetadataUpdater { m.ctrl.T.Helper() diff --git a/pkg/sql/schemachanger/scplan/internal/scstage/build.go b/pkg/sql/schemachanger/scplan/internal/scstage/build.go index cc9634f8e1af..6ec7fb3111fc 100644 --- a/pkg/sql/schemachanger/scplan/internal/scstage/build.go +++ b/pkg/sql/schemachanger/scplan/internal/scstage/build.go @@ -37,15 +37,17 @@ func BuildStages( phase scop.Phase, g *scgraph.Graph, scJobIDSupplier func() jobspb.JobID, + enforcePlannerSanityChecks bool, ) []Stage { c := buildContext{ - rollback: init.InRollback, - g: g, - isRevertibilityIgnored: true, - targetState: init.TargetState, - startingStatuses: init.Current, - startingPhase: phase, - descIDs: screl.AllTargetDescIDs(init.TargetState), + rollback: init.InRollback, + g: g, + isRevertibilityIgnored: true, + targetState: init.TargetState, + startingStatuses: init.Current, + startingPhase: phase, + descIDs: screl.AllTargetDescIDs(init.TargetState), + enforcePlannerSanityChecks: enforcePlannerSanityChecks, } // Try building stages while ignoring revertibility constraints. // This is fine as long as there are no post-commit stages. @@ -92,14 +94,15 @@ func BuildStages( // buildContext contains the global constants for building the stages. // Only the BuildStages function mutates it, it's read-only everywhere else. type buildContext struct { - rollback bool - g *scgraph.Graph - scJobID jobspb.JobID - isRevertibilityIgnored bool - targetState scpb.TargetState - startingStatuses []scpb.Status - startingPhase scop.Phase - descIDs catalog.DescriptorIDSet + rollback bool + g *scgraph.Graph + scJobID jobspb.JobID + isRevertibilityIgnored bool + targetState scpb.TargetState + startingStatuses []scpb.Status + startingPhase scop.Phase + descIDs catalog.DescriptorIDSet + enforcePlannerSanityChecks bool } func buildStages(bc buildContext) (stages []Stage) { @@ -432,10 +435,13 @@ func (sb stageBuilder) hasUnmeetableOutboundDeps(n *screl.Node) (ret bool) { // Mark this node as having been visited in this traversal. sb.visited[n] = sb.visitEpoch // Do some sanity checks. - if _, isFulfilling := sb.bs.fulfilled[n]; isFulfilling { - // This should never happen. - panic(errors.AssertionFailedf("%s should not yet be scheduled for this stage", - screl.NodeString(n))) + if _, isFulfilled := sb.bs.fulfilled[n]; isFulfilled { + if sb.bc.enforcePlannerSanityChecks { + panic(errors.AssertionFailedf("%s should not yet be scheduled for this stage", + screl.NodeString(n))) + } else { + return false + } } // Look up the current target state for this node, via the lookup table. if t := sb.lut[n.Target]; t == nil { diff --git a/pkg/sql/schemachanger/scplan/plan.go b/pkg/sql/schemachanger/scplan/plan.go index 4d181610d544..3978a0aa6fb5 100644 --- a/pkg/sql/schemachanger/scplan/plan.go +++ b/pkg/sql/schemachanger/scplan/plan.go @@ -42,6 +42,10 @@ type Params struct { // SchemaChangerJobIDSupplier is used to return the JobID for a // job if one should exist. SchemaChangerJobIDSupplier func() jobspb.JobID + + // enforcePlannerSanityCheck, if true, strictly enforces sanity checks in the + // declarative schema changer planner. + EnforcePlannerSanityCheck bool } // Exported internal types @@ -105,9 +109,7 @@ func makePlan(ctx context.Context, p *Plan) (err error) { } { start := timeutil.Now() - p.Stages = scstage.BuildStages( - ctx, p.CurrentState, p.Params.ExecutionPhase, p.Graph, p.Params.SchemaChangerJobIDSupplier, - ) + p.Stages = scstage.BuildStages(ctx, p.CurrentState, p.Params.ExecutionPhase, p.Graph, p.Params.SchemaChangerJobIDSupplier, p.Params.EnforcePlannerSanityCheck) if log.ExpensiveLogEnabled(ctx, 2) { log.Infof(ctx, "stage generation took %v", timeutil.Since(start)) } diff --git a/pkg/sql/schemachanger/scrun/BUILD.bazel b/pkg/sql/schemachanger/scrun/BUILD.bazel index db75cea6687d..a93d5ea12a3a 100644 --- a/pkg/sql/schemachanger/scrun/BUILD.bazel +++ b/pkg/sql/schemachanger/scrun/BUILD.bazel @@ -13,6 +13,7 @@ go_library( "//pkg/jobs", "//pkg/jobs/jobspb", "//pkg/roachpb", + "//pkg/settings", "//pkg/settings/cluster", "//pkg/sql/catalog", "//pkg/sql/catalog/descpb", @@ -21,6 +22,7 @@ go_library( "//pkg/sql/schemachanger/scop", "//pkg/sql/schemachanger/scpb", "//pkg/sql/schemachanger/scplan", + "//pkg/util/buildutil", "@com_github_cockroachdb_errors//:errors", "@com_github_cockroachdb_redact//:redact", ], diff --git a/pkg/sql/schemachanger/scrun/scrun.go b/pkg/sql/schemachanger/scrun/scrun.go index 70c0d7a4a175..efc1607b026b 100644 --- a/pkg/sql/schemachanger/scrun/scrun.go +++ b/pkg/sql/schemachanger/scrun/scrun.go @@ -17,6 +17,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/jobs" "github.com/cockroachdb/cockroach/pkg/jobs/jobspb" "github.com/cockroachdb/cockroach/pkg/roachpb" + "github.com/cockroachdb/cockroach/pkg/settings" "github.com/cockroachdb/cockroach/pkg/settings/cluster" "github.com/cockroachdb/cockroach/pkg/sql/catalog" "github.com/cockroachdb/cockroach/pkg/sql/catalog/descpb" @@ -25,10 +26,18 @@ import ( "github.com/cockroachdb/cockroach/pkg/sql/schemachanger/scop" "github.com/cockroachdb/cockroach/pkg/sql/schemachanger/scpb" "github.com/cockroachdb/cockroach/pkg/sql/schemachanger/scplan" + "github.com/cockroachdb/cockroach/pkg/util/buildutil" "github.com/cockroachdb/errors" "github.com/cockroachdb/redact" ) +var enforcePlannerSanityCheck = settings.RegisterBoolSetting( + settings.TenantWritable, + "sql.schemachanger.strict_planning_sanity_check.enabled", + "enforce strict sanity checks in the declarative schema changer planner", + buildutil.CrdbTestBuild, +) + // RunStatementPhase executes in-transaction schema changes for the targeted // state. These are the immediate changes which take place at DDL statement // execution time (scop.StatementPhase). @@ -67,6 +76,7 @@ func runTransactionPhase( sc, err := scplan.MakePlan(ctx, state, scplan.Params{ ExecutionPhase: phase, SchemaChangerJobIDSupplier: deps.TransactionalJobRegistry().SchemaChangerJobID, + EnforcePlannerSanityCheck: enforcePlannerSanityCheck.Get(&deps.ClusterSettings().SV), }) if err != nil { return scpb.CurrentState{}, jobspb.InvalidJobID, err