From 503f8a4eb156377fb321c1291b6b57db50c1188f Mon Sep 17 00:00:00 2001 From: Drew Kimball Date: Wed, 5 Apr 2023 17:49:12 -0700 Subject: [PATCH 1/2] opt: fix ordering-related optimizer panics It is possible for some functional-dependency information to be visible to a child operator but invisible to its parent. This could previously cause panics when a child provided an ordering that could be proven to satisfy the required ordering with the child FDs, but not with the parent's FDs. This patch adds a step to the logic that builds provided orderings that ensures a provided ordering can be proven to respect the required ordering without needing additional FD information. This ensures that a parent never needs to know its child's FDs in order to prove that the provided ordering is correct. The extra step is a no-op in the common case when the provided ordering can already be proven to respect the required ordering. Informs #85393 Informs #87806 Fixes #96288 Release note (bug fix): Fixed a rare internal error in the optimizer that has existed since before version 22.1, which could occur while enforcing orderings between SQL operators. --- pkg/sql/opt/ordering/ordering.go | 56 ++++++++++++++++--- pkg/sql/opt/xform/testdata/physprops/ordering | 41 +++++++++++++- pkg/sql/opt/xform/testdata/rules/join | 2 +- 3 files changed, 89 insertions(+), 10 deletions(-) diff --git a/pkg/sql/opt/ordering/ordering.go b/pkg/sql/opt/ordering/ordering.go index 8a0c510f70fb..ca88e96d8e36 100644 --- a/pkg/sql/opt/ordering/ordering.go +++ b/pkg/sql/opt/ordering/ordering.go @@ -88,6 +88,7 @@ func BuildProvided(expr memo.RelExpr, required *props.OrderingChoice) opt.Orderi return nil } provided := funcMap[expr.Op()].buildProvidedOrdering(expr, required) + provided = finalizeProvided(provided, required, expr.Relational().OutputCols) if buildutil.CrdbTestBuild { checkProvided(expr, required, provided) @@ -418,6 +419,53 @@ func trimProvided( return provided[:provIdx] } +// finalizeProvided ensures that the provided ordering satisfies the following +// properties: +// 1. The provided ordering can be proven to satisfy the required ordering +// without the use of additional (e.g. functional dependency) information. +// 2. The provided ordering is simplified, such that it does not contain any +// columns from the required ordering optional set. +// 3. The provided ordering only refers to output columns for the operator. +// +// This step is necessary because it is possible for child operators to have +// different functional dependency information than their parents as well as +// different output columns. We have to protect against the case where a parent +// operator cannot prove that its child's provided ordering satisfies its +// required ordering. +func finalizeProvided( + provided opt.Ordering, required *props.OrderingChoice, outCols opt.ColSet, +) (newProvided opt.Ordering) { + // First check if the given provided is already suitable. + providedCols := provided.ColSet() + if len(provided) == len(required.Columns) && providedCols.SubsetOf(outCols) { + needsRemap := false + for i := range provided { + choice, ordCol := required.Columns[i], provided[i] + if !choice.Group.Contains(ordCol.ID()) || choice.Descending != ordCol.Descending() { + needsRemap = true + break + } + } + if !needsRemap { + return provided + } + } + newProvided = make(opt.Ordering, len(required.Columns)) + for i, choice := range required.Columns { + group := choice.Group.Intersection(outCols) + if group.Intersects(providedCols) { + // Prefer using columns from the provided ordering if possible. + group.IntersectionWith(providedCols) + } + col, ok := group.Next(0) + if !ok { + panic(errors.AssertionFailedf("no output column equivalent to %d", redact.Safe(col))) + } + newProvided[i] = opt.MakeOrderingColumn(col, choice.Descending) + } + return newProvided +} + // checkRequired runs sanity checks on the ordering required of an operator. func checkRequired(expr memo.RelExpr, required *props.OrderingChoice) { rel := expr.Relational() @@ -467,12 +515,4 @@ func checkProvided(expr memo.RelExpr, required *props.OrderingChoice, provided o )) } } - - // The provided ordering should not have unnecessary columns. - fds := &expr.Relational().FuncDeps - if trimmed := trimProvided(provided, required, fds); len(trimmed) != len(provided) { - panic(errors.AssertionFailedf( - "provided %s can be trimmed to %s (FDs: %s)", redact.Safe(provided), redact.Safe(trimmed), redact.Safe(fds), - )) - } } diff --git a/pkg/sql/opt/xform/testdata/physprops/ordering b/pkg/sql/opt/xform/testdata/physprops/ordering index 4ba892309051..9cb3490cb0b7 100644 --- a/pkg/sql/opt/xform/testdata/physprops/ordering +++ b/pkg/sql/opt/xform/testdata/physprops/ordering @@ -2988,7 +2988,7 @@ sort ├── cardinality: [0 - 0] ├── key: () ├── fd: ()-->(6,12,23) - ├── ordering: +(12|23),-6 [actual: ] + ├── ordering: +(12|23),-6 [actual: +12,-6] └── values ├── columns: tab_922.crdb_internal_mvcc_timestamp:6!null col1_4:12!null col_2150:23!null ├── cardinality: [0 - 0] @@ -3069,3 +3069,42 @@ top-k ├── cardinality: [0 - 3] └── scan t106678 └── columns: a:1 b:2 + +exec-ddl +CREATE TABLE v0 (c1 BYTES PRIMARY KEY, c2 TIMESTAMP, INDEX i3(c2)) +---- + +opt +SELECT c1 FROM v0 +WHERE (c1 = c1 AND c2 = '01-31-2023 00:00:00'::TIMESTAMP) + OR (c1 = b'00' AND c1 = b'0') + OR (c1 IS NULL AND c2 IS NULL) +ORDER BY c1; +---- +project + ├── columns: c1:1!null + ├── key: (1) + ├── ordering: +1 + └── distinct-on + ├── columns: c1:1!null c2:2 + ├── grouping columns: c1:1!null + ├── key: (1) + ├── fd: (1)-->(2) + ├── ordering: +1 + ├── project + │ ├── columns: c1:1!null c2:2 + │ ├── key: (1) + │ ├── fd: (1)-->(2) + │ ├── ordering: +1 + │ ├── scan v0@i3 + │ │ ├── columns: c1:5!null c2:6 + │ │ ├── constraint: /6/5: [/'2023-01-31 00:00:00' - /'2023-01-31 00:00:00'] + │ │ ├── key: (5) + │ │ ├── fd: (5)-->(6) + │ │ └── ordering: +5 + │ └── projections + │ ├── c1:5 [as=c1:1, outer=(5)] + │ └── c2:6 [as=c2:2, outer=(6)] + └── aggregations + └── const-agg [as=c2:2, outer=(2)] + └── c2:2 diff --git a/pkg/sql/opt/xform/testdata/rules/join b/pkg/sql/opt/xform/testdata/rules/join index 282214d795fc..0598e80a0d8d 100644 --- a/pkg/sql/opt/xform/testdata/rules/join +++ b/pkg/sql/opt/xform/testdata/rules/join @@ -9824,7 +9824,7 @@ project │ ├── cardinality: [0 - 1] │ ├── key: (25) │ ├── fd: (25)-->(26,27), (27)~~>(25,26) - │ ├── ordering: +25 [actual: ] + │ ├── ordering: +25 │ └── project │ ├── columns: a:25!null b:26!null c:27!null q:31!null r:32!null │ ├── cardinality: [0 - 1] From 9b408a3c4f103f9af9642bd98addd06756a6f7e2 Mon Sep 17 00:00:00 2001 From: Drew Kimball Date: Wed, 25 Oct 2023 15:30:22 -0600 Subject: [PATCH 2/2] opt: add setting for provided ordering fix This patch adds a new session setting, `optimizer_use_provided_ordering_fix`, which toggles whether to use the `finalizeProvided` function introduced in required ordering. This setting is on by default, and will be used when backporting #100776. Informs #113072 Release note: None --- pkg/sql/exec_util.go | 4 ++++ .../testdata/logic_test/information_schema | 1 + .../logictest/testdata/logic_test/pg_catalog | 3 +++ .../logictest/testdata/logic_test/show_source | 1 + pkg/sql/opt/memo/memo.go | 5 ++++- pkg/sql/opt/memo/memo_test.go | 6 ++++++ pkg/sql/opt/optgen/exprgen/expr_gen.go | 2 +- pkg/sql/opt/ordering/BUILD.bazel | 1 + pkg/sql/opt/ordering/ordering.go | 9 +++++++-- pkg/sql/opt/testutils/opttester/opt_tester.go | 1 + pkg/sql/opt/xform/optimizer.go | 2 +- .../sessiondatapb/local_only_session_data.proto | 5 +++++ pkg/sql/vars.go | 17 +++++++++++++++++ 13 files changed, 52 insertions(+), 5 deletions(-) diff --git a/pkg/sql/exec_util.go b/pkg/sql/exec_util.go index 7f026550f504..116dadaeb7fa 100644 --- a/pkg/sql/exec_util.go +++ b/pkg/sql/exec_util.go @@ -3423,6 +3423,10 @@ func (m *sessionDataMutator) SetAllowRoleMembershipsToChangeDuringTransaction(va m.data.AllowRoleMembershipsToChangeDuringTransaction = val } +func (m *sessionDataMutator) SetOptimizerUseProvidedOrderingFix(val bool) { + m.data.OptimizerUseProvidedOrderingFix = val +} + // Utility functions related to scrubbing sensitive information on SQL Stats. // quantizeCounts ensures that the Count field in the diff --git a/pkg/sql/logictest/testdata/logic_test/information_schema b/pkg/sql/logictest/testdata/logic_test/information_schema index bd4398963200..24e067dad696 100644 --- a/pkg/sql/logictest/testdata/logic_test/information_schema +++ b/pkg/sql/logictest/testdata/logic_test/information_schema @@ -4810,6 +4810,7 @@ optimizer_use_improved_split_disjunction_for_joins off optimizer_use_limit_ordering_for_streaming_group_by off optimizer_use_multicol_stats on optimizer_use_not_visible_indexes off +optimizer_use_provided_ordering_fix on override_multi_region_zone_config off parallelize_multi_key_lookup_joins_enabled off password_encryption scram-sha-256 diff --git a/pkg/sql/logictest/testdata/logic_test/pg_catalog b/pkg/sql/logictest/testdata/logic_test/pg_catalog index 96b8132faa80..756297708fc0 100644 --- a/pkg/sql/logictest/testdata/logic_test/pg_catalog +++ b/pkg/sql/logictest/testdata/logic_test/pg_catalog @@ -2855,6 +2855,7 @@ optimizer_use_improved_split_disjunction_for_joins off N optimizer_use_limit_ordering_for_streaming_group_by off NULL NULL NULL string optimizer_use_multicol_stats on NULL NULL NULL string optimizer_use_not_visible_indexes off NULL NULL NULL string +optimizer_use_provided_ordering_fix on NULL NULL NULL string override_multi_region_zone_config off NULL NULL NULL string parallelize_multi_key_lookup_joins_enabled off NULL NULL NULL string password_encryption scram-sha-256 NULL NULL NULL string @@ -3003,6 +3004,7 @@ optimizer_use_improved_split_disjunction_for_joins off N optimizer_use_limit_ordering_for_streaming_group_by off NULL user NULL off off optimizer_use_multicol_stats on NULL user NULL on on optimizer_use_not_visible_indexes off NULL user NULL off off +optimizer_use_provided_ordering_fix on NULL user NULL on on override_multi_region_zone_config off NULL user NULL off off parallelize_multi_key_lookup_joins_enabled off NULL user NULL false false password_encryption scram-sha-256 NULL user NULL scram-sha-256 scram-sha-256 @@ -3149,6 +3151,7 @@ optimizer_use_improved_split_disjunction_for_joins NULL NULL NULL optimizer_use_limit_ordering_for_streaming_group_by NULL NULL NULL NULL NULL optimizer_use_multicol_stats NULL NULL NULL NULL NULL optimizer_use_not_visible_indexes NULL NULL NULL NULL NULL +optimizer_use_provided_ordering_fix NULL NULL NULL NULL NULL override_multi_region_zone_config NULL NULL NULL NULL NULL parallelize_multi_key_lookup_joins_enabled NULL NULL NULL NULL NULL password_encryption NULL NULL NULL NULL NULL diff --git a/pkg/sql/logictest/testdata/logic_test/show_source b/pkg/sql/logictest/testdata/logic_test/show_source index 9fd9b1e2bfcf..6be3d2612cdf 100644 --- a/pkg/sql/logictest/testdata/logic_test/show_source +++ b/pkg/sql/logictest/testdata/logic_test/show_source @@ -116,6 +116,7 @@ optimizer_use_improved_split_disjunction_for_joins off optimizer_use_limit_ordering_for_streaming_group_by off optimizer_use_multicol_stats on optimizer_use_not_visible_indexes off +optimizer_use_provided_ordering_fix on override_multi_region_zone_config off parallelize_multi_key_lookup_joins_enabled off password_encryption scram-sha-256 diff --git a/pkg/sql/opt/memo/memo.go b/pkg/sql/opt/memo/memo.go index 146b647675ce..a2a654d4250e 100644 --- a/pkg/sql/opt/memo/memo.go +++ b/pkg/sql/opt/memo/memo.go @@ -165,6 +165,7 @@ type Memo struct { alwaysUseHistograms bool hoistUncorrelatedEqualitySubqueries bool useImprovedComputedColumnFiltersDerivation bool + useProvidedOrderingFix bool // curRank is the highest currently in-use scalar expression rank. curRank opt.ScalarRank @@ -225,6 +226,7 @@ func (m *Memo) Init(evalCtx *eval.Context) { alwaysUseHistograms: evalCtx.SessionData().OptimizerAlwaysUseHistograms, hoistUncorrelatedEqualitySubqueries: evalCtx.SessionData().OptimizerHoistUncorrelatedEqualitySubqueries, useImprovedComputedColumnFiltersDerivation: evalCtx.SessionData().OptimizerUseImprovedComputedColumnFiltersDerivation, + useProvidedOrderingFix: evalCtx.SessionData().OptimizerUseProvidedOrderingFix, } m.metadata.Init() m.logPropsBuilder.init(evalCtx, m) @@ -368,7 +370,8 @@ func (m *Memo) IsStale( m.useImprovedSplitDisjunctionForJoins != evalCtx.SessionData().OptimizerUseImprovedSplitDisjunctionForJoins || m.alwaysUseHistograms != evalCtx.SessionData().OptimizerAlwaysUseHistograms || m.hoistUncorrelatedEqualitySubqueries != evalCtx.SessionData().OptimizerHoistUncorrelatedEqualitySubqueries || - m.useImprovedComputedColumnFiltersDerivation != evalCtx.SessionData().OptimizerUseImprovedComputedColumnFiltersDerivation { + m.useImprovedComputedColumnFiltersDerivation != evalCtx.SessionData().OptimizerUseImprovedComputedColumnFiltersDerivation || + m.useProvidedOrderingFix != evalCtx.SessionData().OptimizerUseProvidedOrderingFix { return true, nil } diff --git a/pkg/sql/opt/memo/memo_test.go b/pkg/sql/opt/memo/memo_test.go index 66cbb367682f..b541d8d8b5e2 100644 --- a/pkg/sql/opt/memo/memo_test.go +++ b/pkg/sql/opt/memo/memo_test.go @@ -358,6 +358,12 @@ func TestMemoIsStale(t *testing.T) { evalCtx.SessionData().OptimizerUseImprovedComputedColumnFiltersDerivation = false notStale() + // Stale optimizer_use_provided_ordering_fix. + evalCtx.SessionData().OptimizerUseProvidedOrderingFix = true + stale() + evalCtx.SessionData().OptimizerUseProvidedOrderingFix = false + notStale() + // User no longer has access to view. catalog.View(tree.NewTableNameWithSchema("t", tree.PublicSchemaName, "abcview")).Revoked = true _, err = o.Memo().IsStale(ctx, &evalCtx, catalog) diff --git a/pkg/sql/opt/optgen/exprgen/expr_gen.go b/pkg/sql/opt/optgen/exprgen/expr_gen.go index 1759cd5e7e99..fe53a7dd09d5 100644 --- a/pkg/sql/opt/optgen/exprgen/expr_gen.go +++ b/pkg/sql/opt/optgen/exprgen/expr_gen.go @@ -400,7 +400,7 @@ func (eg *exprGen) populateBestProps(expr opt.Expr, required *physical.Required) provided := &physical.Provided{} // BuildProvided relies on ProvidedPhysical() being set in the children, so // it must run after the recursive calls on the children. - provided.Ordering = ordering.BuildProvided(rel, &required.Ordering) + provided.Ordering = ordering.BuildProvided(eg.f.EvalContext(), rel, &required.Ordering) provided.Distribution = distribution.BuildProvided(eg.f.EvalContext(), rel, &required.Distribution) cost += eg.coster.ComputeCost(rel, required) diff --git a/pkg/sql/opt/ordering/BUILD.bazel b/pkg/sql/opt/ordering/BUILD.bazel index 489e17a4b8c2..89a0d1cf73e7 100644 --- a/pkg/sql/opt/ordering/BUILD.bazel +++ b/pkg/sql/opt/ordering/BUILD.bazel @@ -31,6 +31,7 @@ go_library( "//pkg/sql/opt/cat", "//pkg/sql/opt/memo", "//pkg/sql/opt/props", + "//pkg/sql/sem/eval", "//pkg/sql/sem/tree", "//pkg/util/buildutil", "@com_github_cockroachdb_errors//:errors", diff --git a/pkg/sql/opt/ordering/ordering.go b/pkg/sql/opt/ordering/ordering.go index ca88e96d8e36..32fc6849feb9 100644 --- a/pkg/sql/opt/ordering/ordering.go +++ b/pkg/sql/opt/ordering/ordering.go @@ -14,6 +14,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/sql/opt" "github.com/cockroachdb/cockroach/pkg/sql/opt/memo" "github.com/cockroachdb/cockroach/pkg/sql/opt/props" + "github.com/cockroachdb/cockroach/pkg/sql/sem/eval" "github.com/cockroachdb/cockroach/pkg/util/buildutil" "github.com/cockroachdb/errors" "github.com/cockroachdb/redact" @@ -83,12 +84,16 @@ func BuildChildRequired( // // This function assumes that the provided orderings have already been set in // the children of the expression. -func BuildProvided(expr memo.RelExpr, required *props.OrderingChoice) opt.Ordering { +func BuildProvided( + evalCtx *eval.Context, expr memo.RelExpr, required *props.OrderingChoice, +) opt.Ordering { if required.Any() { return nil } provided := funcMap[expr.Op()].buildProvidedOrdering(expr, required) - provided = finalizeProvided(provided, required, expr.Relational().OutputCols) + if evalCtx.SessionData().OptimizerUseProvidedOrderingFix { + provided = finalizeProvided(provided, required, expr.Relational().OutputCols) + } if buildutil.CrdbTestBuild { checkProvided(expr, required, provided) diff --git a/pkg/sql/opt/testutils/opttester/opt_tester.go b/pkg/sql/opt/testutils/opttester/opt_tester.go index ab0b0d6fef2e..9b7eb39a1073 100644 --- a/pkg/sql/opt/testutils/opttester/opt_tester.go +++ b/pkg/sql/opt/testutils/opttester/opt_tester.go @@ -292,6 +292,7 @@ func New(catalog cat.Catalog, sql string) *OptTester { ot.evalCtx.SessionData().OptSplitScanLimit = tabledesc.MaxBucketAllowed ot.evalCtx.SessionData().VariableInequalityLookupJoinEnabled = true ot.evalCtx.SessionData().OptimizerUseImprovedSplitDisjunctionForJoins = true + ot.evalCtx.SessionData().OptimizerUseProvidedOrderingFix = true return ot } diff --git a/pkg/sql/opt/xform/optimizer.go b/pkg/sql/opt/xform/optimizer.go index 50ac5fe7dca5..8c537dbd1ce7 100644 --- a/pkg/sql/opt/xform/optimizer.go +++ b/pkg/sql/opt/xform/optimizer.go @@ -775,7 +775,7 @@ func (o *Optimizer) setLowestCostTree(parent opt.Expr, parentProps *physical.Req var provided physical.Provided // BuildProvided relies on ProvidedPhysical() being set in the children, so // it must run after the recursive calls on the children. - provided.Ordering = ordering.BuildProvided(relParent, &parentProps.Ordering) + provided.Ordering = ordering.BuildProvided(o.evalCtx, relParent, &parentProps.Ordering) provided.Distribution = distribution.BuildProvided(o.evalCtx, relParent, &parentProps.Distribution) o.mem.SetBestProps(relParent, parentProps, &provided, relCost) } diff --git a/pkg/sql/sessiondatapb/local_only_session_data.proto b/pkg/sql/sessiondatapb/local_only_session_data.proto index e8688208efc2..11fc373321da 100644 --- a/pkg/sql/sessiondatapb/local_only_session_data.proto +++ b/pkg/sql/sessiondatapb/local_only_session_data.proto @@ -356,6 +356,11 @@ message LocalOnlySessionData { // column involved a single column and that column was equated with a single // constant value in a WHERE clause filter. bool optimizer_use_improved_computed_column_filters_derivation = 104; + // OptimizerUseProvidedOrderingFix, when true, causes the optimizer to + // reconcile provided orderings with required ordering choices. This prevents + // internal errors due to incomplete functional dependencies, and also + // fixes a bug that incorrectly truncated the provided ordering (see #113072). + bool optimizer_use_provided_ordering_fix = 115; /////////////////////////////////////////////////////////////////////////// // WARNING: consider whether a session parameter you're adding needs to // diff --git a/pkg/sql/vars.go b/pkg/sql/vars.go index 8676d7e7a570..ce021a274a82 100644 --- a/pkg/sql/vars.go +++ b/pkg/sql/vars.go @@ -2526,6 +2526,23 @@ var varGen = map[string]sessionVar{ }, GlobalDefault: globalFalse, }, + + // CockroachDB extension. + `optimizer_use_provided_ordering_fix`: { + GetStringVal: makePostgresBoolGetStringValFn(`optimizer_use_provided_ordering_fix`), + Set: func(_ context.Context, m sessionDataMutator, s string) error { + b, err := paramparse.ParseBoolVar("optimizer_use_provided_ordering_fix", s) + if err != nil { + return err + } + m.SetOptimizerUseProvidedOrderingFix(b) + return nil + }, + Get: func(evalCtx *extendedEvalContext, _ *kv.Txn) (string, error) { + return formatBoolAsPostgresSetting(evalCtx.SessionData().OptimizerUseProvidedOrderingFix), nil + }, + GlobalDefault: globalTrue, + }, } // We want test coverage for this on and off so make it metamorphic.