From 5072ac2c152c5f4ef97a1073f9652fac887670d5 Mon Sep 17 00:00:00 2001 From: Drew Kimball Date: Thu, 6 Apr 2023 00:49:12 +0000 Subject: [PATCH 1/4] opt: fix ordering-related optimizer panics It is possible for some functional-dependency information to be visible to a child operator but invisible to its parent. This could previously cause panics when a child provided an ordering that could be proven to satisfy the required ordering with the child FDs, but not with the parent's FDs. This patch adds a step to the logic that builds provided orderings that ensures a provided ordering can be proven to respect the required ordering without needing additional FD information. This ensures that a parent never needs to know its child's FDs in order to prove that the provided ordering is correct. The extra step is a no-op in the common case when the provided ordering can already be proven to respect the required ordering. Informs #85393 Informs #87806 Fixes #96288 Release note (bug fix): Fixed a rare internal error in the optimizer that has existed since before version 22.1, which could occur while enforcing orderings between SQL operators. --- pkg/sql/opt/ordering/ordering.go | 56 ++++++++++++++++--- pkg/sql/opt/xform/testdata/physprops/ordering | 41 +++++++++++++- pkg/sql/opt/xform/testdata/rules/join | 2 +- 3 files changed, 89 insertions(+), 10 deletions(-) diff --git a/pkg/sql/opt/ordering/ordering.go b/pkg/sql/opt/ordering/ordering.go index 8a0c510f70fb..ca88e96d8e36 100644 --- a/pkg/sql/opt/ordering/ordering.go +++ b/pkg/sql/opt/ordering/ordering.go @@ -88,6 +88,7 @@ func BuildProvided(expr memo.RelExpr, required *props.OrderingChoice) opt.Orderi return nil } provided := funcMap[expr.Op()].buildProvidedOrdering(expr, required) + provided = finalizeProvided(provided, required, expr.Relational().OutputCols) if buildutil.CrdbTestBuild { checkProvided(expr, required, provided) @@ -418,6 +419,53 @@ func trimProvided( return provided[:provIdx] } +// finalizeProvided ensures that the provided ordering satisfies the following +// properties: +// 1. The provided ordering can be proven to satisfy the required ordering +// without the use of additional (e.g. functional dependency) information. +// 2. The provided ordering is simplified, such that it does not contain any +// columns from the required ordering optional set. +// 3. The provided ordering only refers to output columns for the operator. +// +// This step is necessary because it is possible for child operators to have +// different functional dependency information than their parents as well as +// different output columns. We have to protect against the case where a parent +// operator cannot prove that its child's provided ordering satisfies its +// required ordering. +func finalizeProvided( + provided opt.Ordering, required *props.OrderingChoice, outCols opt.ColSet, +) (newProvided opt.Ordering) { + // First check if the given provided is already suitable. + providedCols := provided.ColSet() + if len(provided) == len(required.Columns) && providedCols.SubsetOf(outCols) { + needsRemap := false + for i := range provided { + choice, ordCol := required.Columns[i], provided[i] + if !choice.Group.Contains(ordCol.ID()) || choice.Descending != ordCol.Descending() { + needsRemap = true + break + } + } + if !needsRemap { + return provided + } + } + newProvided = make(opt.Ordering, len(required.Columns)) + for i, choice := range required.Columns { + group := choice.Group.Intersection(outCols) + if group.Intersects(providedCols) { + // Prefer using columns from the provided ordering if possible. + group.IntersectionWith(providedCols) + } + col, ok := group.Next(0) + if !ok { + panic(errors.AssertionFailedf("no output column equivalent to %d", redact.Safe(col))) + } + newProvided[i] = opt.MakeOrderingColumn(col, choice.Descending) + } + return newProvided +} + // checkRequired runs sanity checks on the ordering required of an operator. func checkRequired(expr memo.RelExpr, required *props.OrderingChoice) { rel := expr.Relational() @@ -467,12 +515,4 @@ func checkProvided(expr memo.RelExpr, required *props.OrderingChoice, provided o )) } } - - // The provided ordering should not have unnecessary columns. - fds := &expr.Relational().FuncDeps - if trimmed := trimProvided(provided, required, fds); len(trimmed) != len(provided) { - panic(errors.AssertionFailedf( - "provided %s can be trimmed to %s (FDs: %s)", redact.Safe(provided), redact.Safe(trimmed), redact.Safe(fds), - )) - } } diff --git a/pkg/sql/opt/xform/testdata/physprops/ordering b/pkg/sql/opt/xform/testdata/physprops/ordering index 4ba892309051..9cb3490cb0b7 100644 --- a/pkg/sql/opt/xform/testdata/physprops/ordering +++ b/pkg/sql/opt/xform/testdata/physprops/ordering @@ -2988,7 +2988,7 @@ sort ├── cardinality: [0 - 0] ├── key: () ├── fd: ()-->(6,12,23) - ├── ordering: +(12|23),-6 [actual: ] + ├── ordering: +(12|23),-6 [actual: +12,-6] └── values ├── columns: tab_922.crdb_internal_mvcc_timestamp:6!null col1_4:12!null col_2150:23!null ├── cardinality: [0 - 0] @@ -3069,3 +3069,42 @@ top-k ├── cardinality: [0 - 3] └── scan t106678 └── columns: a:1 b:2 + +exec-ddl +CREATE TABLE v0 (c1 BYTES PRIMARY KEY, c2 TIMESTAMP, INDEX i3(c2)) +---- + +opt +SELECT c1 FROM v0 +WHERE (c1 = c1 AND c2 = '01-31-2023 00:00:00'::TIMESTAMP) + OR (c1 = b'00' AND c1 = b'0') + OR (c1 IS NULL AND c2 IS NULL) +ORDER BY c1; +---- +project + ├── columns: c1:1!null + ├── key: (1) + ├── ordering: +1 + └── distinct-on + ├── columns: c1:1!null c2:2 + ├── grouping columns: c1:1!null + ├── key: (1) + ├── fd: (1)-->(2) + ├── ordering: +1 + ├── project + │ ├── columns: c1:1!null c2:2 + │ ├── key: (1) + │ ├── fd: (1)-->(2) + │ ├── ordering: +1 + │ ├── scan v0@i3 + │ │ ├── columns: c1:5!null c2:6 + │ │ ├── constraint: /6/5: [/'2023-01-31 00:00:00' - /'2023-01-31 00:00:00'] + │ │ ├── key: (5) + │ │ ├── fd: (5)-->(6) + │ │ └── ordering: +5 + │ └── projections + │ ├── c1:5 [as=c1:1, outer=(5)] + │ └── c2:6 [as=c2:2, outer=(6)] + └── aggregations + └── const-agg [as=c2:2, outer=(2)] + └── c2:2 diff --git a/pkg/sql/opt/xform/testdata/rules/join b/pkg/sql/opt/xform/testdata/rules/join index 282214d795fc..0598e80a0d8d 100644 --- a/pkg/sql/opt/xform/testdata/rules/join +++ b/pkg/sql/opt/xform/testdata/rules/join @@ -9824,7 +9824,7 @@ project │ ├── cardinality: [0 - 1] │ ├── key: (25) │ ├── fd: (25)-->(26,27), (27)~~>(25,26) - │ ├── ordering: +25 [actual: ] + │ ├── ordering: +25 │ └── project │ ├── columns: a:25!null b:26!null c:27!null q:31!null r:32!null │ ├── cardinality: [0 - 1] From 080a5f10f9e2e5f439d3a1dec3379645b4cc61fa Mon Sep 17 00:00:00 2001 From: Yahor Yuzefovich Date: Wed, 25 Oct 2023 01:05:58 +0000 Subject: [PATCH 2/4] sql: fix usage of streamer with multiple column families Previously, we had the following bug in how the streamer was used under the following conditions: - we're looking up into a table with at least 3 column families - not all column families are needed, so we end up creating "family-specific" spans (either Gets or Scans - the latter is possible when multiple contiguous families are needed) - the streamer is used with OutOfOrder mode (which is the case for index joins when `MaintainOrdering` is `false` and for lookup joins when `MaintainLookupOrdering` is `false`) - the index we're looking up into is split across multiple ranges. In such a scenario we could end up with KVs from different SQL rows being intertwined because the streamer could execute a separate BatchRequest for those rows, and in case `TargetBytes` limit estimate was insufficient, we'd end up creating "resume" batches, at which point the "results" stream would be incorrect. Later, at the SQL fetcher level this could either manifest as an internal "non-nullable column with no value" error or with silent incorrect output (we'd create multiple output SQL rows from a true single one). This problem is now fixed by asking the streamer to maintain the ordering of the requests whenever we have `SplitFamilyIDs` with more than one family, meaning that we might end up creating family-specific spans. Note that the non-streamer code path doesn't suffer from this problem because there we only parallelize BatchRequests when neither `TargetBytes` nor `MaxSpanRequestKeys` limits are set, which is the requirement for having "resume" batches. Release note (bug fix): CockroachDB previously could incorrectly evaluate lookup and index joins into tables with at least 3 column families in some cases (either "non-nullable column with no value" internal error would occur or the query would return incorrect results), and this is now fixed. The bug was introduced in 22.2. --- pkg/sql/colfetcher/index_join.go | 18 ++++--- .../logictest/testdata/logic_test/lookup_join | 18 +++++++ .../execbuilder/testdata/lookup_join_limit | 2 +- pkg/sql/rowexec/joinreader.go | 49 +++++++++++++------ 4 files changed, 65 insertions(+), 22 deletions(-) diff --git a/pkg/sql/colfetcher/index_join.go b/pkg/sql/colfetcher/index_join.go index c8fe34a74d35..694101ed0ee5 100644 --- a/pkg/sql/colfetcher/index_join.go +++ b/pkg/sql/colfetcher/index_join.go @@ -471,8 +471,8 @@ func getIndexJoinBatchSize( // NewColIndexJoin creates a new ColIndexJoin operator. // -// If spec.MaintainOrdering is true, then the diskMonitor argument must be -// non-nil. +// If spec.MaintainOrdering is true, or spec.SplitFamilyIDs has more than one +// family, then the diskMonitor argument must be non-nil. func NewColIndexJoin( ctx context.Context, allocator *colmem.Allocator, @@ -518,15 +518,21 @@ func NewColIndexJoin( if streamerBudgetAcc == nil { return nil, errors.AssertionFailedf("streamer budget account is nil when the Streamer API is desired") } - if spec.MaintainOrdering && diskMonitor == nil { - return nil, errors.AssertionFailedf("diskMonitor is nil when ordering needs to be maintained") - } // Keep 1/16th of the memory limit for the output batch of the cFetcher, // another 1/16th of the limit for the input tuples buffered by the index // joiner, and we'll give the remaining memory to the streamer budget // below. cFetcherMemoryLimit = int64(math.Ceil(float64(totalMemoryLimit) / 16.0)) streamerBudgetLimit := 14 * cFetcherMemoryLimit + // When we have SplitFamilyIDs with more than one family ID, then it's + // possible for a single lookup span to be split into multiple "family" + // spans, and in order to preserve the invariant that all KVs for a + // single SQL row are contiguous we must ask the streamer to preserve + // the ordering. See #113013 for an example. + maintainOrdering := spec.MaintainOrdering || len(spec.SplitFamilyIDs) > 1 + if maintainOrdering && diskMonitor == nil { + return nil, errors.AssertionFailedf("diskMonitor is nil when ordering needs to be maintained") + } kvFetcher = row.NewStreamingKVFetcher( flowCtx.Cfg.DistSender, flowCtx.Stopper(), @@ -536,7 +542,7 @@ func NewColIndexJoin( spec.LockingStrength, streamerBudgetLimit, streamerBudgetAcc, - spec.MaintainOrdering, + maintainOrdering, true, /* singleRowLookup */ int(spec.FetchSpec.MaxKeysPerRow), rowcontainer.NewKVStreamerResultDiskBuffer( diff --git a/pkg/sql/logictest/testdata/logic_test/lookup_join b/pkg/sql/logictest/testdata/logic_test/lookup_join index 94c468c38503..e439dee7cf6a 100644 --- a/pkg/sql/logictest/testdata/logic_test/lookup_join +++ b/pkg/sql/logictest/testdata/logic_test/lookup_join @@ -1516,3 +1516,21 @@ FROM t89576 AS t1 LEFT LOOKUP JOIN t89576 AS t2 ON (t2.v) = (t1.v) AND (t2.s) = (t1.s) + +# Regression test for incorrectly using OutOfOrder mode of the streamer with +# multiple column families (#113013). + +skipif config 3node-tenant-default-configs +statement ok +CREATE TABLE l_113013 (r_id INT, l_id INT, PRIMARY KEY (r_id, l_id), INDEX l_id_idx(l_id)); +CREATE TABLE r_113013 (id INT PRIMARY KEY, c1 STRING NOT NULL, c2 STRING NOT NULL, c3 STRING NOT NULL, FAMILY f1 (id, c1), FAMILY f2 (c2), FAMILY f3 (c3)); +ALTER TABLE r_113013 SPLIT AT VALUES (2); +INSERT INTO l_113013 VALUES (1, 1), (2, 1); +INSERT INTO r_113013 VALUES (1, 'c1', 'c2', repeat('c3', 2000)), (2, 'c1', 'c2', 'c3'); + +skipif config 3node-tenant-default-configs +query II rowsort +SELECT length(c1), length(c3) FROM l_113013 l INNER JOIN r_113013 r ON l.r_id = r.id WHERE l.l_id = 1; +---- +2 4000 +2 2 diff --git a/pkg/sql/opt/exec/execbuilder/testdata/lookup_join_limit b/pkg/sql/opt/exec/execbuilder/testdata/lookup_join_limit index dee9c316da21..d0daab2a2255 100644 --- a/pkg/sql/opt/exec/execbuilder/testdata/lookup_join_limit +++ b/pkg/sql/opt/exec/execbuilder/testdata/lookup_join_limit @@ -4,7 +4,7 @@ # rows eagerly in the presence of limit hints. statement ok -CREATE TABLE a (x INT PRIMARY KEY, y INT, z INT, INDEX (y)); +CREATE TABLE a (x INT PRIMARY KEY, y INT, z INT, INDEX (y), FAMILY (x, y, z)); CREATE TABLE b (x INT PRIMARY KEY); INSERT INTO a VALUES (1, 1, 1), (2, 1, 1), (3, 2, 2), (4, 2, 2); INSERT INTO b VALUES (1), (2), (3), (4); diff --git a/pkg/sql/rowexec/joinreader.go b/pkg/sql/rowexec/joinreader.go index cda65d730531..c6fb3d34fea7 100644 --- a/pkg/sql/rowexec/joinreader.go +++ b/pkg/sql/rowexec/joinreader.go @@ -129,15 +129,20 @@ type joinReader struct { unlimitedMemMonitor *mon.BytesMonitor budgetAcc mon.BoundAccount // maintainOrdering indicates whether the ordering of the input stream - // needs to be maintained AND that we rely on the streamer for that. - // We currently only rely on the streamer in two cases: - // 1. We are performing an index join and joinReader.maintainOrdering is + // needs to be maintained AND that we rely on the streamer for that. We + // currently rely on the streamer in the following cases: + // 1. When spec.SplitFamilyIDs has more than one family, for both + // index and lookup joins (this is needed to ensure that all KVs + // for a single row are returned contiguously). + // 2. We are performing an index join and spec.MaintainOrdering is // true. - // 2. We are performing a lookup join and maintainLookupOrdering is true. - // Except for case (2), we don't rely on the streamer for maintaining - // the ordering for lookup joins due to implementation details (since we - // still buffer all looked up rows and restore the ordering explicitly via - // the joinReaderOrderingStrategy). + // 3. We are performing a lookup join and spec.MaintainLookupOrdering + // is true. + // Note that in case (3), we don't rely on the streamer for maintaining + // the ordering for lookup joins when spec.MaintainOrdering is true due + // to implementation details (since we still buffer all looked up rows + // and restore the ordering explicitly via the + // joinReaderOrderingStrategy). maintainOrdering bool diskMonitor *mon.BytesMonitor txnKVStreamerMemAcc mon.BoundAccount @@ -491,13 +496,27 @@ func newJoinReader( jr.streamerInfo.unlimitedMemMonitor.StartNoReserved(flowCtx.EvalCtx.Ctx(), flowCtx.EvalCtx.Mon) jr.streamerInfo.budgetAcc = jr.streamerInfo.unlimitedMemMonitor.MakeBoundAccount() jr.streamerInfo.txnKVStreamerMemAcc = jr.streamerInfo.unlimitedMemMonitor.MakeBoundAccount() - // The index joiner can rely on the streamer to maintain the input ordering, - // but the lookup joiner currently handles this logic itself, so the - // streamer can operate in OutOfOrder mode. The exception is when the - // results of each lookup need to be returned in index order - in this case, - // InOrder mode must be used for the streamer. - jr.streamerInfo.maintainOrdering = (jr.maintainOrdering && readerType == indexJoinReaderType) || - spec.MaintainLookupOrdering + // When we have SplitFamilyIDs with more than one family ID, then it's + // possible for a single lookup span to be split into multiple "family" + // spans, and in order to preserve the invariant that all KVs for a + // single SQL row are contiguous we must ask the streamer to preserve + // the ordering. See #113013 for an example. + jr.streamerInfo.maintainOrdering = len(spec.SplitFamilyIDs) > 1 + if readerType == indexJoinReaderType { + if spec.MaintainOrdering { + // The index join can rely on the streamer to maintain the input + // ordering. + jr.streamerInfo.maintainOrdering = true + } + } else { + // Due to implementation details (the join reader strategy restores + // the desired order when spec.MaintainOrdering is set) we only need + // to ask the streamer to maintain ordering if the results of each + // lookup need to be returned in index order. + if spec.MaintainLookupOrdering { + jr.streamerInfo.maintainOrdering = true + } + } var diskBuffer kvstreamer.ResultDiskBuffer if jr.streamerInfo.maintainOrdering { From 398541e9931c40e2e47f5e1d194ed0ed13181e28 Mon Sep 17 00:00:00 2001 From: Yahor Yuzefovich Date: Wed, 25 Oct 2023 20:28:21 +0000 Subject: [PATCH 3/4] sql: add session variable to force streamer to maintain ordering This commit adds `streamer_always_maintain_ordering` session variable that - when set to `true` - forces all current usages of the streamer in SQL layer (lookup and index joins) to ask it to maintain the ordering, even if this is not stricly necessary by the query. This variable is introduced as a possible workaround in case we find more scenarios where we currently are incorrectly using the OutOfOrder mode of the streamer. Release note: None --- pkg/sql/colfetcher/index_join.go | 6 +++--- pkg/sql/exec_util.go | 4 ++++ .../testdata/logic_test/information_schema | 1 + .../logictest/testdata/logic_test/pg_catalog | 3 +++ .../logictest/testdata/logic_test/show_source | 1 + pkg/sql/rowexec/joinreader.go | 3 +++ pkg/sql/sessiondatapb/session_data.proto | 7 +++++++ pkg/sql/vars.go | 17 +++++++++++++++++ 8 files changed, 39 insertions(+), 3 deletions(-) diff --git a/pkg/sql/colfetcher/index_join.go b/pkg/sql/colfetcher/index_join.go index 694101ed0ee5..1e7564d4ab27 100644 --- a/pkg/sql/colfetcher/index_join.go +++ b/pkg/sql/colfetcher/index_join.go @@ -470,9 +470,6 @@ func getIndexJoinBatchSize( } // NewColIndexJoin creates a new ColIndexJoin operator. -// -// If spec.MaintainOrdering is true, or spec.SplitFamilyIDs has more than one -// family, then the diskMonitor argument must be non-nil. func NewColIndexJoin( ctx context.Context, allocator *colmem.Allocator, @@ -530,6 +527,9 @@ func NewColIndexJoin( // single SQL row are contiguous we must ask the streamer to preserve // the ordering. See #113013 for an example. maintainOrdering := spec.MaintainOrdering || len(spec.SplitFamilyIDs) > 1 + if flowCtx.EvalCtx.SessionData().StreamerAlwaysMaintainOrdering { + maintainOrdering = true + } if maintainOrdering && diskMonitor == nil { return nil, errors.AssertionFailedf("diskMonitor is nil when ordering needs to be maintained") } diff --git a/pkg/sql/exec_util.go b/pkg/sql/exec_util.go index 116dadaeb7fa..d16cd73c8601 100644 --- a/pkg/sql/exec_util.go +++ b/pkg/sql/exec_util.go @@ -3415,6 +3415,10 @@ func (m *sessionDataMutator) SetStreamerEnabled(val bool) { m.data.StreamerEnabled = val } +func (m *sessionDataMutator) SetStreamerAlwaysMaintainOrdering(val bool) { + m.data.StreamerAlwaysMaintainOrdering = val +} + func (m *sessionDataMutator) SetUnboundedParallelScans(val bool) { m.data.UnboundedParallelScans = val } diff --git a/pkg/sql/logictest/testdata/logic_test/information_schema b/pkg/sql/logictest/testdata/logic_test/information_schema index 24e067dad696..94366c085fe2 100644 --- a/pkg/sql/logictest/testdata/logic_test/information_schema +++ b/pkg/sql/logictest/testdata/logic_test/information_schema @@ -4836,6 +4836,7 @@ sql_safe_updates off ssl_renegotiation_limit 0 standard_conforming_strings on statement_timeout 0 +streamer_always_maintain_ordering off streamer_enabled on stub_catalog_tables on synchronize_seqscans on diff --git a/pkg/sql/logictest/testdata/logic_test/pg_catalog b/pkg/sql/logictest/testdata/logic_test/pg_catalog index 756297708fc0..29c201e7d265 100644 --- a/pkg/sql/logictest/testdata/logic_test/pg_catalog +++ b/pkg/sql/logictest/testdata/logic_test/pg_catalog @@ -2878,6 +2878,7 @@ show_primary_key_constraint_on_not_visible_columns on N sql_safe_updates off NULL NULL NULL string standard_conforming_strings on NULL NULL NULL string statement_timeout 0 NULL NULL NULL string +streamer_always_maintain_ordering off NULL NULL NULL string streamer_enabled on NULL NULL NULL string stub_catalog_tables on NULL NULL NULL string synchronize_seqscans on NULL NULL NULL string @@ -3027,6 +3028,7 @@ show_primary_key_constraint_on_not_visible_columns on N sql_safe_updates off NULL user NULL off off standard_conforming_strings on NULL user NULL on on statement_timeout 0 NULL user NULL 0s 0s +streamer_always_maintain_ordering off NULL user NULL off off streamer_enabled on NULL user NULL on on stub_catalog_tables on NULL user NULL on on synchronize_seqscans on NULL user NULL on on @@ -3175,6 +3177,7 @@ show_primary_key_constraint_on_not_visible_columns NULL NULL NULL sql_safe_updates NULL NULL NULL NULL NULL standard_conforming_strings NULL NULL NULL NULL NULL statement_timeout NULL NULL NULL NULL NULL +streamer_always_maintain_ordering NULL NULL NULL NULL NULL streamer_enabled NULL NULL NULL NULL NULL stub_catalog_tables NULL NULL NULL NULL NULL synchronize_seqscans NULL NULL NULL NULL NULL diff --git a/pkg/sql/logictest/testdata/logic_test/show_source b/pkg/sql/logictest/testdata/logic_test/show_source index 6be3d2612cdf..d2b09a3febb9 100644 --- a/pkg/sql/logictest/testdata/logic_test/show_source +++ b/pkg/sql/logictest/testdata/logic_test/show_source @@ -139,6 +139,7 @@ show_primary_key_constraint_on_not_visible_columns on sql_safe_updates off standard_conforming_strings on statement_timeout 0 +streamer_always_maintain_ordering off streamer_enabled on stub_catalog_tables on synchronize_seqscans on diff --git a/pkg/sql/rowexec/joinreader.go b/pkg/sql/rowexec/joinreader.go index c6fb3d34fea7..6fbadf171eb1 100644 --- a/pkg/sql/rowexec/joinreader.go +++ b/pkg/sql/rowexec/joinreader.go @@ -517,6 +517,9 @@ func newJoinReader( jr.streamerInfo.maintainOrdering = true } } + if jr.FlowCtx.EvalCtx.SessionData().StreamerAlwaysMaintainOrdering { + jr.streamerInfo.maintainOrdering = true + } var diskBuffer kvstreamer.ResultDiskBuffer if jr.streamerInfo.maintainOrdering { diff --git a/pkg/sql/sessiondatapb/session_data.proto b/pkg/sql/sessiondatapb/session_data.proto index d2f9e2b86a24..ed2d488ef763 100644 --- a/pkg/sql/sessiondatapb/session_data.proto +++ b/pkg/sql/sessiondatapb/session_data.proto @@ -104,6 +104,13 @@ message SessionData { // ColIndexJoin operator (when it is using the Streamer API) to construct a // single lookup KV batch. int64 index_join_streamer_batch_size = 24; + // StreamerAlwaysMaintainOrdering indicates that the SQL users of the Streamer + // should always ask it to maintain the ordering, even when it might not be + // strictly necessary for the query. + // + // This session variable is introduced as a possible workaround in case we + // have more bugs like #113013. + bool streamer_always_maintain_ordering = 27; } // DataConversionConfig contains the parameters that influence the output diff --git a/pkg/sql/vars.go b/pkg/sql/vars.go index ce021a274a82..a31303f27370 100644 --- a/pkg/sql/vars.go +++ b/pkg/sql/vars.go @@ -2493,6 +2493,23 @@ var varGen = map[string]sessionVar{ }, }, + // CockroachDB extension. + `streamer_always_maintain_ordering`: { + GetStringVal: makePostgresBoolGetStringValFn(`streamer_always_maintain_ordering`), + Set: func(_ context.Context, m sessionDataMutator, s string) error { + b, err := paramparse.ParseBoolVar("streamer_always_maintain_ordering", s) + if err != nil { + return err + } + m.SetStreamerAlwaysMaintainOrdering(b) + return nil + }, + Get: func(evalCtx *extendedEvalContext, _ *kv.Txn) (string, error) { + return formatBoolAsPostgresSetting(evalCtx.SessionData().StreamerAlwaysMaintainOrdering), nil + }, + GlobalDefault: globalFalse, + }, + // CockroachDB extension. `unbounded_parallel_scans`: { GetStringVal: makePostgresBoolGetStringValFn(`unbounded_parallel_scans`), From 3b9ce722cc377c87ec9cd389364e7b10c593c34e Mon Sep 17 00:00:00 2001 From: Drew Kimball Date: Wed, 25 Oct 2023 21:30:22 +0000 Subject: [PATCH 4/4] opt: add setting for provided ordering fix This patch adds a new session setting, `optimizer_use_provided_ordering_fix`, which toggles whether to use the `finalizeProvided` function introduced in required ordering. This setting is on by default, and will be used when backporting #100776. Informs #113072 Release note: None --- pkg/sql/exec_util.go | 4 ++++ .../testdata/logic_test/information_schema | 1 + .../logictest/testdata/logic_test/pg_catalog | 3 +++ .../logictest/testdata/logic_test/show_source | 1 + pkg/sql/opt/memo/memo.go | 5 ++++- pkg/sql/opt/memo/memo_test.go | 6 ++++++ pkg/sql/opt/optgen/exprgen/expr_gen.go | 2 +- pkg/sql/opt/ordering/BUILD.bazel | 1 + pkg/sql/opt/ordering/ordering.go | 9 +++++++-- pkg/sql/opt/testutils/opttester/opt_tester.go | 1 + pkg/sql/opt/xform/optimizer.go | 2 +- .../sessiondatapb/local_only_session_data.proto | 5 +++++ pkg/sql/vars.go | 17 +++++++++++++++++ 13 files changed, 52 insertions(+), 5 deletions(-) diff --git a/pkg/sql/exec_util.go b/pkg/sql/exec_util.go index 7f026550f504..116dadaeb7fa 100644 --- a/pkg/sql/exec_util.go +++ b/pkg/sql/exec_util.go @@ -3423,6 +3423,10 @@ func (m *sessionDataMutator) SetAllowRoleMembershipsToChangeDuringTransaction(va m.data.AllowRoleMembershipsToChangeDuringTransaction = val } +func (m *sessionDataMutator) SetOptimizerUseProvidedOrderingFix(val bool) { + m.data.OptimizerUseProvidedOrderingFix = val +} + // Utility functions related to scrubbing sensitive information on SQL Stats. // quantizeCounts ensures that the Count field in the diff --git a/pkg/sql/logictest/testdata/logic_test/information_schema b/pkg/sql/logictest/testdata/logic_test/information_schema index bd4398963200..24e067dad696 100644 --- a/pkg/sql/logictest/testdata/logic_test/information_schema +++ b/pkg/sql/logictest/testdata/logic_test/information_schema @@ -4810,6 +4810,7 @@ optimizer_use_improved_split_disjunction_for_joins off optimizer_use_limit_ordering_for_streaming_group_by off optimizer_use_multicol_stats on optimizer_use_not_visible_indexes off +optimizer_use_provided_ordering_fix on override_multi_region_zone_config off parallelize_multi_key_lookup_joins_enabled off password_encryption scram-sha-256 diff --git a/pkg/sql/logictest/testdata/logic_test/pg_catalog b/pkg/sql/logictest/testdata/logic_test/pg_catalog index 96b8132faa80..756297708fc0 100644 --- a/pkg/sql/logictest/testdata/logic_test/pg_catalog +++ b/pkg/sql/logictest/testdata/logic_test/pg_catalog @@ -2855,6 +2855,7 @@ optimizer_use_improved_split_disjunction_for_joins off N optimizer_use_limit_ordering_for_streaming_group_by off NULL NULL NULL string optimizer_use_multicol_stats on NULL NULL NULL string optimizer_use_not_visible_indexes off NULL NULL NULL string +optimizer_use_provided_ordering_fix on NULL NULL NULL string override_multi_region_zone_config off NULL NULL NULL string parallelize_multi_key_lookup_joins_enabled off NULL NULL NULL string password_encryption scram-sha-256 NULL NULL NULL string @@ -3003,6 +3004,7 @@ optimizer_use_improved_split_disjunction_for_joins off N optimizer_use_limit_ordering_for_streaming_group_by off NULL user NULL off off optimizer_use_multicol_stats on NULL user NULL on on optimizer_use_not_visible_indexes off NULL user NULL off off +optimizer_use_provided_ordering_fix on NULL user NULL on on override_multi_region_zone_config off NULL user NULL off off parallelize_multi_key_lookup_joins_enabled off NULL user NULL false false password_encryption scram-sha-256 NULL user NULL scram-sha-256 scram-sha-256 @@ -3149,6 +3151,7 @@ optimizer_use_improved_split_disjunction_for_joins NULL NULL NULL optimizer_use_limit_ordering_for_streaming_group_by NULL NULL NULL NULL NULL optimizer_use_multicol_stats NULL NULL NULL NULL NULL optimizer_use_not_visible_indexes NULL NULL NULL NULL NULL +optimizer_use_provided_ordering_fix NULL NULL NULL NULL NULL override_multi_region_zone_config NULL NULL NULL NULL NULL parallelize_multi_key_lookup_joins_enabled NULL NULL NULL NULL NULL password_encryption NULL NULL NULL NULL NULL diff --git a/pkg/sql/logictest/testdata/logic_test/show_source b/pkg/sql/logictest/testdata/logic_test/show_source index 9fd9b1e2bfcf..6be3d2612cdf 100644 --- a/pkg/sql/logictest/testdata/logic_test/show_source +++ b/pkg/sql/logictest/testdata/logic_test/show_source @@ -116,6 +116,7 @@ optimizer_use_improved_split_disjunction_for_joins off optimizer_use_limit_ordering_for_streaming_group_by off optimizer_use_multicol_stats on optimizer_use_not_visible_indexes off +optimizer_use_provided_ordering_fix on override_multi_region_zone_config off parallelize_multi_key_lookup_joins_enabled off password_encryption scram-sha-256 diff --git a/pkg/sql/opt/memo/memo.go b/pkg/sql/opt/memo/memo.go index 146b647675ce..a2a654d4250e 100644 --- a/pkg/sql/opt/memo/memo.go +++ b/pkg/sql/opt/memo/memo.go @@ -165,6 +165,7 @@ type Memo struct { alwaysUseHistograms bool hoistUncorrelatedEqualitySubqueries bool useImprovedComputedColumnFiltersDerivation bool + useProvidedOrderingFix bool // curRank is the highest currently in-use scalar expression rank. curRank opt.ScalarRank @@ -225,6 +226,7 @@ func (m *Memo) Init(evalCtx *eval.Context) { alwaysUseHistograms: evalCtx.SessionData().OptimizerAlwaysUseHistograms, hoistUncorrelatedEqualitySubqueries: evalCtx.SessionData().OptimizerHoistUncorrelatedEqualitySubqueries, useImprovedComputedColumnFiltersDerivation: evalCtx.SessionData().OptimizerUseImprovedComputedColumnFiltersDerivation, + useProvidedOrderingFix: evalCtx.SessionData().OptimizerUseProvidedOrderingFix, } m.metadata.Init() m.logPropsBuilder.init(evalCtx, m) @@ -368,7 +370,8 @@ func (m *Memo) IsStale( m.useImprovedSplitDisjunctionForJoins != evalCtx.SessionData().OptimizerUseImprovedSplitDisjunctionForJoins || m.alwaysUseHistograms != evalCtx.SessionData().OptimizerAlwaysUseHistograms || m.hoistUncorrelatedEqualitySubqueries != evalCtx.SessionData().OptimizerHoistUncorrelatedEqualitySubqueries || - m.useImprovedComputedColumnFiltersDerivation != evalCtx.SessionData().OptimizerUseImprovedComputedColumnFiltersDerivation { + m.useImprovedComputedColumnFiltersDerivation != evalCtx.SessionData().OptimizerUseImprovedComputedColumnFiltersDerivation || + m.useProvidedOrderingFix != evalCtx.SessionData().OptimizerUseProvidedOrderingFix { return true, nil } diff --git a/pkg/sql/opt/memo/memo_test.go b/pkg/sql/opt/memo/memo_test.go index 66cbb367682f..b541d8d8b5e2 100644 --- a/pkg/sql/opt/memo/memo_test.go +++ b/pkg/sql/opt/memo/memo_test.go @@ -358,6 +358,12 @@ func TestMemoIsStale(t *testing.T) { evalCtx.SessionData().OptimizerUseImprovedComputedColumnFiltersDerivation = false notStale() + // Stale optimizer_use_provided_ordering_fix. + evalCtx.SessionData().OptimizerUseProvidedOrderingFix = true + stale() + evalCtx.SessionData().OptimizerUseProvidedOrderingFix = false + notStale() + // User no longer has access to view. catalog.View(tree.NewTableNameWithSchema("t", tree.PublicSchemaName, "abcview")).Revoked = true _, err = o.Memo().IsStale(ctx, &evalCtx, catalog) diff --git a/pkg/sql/opt/optgen/exprgen/expr_gen.go b/pkg/sql/opt/optgen/exprgen/expr_gen.go index 1759cd5e7e99..fe53a7dd09d5 100644 --- a/pkg/sql/opt/optgen/exprgen/expr_gen.go +++ b/pkg/sql/opt/optgen/exprgen/expr_gen.go @@ -400,7 +400,7 @@ func (eg *exprGen) populateBestProps(expr opt.Expr, required *physical.Required) provided := &physical.Provided{} // BuildProvided relies on ProvidedPhysical() being set in the children, so // it must run after the recursive calls on the children. - provided.Ordering = ordering.BuildProvided(rel, &required.Ordering) + provided.Ordering = ordering.BuildProvided(eg.f.EvalContext(), rel, &required.Ordering) provided.Distribution = distribution.BuildProvided(eg.f.EvalContext(), rel, &required.Distribution) cost += eg.coster.ComputeCost(rel, required) diff --git a/pkg/sql/opt/ordering/BUILD.bazel b/pkg/sql/opt/ordering/BUILD.bazel index 489e17a4b8c2..89a0d1cf73e7 100644 --- a/pkg/sql/opt/ordering/BUILD.bazel +++ b/pkg/sql/opt/ordering/BUILD.bazel @@ -31,6 +31,7 @@ go_library( "//pkg/sql/opt/cat", "//pkg/sql/opt/memo", "//pkg/sql/opt/props", + "//pkg/sql/sem/eval", "//pkg/sql/sem/tree", "//pkg/util/buildutil", "@com_github_cockroachdb_errors//:errors", diff --git a/pkg/sql/opt/ordering/ordering.go b/pkg/sql/opt/ordering/ordering.go index ca88e96d8e36..32fc6849feb9 100644 --- a/pkg/sql/opt/ordering/ordering.go +++ b/pkg/sql/opt/ordering/ordering.go @@ -14,6 +14,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/sql/opt" "github.com/cockroachdb/cockroach/pkg/sql/opt/memo" "github.com/cockroachdb/cockroach/pkg/sql/opt/props" + "github.com/cockroachdb/cockroach/pkg/sql/sem/eval" "github.com/cockroachdb/cockroach/pkg/util/buildutil" "github.com/cockroachdb/errors" "github.com/cockroachdb/redact" @@ -83,12 +84,16 @@ func BuildChildRequired( // // This function assumes that the provided orderings have already been set in // the children of the expression. -func BuildProvided(expr memo.RelExpr, required *props.OrderingChoice) opt.Ordering { +func BuildProvided( + evalCtx *eval.Context, expr memo.RelExpr, required *props.OrderingChoice, +) opt.Ordering { if required.Any() { return nil } provided := funcMap[expr.Op()].buildProvidedOrdering(expr, required) - provided = finalizeProvided(provided, required, expr.Relational().OutputCols) + if evalCtx.SessionData().OptimizerUseProvidedOrderingFix { + provided = finalizeProvided(provided, required, expr.Relational().OutputCols) + } if buildutil.CrdbTestBuild { checkProvided(expr, required, provided) diff --git a/pkg/sql/opt/testutils/opttester/opt_tester.go b/pkg/sql/opt/testutils/opttester/opt_tester.go index ab0b0d6fef2e..9b7eb39a1073 100644 --- a/pkg/sql/opt/testutils/opttester/opt_tester.go +++ b/pkg/sql/opt/testutils/opttester/opt_tester.go @@ -292,6 +292,7 @@ func New(catalog cat.Catalog, sql string) *OptTester { ot.evalCtx.SessionData().OptSplitScanLimit = tabledesc.MaxBucketAllowed ot.evalCtx.SessionData().VariableInequalityLookupJoinEnabled = true ot.evalCtx.SessionData().OptimizerUseImprovedSplitDisjunctionForJoins = true + ot.evalCtx.SessionData().OptimizerUseProvidedOrderingFix = true return ot } diff --git a/pkg/sql/opt/xform/optimizer.go b/pkg/sql/opt/xform/optimizer.go index 50ac5fe7dca5..8c537dbd1ce7 100644 --- a/pkg/sql/opt/xform/optimizer.go +++ b/pkg/sql/opt/xform/optimizer.go @@ -775,7 +775,7 @@ func (o *Optimizer) setLowestCostTree(parent opt.Expr, parentProps *physical.Req var provided physical.Provided // BuildProvided relies on ProvidedPhysical() being set in the children, so // it must run after the recursive calls on the children. - provided.Ordering = ordering.BuildProvided(relParent, &parentProps.Ordering) + provided.Ordering = ordering.BuildProvided(o.evalCtx, relParent, &parentProps.Ordering) provided.Distribution = distribution.BuildProvided(o.evalCtx, relParent, &parentProps.Distribution) o.mem.SetBestProps(relParent, parentProps, &provided, relCost) } diff --git a/pkg/sql/sessiondatapb/local_only_session_data.proto b/pkg/sql/sessiondatapb/local_only_session_data.proto index e8688208efc2..11fc373321da 100644 --- a/pkg/sql/sessiondatapb/local_only_session_data.proto +++ b/pkg/sql/sessiondatapb/local_only_session_data.proto @@ -356,6 +356,11 @@ message LocalOnlySessionData { // column involved a single column and that column was equated with a single // constant value in a WHERE clause filter. bool optimizer_use_improved_computed_column_filters_derivation = 104; + // OptimizerUseProvidedOrderingFix, when true, causes the optimizer to + // reconcile provided orderings with required ordering choices. This prevents + // internal errors due to incomplete functional dependencies, and also + // fixes a bug that incorrectly truncated the provided ordering (see #113072). + bool optimizer_use_provided_ordering_fix = 115; /////////////////////////////////////////////////////////////////////////// // WARNING: consider whether a session parameter you're adding needs to // diff --git a/pkg/sql/vars.go b/pkg/sql/vars.go index 8676d7e7a570..ce021a274a82 100644 --- a/pkg/sql/vars.go +++ b/pkg/sql/vars.go @@ -2526,6 +2526,23 @@ var varGen = map[string]sessionVar{ }, GlobalDefault: globalFalse, }, + + // CockroachDB extension. + `optimizer_use_provided_ordering_fix`: { + GetStringVal: makePostgresBoolGetStringValFn(`optimizer_use_provided_ordering_fix`), + Set: func(_ context.Context, m sessionDataMutator, s string) error { + b, err := paramparse.ParseBoolVar("optimizer_use_provided_ordering_fix", s) + if err != nil { + return err + } + m.SetOptimizerUseProvidedOrderingFix(b) + return nil + }, + Get: func(evalCtx *extendedEvalContext, _ *kv.Txn) (string, error) { + return formatBoolAsPostgresSetting(evalCtx.SessionData().OptimizerUseProvidedOrderingFix), nil + }, + GlobalDefault: globalTrue, + }, } // We want test coverage for this on and off so make it metamorphic.