From b3c4d9e3e064a7dbf704d254b39ca39b142b775d Mon Sep 17 00:00:00 2001 From: Andrew Kimball Date: Fri, 20 Apr 2018 20:00:04 -0400 Subject: [PATCH 1/3] opt: Add OuterCols to relational properties Now that we have support for subqueries, column references to outer relations are possible. This commit calculates the OuterCols logical property for the various relational operators. Release note: None --- pkg/sql/opt/memo/expr_view.go | 4 + pkg/sql/opt/memo/logical_props.go | 2 - pkg/sql/opt/memo/logical_props_factory.go | 93 ++++++++-- .../opt/memo/logical_props_factory_test.go | 16 ++ pkg/sql/opt/memo/testdata/logprops/groupby | 174 ++++++++++++------ pkg/sql/opt/memo/testdata/logprops/join | 152 +++++++++------ pkg/sql/opt/memo/testdata/logprops/limit | 68 +++++-- pkg/sql/opt/memo/testdata/logprops/offset | 109 +++++++++++ pkg/sql/opt/memo/testdata/logprops/project | 87 +++++++-- pkg/sql/opt/memo/testdata/logprops/select | 102 ++++++---- pkg/sql/opt/memo/testdata/logprops/set | 159 +++++++++++----- pkg/sql/opt/memo/testdata/logprops/values | 38 ++++ pkg/sql/opt/memo/testdata/stats/limit | 2 +- pkg/sql/opt/xform/testdata/rules/limit | 2 +- 14 files changed, 770 insertions(+), 238 deletions(-) create mode 100644 pkg/sql/opt/memo/testdata/logprops/offset diff --git a/pkg/sql/opt/memo/expr_view.go b/pkg/sql/opt/memo/expr_view.go index a842747cce1a..ce6e9eff7418 100644 --- a/pkg/sql/opt/memo/expr_view.go +++ b/pkg/sql/opt/memo/expr_view.go @@ -333,6 +333,10 @@ func (ev ExprView) formatRelational(tp treeprinter.Node, flags ExprFmtFlags) { } } + if !flags.HasFlags(ExprFmtHideOuterCols) && !logProps.Relational.OuterCols.Empty() { + tp.Childf("outer: %s", logProps.Relational.OuterCols.String()) + } + if !flags.HasFlags(ExprFmtHideStats) { ev.formatStats(tp, &logProps.Relational.Stats) } diff --git a/pkg/sql/opt/memo/logical_props.go b/pkg/sql/opt/memo/logical_props.go index 019b85f0917f..665ed58c42be 100644 --- a/pkg/sql/opt/memo/logical_props.go +++ b/pkg/sql/opt/memo/logical_props.go @@ -90,8 +90,6 @@ type RelationalProps struct { // SELECT expression binds the b.x and b.y references, so they are not // part of the outer column set. The outer SELECT binds the a.x column, and // so its outer column set is empty. - // - // TODO(andyk): populate this when we have subquery support OuterCols opt.ColSet // Stats is the set of statistics that apply to this relational expression. diff --git a/pkg/sql/opt/memo/logical_props_factory.go b/pkg/sql/opt/memo/logical_props_factory.go index 3be2e111dfe5..c2a165c0b235 100644 --- a/pkg/sql/opt/memo/logical_props_factory.go +++ b/pkg/sql/opt/memo/logical_props_factory.go @@ -75,7 +75,7 @@ func (f logicalPropsFactory) constructRelationalProps(ev ExprView) LogicalProps return f.constructLimitProps(ev) case opt.OffsetOp: - return f.passThroughRelationalProps(ev, 0 /* childIdx */) + return f.constructOffsetProps(ev) case opt.Max1RowOp: return f.constructMax1RowProps(ev) @@ -117,10 +117,19 @@ func (f logicalPropsFactory) constructSelectProps(ev ExprView) LogicalProps { props := LogicalProps{Relational: &RelationalProps{}} inputProps := ev.childGroup(0).logical.Relational + filterProps := ev.childGroup(1).logical.Scalar // Inherit input properties as starting point. *props.Relational = *inputProps + // Any outer columns from the filter that are not bound by the input columns + // are outer columns for the Select expression, in addition to any outer + // columns inherited from the input expression. + if !filterProps.OuterCols.SubsetOf(inputProps.OutputCols) { + props.Relational.OuterCols = filterProps.OuterCols.Difference(inputProps.OutputCols) + props.Relational.OuterCols.UnionWith(inputProps.OuterCols) + } + props.Relational.Stats.initSelect(f.evalCtx, ev.Child(1), &inputProps.Stats) return props @@ -130,6 +139,7 @@ func (f logicalPropsFactory) constructProjectProps(ev ExprView) LogicalProps { props := LogicalProps{Relational: &RelationalProps{}} inputProps := ev.childGroup(0).logical.Relational + projectionProps := ev.childGroup(1).logical.Scalar // Use output columns from projection list. props.Relational.OutputCols = opt.ColListToSet(ev.Child(1).Private().(opt.ColList)) @@ -139,8 +149,13 @@ func (f logicalPropsFactory) constructProjectProps(ev ExprView) LogicalProps { props.Relational.NotNullCols = inputProps.NotNullCols filterNullCols(props.Relational) - // Inherit outer columns from input. - props.Relational.OuterCols = inputProps.OuterCols + // Any outer columns from the projection list that are not bound by the input + // columns are outer columns from the Project expression, in addition to any + // outer columns inherited from the input expression. + if !projectionProps.OuterCols.SubsetOf(inputProps.OutputCols) { + props.Relational.OuterCols = projectionProps.OuterCols.Difference(inputProps.OutputCols) + } + props.Relational.OuterCols.UnionWith(inputProps.OuterCols) // Inherit weak keys that are composed entirely of output columns. props.Relational.WeakKeys = inputProps.WeakKeys @@ -156,6 +171,7 @@ func (f logicalPropsFactory) constructJoinProps(ev ExprView) LogicalProps { leftProps := ev.childGroup(0).logical.Relational rightProps := ev.childGroup(1).logical.Relational + onProps := ev.childGroup(2).logical.Scalar // Output columns are union of columns from left and right inputs, except // in case of semi and anti joins, which only project the left columns. @@ -186,6 +202,24 @@ func (f logicalPropsFactory) constructJoinProps(ev ExprView) LogicalProps { props.Relational.NotNullCols.UnionWith(leftProps.NotNullCols) } + // Any outer columns from the filter that are not bound by the input columns + // are outer columns for the Join expression, in addition to any outer columns + // inherited from the input expressions. + inputCols := leftProps.OutputCols.Union(rightProps.OutputCols) + if !onProps.OuterCols.SubsetOf(inputCols) { + props.Relational.OuterCols = onProps.OuterCols.Difference(inputCols) + } + if ev.IsJoinApply() { + // Outer columns of right side of apply join can be bound by output + // columns of left side of apply join. + if !rightProps.OuterCols.SubsetOf(leftProps.OutputCols) { + props.Relational.OuterCols.UnionWith(rightProps.OuterCols.Difference(leftProps.OutputCols)) + } + } else { + props.Relational.OuterCols.UnionWith(rightProps.OuterCols) + } + props.Relational.OuterCols.UnionWith(leftProps.OuterCols) + // TODO(andyk): Need to derive weak keys for joins, for example when weak // keys on both sides are equivalent cols. @@ -200,6 +234,7 @@ func (f logicalPropsFactory) constructGroupByProps(ev ExprView) LogicalProps { props := LogicalProps{Relational: &RelationalProps{}} inputProps := ev.childGroup(0).logical.Relational + aggProps := ev.childGroup(1).logical.Scalar // Output columns are the union of grouping columns with columns from the // aggregate projection list. @@ -212,6 +247,12 @@ func (f logicalPropsFactory) constructGroupByProps(ev ExprView) LogicalProps { props.Relational.NotNullCols = inputProps.NotNullCols.Copy() props.Relational.NotNullCols.IntersectionWith(groupingColSet) + // Any outer columns from aggregation expressions that are not bound by the + // input columns are outer columns. + props.Relational.OuterCols = aggProps.OuterCols.Copy() + props.Relational.OuterCols.DifferenceWith(inputProps.OutputCols) + props.Relational.OuterCols.UnionWith(inputProps.OuterCols) + // Scalar group by has no grouping columns and always a single row. if groupingColSet.Empty() { // Any combination of columns is a weak key when there is one row. @@ -263,6 +304,9 @@ func (f logicalPropsFactory) constructSetProps(ev ExprView) LogicalProps { } } + // Outer columns from either side are outer columns for set operation. + props.Relational.OuterCols = leftProps.OuterCols.Union(rightProps.OuterCols) + props.Relational.Stats.initSetOp(ev.Operator(), &leftProps.Stats, &rightProps.Stats, &colMap) return props @@ -274,6 +318,11 @@ func (f logicalPropsFactory) constructValuesProps(ev ExprView) LogicalProps { // Use output columns that are attached to the values op. props.Relational.OutputCols = opt.ColListToSet(ev.Private().(opt.ColList)) + // Union outer columns from all row expressions. + for i := 0; i < ev.ChildCount(); i++ { + props.Relational.OuterCols.UnionWith(ev.childGroup(i).logical.Scalar.OuterCols) + } + props.Relational.Stats.initValues(ev, &props.Relational.OutputCols) return props @@ -284,34 +333,49 @@ func (f logicalPropsFactory) constructLimitProps(ev ExprView) LogicalProps { inputProps := ev.Child(0).Logical().Relational limit := ev.Child(1) + limitProps := limit.Logical().Scalar // Start with pass-through props from input. *props.Relational = *inputProps + // Inherit outer columns from limit expression. + if !limitProps.OuterCols.Empty() { + props.Relational.OuterCols = limitProps.OuterCols.Union(inputProps.OuterCols) + } + props.Relational.Stats.initLimit(limit, &inputProps.Stats) return props } -func (f logicalPropsFactory) constructMax1RowProps(ev ExprView) LogicalProps { +func (f logicalPropsFactory) constructOffsetProps(ev ExprView) LogicalProps { props := LogicalProps{Relational: &RelationalProps{}} inputProps := ev.Child(0).Logical().Relational + offsetProps := ev.Child(1).Logical().Scalar // Start with pass-through props from input. *props.Relational = *inputProps - props.Relational.Stats.initMax1Row(&inputProps.Stats) + // Inherit outer columns from offset expression. + if !offsetProps.OuterCols.Empty() { + props.Relational.OuterCols = offsetProps.OuterCols.Union(inputProps.OuterCols) + } return props } -// passThroughRelationalProps returns the relational properties of the given -// child group. -func (f logicalPropsFactory) passThroughRelationalProps(ev ExprView, childIdx int) LogicalProps { - // Properties are immutable after construction, so just inherit relational - // props pointer from child. - return LogicalProps{Relational: ev.childGroup(childIdx).logical.Relational} +func (f logicalPropsFactory) constructMax1RowProps(ev ExprView) LogicalProps { + props := LogicalProps{Relational: &RelationalProps{}} + + inputProps := ev.Child(0).Logical().Relational + + // Start with pass-through props from input. + *props.Relational = *inputProps + + props.Relational.Stats.initMax1Row(&inputProps.Stats) + + return props } func (f logicalPropsFactory) constructScalarProps(ev ExprView) LogicalProps { @@ -322,6 +386,13 @@ func (f logicalPropsFactory) constructScalarProps(ev ExprView) LogicalProps { // Variable introduces outer column. props.Scalar.OuterCols.Add(int(ev.Private().(opt.ColumnID))) return props + + case opt.SubqueryOp: + inputProps := ev.childGroup(0).logical.Relational + projectionProps := ev.childGroup(1).logical.Scalar + props.Scalar.OuterCols = projectionProps.OuterCols.Difference(inputProps.OutputCols) + props.Scalar.OuterCols.UnionWith(inputProps.OuterCols) + return props } // By default, union outer cols from all children, both relational and scalar. diff --git a/pkg/sql/opt/memo/logical_props_factory_test.go b/pkg/sql/opt/memo/logical_props_factory_test.go index 90aefcfc3418..fd0552c14386 100644 --- a/pkg/sql/opt/memo/logical_props_factory_test.go +++ b/pkg/sql/opt/memo/logical_props_factory_test.go @@ -24,6 +24,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/sql/opt/norm" "github.com/cockroachdb/cockroach/pkg/sql/opt/testutils" "github.com/cockroachdb/cockroach/pkg/sql/sem/tree" + "github.com/cockroachdb/cockroach/pkg/sql/sem/types" ) func TestLogicalPropsFactory(t *testing.T) { @@ -69,6 +70,21 @@ func TestLogicalJoinProps(t *testing.T) { joinFunc(opt.SemiJoinApplyOp, "a.x:1(int!null) a.y:2(int)") joinFunc(opt.AntiJoinOp, "a.x:1(int!null) a.y:2(int)") joinFunc(opt.AntiJoinApplyOp, "a.x:1(int!null) a.y:2(int)") + + // Ensure that OuterCols that refer to outer relation of apply join do not + // become OuterCols of the join (i.e. that they are bound). + // (ApplyInnerJoin (Scan a) (Values (Tuple (Variable a.x)))) + leftGroup := f.ConstructScan(f.InternScanOpDef(constructScanOpDef(f.Metadata(), a))) + varGroup := f.ConstructVariable(f.InternColumnID(f.Metadata().TableColumn(a, 0))) + tupleGroup := f.ConstructTuple(f.InternList([]memo.GroupID{varGroup})) + rows := f.InternList([]memo.GroupID{tupleGroup}) + cols := f.InternColList(opt.ColList{f.Metadata().AddColumn("column1", types.Int)}) + valuesGroup := f.ConstructValues(rows, cols) + joinGroup := f.ConstructInnerJoinApply(leftGroup, valuesGroup, f.ConstructTrue()) + + if !f.Memo().GroupProperties(joinGroup).Relational.OuterCols.Empty() { + t.Fatalf("expected outer columns to be empty on apply join group") + } } func constructScanOpDef(md *opt.Metadata, tabID opt.TableID) *memo.ScanOpDef { diff --git a/pkg/sql/opt/memo/testdata/logprops/groupby b/pkg/sql/opt/memo/testdata/logprops/groupby index 89c203a89115..359d004991aa 100644 --- a/pkg/sql/opt/memo/testdata/logprops/groupby +++ b/pkg/sql/opt/memo/testdata/logprops/groupby @@ -1,7 +1,7 @@ exec-ddl -CREATE TABLE a (x INT PRIMARY KEY, y INT, z FLOAT NOT NULL, s STRING, UNIQUE (s DESC, z)) +CREATE TABLE xyzs (x INT PRIMARY KEY, y INT, z FLOAT NOT NULL, s STRING, UNIQUE (s DESC, z)) ---- -TABLE a +TABLE xyzs ├── x int not null ├── y int ├── z float not null @@ -13,160 +13,224 @@ TABLE a ├── z float not null └── x int not null (storing) +exec-ddl +CREATE TABLE kuv (k INT PRIMARY KEY, u FLOAT, v STRING) +---- +TABLE kuv + ├── k int not null + ├── u float + ├── v string + └── INDEX primary + └── k int not null + build -SELECT a.y, SUM(a.z), a.x, False FROM a GROUP BY a.x, a.y +SELECT y, SUM(z), x, False FROM xyzs GROUP BY x, y ---- project ├── columns: y:2(int) column5:5(float) x:1(int!null) column6:6(bool) ├── stats: [rows=100] ├── keys: (1) ├── group-by - │ ├── columns: a.x:1(int!null) a.y:2(int) column5:5(float) - │ ├── grouping columns: a.x:1(int!null) a.y:2(int) + │ ├── columns: xyzs.x:1(int!null) xyzs.y:2(int) column5:5(float) + │ ├── grouping columns: xyzs.x:1(int!null) xyzs.y:2(int) │ ├── stats: [rows=100] │ ├── keys: (1) │ ├── project - │ │ ├── columns: a.x:1(int!null) a.y:2(int) a.z:3(float!null) + │ │ ├── columns: xyzs.x:1(int!null) xyzs.y:2(int) xyzs.z:3(float!null) │ │ ├── stats: [rows=1000] │ │ ├── keys: (1) - │ │ ├── scan a - │ │ │ ├── columns: a.x:1(int!null) a.y:2(int) a.z:3(float!null) a.s:4(string) + │ │ ├── scan xyzs + │ │ │ ├── columns: xyzs.x:1(int!null) xyzs.y:2(int) xyzs.z:3(float!null) xyzs.s:4(string) │ │ │ ├── stats: [rows=1000] │ │ │ └── keys: (1) weak(3,4) │ │ └── projections [outer=(1-3)] - │ │ ├── variable: a.x [type=int, outer=(1)] - │ │ ├── variable: a.y [type=int, outer=(2)] - │ │ └── variable: a.z [type=float, outer=(3)] + │ │ ├── variable: xyzs.x [type=int, outer=(1)] + │ │ ├── variable: xyzs.y [type=int, outer=(2)] + │ │ └── variable: xyzs.z [type=float, outer=(3)] │ └── aggregations [outer=(3)] │ └── function: sum [type=float, outer=(3)] - │ └── variable: a.z [type=float, outer=(3)] + │ └── variable: xyzs.z [type=float, outer=(3)] └── projections [outer=(1,2,5)] - ├── variable: a.y [type=int, outer=(2)] + ├── variable: xyzs.y [type=int, outer=(2)] ├── variable: column5 [type=float, outer=(5)] - ├── variable: a.x [type=int, outer=(1)] + ├── variable: xyzs.x [type=int, outer=(1)] └── false [type=bool] # Scalar groupby. build -SELECT SUM(a.x), MAX(a.y) FROM a +SELECT SUM(x), MAX(y) FROM xyzs ---- group-by ├── columns: column5:5(decimal) column6:6(int) ├── stats: [rows=1] ├── keys: () ├── project - │ ├── columns: a.x:1(int!null) a.y:2(int) + │ ├── columns: xyzs.x:1(int!null) xyzs.y:2(int) │ ├── stats: [rows=1000] │ ├── keys: (1) - │ ├── scan a - │ │ ├── columns: a.x:1(int!null) a.y:2(int) a.z:3(float!null) a.s:4(string) + │ ├── scan xyzs + │ │ ├── columns: xyzs.x:1(int!null) xyzs.y:2(int) xyzs.z:3(float!null) xyzs.s:4(string) │ │ ├── stats: [rows=1000] │ │ └── keys: (1) weak(3,4) │ └── projections [outer=(1,2)] - │ ├── variable: a.x [type=int, outer=(1)] - │ └── variable: a.y [type=int, outer=(2)] + │ ├── variable: xyzs.x [type=int, outer=(1)] + │ └── variable: xyzs.y [type=int, outer=(2)] └── aggregations [outer=(1,2)] ├── function: sum [type=decimal, outer=(1)] - │ └── variable: a.x [type=int, outer=(1)] + │ └── variable: xyzs.x [type=int, outer=(1)] └── function: max [type=int, outer=(2)] - └── variable: a.y [type=int, outer=(2)] + └── variable: xyzs.y [type=int, outer=(2)] # Group by unique index columns. build -SELECT s FROM a GROUP BY z, s +SELECT s FROM xyzs GROUP BY z, s ---- project ├── columns: s:4(string) ├── stats: [rows=100] ├── group-by - │ ├── columns: a.z:3(float!null) a.s:4(string) - │ ├── grouping columns: a.z:3(float!null) a.s:4(string) + │ ├── columns: xyzs.z:3(float!null) xyzs.s:4(string) + │ ├── grouping columns: xyzs.z:3(float!null) xyzs.s:4(string) │ ├── stats: [rows=100] │ ├── keys: weak(3,4) │ ├── project - │ │ ├── columns: a.z:3(float!null) a.s:4(string) + │ │ ├── columns: xyzs.z:3(float!null) xyzs.s:4(string) │ │ ├── stats: [rows=1000] │ │ ├── keys: weak(3,4) - │ │ ├── scan a - │ │ │ ├── columns: a.x:1(int!null) a.y:2(int) a.z:3(float!null) a.s:4(string) + │ │ ├── scan xyzs + │ │ │ ├── columns: xyzs.x:1(int!null) xyzs.y:2(int) xyzs.z:3(float!null) xyzs.s:4(string) │ │ │ ├── stats: [rows=1000] │ │ │ └── keys: (1) weak(3,4) │ │ └── projections [outer=(3,4)] - │ │ ├── variable: a.z [type=float, outer=(3)] - │ │ └── variable: a.s [type=string, outer=(4)] + │ │ ├── variable: xyzs.z [type=float, outer=(3)] + │ │ └── variable: xyzs.s [type=string, outer=(4)] │ └── aggregations └── projections [outer=(4)] - └── variable: a.s [type=string, outer=(4)] + └── variable: xyzs.s [type=string, outer=(4)] # Group by columns that otherwise wouldn't be weak key. build -SELECT y, SUM(z) FROM a GROUP BY z, y +SELECT y, SUM(z) FROM xyzs GROUP BY z, y ---- project ├── columns: y:2(int) column5:5(float) ├── stats: [rows=100] ├── group-by - │ ├── columns: a.y:2(int) a.z:3(float!null) column5:5(float) - │ ├── grouping columns: a.y:2(int) a.z:3(float!null) + │ ├── columns: xyzs.y:2(int) xyzs.z:3(float!null) column5:5(float) + │ ├── grouping columns: xyzs.y:2(int) xyzs.z:3(float!null) │ ├── stats: [rows=100] │ ├── keys: weak(2,3) │ ├── project - │ │ ├── columns: a.z:3(float!null) a.y:2(int) + │ │ ├── columns: xyzs.z:3(float!null) xyzs.y:2(int) │ │ ├── stats: [rows=1000] - │ │ ├── scan a - │ │ │ ├── columns: a.x:1(int!null) a.y:2(int) a.z:3(float!null) a.s:4(string) + │ │ ├── scan xyzs + │ │ │ ├── columns: xyzs.x:1(int!null) xyzs.y:2(int) xyzs.z:3(float!null) xyzs.s:4(string) │ │ │ ├── stats: [rows=1000] │ │ │ └── keys: (1) weak(3,4) │ │ └── projections [outer=(2,3)] - │ │ ├── variable: a.z [type=float, outer=(3)] - │ │ └── variable: a.y [type=int, outer=(2)] + │ │ ├── variable: xyzs.z [type=float, outer=(3)] + │ │ └── variable: xyzs.y [type=int, outer=(2)] │ └── aggregations [outer=(3)] │ └── function: sum [type=float, outer=(3)] - │ └── variable: a.z [type=float, outer=(3)] + │ └── variable: xyzs.z [type=float, outer=(3)] └── projections [outer=(2,5)] - ├── variable: a.y [type=int, outer=(2)] + ├── variable: xyzs.y [type=int, outer=(2)] └── variable: column5 [type=float, outer=(5)] # Group by column that is subset of unique index. build -SELECT z, MAX(s) FROM a GROUP BY z +SELECT z, MAX(s) FROM xyzs GROUP BY z ---- group-by ├── columns: z:3(float!null) column5:5(string) - ├── grouping columns: a.z:3(float!null) + ├── grouping columns: xyzs.z:3(float!null) ├── stats: [rows=100] ├── keys: (3) ├── project - │ ├── columns: a.z:3(float!null) a.s:4(string) + │ ├── columns: xyzs.z:3(float!null) xyzs.s:4(string) │ ├── stats: [rows=1000] │ ├── keys: weak(3,4) - │ ├── scan a - │ │ ├── columns: a.x:1(int!null) a.y:2(int) a.z:3(float!null) a.s:4(string) + │ ├── scan xyzs + │ │ ├── columns: xyzs.x:1(int!null) xyzs.y:2(int) xyzs.z:3(float!null) xyzs.s:4(string) │ │ ├── stats: [rows=1000] │ │ └── keys: (1) weak(3,4) │ └── projections [outer=(3,4)] - │ ├── variable: a.z [type=float, outer=(3)] - │ └── variable: a.s [type=string, outer=(4)] + │ ├── variable: xyzs.z [type=float, outer=(3)] + │ └── variable: xyzs.s [type=string, outer=(4)] └── aggregations [outer=(4)] └── function: max [type=string, outer=(4)] - └── variable: a.s [type=string, outer=(4)] + └── variable: xyzs.s [type=string, outer=(4)] # Group by all columns. build -SELECT s FROM a GROUP BY a.* +SELECT s FROM xyzs GROUP BY xyzs.* ---- project ├── columns: s:4(string) ├── stats: [rows=100] ├── group-by - │ ├── columns: a.x:1(int!null) a.y:2(int) a.z:3(float!null) a.s:4(string) - │ ├── grouping columns: a.x:1(int!null) a.y:2(int) a.z:3(float!null) a.s:4(string) + │ ├── columns: xyzs.x:1(int!null) xyzs.y:2(int) xyzs.z:3(float!null) xyzs.s:4(string) + │ ├── grouping columns: xyzs.x:1(int!null) xyzs.y:2(int) xyzs.z:3(float!null) xyzs.s:4(string) │ ├── stats: [rows=100] │ ├── keys: (1) weak(3,4) - │ ├── scan a - │ │ ├── columns: a.x:1(int!null) a.y:2(int) a.z:3(float!null) a.s:4(string) + │ ├── scan xyzs + │ │ ├── columns: xyzs.x:1(int!null) xyzs.y:2(int) xyzs.z:3(float!null) xyzs.s:4(string) │ │ ├── stats: [rows=1000] │ │ └── keys: (1) weak(3,4) │ └── aggregations └── projections [outer=(4)] - └── variable: a.s [type=string, outer=(4)] + └── variable: xyzs.s [type=string, outer=(4)] + +# Propagate outer columns. +build +SELECT * FROM xyzs WHERE (SELECT SUM(x) FROM (SELECT y, u FROM kuv) GROUP BY u) > 100 +---- +select + ├── columns: x:1(int!null) y:2(int) z:3(float!null) s:4(string) + ├── stats: [rows=100] + ├── keys: (1) weak(3,4) + ├── scan xyzs + │ ├── columns: xyzs.x:1(int!null) xyzs.y:2(int) xyzs.z:3(float!null) xyzs.s:4(string) + │ ├── stats: [rows=1000] + │ └── keys: (1) weak(3,4) + └── gt [type=bool, outer=(1,2)] + ├── subquery [type=decimal, outer=(1,2)] + │ ├── max1-row + │ │ ├── columns: column8:8(decimal) + │ │ ├── outer: (1,2) + │ │ ├── stats: [rows=1] + │ │ └── project + │ │ ├── columns: column8:8(decimal) + │ │ ├── outer: (1,2) + │ │ ├── stats: [rows=100] + │ │ ├── group-by + │ │ │ ├── columns: kuv.u:6(float) column8:8(decimal) + │ │ │ ├── grouping columns: kuv.u:6(float) + │ │ │ ├── outer: (1,2) + │ │ │ ├── stats: [rows=100] + │ │ │ ├── keys: weak(6) + │ │ │ ├── project + │ │ │ │ ├── columns: kuv.u:6(float) xyzs.x:1(int) + │ │ │ │ ├── outer: (1,2) + │ │ │ │ ├── stats: [rows=1000] + │ │ │ │ ├── project + │ │ │ │ │ ├── columns: xyzs.y:2(int) kuv.u:6(float) + │ │ │ │ │ ├── outer: (2) + │ │ │ │ │ ├── stats: [rows=1000] + │ │ │ │ │ ├── scan kuv + │ │ │ │ │ │ ├── columns: kuv.k:5(int!null) kuv.u:6(float) kuv.v:7(string) + │ │ │ │ │ │ ├── stats: [rows=1000] + │ │ │ │ │ │ └── keys: (5) + │ │ │ │ │ └── projections [outer=(2,6)] + │ │ │ │ │ ├── variable: xyzs.y [type=int, outer=(2)] + │ │ │ │ │ └── variable: kuv.u [type=float, outer=(6)] + │ │ │ │ └── projections [outer=(1,6)] + │ │ │ │ ├── variable: kuv.u [type=float, outer=(6)] + │ │ │ │ └── variable: xyzs.x [type=int, outer=(1)] + │ │ │ └── aggregations [outer=(1)] + │ │ │ └── function: sum [type=decimal, outer=(1)] + │ │ │ └── variable: xyzs.x [type=int, outer=(1)] + │ │ └── projections [outer=(8)] + │ │ └── variable: column8 [type=decimal, outer=(8)] + │ └── variable: column8 [type=decimal, outer=(8)] + └── const: 100 [type=decimal] diff --git a/pkg/sql/opt/memo/testdata/logprops/join b/pkg/sql/opt/memo/testdata/logprops/join index 4babb834700f..810a92abe912 100644 --- a/pkg/sql/opt/memo/testdata/logprops/join +++ b/pkg/sql/opt/memo/testdata/logprops/join @@ -1,7 +1,7 @@ exec-ddl -CREATE TABLE a (x INT PRIMARY KEY, y INT, s STRING, d DECIMAL NOT NULL, UNIQUE (s DESC, d)) +CREATE TABLE xysd (x INT PRIMARY KEY, y INT, s STRING, d DECIMAL NOT NULL, UNIQUE (s DESC, d)) ---- -TABLE a +TABLE xysd ├── x int not null ├── y int ├── s string @@ -14,125 +14,165 @@ TABLE a └── x int not null (storing) exec-ddl -CREATE TABLE b (x INT, z INT NOT NULL) +CREATE TABLE uv (u INT, v INT NOT NULL) ---- -TABLE b - ├── x int - ├── z int not null +TABLE uv + ├── u int + ├── v int not null ├── rowid int not null (hidden) └── INDEX primary └── rowid int not null (hidden) build -SELECT *, rowid FROM a INNER JOIN b ON a.x=b.x +SELECT *, rowid FROM xysd INNER JOIN uv ON x=u ---- inner-join - ├── columns: x:1(int!null) y:2(int) s:3(string) d:4(decimal!null) x:5(int) z:6(int!null) rowid:7(int!null) + ├── columns: x:1(int!null) y:2(int) s:3(string) d:4(decimal!null) u:5(int) v:6(int!null) rowid:7(int!null) ├── stats: [rows=100000] - ├── scan a - │ ├── columns: a.x:1(int!null) a.y:2(int) a.s:3(string) a.d:4(decimal!null) + ├── scan xysd + │ ├── columns: xysd.x:1(int!null) xysd.y:2(int) xysd.s:3(string) xysd.d:4(decimal!null) │ ├── stats: [rows=1000] │ └── keys: (1) weak(3,4) - ├── scan b - │ ├── columns: b.x:5(int) b.z:6(int!null) b.rowid:7(int!null) + ├── scan uv + │ ├── columns: uv.u:5(int) uv.v:6(int!null) uv.rowid:7(int!null) │ ├── stats: [rows=1000] │ └── keys: (7) └── eq [type=bool, outer=(1,5)] - ├── variable: a.x [type=int, outer=(1)] - └── variable: b.x [type=int, outer=(5)] + ├── variable: xysd.x [type=int, outer=(1)] + └── variable: uv.u [type=int, outer=(5)] build -SELECT *, rowid FROM a LEFT JOIN b ON a.x=b.x +SELECT *, rowid FROM xysd LEFT JOIN uv ON x=u ---- left-join - ├── columns: x:1(int!null) y:2(int) s:3(string) d:4(decimal!null) x:5(int) z:6(int) rowid:7(int) + ├── columns: x:1(int!null) y:2(int) s:3(string) d:4(decimal!null) u:5(int) v:6(int) rowid:7(int) ├── stats: [rows=100000] - ├── scan a - │ ├── columns: a.x:1(int!null) a.y:2(int) a.s:3(string) a.d:4(decimal!null) + ├── scan xysd + │ ├── columns: xysd.x:1(int!null) xysd.y:2(int) xysd.s:3(string) xysd.d:4(decimal!null) │ ├── stats: [rows=1000] │ └── keys: (1) weak(3,4) - ├── scan b - │ ├── columns: b.x:5(int) b.z:6(int!null) b.rowid:7(int!null) + ├── scan uv + │ ├── columns: uv.u:5(int) uv.v:6(int!null) uv.rowid:7(int!null) │ ├── stats: [rows=1000] │ └── keys: (7) └── eq [type=bool, outer=(1,5)] - ├── variable: a.x [type=int, outer=(1)] - └── variable: b.x [type=int, outer=(5)] + ├── variable: xysd.x [type=int, outer=(1)] + └── variable: uv.u [type=int, outer=(5)] build -SELECT *, rowid FROM a RIGHT JOIN b ON a.x=b.x +SELECT *, rowid FROM xysd RIGHT JOIN uv ON x=u ---- right-join - ├── columns: x:1(int) y:2(int) s:3(string) d:4(decimal) x:5(int) z:6(int!null) rowid:7(int!null) + ├── columns: x:1(int) y:2(int) s:3(string) d:4(decimal) u:5(int) v:6(int!null) rowid:7(int!null) ├── stats: [rows=100000] - ├── scan a - │ ├── columns: a.x:1(int!null) a.y:2(int) a.s:3(string) a.d:4(decimal!null) + ├── scan xysd + │ ├── columns: xysd.x:1(int!null) xysd.y:2(int) xysd.s:3(string) xysd.d:4(decimal!null) │ ├── stats: [rows=1000] │ └── keys: (1) weak(3,4) - ├── scan b - │ ├── columns: b.x:5(int) b.z:6(int!null) b.rowid:7(int!null) + ├── scan uv + │ ├── columns: uv.u:5(int) uv.v:6(int!null) uv.rowid:7(int!null) │ ├── stats: [rows=1000] │ └── keys: (7) └── eq [type=bool, outer=(1,5)] - ├── variable: a.x [type=int, outer=(1)] - └── variable: b.x [type=int, outer=(5)] + ├── variable: xysd.x [type=int, outer=(1)] + └── variable: uv.u [type=int, outer=(5)] build -SELECT *, rowid FROM a FULL JOIN b ON a.x=b.x +SELECT *, rowid FROM xysd FULL JOIN uv ON x=u ---- full-join - ├── columns: x:1(int) y:2(int) s:3(string) d:4(decimal) x:5(int) z:6(int) rowid:7(int) + ├── columns: x:1(int) y:2(int) s:3(string) d:4(decimal) u:5(int) v:6(int) rowid:7(int) ├── stats: [rows=100000] - ├── scan a - │ ├── columns: a.x:1(int!null) a.y:2(int) a.s:3(string) a.d:4(decimal!null) + ├── scan xysd + │ ├── columns: xysd.x:1(int!null) xysd.y:2(int) xysd.s:3(string) xysd.d:4(decimal!null) │ ├── stats: [rows=1000] │ └── keys: (1) weak(3,4) - ├── scan b - │ ├── columns: b.x:5(int) b.z:6(int!null) b.rowid:7(int!null) + ├── scan uv + │ ├── columns: uv.u:5(int) uv.v:6(int!null) uv.rowid:7(int!null) │ ├── stats: [rows=1000] │ └── keys: (7) └── eq [type=bool, outer=(1,5)] - ├── variable: a.x [type=int, outer=(1)] - └── variable: b.x [type=int, outer=(5)] + ├── variable: xysd.x [type=int, outer=(1)] + └── variable: uv.u [type=int, outer=(5)] build -SELECT * FROM a, b +SELECT * FROM xysd, uv ---- project - ├── columns: x:1(int!null) y:2(int) s:3(string) d:4(decimal!null) x:5(int) z:6(int!null) + ├── columns: x:1(int!null) y:2(int) s:3(string) d:4(decimal!null) u:5(int) v:6(int!null) ├── stats: [rows=1000000] ├── inner-join - │ ├── columns: a.x:1(int!null) a.y:2(int) a.s:3(string) a.d:4(decimal!null) b.x:5(int) b.z:6(int!null) b.rowid:7(int!null) + │ ├── columns: xysd.x:1(int!null) xysd.y:2(int) xysd.s:3(string) xysd.d:4(decimal!null) uv.u:5(int) uv.v:6(int!null) uv.rowid:7(int!null) │ ├── stats: [rows=1000000] - │ ├── scan a - │ │ ├── columns: a.x:1(int!null) a.y:2(int) a.s:3(string) a.d:4(decimal!null) + │ ├── scan xysd + │ │ ├── columns: xysd.x:1(int!null) xysd.y:2(int) xysd.s:3(string) xysd.d:4(decimal!null) │ │ ├── stats: [rows=1000] │ │ └── keys: (1) weak(3,4) - │ ├── scan b - │ │ ├── columns: b.x:5(int) b.z:6(int!null) b.rowid:7(int!null) + │ ├── scan uv + │ │ ├── columns: uv.u:5(int) uv.v:6(int!null) uv.rowid:7(int!null) │ │ ├── stats: [rows=1000] │ │ └── keys: (7) │ └── true [type=bool] └── projections [outer=(1-6)] - ├── variable: a.x [type=int, outer=(1)] - ├── variable: a.y [type=int, outer=(2)] - ├── variable: a.s [type=string, outer=(3)] - ├── variable: a.d [type=decimal, outer=(4)] - ├── variable: b.x [type=int, outer=(5)] - └── variable: b.z [type=int, outer=(6)] + ├── variable: xysd.x [type=int, outer=(1)] + ├── variable: xysd.y [type=int, outer=(2)] + ├── variable: xysd.s [type=string, outer=(3)] + ├── variable: xysd.d [type=decimal, outer=(4)] + ├── variable: uv.u [type=int, outer=(5)] + └── variable: uv.v [type=int, outer=(6)] build -SELECT * FROM a, a +SELECT * FROM xysd, xysd ---- inner-join ├── columns: x:1(int!null) y:2(int) s:3(string) d:4(decimal!null) x:5(int!null) y:6(int) s:7(string) d:8(decimal!null) ├── stats: [rows=1000000] - ├── scan a - │ ├── columns: a.x:1(int!null) a.y:2(int) a.s:3(string) a.d:4(decimal!null) + ├── scan xysd + │ ├── columns: xysd.x:1(int!null) xysd.y:2(int) xysd.s:3(string) xysd.d:4(decimal!null) │ ├── stats: [rows=1000] │ └── keys: (1) weak(3,4) - ├── scan a - │ ├── columns: a.x:5(int!null) a.y:6(int) a.s:7(string) a.d:8(decimal!null) + ├── scan xysd + │ ├── columns: xysd.x:5(int!null) xysd.y:6(int) xysd.s:7(string) xysd.d:8(decimal!null) │ ├── stats: [rows=1000] │ └── keys: (5) weak(7,8) └── true [type=bool] + +# Propagate outer columns. +build +SELECT * FROM xysd WHERE EXISTS(SELECT * FROM (SELECT x) INNER JOIN (SELECT y) ON x::string = s) +---- +select + ├── columns: x:1(int!null) y:2(int) s:3(string) d:4(decimal!null) + ├── stats: [rows=100] + ├── keys: (1) weak(3,4) + ├── scan xysd + │ ├── columns: xysd.x:1(int!null) xysd.y:2(int) xysd.s:3(string) xysd.d:4(decimal!null) + │ ├── stats: [rows=1000] + │ └── keys: (1) weak(3,4) + └── exists [type=bool, outer=(1-3)] + └── inner-join + ├── columns: xysd.x:1(int) xysd.y:2(int) + ├── outer: (1-3) + ├── stats: [rows=0] + ├── project + │ ├── columns: xysd.x:1(int) + │ ├── outer: (1) + │ ├── stats: [rows=1] + │ ├── values + │ │ ├── stats: [rows=1] + │ │ └── tuple [type=tuple{}] + │ └── projections [outer=(1)] + │ └── variable: xysd.x [type=int, outer=(1)] + ├── project + │ ├── columns: xysd.y:2(int) + │ ├── outer: (2) + │ ├── stats: [rows=1] + │ ├── values + │ │ ├── stats: [rows=1] + │ │ └── tuple [type=tuple{}] + │ └── projections [outer=(2)] + │ └── variable: xysd.y [type=int, outer=(2)] + └── eq [type=bool, outer=(1,3)] + ├── cast: string [type=string, outer=(1)] + │ └── variable: xysd.x [type=int, outer=(1)] + └── variable: xysd.s [type=string, outer=(3)] diff --git a/pkg/sql/opt/memo/testdata/logprops/limit b/pkg/sql/opt/memo/testdata/logprops/limit index 169c8b07bebf..e8bd03ce423d 100644 --- a/pkg/sql/opt/memo/testdata/logprops/limit +++ b/pkg/sql/opt/memo/testdata/logprops/limit @@ -1,7 +1,7 @@ exec-ddl -CREATE TABLE a (x INT PRIMARY KEY, y INT, z FLOAT NOT NULL, s STRING, UNIQUE (s DESC, z)) +CREATE TABLE xyzs (x INT PRIMARY KEY, y INT, z FLOAT NOT NULL, s STRING, UNIQUE (s DESC, z)) ---- -TABLE a +TABLE xyzs ├── x int not null ├── y int ├── z float not null @@ -13,31 +13,41 @@ TABLE a ├── z float not null └── x int not null (storing) +exec-ddl +CREATE TABLE kuv (k INT PRIMARY KEY, u FLOAT, v STRING) +---- +TABLE kuv + ├── k int not null + ├── u float + ├── v string + └── INDEX primary + └── k int not null + build -SELECT * FROM a LIMIT 1 +SELECT * FROM xyzs LIMIT 1 ---- limit ├── columns: x:1(int!null) y:2(int) z:3(float!null) s:4(string) ├── stats: [rows=1] ├── keys: (1) weak(3,4) - ├── scan a - │ ├── columns: a.x:1(int!null) a.y:2(int) a.z:3(float!null) a.s:4(string) + ├── scan xyzs + │ ├── columns: xyzs.x:1(int!null) xyzs.y:2(int) xyzs.z:3(float!null) xyzs.s:4(string) │ ├── stats: [rows=1000] │ └── keys: (1) weak(3,4) └── const: 1 [type=int] build -SELECT * FROM a LIMIT (SELECT 1) +SELECT * FROM xyzs LIMIT (SELECT 1) ---- limit ├── columns: x:1(int!null) y:2(int) z:3(float!null) s:4(string) ├── stats: [rows=1000] ├── keys: (1) weak(3,4) - ├── scan a - │ ├── columns: a.x:1(int!null) a.y:2(int) a.z:3(float!null) a.s:4(string) + ├── scan xyzs + │ ├── columns: xyzs.x:1(int!null) xyzs.y:2(int) xyzs.z:3(float!null) xyzs.s:4(string) │ ├── stats: [rows=1000] │ └── keys: (1) weak(3,4) - └── subquery [type=int, outer=(5)] + └── subquery [type=int] ├── max1-row │ ├── columns: column5:5(int) │ ├── stats: [rows=1] @@ -52,14 +62,48 @@ limit └── variable: column5 [type=int, outer=(5)] build -SELECT * FROM a LIMIT 0 +SELECT * FROM xyzs LIMIT 0 ---- limit ├── columns: x:1(int!null) y:2(int) z:3(float!null) s:4(string) ├── stats: [rows=1000] ├── keys: (1) weak(3,4) - ├── scan a - │ ├── columns: a.x:1(int!null) a.y:2(int) a.z:3(float!null) a.s:4(string) + ├── scan xyzs + │ ├── columns: xyzs.x:1(int!null) xyzs.y:2(int) xyzs.z:3(float!null) xyzs.s:4(string) │ ├── stats: [rows=1000] │ └── keys: (1) weak(3,4) └── const: 0 [type=int] + +# Propagate outer columns. +build +SELECT (SELECT x FROM kuv LIMIT y) FROM xyzs +---- +project + ├── columns: column8:8(int) + ├── stats: [rows=1000] + ├── scan xyzs + │ ├── columns: xyzs.x:1(int!null) xyzs.y:2(int) xyzs.z:3(float!null) xyzs.s:4(string) + │ ├── stats: [rows=1000] + │ └── keys: (1) weak(3,4) + └── projections [outer=(1,2)] + └── subquery [type=int, outer=(1,2)] + ├── max1-row + │ ├── columns: xyzs.x:1(int) + │ ├── outer: (1,2) + │ ├── stats: [rows=1] + │ └── limit + │ ├── columns: xyzs.x:1(int) + │ ├── outer: (1,2) + │ ├── stats: [rows=1000] + │ ├── project + │ │ ├── columns: xyzs.x:1(int) + │ │ ├── outer: (1) + │ │ ├── stats: [rows=1000] + │ │ ├── scan kuv + │ │ │ ├── columns: kuv.k:5(int!null) kuv.u:6(float) kuv.v:7(string) + │ │ │ ├── stats: [rows=1000] + │ │ │ └── keys: (5) + │ │ └── projections [outer=(1)] + │ │ └── variable: xyzs.x [type=int, outer=(1)] + │ └── variable: xyzs.y [type=int, outer=(2)] + └── variable: xyzs.x [type=int, outer=(1)] diff --git a/pkg/sql/opt/memo/testdata/logprops/offset b/pkg/sql/opt/memo/testdata/logprops/offset new file mode 100644 index 000000000000..8ea370d3a203 --- /dev/null +++ b/pkg/sql/opt/memo/testdata/logprops/offset @@ -0,0 +1,109 @@ +exec-ddl +CREATE TABLE xyzs (x INT PRIMARY KEY, y INT, z FLOAT NOT NULL, s STRING, UNIQUE (s DESC, z)) +---- +TABLE xyzs + ├── x int not null + ├── y int + ├── z float not null + ├── s string + ├── INDEX primary + │ └── x int not null + └── INDEX secondary + ├── s string desc + ├── z float not null + └── x int not null (storing) + +exec-ddl +CREATE TABLE kuv (k INT PRIMARY KEY, u FLOAT, v STRING) +---- +TABLE kuv + ├── k int not null + ├── u float + ├── v string + └── INDEX primary + └── k int not null + +build +SELECT * FROM xyzs OFFSET 1 +---- +offset + ├── columns: x:1(int!null) y:2(int) z:3(float!null) s:4(string) + ├── stats: [rows=1000] + ├── keys: (1) weak(3,4) + ├── scan xyzs + │ ├── columns: xyzs.x:1(int!null) xyzs.y:2(int) xyzs.z:3(float!null) xyzs.s:4(string) + │ ├── stats: [rows=1000] + │ └── keys: (1) weak(3,4) + └── const: 1 [type=int] + +build +SELECT * FROM xyzs OFFSET (SELECT 1) +---- +offset + ├── columns: x:1(int!null) y:2(int) z:3(float!null) s:4(string) + ├── stats: [rows=1000] + ├── keys: (1) weak(3,4) + ├── scan xyzs + │ ├── columns: xyzs.x:1(int!null) xyzs.y:2(int) xyzs.z:3(float!null) xyzs.s:4(string) + │ ├── stats: [rows=1000] + │ └── keys: (1) weak(3,4) + └── subquery [type=int] + ├── max1-row + │ ├── columns: column5:5(int) + │ ├── stats: [rows=1] + │ └── project + │ ├── columns: column5:5(int) + │ ├── stats: [rows=1] + │ ├── values + │ │ ├── stats: [rows=1] + │ │ └── tuple [type=tuple{}] + │ └── projections + │ └── const: 1 [type=int] + └── variable: column5 [type=int, outer=(5)] + +build +SELECT * FROM xyzs OFFSET 0 +---- +offset + ├── columns: x:1(int!null) y:2(int) z:3(float!null) s:4(string) + ├── stats: [rows=1000] + ├── keys: (1) weak(3,4) + ├── scan xyzs + │ ├── columns: xyzs.x:1(int!null) xyzs.y:2(int) xyzs.z:3(float!null) xyzs.s:4(string) + │ ├── stats: [rows=1000] + │ └── keys: (1) weak(3,4) + └── const: 0 [type=int] + +# Propagate outer columns. +build +SELECT (SELECT x FROM kuv OFFSET y) FROM xyzs +---- +project + ├── columns: column8:8(int) + ├── stats: [rows=1000] + ├── scan xyzs + │ ├── columns: xyzs.x:1(int!null) xyzs.y:2(int) xyzs.z:3(float!null) xyzs.s:4(string) + │ ├── stats: [rows=1000] + │ └── keys: (1) weak(3,4) + └── projections [outer=(1,2)] + └── subquery [type=int, outer=(1,2)] + ├── max1-row + │ ├── columns: xyzs.x:1(int) + │ ├── outer: (1,2) + │ ├── stats: [rows=1] + │ └── offset + │ ├── columns: xyzs.x:1(int) + │ ├── outer: (1,2) + │ ├── stats: [rows=1000] + │ ├── project + │ │ ├── columns: xyzs.x:1(int) + │ │ ├── outer: (1) + │ │ ├── stats: [rows=1000] + │ │ ├── scan kuv + │ │ │ ├── columns: kuv.k:5(int!null) kuv.u:6(float) kuv.v:7(string) + │ │ │ ├── stats: [rows=1000] + │ │ │ └── keys: (5) + │ │ └── projections [outer=(1)] + │ │ └── variable: xyzs.x [type=int, outer=(1)] + │ └── variable: xyzs.y [type=int, outer=(2)] + └── variable: xyzs.x [type=int, outer=(1)] diff --git a/pkg/sql/opt/memo/testdata/logprops/project b/pkg/sql/opt/memo/testdata/logprops/project index 87a2f89626a5..5bbe2b553e49 100644 --- a/pkg/sql/opt/memo/testdata/logprops/project +++ b/pkg/sql/opt/memo/testdata/logprops/project @@ -1,7 +1,7 @@ exec-ddl -CREATE TABLE a (x INT PRIMARY KEY, y INT, s STRING, d DECIMAL NOT NULL, UNIQUE (s DESC, d)) +CREATE TABLE xysd (x INT PRIMARY KEY, y INT, s STRING, d DECIMAL NOT NULL, UNIQUE (s DESC, d)) ---- -TABLE a +TABLE xysd ├── x int not null ├── y int ├── s string @@ -13,34 +13,97 @@ TABLE a ├── d decimal not null └── x int not null (storing) +exec-ddl +CREATE TABLE kuv (k INT PRIMARY KEY, u FLOAT, v STRING) +---- +TABLE kuv + ├── k int not null + ├── u float + ├── v string + └── INDEX primary + └── k int not null + build -SELECT a.y, a.x+1, 1, a.x FROM a +SELECT y, x+1, 1, x FROM xysd ---- project ├── columns: y:2(int) column5:5(int) column6:6(int) x:1(int!null) ├── stats: [rows=1000] ├── keys: (1) - ├── scan a - │ ├── columns: a.x:1(int!null) a.y:2(int) a.s:3(string) a.d:4(decimal!null) + ├── scan xysd + │ ├── columns: xysd.x:1(int!null) xysd.y:2(int) xysd.s:3(string) xysd.d:4(decimal!null) │ ├── stats: [rows=1000] │ └── keys: (1) weak(3,4) └── projections [outer=(1,2)] - ├── variable: a.y [type=int, outer=(2)] + ├── variable: xysd.y [type=int, outer=(2)] ├── plus [type=int, outer=(1)] - │ ├── variable: a.x [type=int, outer=(1)] + │ ├── variable: xysd.x [type=int, outer=(1)] │ └── const: 1 [type=int] ├── const: 1 [type=int] - └── variable: a.x [type=int, outer=(1)] + └── variable: xysd.x [type=int, outer=(1)] build -SELECT s FROM a +SELECT s FROM xysd ---- project ├── columns: s:3(string) ├── stats: [rows=1000] - ├── scan a - │ ├── columns: a.x:1(int!null) a.y:2(int) a.s:3(string) a.d:4(decimal!null) + ├── scan xysd + │ ├── columns: xysd.x:1(int!null) xysd.y:2(int) xysd.s:3(string) xysd.d:4(decimal!null) │ ├── stats: [rows=1000] │ └── keys: (1) weak(3,4) └── projections [outer=(3)] - └── variable: a.s [type=string, outer=(3)] + └── variable: xysd.s [type=string, outer=(3)] + +# Propagate outer columns. +build +SELECT * FROM xysd WHERE (SELECT (SELECT y) FROM kuv WHERE k=x) > 5 +---- +select + ├── columns: x:1(int!null) y:2(int) s:3(string) d:4(decimal!null) + ├── stats: [rows=100] + ├── keys: (1) weak(3,4) + ├── scan xysd + │ ├── columns: xysd.x:1(int!null) xysd.y:2(int) xysd.s:3(string) xysd.d:4(decimal!null) + │ ├── stats: [rows=1000] + │ └── keys: (1) weak(3,4) + └── gt [type=bool, outer=(1,2)] + ├── subquery [type=int, outer=(1,2)] + │ ├── max1-row + │ │ ├── columns: column8:8(int) + │ │ ├── outer: (1,2) + │ │ ├── stats: [rows=1] + │ │ └── project + │ │ ├── columns: column8:8(int) + │ │ ├── outer: (1,2) + │ │ ├── stats: [rows=100] + │ │ ├── select + │ │ │ ├── columns: kuv.k:5(int!null) kuv.u:6(float) kuv.v:7(string) + │ │ │ ├── outer: (1) + │ │ │ ├── stats: [rows=100] + │ │ │ ├── keys: (5) + │ │ │ ├── scan kuv + │ │ │ │ ├── columns: kuv.k:5(int!null) kuv.u:6(float) kuv.v:7(string) + │ │ │ │ ├── stats: [rows=1000] + │ │ │ │ └── keys: (5) + │ │ │ └── eq [type=bool, outer=(1,5)] + │ │ │ ├── variable: kuv.k [type=int, outer=(5)] + │ │ │ └── variable: xysd.x [type=int, outer=(1)] + │ │ └── projections [outer=(2)] + │ │ └── subquery [type=int, outer=(2)] + │ │ ├── max1-row + │ │ │ ├── columns: xysd.y:2(int) + │ │ │ ├── outer: (2) + │ │ │ ├── stats: [rows=1] + │ │ │ └── project + │ │ │ ├── columns: xysd.y:2(int) + │ │ │ ├── outer: (2) + │ │ │ ├── stats: [rows=1] + │ │ │ ├── values + │ │ │ │ ├── stats: [rows=1] + │ │ │ │ └── tuple [type=tuple{}] + │ │ │ └── projections [outer=(2)] + │ │ │ └── variable: xysd.y [type=int, outer=(2)] + │ │ └── variable: xysd.y [type=int, outer=(2)] + │ └── variable: column8 [type=int, outer=(8)] + └── const: 5 [type=int] diff --git a/pkg/sql/opt/memo/testdata/logprops/select b/pkg/sql/opt/memo/testdata/logprops/select index 96568cabab2d..163b319e53e4 100644 --- a/pkg/sql/opt/memo/testdata/logprops/select +++ b/pkg/sql/opt/memo/testdata/logprops/select @@ -1,63 +1,89 @@ exec-ddl -CREATE TABLE a (x INT PRIMARY KEY, y INT) +CREATE TABLE xy (x INT PRIMARY KEY, y INT) ---- -TABLE a +TABLE xy ├── x int not null ├── y int └── INDEX primary └── x int not null exec-ddl -CREATE TABLE b (x INT, z INT NOT NULL) +CREATE TABLE kuv (k INT PRIMARY KEY, u FLOAT, v STRING) ---- -TABLE b - ├── x int - ├── z int not null - ├── rowid int not null (hidden) +TABLE kuv + ├── k int not null + ├── u float + ├── v string └── INDEX primary - └── rowid int not null (hidden) + └── k int not null build -SELECT * FROM a WHERE x=1 +SELECT * FROM xy WHERE x=1 ---- select ├── columns: x:1(int!null) y:2(int) ├── stats: [rows=100] ├── keys: (1) - ├── scan a - │ ├── columns: a.x:1(int!null) a.y:2(int) + ├── scan xy + │ ├── columns: xy.x:1(int!null) xy.y:2(int) │ ├── stats: [rows=1000] │ └── keys: (1) └── eq [type=bool, outer=(1), constraints=(/1: [/1 - /1]; tight)] - ├── variable: a.x [type=int, outer=(1)] + ├── variable: xy.x [type=int, outer=(1)] └── const: 1 [type=int] build -SELECT * FROM a,b WHERE a.x=b.x +SELECT * FROM xy,kuv WHERE xy.x=kuv.k ---- -project - ├── columns: x:1(int!null) y:2(int) x:3(int) z:4(int!null) +select + ├── columns: x:1(int!null) y:2(int) k:3(int!null) u:4(float) v:5(string) ├── stats: [rows=100000] - ├── select - │ ├── columns: a.x:1(int!null) a.y:2(int) b.x:3(int) b.z:4(int!null) b.rowid:5(int!null) - │ ├── stats: [rows=100000] - │ ├── inner-join - │ │ ├── columns: a.x:1(int!null) a.y:2(int) b.x:3(int) b.z:4(int!null) b.rowid:5(int!null) - │ │ ├── stats: [rows=1000000] - │ │ ├── scan a - │ │ │ ├── columns: a.x:1(int!null) a.y:2(int) - │ │ │ ├── stats: [rows=1000] - │ │ │ └── keys: (1) - │ │ ├── scan b - │ │ │ ├── columns: b.x:3(int) b.z:4(int!null) b.rowid:5(int!null) - │ │ │ ├── stats: [rows=1000] - │ │ │ └── keys: (5) - │ │ └── true [type=bool] - │ └── eq [type=bool, outer=(1,3)] - │ ├── variable: a.x [type=int, outer=(1)] - │ └── variable: b.x [type=int, outer=(3)] - └── projections [outer=(1-4)] - ├── variable: a.x [type=int, outer=(1)] - ├── variable: a.y [type=int, outer=(2)] - ├── variable: b.x [type=int, outer=(3)] - └── variable: b.z [type=int, outer=(4)] + ├── inner-join + │ ├── columns: xy.x:1(int!null) xy.y:2(int) kuv.k:3(int!null) kuv.u:4(float) kuv.v:5(string) + │ ├── stats: [rows=1000000] + │ ├── scan xy + │ │ ├── columns: xy.x:1(int!null) xy.y:2(int) + │ │ ├── stats: [rows=1000] + │ │ └── keys: (1) + │ ├── scan kuv + │ │ ├── columns: kuv.k:3(int!null) kuv.u:4(float) kuv.v:5(string) + │ │ ├── stats: [rows=1000] + │ │ └── keys: (3) + │ └── true [type=bool] + └── eq [type=bool, outer=(1,3)] + ├── variable: xy.x [type=int, outer=(1)] + └── variable: kuv.k [type=int, outer=(3)] + +# Propagate outer columns. +build +SELECT * FROM xy WHERE EXISTS(SELECT * FROM (SELECT * FROM kuv WHERE k=y) WHERE k=x) +---- +select + ├── columns: x:1(int!null) y:2(int) + ├── stats: [rows=100] + ├── keys: (1) + ├── scan xy + │ ├── columns: xy.x:1(int!null) xy.y:2(int) + │ ├── stats: [rows=1000] + │ └── keys: (1) + └── exists [type=bool, outer=(1,2)] + └── select + ├── columns: kuv.k:3(int!null) kuv.u:4(float) kuv.v:5(string) + ├── outer: (1,2) + ├── stats: [rows=10] + ├── keys: (3) + ├── select + │ ├── columns: kuv.k:3(int!null) kuv.u:4(float) kuv.v:5(string) + │ ├── outer: (2) + │ ├── stats: [rows=100] + │ ├── keys: (3) + │ ├── scan kuv + │ │ ├── columns: kuv.k:3(int!null) kuv.u:4(float) kuv.v:5(string) + │ │ ├── stats: [rows=1000] + │ │ └── keys: (3) + │ └── eq [type=bool, outer=(2,3)] + │ ├── variable: kuv.k [type=int, outer=(3)] + │ └── variable: xy.y [type=int, outer=(2)] + └── eq [type=bool, outer=(1,3)] + ├── variable: kuv.k [type=int, outer=(3)] + └── variable: xy.x [type=int, outer=(1)] diff --git a/pkg/sql/opt/memo/testdata/logprops/set b/pkg/sql/opt/memo/testdata/logprops/set index 56b659a4e84d..6fe72291c443 100644 --- a/pkg/sql/opt/memo/testdata/logprops/set +++ b/pkg/sql/opt/memo/testdata/logprops/set @@ -1,123 +1,182 @@ exec-ddl -CREATE TABLE a (x INT PRIMARY KEY, y INT) +CREATE TABLE xy (x INT PRIMARY KEY, y INT) ---- -TABLE a +TABLE xy ├── x int not null ├── y int └── INDEX primary └── x int not null exec-ddl -CREATE TABLE b (x INT, z INT NOT NULL) +CREATE TABLE uv (u INT, v INT NOT NULL) ---- -TABLE b - ├── x int - ├── z int not null +TABLE uv + ├── u int + ├── v int not null ├── rowid int not null (hidden) └── INDEX primary └── rowid int not null (hidden) build -SELECT * FROM a UNION SELECT * FROM b +SELECT * FROM xy UNION SELECT * FROM uv ---- union ├── columns: x:6(int) y:7(int) - ├── left columns: a.x:1(int) a.y:2(int) - ├── right columns: b.x:3(int) b.z:4(int) + ├── left columns: xy.x:1(int) xy.y:2(int) + ├── right columns: uv.u:3(int) uv.v:4(int) ├── stats: [rows=2000] - ├── scan a - │ ├── columns: a.x:1(int!null) a.y:2(int) + ├── scan xy + │ ├── columns: xy.x:1(int!null) xy.y:2(int) │ ├── stats: [rows=1000] │ └── keys: (1) └── project - ├── columns: b.x:3(int) b.z:4(int!null) + ├── columns: uv.u:3(int) uv.v:4(int!null) ├── stats: [rows=1000] - ├── scan b - │ ├── columns: b.x:3(int) b.z:4(int!null) b.rowid:5(int!null) + ├── scan uv + │ ├── columns: uv.u:3(int) uv.v:4(int!null) uv.rowid:5(int!null) │ ├── stats: [rows=1000] │ └── keys: (5) └── projections [outer=(3,4)] - ├── variable: b.x [type=int, outer=(3)] - └── variable: b.z [type=int, outer=(4)] + ├── variable: uv.u [type=int, outer=(3)] + └── variable: uv.v [type=int, outer=(4)] build -SELECT x, y, x FROM a INTERSECT SELECT z, x, rowid FROM (SELECT *, rowid FROM b WHERE b.x=1) b +SELECT x, y, x FROM xy INTERSECT SELECT v, u, rowid FROM (SELECT *, rowid FROM uv WHERE u=1) uv ---- intersect ├── columns: x:1(int!null) y:2(int) x:1(int!null) - ├── left columns: a.x:1(int!null) a.y:2(int) a.x:1(int!null) - ├── right columns: b.z:4(int) b.x:3(int) b.rowid:5(int) + ├── left columns: xy.x:1(int!null) xy.y:2(int) xy.x:1(int!null) + ├── right columns: uv.v:4(int) uv.u:3(int) uv.rowid:5(int) ├── stats: [rows=100] ├── project - │ ├── columns: a.x:1(int!null) a.y:2(int) + │ ├── columns: xy.x:1(int!null) xy.y:2(int) │ ├── stats: [rows=1000] │ ├── keys: (1) - │ ├── scan a - │ │ ├── columns: a.x:1(int!null) a.y:2(int) + │ ├── scan xy + │ │ ├── columns: xy.x:1(int!null) xy.y:2(int) │ │ ├── stats: [rows=1000] │ │ └── keys: (1) │ └── projections [outer=(1,2)] - │ ├── variable: a.x [type=int, outer=(1)] - │ └── variable: a.y [type=int, outer=(2)] + │ ├── variable: xy.x [type=int, outer=(1)] + │ └── variable: xy.y [type=int, outer=(2)] └── project - ├── columns: b.z:4(int!null) b.x:3(int) b.rowid:5(int!null) + ├── columns: uv.v:4(int!null) uv.u:3(int) uv.rowid:5(int!null) ├── stats: [rows=100] ├── keys: (5) ├── select - │ ├── columns: b.x:3(int) b.z:4(int!null) b.rowid:5(int!null) + │ ├── columns: uv.u:3(int) uv.v:4(int!null) uv.rowid:5(int!null) │ ├── stats: [rows=100] │ ├── keys: (5) - │ ├── scan b - │ │ ├── columns: b.x:3(int) b.z:4(int!null) b.rowid:5(int!null) + │ ├── scan uv + │ │ ├── columns: uv.u:3(int) uv.v:4(int!null) uv.rowid:5(int!null) │ │ ├── stats: [rows=1000] │ │ └── keys: (5) │ └── eq [type=bool, outer=(3), constraints=(/3: [/1 - /1]; tight)] - │ ├── variable: b.x [type=int, outer=(3)] + │ ├── variable: uv.u [type=int, outer=(3)] │ └── const: 1 [type=int] └── projections [outer=(3-5)] - ├── variable: b.z [type=int, outer=(4)] - ├── variable: b.x [type=int, outer=(3)] - └── variable: b.rowid [type=int, outer=(5)] + ├── variable: uv.v [type=int, outer=(4)] + ├── variable: uv.u [type=int, outer=(3)] + └── variable: uv.rowid [type=int, outer=(5)] build -SELECT x, x, y FROM a EXCEPT SELECT x, z, z FROM (SELECT * FROM b WHERE b.x=1) b +SELECT x, x, y FROM xy EXCEPT SELECT u, v, v FROM (SELECT * FROM uv WHERE u=1) uv ---- except ├── columns: x:1(int!null) x:1(int!null) y:2(int) - ├── left columns: a.x:1(int!null) a.x:1(int!null) a.y:2(int) - ├── right columns: b.x:3(int) b.z:4(int) b.z:4(int) + ├── left columns: xy.x:1(int!null) xy.x:1(int!null) xy.y:2(int) + ├── right columns: uv.u:3(int) uv.v:4(int) uv.v:4(int) ├── stats: [rows=1000] ├── project - │ ├── columns: a.x:1(int!null) a.y:2(int) + │ ├── columns: xy.x:1(int!null) xy.y:2(int) │ ├── stats: [rows=1000] │ ├── keys: (1) - │ ├── scan a - │ │ ├── columns: a.x:1(int!null) a.y:2(int) + │ ├── scan xy + │ │ ├── columns: xy.x:1(int!null) xy.y:2(int) │ │ ├── stats: [rows=1000] │ │ └── keys: (1) │ └── projections [outer=(1,2)] - │ ├── variable: a.x [type=int, outer=(1)] - │ └── variable: a.y [type=int, outer=(2)] + │ ├── variable: xy.x [type=int, outer=(1)] + │ └── variable: xy.y [type=int, outer=(2)] └── project - ├── columns: b.x:3(int) b.z:4(int!null) + ├── columns: uv.u:3(int) uv.v:4(int!null) ├── stats: [rows=100] ├── project - │ ├── columns: b.x:3(int) b.z:4(int!null) + │ ├── columns: uv.u:3(int) uv.v:4(int!null) │ ├── stats: [rows=100] │ ├── select - │ │ ├── columns: b.x:3(int) b.z:4(int!null) b.rowid:5(int!null) + │ │ ├── columns: uv.u:3(int) uv.v:4(int!null) uv.rowid:5(int!null) │ │ ├── stats: [rows=100] │ │ ├── keys: (5) - │ │ ├── scan b - │ │ │ ├── columns: b.x:3(int) b.z:4(int!null) b.rowid:5(int!null) + │ │ ├── scan uv + │ │ │ ├── columns: uv.u:3(int) uv.v:4(int!null) uv.rowid:5(int!null) │ │ │ ├── stats: [rows=1000] │ │ │ └── keys: (5) │ │ └── eq [type=bool, outer=(3), constraints=(/3: [/1 - /1]; tight)] - │ │ ├── variable: b.x [type=int, outer=(3)] + │ │ ├── variable: uv.u [type=int, outer=(3)] │ │ └── const: 1 [type=int] │ └── projections [outer=(3,4)] - │ ├── variable: b.x [type=int, outer=(3)] - │ └── variable: b.z [type=int, outer=(4)] + │ ├── variable: uv.u [type=int, outer=(3)] + │ └── variable: uv.v [type=int, outer=(4)] └── projections [outer=(3,4)] - ├── variable: b.x [type=int, outer=(3)] - └── variable: b.z [type=int, outer=(4)] + ├── variable: uv.u [type=int, outer=(3)] + └── variable: uv.v [type=int, outer=(4)] + +# Propagate outer columns. +build +SELECT * FROM xy WHERE (SELECT x, u FROM uv UNION SELECT y, v FROM uv) = (1, 2) +---- +select + ├── columns: x:1(int!null) y:2(int) + ├── stats: [rows=100] + ├── keys: (1) + ├── scan xy + │ ├── columns: xy.x:1(int!null) xy.y:2(int) + │ ├── stats: [rows=1000] + │ └── keys: (1) + └── eq [type=bool, outer=(1,2)] + ├── subquery [type=tuple{int, int}, outer=(1,2)] + │ ├── max1-row + │ │ ├── columns: column11:11(tuple{int, int}) + │ │ ├── outer: (1,2) + │ │ ├── stats: [rows=1] + │ │ └── project + │ │ ├── columns: column11:11(tuple{int, int}) + │ │ ├── outer: (1,2) + │ │ ├── stats: [rows=2000] + │ │ ├── union + │ │ │ ├── columns: x:9(int) u:10(int) + │ │ │ ├── left columns: xy.x:1(int) uv.u:3(int) + │ │ │ ├── right columns: xy.y:2(int) uv.v:7(int) + │ │ │ ├── outer: (1,2) + │ │ │ ├── stats: [rows=2000] + │ │ │ ├── project + │ │ │ │ ├── columns: xy.x:1(int) uv.u:3(int) + │ │ │ │ ├── outer: (1) + │ │ │ │ ├── stats: [rows=1000] + │ │ │ │ ├── scan uv + │ │ │ │ │ ├── columns: uv.u:3(int) uv.v:4(int!null) uv.rowid:5(int!null) + │ │ │ │ │ ├── stats: [rows=1000] + │ │ │ │ │ └── keys: (5) + │ │ │ │ └── projections [outer=(1,3)] + │ │ │ │ ├── variable: xy.x [type=int, outer=(1)] + │ │ │ │ └── variable: uv.u [type=int, outer=(3)] + │ │ │ └── project + │ │ │ ├── columns: xy.y:2(int) uv.v:7(int!null) + │ │ │ ├── outer: (2) + │ │ │ ├── stats: [rows=1000] + │ │ │ ├── scan uv + │ │ │ │ ├── columns: uv.u:6(int) uv.v:7(int!null) uv.rowid:8(int!null) + │ │ │ │ ├── stats: [rows=1000] + │ │ │ │ └── keys: (8) + │ │ │ └── projections [outer=(2,7)] + │ │ │ ├── variable: xy.y [type=int, outer=(2)] + │ │ │ └── variable: uv.v [type=int, outer=(7)] + │ │ └── projections [outer=(9,10)] + │ │ └── tuple [type=tuple{int, int}, outer=(9,10)] + │ │ ├── variable: x [type=int, outer=(9)] + │ │ └── variable: u [type=int, outer=(10)] + │ └── variable: column11 [type=tuple{int, int}, outer=(11)] + └── tuple [type=tuple{int, int}] + ├── const: 1 [type=int] + └── const: 2 [type=int] diff --git a/pkg/sql/opt/memo/testdata/logprops/values b/pkg/sql/opt/memo/testdata/logprops/values index d193435f0829..c19f83422a1a 100644 --- a/pkg/sql/opt/memo/testdata/logprops/values +++ b/pkg/sql/opt/memo/testdata/logprops/values @@ -1,3 +1,12 @@ +exec-ddl +CREATE TABLE xy (x INT PRIMARY KEY, y INT) +---- +TABLE xy + ├── x int not null + ├── y int + └── INDEX primary + └── x int not null + build SELECT * FROM (VALUES (1, 2), (3, 4), (NULL, 5)) ---- @@ -13,3 +22,32 @@ values └── tuple [type=tuple{unknown, int}] ├── null [type=unknown] └── const: 5 [type=int] + +# Propagate outer columns. +build +SELECT (VALUES (x), (y+1)) FROM xy +---- +project + ├── columns: column4:4(int) + ├── stats: [rows=1000] + ├── scan xy + │ ├── columns: xy.x:1(int!null) xy.y:2(int) + │ ├── stats: [rows=1000] + │ └── keys: (1) + └── projections [outer=(1,2)] + └── subquery [type=int, outer=(1,2)] + ├── max1-row + │ ├── columns: column1:3(int) + │ ├── outer: (1,2) + │ ├── stats: [rows=1] + │ └── values + │ ├── columns: column1:3(int) + │ ├── outer: (1,2) + │ ├── stats: [rows=2, distinct(3)=2] + │ ├── tuple [type=tuple{int}, outer=(1)] + │ │ └── variable: xy.x [type=int, outer=(1)] + │ └── tuple [type=tuple{int}, outer=(2)] + │ └── plus [type=int, outer=(2)] + │ ├── variable: xy.y [type=int, outer=(2)] + │ └── const: 1 [type=int] + └── variable: column1 [type=int, outer=(3)] diff --git a/pkg/sql/opt/memo/testdata/stats/limit b/pkg/sql/opt/memo/testdata/stats/limit index 41fb9d680f69..8a5d44023062 100644 --- a/pkg/sql/opt/memo/testdata/stats/limit +++ b/pkg/sql/opt/memo/testdata/stats/limit @@ -80,7 +80,7 @@ limit │ └── eq [type=bool, outer=(3), constraints=(/3: [/'foo' - /'foo']; tight)] │ ├── variable: a.s [type=string, outer=(3)] │ └── const: 'foo' [type=string] - └── subquery [type=int, outer=(5)] + └── subquery [type=int] ├── max1-row │ ├── columns: column5:5(int) │ ├── stats: [rows=1] diff --git a/pkg/sql/opt/xform/testdata/rules/limit b/pkg/sql/opt/xform/testdata/rules/limit index 90b805c0c892..2925a462a265 100644 --- a/pkg/sql/opt/xform/testdata/rules/limit +++ b/pkg/sql/opt/xform/testdata/rules/limit @@ -95,7 +95,7 @@ limit ├── columns: s:4(string) ├── scan a │ └── columns: a.s:4(string) - └── subquery [type=int, outer=(6)] + └── subquery [type=int] ├── max1-row │ ├── columns: a.k:6(int!null) │ ├── keys: (6) From 19abbbdf67f5df509f585d158d4a618d21b7bbc0 Mon Sep 17 00:00:00 2001 From: Nathan VanBenschoten Date: Tue, 24 Apr 2018 13:38:53 -0400 Subject: [PATCH 2/3] storage: bump LastHeartbeat timestamp when writing txn record Fixes #23945. See #20448. This change addresses a case where delayed BeginTxn requests can result in txn records looking inactive immediately upon being written. We now bump the txn record's LastHeartbeat timestamp when writing the record. Release note: None --- .../batcheval/cmd_begin_transaction.go | 23 +++++++++++++------ 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/pkg/storage/batcheval/cmd_begin_transaction.go b/pkg/storage/batcheval/cmd_begin_transaction.go index b9371b95917f..d43ee84d5e10 100644 --- a/pkg/storage/batcheval/cmd_begin_transaction.go +++ b/pkg/storage/batcheval/cmd_begin_transaction.go @@ -70,7 +70,8 @@ func BeginTransaction( clonedTxn := h.Txn.Clone() reply.Txn = &clonedTxn - // Verify transaction does not already exist. + // Check whether the transaction record already exists. If it already + // exists, check its current status and react accordingly. tmpTxn := roachpb.Transaction{} ok, err := engine.MVCCGetProto(ctx, batch, key, hlc.Timestamp{}, true, nil, &tmpTxn) if err != nil { @@ -109,18 +110,26 @@ func BeginTransaction( } } - threshold := cArgs.EvalCtx.GetTxnSpanGCThreshold() - - // Disallow creation of a transaction record if it's at a timestamp before - // the TxnSpanGCThreshold, as in that case our transaction may already have - // been aborted by a concurrent actor which encountered one of our intents - // (which may have been written before this entry). + // Disallow creation or modification of a transaction record if it's at a + // timestamp before the TxnSpanGCThreshold, as in that case our transaction + // may already have been aborted by a concurrent actor which encountered one + // of our intents (which may have been written before this entry). // // See #9265. + threshold := cArgs.EvalCtx.GetTxnSpanGCThreshold() if reply.Txn.LastActive().Less(threshold) { return result.Result{}, roachpb.NewTransactionAbortedError() } + // Transaction heartbeats don't begin until after the transaction record + // has been laid down and the response has returned to the transaction + // coordinator. This poses a problem for BeginTxn requests that get + // delayed, because it's possible that the transaction records they + // write will look inactive immediately after being written. To avoid + // this situation resulting in transactions being aborted unnecessarily, + // we bump the record's heartbeat timestamp right before laying it down. + reply.Txn.LastHeartbeat.Forward(cArgs.EvalCtx.Clock().Now()) + // Write the txn record. reply.Txn.Writing = true return result.Result{}, engine.MVCCPutProto(ctx, batch, cArgs.Stats, key, hlc.Timestamp{}, nil, reply.Txn) From d3c6e99893816f9bf5d5188eaa9f3a10d3dc607e Mon Sep 17 00:00:00 2001 From: Andrew Kimball Date: Sat, 21 Apr 2018 19:49:31 -0700 Subject: [PATCH 3/3] opt: Hoist Exists operator and try to decorrelate Transform EXISTS clause in WHERE clauses into a SemiJoinApply or AntiSemiJoinApply operator. Additional rules will attempt to decorrelate the right operand of the Apply by pushing the Apply down through any Select operator. Example: SELECT * FROM a WHERE EXISTS(SELECT * FROM b WHERE a.x=b.x) => SELECT * FROM a SEMI JOIN APPLY (SELECT * FROM b WHERE a.x=b.x) => SELECT * FROM a SEMI JOIN b WHERE a.x=b.x Release note: None --- pkg/sql/opt/memo/logical_props_factory.go | 3 +- pkg/sql/opt/norm/factory.go | 130 +++++--- pkg/sql/opt/norm/rules/citations.md | 10 +- pkg/sql/opt/norm/rules/join.opt | 80 ++++- pkg/sql/opt/norm/rules/scalar.opt | 21 ++ pkg/sql/opt/norm/rules/select.opt | 67 +++- pkg/sql/opt/norm/testdata/combo | 211 ++++++++++++ pkg/sql/opt/norm/testdata/join | 390 +++++++++++++++++++--- pkg/sql/opt/norm/testdata/scalar | 61 ++++ pkg/sql/opt/norm/testdata/select | 215 ++++++++++++ pkg/sql/opt/ops/scalar.opt | 98 ++++-- pkg/sql/opt/rule_name_string.go | 4 +- 12 files changed, 1137 insertions(+), 153 deletions(-) diff --git a/pkg/sql/opt/memo/logical_props_factory.go b/pkg/sql/opt/memo/logical_props_factory.go index c2a165c0b235..ea4ba66320c3 100644 --- a/pkg/sql/opt/memo/logical_props_factory.go +++ b/pkg/sql/opt/memo/logical_props_factory.go @@ -249,8 +249,7 @@ func (f logicalPropsFactory) constructGroupByProps(ev ExprView) LogicalProps { // Any outer columns from aggregation expressions that are not bound by the // input columns are outer columns. - props.Relational.OuterCols = aggProps.OuterCols.Copy() - props.Relational.OuterCols.DifferenceWith(inputProps.OutputCols) + props.Relational.OuterCols = aggProps.OuterCols.Difference(inputProps.OutputCols) props.Relational.OuterCols.UnionWith(inputProps.OuterCols) // Scalar group by has no grouping columns and always a single row. diff --git a/pkg/sql/opt/norm/factory.go b/pkg/sql/opt/norm/factory.go index 75cf2d85329a..740456f51e47 100644 --- a/pkg/sql/opt/norm/factory.go +++ b/pkg/sql/opt/norm/factory.go @@ -172,6 +172,25 @@ func (f *Factory) listOnlyHasNulls(list memo.ListID) bool { return true } +// removeListItem returns a new list that is a copy of the given list, except +// that it does not contain the given search item. If the list contains the item +// multiple times, then only the first instance is removed. If the list does not +// contain the item, then removeListItem will panic. +func (f *Factory) removeListItem(list memo.ListID, search memo.GroupID) memo.ListID { + existingList := f.mem.LookupList(list) + newList := make([]memo.GroupID, len(existingList)-1) + for i, item := range existingList { + if item == search { + newList = append(newList[:i], existingList[i+1:]...) + break + } + + // If the list does not contain the item, this will panic. + newList[i] = item + } + return f.mem.InternList(newList) +} + // isSortedUniqueList returns true if the list is in sorted order, with no // duplicates. See the comment for listSorter.compare for comparison rule // details. @@ -336,14 +355,17 @@ func (f *Factory) outerCols(group memo.GroupID) opt.ColSet { return f.lookupLogical(group).OuterCols() } -// synthesizedCols returns the set of columns which have been added by the given -// Project operator to its input columns. For example, the "x+1" column is a -// synthesized column in "SELECT x, x+1 FROM a". -func (f *Factory) synthesizedCols(project memo.GroupID) opt.ColSet { - synth := f.outputCols(project).Copy() - input := f.mem.NormExpr(project).AsProject().Input() - synth.DifferenceWith(f.outputCols(input)) - return synth +// hasOuterCols returns true if the given group has at least one outer column, +// or in other words, a reference to a variable that is not bound within its +// own scope. For example: +// +// SELECT * FROM a WHERE EXISTS(SELECT * FROM b WHERE b.x = a.x) +// +// The a.x variable in the EXISTS subquery references a column outside the scope +// of the subquery. It is an "outer column" for the subquery (see the comment on +// RelationalProps.OuterCols for more details). +func (f *Factory) hasOuterCols(group memo.GroupID) bool { + return !f.outerCols(group).Empty() } // onlyConstants returns true if the scalar expression is a "constant @@ -371,6 +393,13 @@ func (f *Factory) hasSubsetCols(left, right memo.GroupID) bool { return f.outputCols(left).SubsetOf(f.outputCols(right)) } +// isScalarGroupBy returns true if the given grouping columns come from a +// "scalar" GroupBy operator. A scalar GroupBy always returns exactly one row, +// with any aggregate functions operating over the entire input expression. +func (f *Factory) isScalarGroupBy(groupingCols memo.PrivateID) bool { + return f.mem.LookupPrivate(groupingCols).(opt.ColSet).Empty() +} + // ---------------------------------------------------------------------- // // Project Rules @@ -578,14 +607,8 @@ func (f *Factory) offsetNoCycle(input, limit memo.GroupID, ordering memo.Private // // ---------------------------------------------------------------------- -// emptyGroupingCols returns true if the given grouping columns for a GroupBy -// operator are empty. -func (f *Factory) emptyGroupingCols(cols memo.PrivateID) bool { - return f.mem.LookupPrivate(cols).(opt.ColSet).Empty() -} - -// isCorrelated returns true if variables in the source expression reference -// columns in the destination expression. For example: +// isCorrelated returns true if any variable in the source expression references +// a column from the destination expression. For example: // (InnerJoin // (Scan a) // (Scan b) @@ -599,22 +622,25 @@ func (f *Factory) isCorrelated(src, dst memo.GroupID) bool { return f.outerCols(src).Intersects(f.outputCols(dst)) } -// isCorrelatedCols is similar to isCorrelated, except that it checks whether -// variables in the given expression reference any of the given columns. This: +// isBoundBy returns true if all outer references in the source expression are +// bound by the destination expression. For example: // -// (IsCorrelated $src $dst) -// -// is equivalent to this: -// -// (IsCorrelatedCols $src (OutputCols $dts)) +// (InnerJoin +// (Scan a) +// (Scan b) +// (Eq (Variable a.x) (Const 1)) +// ) // -func (f *Factory) isCorrelatedCols(group memo.GroupID, cols opt.ColSet) bool { - return f.outerCols(group).Intersects(cols) +// The (Eq) expression is fully bound by the (Scan a) expression because all of +// its outer references are satisfied by the columns produced by the Scan. +func (f *Factory) isBoundBy(src, dst memo.GroupID) bool { + return f.outerCols(src).SubsetOf(f.outputCols(dst)) } -// extractCorrelatedConditions returns a new list containing only those -// expressions from the given list that are correlated with the given set of -// columns. For example: +// extractBoundConditions returns a new list containing only those expressions +// from the given list that are fully bound by the given expression (i.e. all +// outer references are satisfied by it). For example: +// // (InnerJoin // (Scan a) // (Scan b) @@ -624,26 +650,27 @@ func (f *Factory) isCorrelatedCols(group memo.GroupID, cols opt.ColSet) bool { // ]) // ) // -// Calling extractCorrelatedConditions with the filter conditions list and the -// output columns of (Scan b) would extract the (Eq) expression, since it -// references columns from b. -func (f *Factory) extractCorrelatedConditions(list memo.ListID, cols opt.ColSet) memo.ListID { +// Calling extractBoundConditions with the filter conditions list and the output +// columns of (Scan a) would extract the (Gt) expression, since its outer +// references only reference columns from a. +func (f *Factory) extractBoundConditions(list memo.ListID, group memo.GroupID) memo.ListID { extracted := make([]memo.GroupID, 0, list.Length) for _, item := range f.mem.LookupList(list) { - if f.isCorrelatedCols(item, cols) { + if f.isBoundBy(item, group) { extracted = append(extracted, item) } } return f.mem.InternList(extracted) } -// extractUncorrelatedConditions is the inverse of extractCorrelatedConditions. -// Instead of extracting correlated expressions, it extracts list expressions -// that are *not* correlated with the destination. -func (f *Factory) extractUncorrelatedConditions(list memo.ListID, cols opt.ColSet) memo.ListID { +// extractUnboundConditions is the inverse of extractBoundConditions. Instead of +// extracting expressions that are bound by the given expression, it extracts +// list expressions that have at least one outer reference that is *not* bound +// by the given expression (i.e. it has a "free" variable). +func (f *Factory) extractUnboundConditions(list memo.ListID, group memo.GroupID) memo.ListID { extracted := make([]memo.GroupID, 0, list.Length) for _, item := range f.mem.LookupList(list) { - if !f.isCorrelatedCols(item, cols) { + if !f.isBoundBy(item, group) { extracted = append(extracted, item) } } @@ -713,6 +740,33 @@ func (f *Factory) colsAreKey(cols memo.PrivateID, group memo.GroupID) bool { return false } +// ---------------------------------------------------------------------- +// +// Join Rules +// Custom match and replace functions used with join.opt rules. +// +// ---------------------------------------------------------------------- + +// removeApply replaces an apply join operator type with the corresponding non- +// apply join operator type. This is used when decorrelating subqueries. +func (f *Factory) removeApply(op opt.Operator, left, right, filter memo.GroupID) memo.GroupID { + switch op { + case opt.InnerJoinApplyOp: + return f.ConstructInnerJoin(left, right, filter) + case opt.LeftJoinApplyOp: + return f.ConstructLeftJoin(left, right, filter) + case opt.RightJoinApplyOp: + return f.ConstructRightJoin(left, right, filter) + case opt.FullJoinApplyOp: + return f.ConstructFullJoin(left, right, filter) + case opt.SemiJoinApplyOp: + return f.ConstructSemiJoin(left, right, filter) + case opt.AntiJoinApplyOp: + return f.ConstructAntiJoin(left, right, filter) + } + panic(fmt.Sprintf("unexpected join operator: %v", op)) +} + // ---------------------------------------------------------------------- // // Boolean Rules diff --git a/pkg/sql/opt/norm/rules/citations.md b/pkg/sql/opt/norm/rules/citations.md index c8b4759f37e2..d78a05e06994 100644 --- a/pkg/sql/opt/norm/rules/citations.md +++ b/pkg/sql/opt/norm/rules/citations.md @@ -8,5 +8,11 @@ further information, and in some cases proofs, can be found. ACM Trans. Database Syst.. 22. 43-73. 10.1145/244810.244812. https://www.researchgate.net/publication/220225172_Outerjoin_Simplification_and_Reordering_for_Query_Optimization -[2] M. M. Joshi and C. A. Galindo-Legaria. Properties of the GroupBy/Aggregate - relational operator. Technical report, Microsoft, 2001. MSR-TR-2001-13. +[2] M. M. Joshi and C. A. Galindo-Legaria. + Properties of the GroupBy/Aggregate relational operator. + Technical report, Microsoft, 2001. MSR-TR-2001-13. + +[3] Galindo-Legaria, C.A. & Joshi, Milind. (2001). + Orthogonal Optimization of Subqueries and Aggregation. + Sigmod Record. 30. 571-581. 10.1145/375663.375748. + http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.563.8492&rep=rep1&type=pdf diff --git a/pkg/sql/opt/norm/rules/join.opt b/pkg/sql/opt/norm/rules/join.opt index fe20f61de6d2..4fa14b973696 100644 --- a/pkg/sql/opt/norm/rules/join.opt +++ b/pkg/sql/opt/norm/rules/join.opt @@ -49,38 +49,102 @@ # -- But if the filter is incorrectly pushed down, then no row is returned. # SELECT * FROM (SELECT * FROM a WHERE a.y < 0) a LEFT JOIN b ON a.x=b.x # +# In addition, AntiJoin is not eligible for this rule, as illustrated by this +# example: +# +# -- A row is returned for a.y=2. +# SELECT * FROM a ANTI JOIN b ON a.y < 0 +# +# -- But if the filter is incorrectly pushed down, then no row is returned. +# SELECT * FROM (SELECT * FROM a WHERE a.y < 0) a ANTI JOIN b ON True +# # Citations: [1] [PushFilterIntoJoinLeft, Normalize] -(InnerJoin | InnerJoinApply | RightJoin | RightJoinApply +(InnerJoin | InnerJoinApply | RightJoin | RightJoinApply | SemiJoin | SemiJoinApply $left:* $right:* - $on:(Filters $list:[ ... $condition:* & ^(IsCorrelated $condition $right) ... ]) + $on:(Filters $list:[ ... $condition:* & (IsBoundBy $condition $left) ... ]) ) => ((OpName) (Select $left - (Filters (ExtractUncorrelatedConditions $list (OutputCols $right))) + (Filters (ExtractBoundConditions $list $left)) ) $right - (Filters (ExtractCorrelatedConditions $list (OutputCols $right))) + (Filters (ExtractUnboundConditions $list $left)) ) # PushFilterIntoJoinRight is symmetric with PushFilterIntoJoinLeft. It pushes # Join filter conditions into the right side of the join rather than into the # left side. See that rule's comments for more details. +# +# This rule triggers a cycle with the TryDecorrelateSelect rule. That rule has +# the DetectCycle tag to break the cycle. [PushFilterIntoJoinRight, Normalize] -(InnerJoin | InnerJoinApply | LeftJoin | LeftJoinApply +(InnerJoin | InnerJoinApply | LeftJoin | LeftJoinApply | + SemiJoin | SemiJoinApply | AntiJoin | AntiJoinApply $left:* $right:* - $on:(Filters $list:[ ... $condition:* & ^(IsCorrelated $condition $left) ... ]) + $on:(Filters $list:[ ... $condition:* & (IsBoundBy $condition $right) ... ]) ) => ((OpName) $left (Select $right - (Filters (ExtractUncorrelatedConditions $list (OutputCols $left))) + (Filters (ExtractBoundConditions $list $right)) ) - (Filters (ExtractCorrelatedConditions $list (OutputCols $left))) + (Filters (ExtractUnboundConditions $list $right)) +) + +# DecorrelateJoin maps an apply join into the corresponding join without an +# apply if the right side of the join is not correlated with the left side. +# This allows the optimizer to consider additional physical join operators that +# are unable to handle correlated inputs. +# +# NOTE: Keep this before other decorrelation patterns, as if the correlated +# join can be removed first, it avoids unnecessarily matching other +# patterns that only exist to get to this pattern. +# +# Citations: [3] +[DecorrelateJoin, Normalize] +(JoinApply + $left:* + $right:* & ^(IsCorrelated $right $left) + $on:* +) +=> +(RemoveApply (OpName) $left $right $on) + +# TryDecorrelateSelect "pushes down" the join apply into the select operator, +# in order to eliminate any correlation between the select filter list and the +# left side of the join, and also to keep "digging" down to find and eliminate +# other unnecessary correlation. Eventually, the hope is to trigger the +# DecorrelateJoin pattern to turn the JoinApply operator into a non-apply Join +# operator. +# +# This rule triggers a cycle with the PushFilterIntoJoinRight rule. Because this +# rule has the DetectCycle tag, it is skipped once a cycle is detected. This +# gives the PushFilterIntoJoinRight rule one last chance to push Select into the +# right input, and therefore causes the normal form to be (Join (Select)) rather +# than (Select (Join)) when the choice is ambiguous. +# +# Note that citation [3] doesn't directly contain this identity, since it +# assumes that the Select will be hoisted above the Join rather than becoming +# part of its On condition. PushFilterIntoJoinRight allows the condition to be +# pushed down, so this rule can correctly pull it up. +# +# Citations: [3] +[TryDecorrelateSelect, Normalize, DetectCycle] +(InnerJoinApply | LeftJoinApply | SemiJoinApply | AntiJoinApply + $left:* + $right:(Select $input:* $filter:*) & (HasOuterCols $right) + $on:* +) +=> +((OpName) + $left + $input + (ConcatFilters $on $filter) ) diff --git a/pkg/sql/opt/norm/rules/scalar.opt b/pkg/sql/opt/norm/rules/scalar.opt index a2be8f1f2fcb..e769ce84c8e4 100644 --- a/pkg/sql/opt/norm/rules/scalar.opt +++ b/pkg/sql/opt/norm/rules/scalar.opt @@ -138,3 +138,24 @@ ) => (Null (BoolType)) + +# EliminateExistsProject discards a Project input to the Exists operator. The +# Project operator never changes the row cardinality of its input, and row +# cardinality is the only thing that Exists cares about, so Project is a no-op. +[EliminateExistsProject, Normalize] +(Exists (Project $input:*)) => (Exists $input) + +# EliminateExistsGroupBy discards a non-scalar GroupBy input to the Exists +# operator. While non-scalar GroupBy can change row cardinality, it always +# returns a non-empty set if its input is non-empty. Similarly, if its input is +# empty, then it returns the empty set. Therefore, it's a no-op for Exists. +[EliminateExistsGroupBy, Normalize] +(Exists + (GroupBy + $input:* + * + $groupingCols:* & ^(IsScalarGroupBy $groupingCols) + ) +) +=> +(Exists $input) diff --git a/pkg/sql/opt/norm/rules/select.opt b/pkg/sql/opt/norm/rules/select.opt index 7b445a4404cb..7300fdef7bbc 100644 --- a/pkg/sql/opt/norm/rules/select.opt +++ b/pkg/sql/opt/norm/rules/select.opt @@ -67,12 +67,12 @@ # rule which has the DetectCycle tag). [PushSelectIntoProject, Normalize] (Select - $project:(Project + (Project $input:* $projections:* ) (Filters - $list:[ ... $condition:* & ^(IsCorrelatedCols $condition (SynthesizedCols $project)) ... ] + $list:[ ... $condition:* & (IsBoundBy $condition $input) ... ] ) ) => @@ -80,11 +80,11 @@ (ProjectNoCycle (Select $input - (Filters (ExtractUncorrelatedConditions $list (SynthesizedCols $project))) + (Filters (ExtractBoundConditions $list $input)) ) $projections ) - (Filters (ExtractCorrelatedConditions $list (SynthesizedCols $project))) + (Filters (ExtractUnboundConditions $list $input)) ) # PushSelectIntoJoinLeft pushes Select filter conditions into the left side of @@ -108,19 +108,19 @@ $right:* $on:* ) - $filter:(Filters $list:[ ... $condition:* & ^(IsCorrelated $condition $right) ... ]) + $filter:(Filters $list:[ ... $condition:* & (IsBoundBy $condition $left) ... ]) ) => (Select ((OpName $input) (Select $left - (Filters (ExtractUncorrelatedConditions $list (OutputCols $right))) + (Filters (ExtractBoundConditions $list $left)) ) $right $on ) - (Filters (ExtractCorrelatedConditions $list (OutputCols $right))) + (Filters (ExtractUnboundConditions $list $left)) ) # PushSelectIntoJoinRight is symmetric with PushSelectIntoJoinLeft. It pushes @@ -133,7 +133,7 @@ $right:* $on:* ) - $filter:(Filters $list:[ ... $condition:* & ^(IsCorrelated $condition $left) ... ]) + $filter:(Filters $list:[ ... $condition:* & (IsBoundBy $condition $right) ... ]) ) => (Select @@ -141,11 +141,11 @@ $left (Select $right - (Filters (ExtractUncorrelatedConditions $list (OutputCols $left))) + (Filters (ExtractBoundConditions $list $right)) ) $on ) - (Filters (ExtractCorrelatedConditions $list (OutputCols $left))) + (Filters (ExtractUnboundConditions $list $right)) ) # MergeSelectInnerJoin merges a Select operator with an InnerJoin input by @@ -175,9 +175,8 @@ # PushSelectIntoGroupBy pushes a Select condition below a GroupBy in the case # where it does not reference any of the aggregation columns. This only works -# if there are grouping columns. Otherwise, this is an instance of the "scalar" -# GroupBy, which returns only one row, and which exhibits different behavior if -# the input is empty: +# if this is not an instance of the "scalar" GroupBy, which returns only one +# row, and which exhibits different behavior if the input is empty: # SELECT MAX(y) FROM a # # If "a" is empty, this returns a single row containing a null value. This is @@ -191,19 +190,53 @@ (GroupBy $input:* $aggregations:* - $groupingCols:* & ^(EmptyGroupingCols $groupingCols) + $groupingCols:* & ^(IsScalarGroupBy $groupingCols) ) - (Filters $list:[ ... $condition:* & ^(IsCorrelated $condition $aggregations) ... ]) + (Filters $list:[ ... $condition:* & (IsBoundBy $condition $input) ... ]) ) => (Select (GroupBy (Select $input - (Filters (ExtractUncorrelatedConditions $list (OutputCols $aggregations))) + (Filters (ExtractBoundConditions $list $input)) ) $aggregations $groupingCols ) - (Filters (ExtractCorrelatedConditions $list (OutputCols $aggregations))) + (Filters (ExtractUnboundConditions $list $input)) +) + +# HoistSelectExists extracts existential subqueries from Select filters, +# turning them into semi-joins. This eliminates the subquery, which is often +# expensive to execute and restricts the optimizer's plan choices. +[HoistSelectExists, Normalize] +(Select + $input:* + $filter:(Filters + $list:[ ... $exists:(Exists $subquery:* & (HasOuterCols $subquery)) ... ] + ) +) +=> +(SemiJoinApply + $input + $subquery + (Filters (RemoveListItem $list $exists)) +) + +# HoistSelectNotExists extracts non-existential subqueries from Select filters, +# turning them into anti-joins. This eliminates the subquery, which is often +# expensive to execute and restricts the optimizer's plan choices. +[HoistSelectNotExists, Normalize] +(Select + $input:* + $filter:(Filters + $list:[ ... $exists:(Not (Exists $subquery:* & (HasOuterCols $subquery))) ... ] + ) +) +=> +(AntiJoinApply + (Select $input (Filters (RemoveListItem $list $exists))) + $subquery + (True) ) diff --git a/pkg/sql/opt/norm/testdata/combo b/pkg/sql/opt/norm/testdata/combo index c568772b029a..97039b69491a 100644 --- a/pkg/sql/opt/norm/testdata/combo +++ b/pkg/sql/opt/norm/testdata/combo @@ -1594,6 +1594,7 @@ Final best expression └── projections [outer=(6)] └── variable: column6 [type=decimal, outer=(6)] +# Exploration patterns with varying costs. optsteps SELECT s, x FROM a WHERE s='foo' AND f>100 ---- @@ -1780,3 +1781,213 @@ Final best expression └── projections [outer=(1,4)] ├── variable: a.s [type=string, outer=(4)] └── variable: a.x [type=int, outer=(1)] + +# Decorrelation pattern. +optsteps +SELECT * FROM a WHERE EXISTS(SELECT * FROM b WHERE z=i) +---- +================================================================================ +Initial expression + Cost: 2200.00 +================================================================================ + select + ├── columns: x:1(int!null) i:2(int) f:3(float) s:4(string) j:5(jsonb) + ├── keys: (1) weak(3,4) + ├── scan a + │ ├── columns: a.x:1(int!null) a.i:2(int) a.f:3(float) a.s:4(string) a.j:5(jsonb) + │ └── keys: (1) weak(3,4) + └── exists [type=bool, outer=(2)] + └── select + ├── columns: b.x:6(int!null) b.z:7(int) + ├── outer: (2) + ├── keys: (6) + ├── scan b + │ ├── columns: b.x:6(int!null) b.z:7(int) + │ └── keys: (6) + └── eq [type=bool, outer=(2,7)] + ├── variable: b.z [type=int, outer=(7)] + └── variable: a.i [type=int, outer=(2)] +================================================================================ +EnsureSelectFilters + Cost: 2200.00 +================================================================================ + select + ├── columns: x:1(int!null) i:2(int) f:3(float) s:4(string) j:5(jsonb) + ├── keys: (1) weak(3,4) + ├── scan a + │ ├── columns: a.x:1(int!null) a.i:2(int) a.f:3(float) a.s:4(string) a.j:5(jsonb) + │ └── keys: (1) weak(3,4) + └── exists [type=bool, outer=(2)] + └── select + ├── columns: b.x:6(int!null) b.z:7(int) + ├── outer: (2) + ├── keys: (6) + ├── scan b + │ ├── columns: b.x:6(int!null) b.z:7(int) + │ └── keys: (6) + - └── eq [type=bool, outer=(2,7)] + - ├── variable: b.z [type=int, outer=(7)] + - └── variable: a.i [type=int, outer=(2)] + + └── filters [type=bool, outer=(2,7)] + + └── eq [type=bool, outer=(2,7)] + + ├── variable: b.z [type=int, outer=(7)] + + └── variable: a.i [type=int, outer=(2)] +================================================================================ +EnsureSelectFilters + Cost: 2200.00 +================================================================================ + select + ├── columns: x:1(int!null) i:2(int) f:3(float) s:4(string) j:5(jsonb) + ├── keys: (1) weak(3,4) + ├── scan a + │ ├── columns: a.x:1(int!null) a.i:2(int) a.f:3(float) a.s:4(string) a.j:5(jsonb) + │ └── keys: (1) weak(3,4) + - └── exists [type=bool, outer=(2)] + - └── select + - ├── columns: b.x:6(int!null) b.z:7(int) + - ├── outer: (2) + - ├── keys: (6) + - ├── scan b + - │ ├── columns: b.x:6(int!null) b.z:7(int) + - │ └── keys: (6) + - └── filters [type=bool, outer=(2,7)] + - └── eq [type=bool, outer=(2,7)] + - ├── variable: b.z [type=int, outer=(7)] + - └── variable: a.i [type=int, outer=(2)] + + └── filters [type=bool, outer=(2)] + + └── exists [type=bool, outer=(2)] + + └── select + + ├── columns: b.x:6(int!null) b.z:7(int) + + ├── outer: (2) + + ├── keys: (6) + + ├── scan b + + │ ├── columns: b.x:6(int!null) b.z:7(int) + + │ └── keys: (6) + + └── filters [type=bool, outer=(2,7)] + + └── eq [type=bool, outer=(2,7)] + + ├── variable: b.z [type=int, outer=(7)] + + └── variable: a.i [type=int, outer=(2)] +================================================================================ +HoistSelectExists + Cost: 2100.00 +================================================================================ + -select + +semi-join-apply + ├── columns: x:1(int!null) i:2(int) f:3(float) s:4(string) j:5(jsonb) + - ├── keys: (1) weak(3,4) + ├── scan a + │ ├── columns: a.x:1(int!null) a.i:2(int) a.f:3(float) a.s:4(string) a.j:5(jsonb) + │ └── keys: (1) weak(3,4) + - └── filters [type=bool, outer=(2)] + - └── exists [type=bool, outer=(2)] + - └── select + - ├── columns: b.x:6(int!null) b.z:7(int) + - ├── outer: (2) + - ├── keys: (6) + - ├── scan b + - │ ├── columns: b.x:6(int!null) b.z:7(int) + - │ └── keys: (6) + - └── filters [type=bool, outer=(2,7)] + - └── eq [type=bool, outer=(2,7)] + - ├── variable: b.z [type=int, outer=(7)] + - └── variable: a.i [type=int, outer=(2)] + + ├── select + + │ ├── columns: b.x:6(int!null) b.z:7(int) + + │ ├── outer: (2) + + │ ├── keys: (6) + + │ ├── scan b + + │ │ ├── columns: b.x:6(int!null) b.z:7(int) + + │ │ └── keys: (6) + + │ └── filters [type=bool, outer=(2,7)] + + │ └── eq [type=bool, outer=(2,7)] + + │ ├── variable: b.z [type=int, outer=(7)] + + │ └── variable: a.i [type=int, outer=(2)] + + └── filters [type=bool] +================================================================================ +EliminateEmptyAnd + Cost: 2100.00 +================================================================================ + semi-join-apply + ├── columns: x:1(int!null) i:2(int) f:3(float) s:4(string) j:5(jsonb) + ├── scan a + │ ├── columns: a.x:1(int!null) a.i:2(int) a.f:3(float) a.s:4(string) a.j:5(jsonb) + │ └── keys: (1) weak(3,4) + ├── select + │ ├── columns: b.x:6(int!null) b.z:7(int) + │ ├── outer: (2) + │ ├── keys: (6) + │ ├── scan b + │ │ ├── columns: b.x:6(int!null) b.z:7(int) + │ │ └── keys: (6) + │ └── filters [type=bool, outer=(2,7)] + │ └── eq [type=bool, outer=(2,7)] + │ ├── variable: b.z [type=int, outer=(7)] + │ └── variable: a.i [type=int, outer=(2)] + - └── filters [type=bool] + + └── true [type=bool] +================================================================================ +TryDecorrelateSelect + Cost: 2000.00 +================================================================================ + semi-join-apply + ├── columns: x:1(int!null) i:2(int) f:3(float) s:4(string) j:5(jsonb) + ├── scan a + │ ├── columns: a.x:1(int!null) a.i:2(int) a.f:3(float) a.s:4(string) a.j:5(jsonb) + │ └── keys: (1) weak(3,4) + - ├── select + + ├── scan b + │ ├── columns: b.x:6(int!null) b.z:7(int) + - │ ├── outer: (2) + - │ ├── keys: (6) + - │ ├── scan b + - │ │ ├── columns: b.x:6(int!null) b.z:7(int) + - │ │ └── keys: (6) + - │ └── filters [type=bool, outer=(2,7)] + - │ └── eq [type=bool, outer=(2,7)] + - │ ├── variable: b.z [type=int, outer=(7)] + - │ └── variable: a.i [type=int, outer=(2)] + - └── true [type=bool] + + │ └── keys: (6) + + └── filters [type=bool, outer=(2,7)] + + └── eq [type=bool, outer=(2,7)] + + ├── variable: b.z [type=int, outer=(7)] + + └── variable: a.i [type=int, outer=(2)] +================================================================================ +DecorrelateJoin + Cost: 2000.00 +================================================================================ + -semi-join-apply + +semi-join + ├── columns: x:1(int!null) i:2(int) f:3(float) s:4(string) j:5(jsonb) + ├── scan a + │ ├── columns: a.x:1(int!null) a.i:2(int) a.f:3(float) a.s:4(string) a.j:5(jsonb) + │ └── keys: (1) weak(3,4) + ├── scan b + │ ├── columns: b.x:6(int!null) b.z:7(int) + │ └── keys: (6) + └── filters [type=bool, outer=(2,7)] + └── eq [type=bool, outer=(2,7)] + ├── variable: b.z [type=int, outer=(7)] + └── variable: a.i [type=int, outer=(2)] +-------------------------------------------------------------------------------- +GenerateIndexScans (no changes) +-------------------------------------------------------------------------------- +-------------------------------------------------------------------------------- +GenerateIndexScans (no changes) +-------------------------------------------------------------------------------- +================================================================================ +Final best expression + Cost: 2000.00 +================================================================================ + semi-join + ├── columns: x:1(int!null) i:2(int) f:3(float) s:4(string) j:5(jsonb) + ├── scan a + │ ├── columns: a.x:1(int!null) a.i:2(int) a.f:3(float) a.s:4(string) a.j:5(jsonb) + │ └── keys: (1) weak(3,4) + ├── scan b + │ ├── columns: b.x:6(int!null) b.z:7(int) + │ └── keys: (6) + └── filters [type=bool, outer=(2,7)] + └── eq [type=bool, outer=(2,7)] + ├── variable: b.z [type=int, outer=(7)] + └── variable: a.i [type=int, outer=(2)] diff --git a/pkg/sql/opt/norm/testdata/join b/pkg/sql/opt/norm/testdata/join index e04ef73664fe..e10aacf59768 100644 --- a/pkg/sql/opt/norm/testdata/join +++ b/pkg/sql/opt/norm/testdata/join @@ -1,14 +1,14 @@ exec-ddl -CREATE TABLE a (x INT PRIMARY KEY, i INT, f FLOAT, s STRING, j JSON) +CREATE TABLE a (k INT PRIMARY KEY, i INT, f FLOAT, s STRING, j JSON) ---- TABLE a - ├── x int not null + ├── k int not null ├── i int ├── f float ├── s string ├── j jsonb └── INDEX primary - └── x int not null + └── k int not null exec-ddl CREATE TABLE t.b (x INT PRIMARY KEY, z INT) @@ -23,19 +23,19 @@ TABLE b # EnsureJoinFiltersAnd # -------------------------------------------------- opt -SELECT * FROM a INNER JOIN b ON a.x=b.x AND b.z10) AND b.z=1 AND a.s='foo' AND a.x=b.x +SELECT * FROM a RIGHT JOIN b ON (a.i<0 OR a.i>10) AND b.z=1 AND a.s='foo' AND a.k=b.x ---- right-join - ├── columns: x:1(int) i:2(int) f:3(float) s:4(string) j:5(jsonb) x:6(int!null) z:7(int) + ├── columns: k:1(int) i:2(int) f:3(float) s:4(string) j:5(jsonb) x:6(int!null) z:7(int) ├── select - │ ├── columns: a.x:1(int!null) a.i:2(int) a.f:3(float) a.s:4(string) a.j:5(jsonb) + │ ├── columns: a.k:1(int!null) a.i:2(int) a.f:3(float) a.s:4(string) a.j:5(jsonb) │ ├── keys: (1) │ ├── scan a - │ │ ├── columns: a.x:1(int!null) a.i:2(int) a.f:3(float) a.s:4(string) a.j:5(jsonb) + │ │ ├── columns: a.k:1(int!null) a.i:2(int) a.f:3(float) a.s:4(string) a.j:5(jsonb) │ │ └── keys: (1) │ └── filters [type=bool, outer=(2,4), constraints=(/4: [/'foo' - /'foo'])] │ ├── or [type=bool, outer=(2)] @@ -136,45 +136,89 @@ right-join │ ├── variable: b.z [type=int, outer=(7)] │ └── const: 1 [type=int] └── eq [type=bool, outer=(1,6)] - ├── variable: a.x [type=int, outer=(1)] + ├── variable: a.k [type=int, outer=(1)] └── variable: b.x [type=int, outer=(6)] # LEFT JOIN should not push down conditions to left side of join. opt -SELECT * FROM a LEFT JOIN b ON a.x=b.x AND a.i=1 +SELECT * FROM a LEFT JOIN b ON a.k=b.x AND a.i=1 ---- left-join - ├── columns: x:1(int!null) i:2(int) f:3(float) s:4(string) j:5(jsonb) x:6(int) z:7(int) + ├── columns: k:1(int!null) i:2(int) f:3(float) s:4(string) j:5(jsonb) x:6(int) z:7(int) ├── scan a - │ ├── columns: a.x:1(int!null) a.i:2(int) a.f:3(float) a.s:4(string) a.j:5(jsonb) + │ ├── columns: a.k:1(int!null) a.i:2(int) a.f:3(float) a.s:4(string) a.j:5(jsonb) │ └── keys: (1) ├── scan b │ ├── columns: b.x:6(int!null) b.z:7(int) │ └── keys: (6) └── filters [type=bool, outer=(1,2,6), constraints=(/2: [/1 - /1])] ├── eq [type=bool, outer=(1,6)] - │ ├── variable: a.x [type=int, outer=(1)] + │ ├── variable: a.k [type=int, outer=(1)] │ └── variable: b.x [type=int, outer=(6)] └── eq [type=bool, outer=(2), constraints=(/2: [/1 - /1]; tight)] ├── variable: a.i [type=int, outer=(2)] └── const: 1 [type=int] +# Semi-join case. +opt +SELECT * FROM a WHERE EXISTS(SELECT * FROM b WHERE x=k AND s='foo') +---- +semi-join + ├── columns: k:1(int!null) i:2(int) f:3(float) s:4(string) j:5(jsonb) + ├── select + │ ├── columns: a.k:1(int!null) a.i:2(int) a.f:3(float) a.s:4(string) a.j:5(jsonb) + │ ├── keys: (1) + │ ├── scan a + │ │ ├── columns: a.k:1(int!null) a.i:2(int) a.f:3(float) a.s:4(string) a.j:5(jsonb) + │ │ └── keys: (1) + │ └── filters [type=bool, outer=(4), constraints=(/4: [/'foo' - /'foo']; tight)] + │ └── eq [type=bool, outer=(4), constraints=(/4: [/'foo' - /'foo']; tight)] + │ ├── variable: a.s [type=string, outer=(4)] + │ └── const: 'foo' [type=string] + ├── scan b + │ ├── columns: b.x:6(int!null) b.z:7(int) + │ └── keys: (6) + └── filters [type=bool, outer=(1,6)] + └── eq [type=bool, outer=(1,6)] + ├── variable: b.x [type=int, outer=(6)] + └── variable: a.k [type=int, outer=(1)] + +# Do not push anti-join conditions into left input. +opt +SELECT * FROM a WHERE NOT EXISTS(SELECT * FROM b WHERE x=k AND s='foo') +---- +anti-join + ├── columns: k:1(int!null) i:2(int) f:3(float) s:4(string) j:5(jsonb) + ├── scan a + │ ├── columns: a.k:1(int!null) a.i:2(int) a.f:3(float) a.s:4(string) a.j:5(jsonb) + │ └── keys: (1) + ├── scan b + │ ├── columns: b.x:6(int!null) b.z:7(int) + │ └── keys: (6) + └── filters [type=bool, outer=(1,4,6), constraints=(/4: [/'foo' - /'foo'])] + ├── eq [type=bool, outer=(1,6)] + │ ├── variable: b.x [type=int, outer=(6)] + │ └── variable: a.k [type=int, outer=(1)] + └── eq [type=bool, outer=(4), constraints=(/4: [/'foo' - /'foo']; tight)] + ├── variable: a.s [type=string, outer=(4)] + └── const: 'foo' [type=string] + # -------------------------------------------------- # PushFilterIntoJoinRight # -------------------------------------------------- opt -SELECT * FROM b INNER JOIN a ON b.x=a.x AND a.s='foo' +SELECT * FROM b INNER JOIN a ON b.x=a.k AND a.s='foo' ---- inner-join - ├── columns: x:1(int!null) z:2(int) x:3(int!null) i:4(int) f:5(float) s:6(string) j:7(jsonb) + ├── columns: x:1(int!null) z:2(int) k:3(int!null) i:4(int) f:5(float) s:6(string) j:7(jsonb) ├── scan b │ ├── columns: b.x:1(int!null) b.z:2(int) │ └── keys: (1) ├── select - │ ├── columns: a.x:3(int!null) a.i:4(int) a.f:5(float) a.s:6(string) a.j:7(jsonb) + │ ├── columns: a.k:3(int!null) a.i:4(int) a.f:5(float) a.s:6(string) a.j:7(jsonb) │ ├── keys: (3) │ ├── scan a - │ │ ├── columns: a.x:3(int!null) a.i:4(int) a.f:5(float) a.s:6(string) a.j:7(jsonb) + │ │ ├── columns: a.k:3(int!null) a.i:4(int) a.f:5(float) a.s:6(string) a.j:7(jsonb) │ │ └── keys: (3) │ └── filters [type=bool, outer=(6), constraints=(/6: [/'foo' - /'foo']; tight)] │ └── eq [type=bool, outer=(6), constraints=(/6: [/'foo' - /'foo']; tight)] @@ -183,21 +227,21 @@ inner-join └── filters [type=bool, outer=(1,3)] └── eq [type=bool, outer=(1,3)] ├── variable: b.x [type=int, outer=(1)] - └── variable: a.x [type=int, outer=(3)] + └── variable: a.k [type=int, outer=(3)] opt -SELECT * FROM b LEFT JOIN a ON (a.i<0 OR a.i>10) AND b.z=1 AND a.s='foo' AND b.x=a.x +SELECT * FROM b LEFT JOIN a ON (a.i<0 OR a.i>10) AND b.z=1 AND a.s='foo' AND b.x=a.k ---- left-join - ├── columns: x:1(int!null) z:2(int) x:3(int) i:4(int) f:5(float) s:6(string) j:7(jsonb) + ├── columns: x:1(int!null) z:2(int) k:3(int) i:4(int) f:5(float) s:6(string) j:7(jsonb) ├── scan b │ ├── columns: b.x:1(int!null) b.z:2(int) │ └── keys: (1) ├── select - │ ├── columns: a.x:3(int!null) a.i:4(int) a.f:5(float) a.s:6(string) a.j:7(jsonb) + │ ├── columns: a.k:3(int!null) a.i:4(int) a.f:5(float) a.s:6(string) a.j:7(jsonb) │ ├── keys: (3) │ ├── scan a - │ │ ├── columns: a.x:3(int!null) a.i:4(int) a.f:5(float) a.s:6(string) a.j:7(jsonb) + │ │ ├── columns: a.k:3(int!null) a.i:4(int) a.f:5(float) a.s:6(string) a.j:7(jsonb) │ │ └── keys: (3) │ └── filters [type=bool, outer=(4,6), constraints=(/6: [/'foo' - /'foo'])] │ ├── or [type=bool, outer=(4)] @@ -216,42 +260,90 @@ left-join │ └── const: 1 [type=int] └── eq [type=bool, outer=(1,3)] ├── variable: b.x [type=int, outer=(1)] - └── variable: a.x [type=int, outer=(3)] + └── variable: a.k [type=int, outer=(3)] # RIGHT JOIN should not push down conditions to right side of join. opt -SELECT * FROM b RIGHT JOIN a ON b.x=a.x AND a.i=1 +SELECT * FROM b RIGHT JOIN a ON b.x=a.k AND a.i=1 ---- right-join - ├── columns: x:1(int) z:2(int) x:3(int!null) i:4(int) f:5(float) s:6(string) j:7(jsonb) + ├── columns: x:1(int) z:2(int) k:3(int!null) i:4(int) f:5(float) s:6(string) j:7(jsonb) ├── scan b │ ├── columns: b.x:1(int!null) b.z:2(int) │ └── keys: (1) ├── scan a - │ ├── columns: a.x:3(int!null) a.i:4(int) a.f:5(float) a.s:6(string) a.j:7(jsonb) + │ ├── columns: a.k:3(int!null) a.i:4(int) a.f:5(float) a.s:6(string) a.j:7(jsonb) │ └── keys: (3) └── filters [type=bool, outer=(1,3,4), constraints=(/4: [/1 - /1])] ├── eq [type=bool, outer=(1,3)] │ ├── variable: b.x [type=int, outer=(1)] - │ └── variable: a.x [type=int, outer=(3)] + │ └── variable: a.k [type=int, outer=(3)] └── eq [type=bool, outer=(4), constraints=(/4: [/1 - /1]; tight)] ├── variable: a.i [type=int, outer=(4)] └── const: 1 [type=int] +# Semi-join case. +opt +SELECT * FROM a WHERE EXISTS(SELECT * FROM b WHERE x=k AND z>10) +---- +semi-join + ├── columns: k:1(int!null) i:2(int) f:3(float) s:4(string) j:5(jsonb) + ├── scan a + │ ├── columns: a.k:1(int!null) a.i:2(int) a.f:3(float) a.s:4(string) a.j:5(jsonb) + │ └── keys: (1) + ├── select + │ ├── columns: b.x:6(int!null) b.z:7(int) + │ ├── keys: (6) + │ ├── scan b + │ │ ├── columns: b.x:6(int!null) b.z:7(int) + │ │ └── keys: (6) + │ └── filters [type=bool, outer=(7), constraints=(/7: [/11 - ]; tight)] + │ └── gt [type=bool, outer=(7), constraints=(/7: [/11 - ]; tight)] + │ ├── variable: b.z [type=int, outer=(7)] + │ └── const: 10 [type=int] + └── filters [type=bool, outer=(1,6)] + └── eq [type=bool, outer=(1,6)] + ├── variable: b.x [type=int, outer=(6)] + └── variable: a.k [type=int, outer=(1)] + +# Anti-join case. +opt +SELECT * FROM a WHERE NOT EXISTS(SELECT * FROM b WHERE x=k AND z>10) +---- +anti-join + ├── columns: k:1(int!null) i:2(int) f:3(float) s:4(string) j:5(jsonb) + ├── scan a + │ ├── columns: a.k:1(int!null) a.i:2(int) a.f:3(float) a.s:4(string) a.j:5(jsonb) + │ └── keys: (1) + ├── select + │ ├── columns: b.x:6(int!null) b.z:7(int) + │ ├── keys: (6) + │ ├── scan b + │ │ ├── columns: b.x:6(int!null) b.z:7(int) + │ │ └── keys: (6) + │ └── filters [type=bool, outer=(7), constraints=(/7: [/11 - ]; tight)] + │ └── gt [type=bool, outer=(7), constraints=(/7: [/11 - ]; tight)] + │ ├── variable: b.z [type=int, outer=(7)] + │ └── const: 10 [type=int] + └── filters [type=bool, outer=(1,6)] + └── eq [type=bool, outer=(1,6)] + ├── variable: b.x [type=int, outer=(6)] + └── variable: a.k [type=int, outer=(1)] + # -------------------------------------------------- # PushFilterIntoJoinLeft + PushFilterIntoJoinRight # -------------------------------------------------- opt -SELECT * FROM a INNER JOIN b ON a.x=b.x AND a.i=1 AND b.z=1 +SELECT * FROM a INNER JOIN b ON a.k=b.x AND a.i=1 AND b.z=1 ---- inner-join - ├── columns: x:1(int!null) i:2(int) f:3(float) s:4(string) j:5(jsonb) x:6(int!null) z:7(int) + ├── columns: k:1(int!null) i:2(int) f:3(float) s:4(string) j:5(jsonb) x:6(int!null) z:7(int) ├── select - │ ├── columns: a.x:1(int!null) a.i:2(int) a.f:3(float) a.s:4(string) a.j:5(jsonb) + │ ├── columns: a.k:1(int!null) a.i:2(int) a.f:3(float) a.s:4(string) a.j:5(jsonb) │ ├── keys: (1) │ ├── scan a - │ │ ├── columns: a.x:1(int!null) a.i:2(int) a.f:3(float) a.s:4(string) a.j:5(jsonb) + │ │ ├── columns: a.k:1(int!null) a.i:2(int) a.f:3(float) a.s:4(string) a.j:5(jsonb) │ │ └── keys: (1) │ └── filters [type=bool, outer=(2), constraints=(/2: [/1 - /1]; tight)] │ └── eq [type=bool, outer=(2), constraints=(/2: [/1 - /1]; tight)] @@ -269,24 +361,24 @@ inner-join │ └── const: 1 [type=int] └── filters [type=bool, outer=(1,6)] └── eq [type=bool, outer=(1,6)] - ├── variable: a.x [type=int, outer=(1)] + ├── variable: a.k [type=int, outer=(1)] └── variable: b.x [type=int, outer=(6)] # FULL JOIN should not push down conditions to either side of join. opt -SELECT * FROM a FULL JOIN b ON a.x=b.x AND a.i=1 AND b.z=1 +SELECT * FROM a FULL JOIN b ON a.k=b.x AND a.i=1 AND b.z=1 ---- full-join - ├── columns: x:1(int) i:2(int) f:3(float) s:4(string) j:5(jsonb) x:6(int) z:7(int) + ├── columns: k:1(int) i:2(int) f:3(float) s:4(string) j:5(jsonb) x:6(int) z:7(int) ├── scan a - │ ├── columns: a.x:1(int!null) a.i:2(int) a.f:3(float) a.s:4(string) a.j:5(jsonb) + │ ├── columns: a.k:1(int!null) a.i:2(int) a.f:3(float) a.s:4(string) a.j:5(jsonb) │ └── keys: (1) ├── scan b │ ├── columns: b.x:6(int!null) b.z:7(int) │ └── keys: (6) └── filters [type=bool, outer=(1,2,6,7), constraints=(/2: [/1 - /1]; /7: [/1 - /1])] ├── eq [type=bool, outer=(1,6)] - │ ├── variable: a.x [type=int, outer=(1)] + │ ├── variable: a.k [type=int, outer=(1)] │ └── variable: b.x [type=int, outer=(6)] ├── eq [type=bool, outer=(2), constraints=(/2: [/1 - /1]; tight)] │ ├── variable: a.i [type=int, outer=(2)] @@ -294,3 +386,201 @@ full-join └── eq [type=bool, outer=(7), constraints=(/7: [/1 - /1]; tight)] ├── variable: b.z [type=int, outer=(7)] └── const: 1 [type=int] + +# Nested semi/anti-join case. +opt +SELECT * FROM b +WHERE EXISTS +( + SELECT * FROM a WHERE k=x AND s='foo' AND NOT EXISTS(SELECT * FROM a WHERE i=10 AND z>100) +) +---- +semi-join-apply + ├── columns: x:1(int!null) z:2(int) + ├── scan b + │ ├── columns: b.x:1(int!null) b.z:2(int) + │ └── keys: (1) + ├── anti-join + │ ├── columns: a.k:3(int!null) a.i:4(int) a.f:5(float) a.s:6(string) a.j:7(jsonb) + │ ├── outer: (1,2) + │ ├── select + │ │ ├── columns: a.k:3(int!null) a.i:4(int) a.f:5(float) a.s:6(string) a.j:7(jsonb) + │ │ ├── outer: (1) + │ │ ├── keys: (3) + │ │ ├── scan a + │ │ │ ├── columns: a.k:3(int!null) a.i:4(int) a.f:5(float) a.s:6(string) a.j:7(jsonb) + │ │ │ └── keys: (3) + │ │ └── filters [type=bool, outer=(1,3,6), constraints=(/6: [/'foo' - /'foo'])] + │ │ ├── eq [type=bool, outer=(1,3)] + │ │ │ ├── variable: a.k [type=int, outer=(3)] + │ │ │ └── variable: b.x [type=int, outer=(1)] + │ │ └── eq [type=bool, outer=(6), constraints=(/6: [/'foo' - /'foo']; tight)] + │ │ ├── variable: a.s [type=string, outer=(6)] + │ │ └── const: 'foo' [type=string] + │ ├── select + │ │ ├── columns: a.k:8(int!null) a.i:9(int) a.f:10(float) a.s:11(string) a.j:12(jsonb) + │ │ ├── outer: (2) + │ │ ├── keys: (8) + │ │ ├── scan a + │ │ │ ├── columns: a.k:8(int!null) a.i:9(int) a.f:10(float) a.s:11(string) a.j:12(jsonb) + │ │ │ └── keys: (8) + │ │ └── filters [type=bool, outer=(2,9), constraints=(/2: [/101 - ]; /9: [/10 - /10]; tight)] + │ │ ├── eq [type=bool, outer=(9), constraints=(/9: [/10 - /10]; tight)] + │ │ │ ├── variable: a.i [type=int, outer=(9)] + │ │ │ └── const: 10 [type=int] + │ │ └── gt [type=bool, outer=(2), constraints=(/2: [/101 - ]; tight)] + │ │ ├── variable: b.z [type=int, outer=(2)] + │ │ └── const: 100 [type=int] + │ └── true [type=bool] + └── true [type=bool] + +# -------------------------------------------------- +# DecorrelateJoin +# -------------------------------------------------- +opt +SELECT * FROM a WHERE EXISTS(SELECT * FROM b WHERE x=k) +---- +semi-join + ├── columns: k:1(int!null) i:2(int) f:3(float) s:4(string) j:5(jsonb) + ├── scan a + │ ├── columns: a.k:1(int!null) a.i:2(int) a.f:3(float) a.s:4(string) a.j:5(jsonb) + │ └── keys: (1) + ├── scan b + │ ├── columns: b.x:6(int!null) b.z:7(int) + │ └── keys: (6) + └── filters [type=bool, outer=(1,6)] + └── eq [type=bool, outer=(1,6)] + ├── variable: b.x [type=int, outer=(6)] + └── variable: a.k [type=int, outer=(1)] + +opt +SELECT * FROM a WHERE NOT EXISTS(SELECT * FROM b WHERE x=k) +---- +anti-join + ├── columns: k:1(int!null) i:2(int) f:3(float) s:4(string) j:5(jsonb) + ├── scan a + │ ├── columns: a.k:1(int!null) a.i:2(int) a.f:3(float) a.s:4(string) a.j:5(jsonb) + │ └── keys: (1) + ├── scan b + │ ├── columns: b.x:6(int!null) b.z:7(int) + │ └── keys: (6) + └── filters [type=bool, outer=(1,6)] + └── eq [type=bool, outer=(1,6)] + ├── variable: b.x [type=int, outer=(6)] + └── variable: a.k [type=int, outer=(1)] + +# -------------------------------------------------- +# TryDecorrelateSelect +# TODO: Add test case for inner join once we hoist the Subquery operator. +# -------------------------------------------------- +opt +SELECT * FROM a WHERE EXISTS(SELECT * FROM (VALUES (k), (i)) WHERE column1=k) +---- +semi-join-apply + ├── columns: k:1(int!null) i:2(int) f:3(float) s:4(string) j:5(jsonb) + ├── scan a + │ ├── columns: a.k:1(int!null) a.i:2(int) a.f:3(float) a.s:4(string) a.j:5(jsonb) + │ └── keys: (1) + ├── values + │ ├── columns: column1:6(int) + │ ├── outer: (1,2) + │ ├── tuple [type=tuple{int}, outer=(1)] + │ │ └── variable: a.k [type=int, outer=(1)] + │ └── tuple [type=tuple{int}, outer=(2)] + │ └── variable: a.i [type=int, outer=(2)] + └── filters [type=bool, outer=(1,6)] + └── eq [type=bool, outer=(1,6)] + ├── variable: column1 [type=int, outer=(6)] + └── variable: a.k [type=int, outer=(1)] + +opt +SELECT * FROM a WHERE NOT EXISTS(SELECT * FROM (VALUES (k), (i)) WHERE column1=k) +---- +anti-join-apply + ├── columns: k:1(int!null) i:2(int) f:3(float) s:4(string) j:5(jsonb) + ├── scan a + │ ├── columns: a.k:1(int!null) a.i:2(int) a.f:3(float) a.s:4(string) a.j:5(jsonb) + │ └── keys: (1) + ├── values + │ ├── columns: column1:6(int) + │ ├── outer: (1,2) + │ ├── tuple [type=tuple{int}, outer=(1)] + │ │ └── variable: a.k [type=int, outer=(1)] + │ └── tuple [type=tuple{int}, outer=(2)] + │ └── variable: a.i [type=int, outer=(2)] + └── filters [type=bool, outer=(1,6)] + └── eq [type=bool, outer=(1,6)] + ├── variable: column1 [type=int, outer=(6)] + └── variable: a.k [type=int, outer=(1)] + +# Attempt to decorrelate query by pulling up outer select. But since limit query +# cannot be decorrelated, push the outer select back down again (and make sure +# potential rule cycle is detected and handled). +opt +SELECT * FROM a WHERE EXISTS(SELECT * FROM (SELECT * FROM b WHERE z=k LIMIT 1) WHERE z=10) +---- +semi-join-apply + ├── columns: k:1(int!null) i:2(int) f:3(float) s:4(string) j:5(jsonb) + ├── scan a + │ ├── columns: a.k:1(int!null) a.i:2(int) a.f:3(float) a.s:4(string) a.j:5(jsonb) + │ └── keys: (1) + ├── select + │ ├── columns: b.x:6(int!null) b.z:7(int) + │ ├── outer: (1) + │ ├── keys: (6) + │ ├── limit + │ │ ├── columns: b.x:6(int!null) b.z:7(int) + │ │ ├── outer: (1) + │ │ ├── keys: (6) + │ │ ├── select + │ │ │ ├── columns: b.x:6(int!null) b.z:7(int) + │ │ │ ├── outer: (1) + │ │ │ ├── keys: (6) + │ │ │ ├── scan b + │ │ │ │ ├── columns: b.x:6(int!null) b.z:7(int) + │ │ │ │ └── keys: (6) + │ │ │ └── filters [type=bool, outer=(1,7)] + │ │ │ └── eq [type=bool, outer=(1,7)] + │ │ │ ├── variable: b.z [type=int, outer=(7)] + │ │ │ └── variable: a.k [type=int, outer=(1)] + │ │ └── const: 1 [type=int] + │ └── filters [type=bool, outer=(7), constraints=(/7: [/10 - /10]; tight)] + │ └── eq [type=bool, outer=(7), constraints=(/7: [/10 - /10]; tight)] + │ ├── variable: b.z [type=int, outer=(7)] + │ └── const: 10 [type=int] + └── true [type=bool] + +# Same as previous, but using anti-join. +opt +SELECT * FROM a WHERE NOT EXISTS(SELECT * FROM (SELECT * FROM b WHERE z=k LIMIT 1) WHERE z=10) +---- +anti-join-apply + ├── columns: k:1(int!null) i:2(int) f:3(float) s:4(string) j:5(jsonb) + ├── scan a + │ ├── columns: a.k:1(int!null) a.i:2(int) a.f:3(float) a.s:4(string) a.j:5(jsonb) + │ └── keys: (1) + ├── select + │ ├── columns: b.x:6(int!null) b.z:7(int) + │ ├── outer: (1) + │ ├── keys: (6) + │ ├── limit + │ │ ├── columns: b.x:6(int!null) b.z:7(int) + │ │ ├── outer: (1) + │ │ ├── keys: (6) + │ │ ├── select + │ │ │ ├── columns: b.x:6(int!null) b.z:7(int) + │ │ │ ├── outer: (1) + │ │ │ ├── keys: (6) + │ │ │ ├── scan b + │ │ │ │ ├── columns: b.x:6(int!null) b.z:7(int) + │ │ │ │ └── keys: (6) + │ │ │ └── filters [type=bool, outer=(1,7)] + │ │ │ └── eq [type=bool, outer=(1,7)] + │ │ │ ├── variable: b.z [type=int, outer=(7)] + │ │ │ └── variable: a.k [type=int, outer=(1)] + │ │ └── const: 1 [type=int] + │ └── filters [type=bool, outer=(7), constraints=(/7: [/10 - /10]; tight)] + │ └── eq [type=bool, outer=(7), constraints=(/7: [/10 - /10]; tight)] + │ ├── variable: b.z [type=int, outer=(7)] + │ └── const: 10 [type=int] + └── true [type=bool] diff --git a/pkg/sql/opt/norm/testdata/scalar b/pkg/sql/opt/norm/testdata/scalar index 1c9f1330355b..cf58c7700eed 100644 --- a/pkg/sql/opt/norm/testdata/scalar +++ b/pkg/sql/opt/norm/testdata/scalar @@ -424,3 +424,64 @@ project └── cast: string [type=string, outer=(4)] └── function: length [type=int, outer=(4)] └── variable: a.s [type=string, outer=(4)] + +# -------------------------------------------------- +# EliminateExistsProject +# -------------------------------------------------- +opt +SELECT * FROM a WHERE EXISTS(SELECT i+1, i*k FROM a) +---- +select + ├── columns: k:1(int!null) i:2(int) f:3(float) s:4(string) j:5(jsonb) arr:6(int[]) + ├── keys: (1) + ├── scan a + │ ├── columns: a.k:1(int!null) a.i:2(int) a.f:3(float) a.s:4(string) a.j:5(jsonb) a.arr:6(int[]) + │ └── keys: (1) + └── filters [type=bool] + └── exists [type=bool] + └── scan a + ├── columns: a.k:7(int!null) a.i:8(int) + └── keys: (7) + +# -------------------------------------------------- +# EliminateExistsGroupBy +# -------------------------------------------------- + +# Scalar group by shouldn't get eliminated. +opt +SELECT * FROM a WHERE EXISTS(SELECT MAX(s) FROM a WHERE False) +---- +select + ├── columns: k:1(int!null) i:2(int) f:3(float) s:4(string) j:5(jsonb) arr:6(int[]) + ├── keys: (1) + ├── scan a + │ ├── columns: a.k:1(int!null) a.i:2(int) a.f:3(float) a.s:4(string) a.j:5(jsonb) a.arr:6(int[]) + │ └── keys: (1) + └── filters [type=bool] + └── exists [type=bool] + └── group-by + ├── columns: column13:13(string) + ├── keys: () + ├── scan a + │ ├── columns: a.s:10(string) + │ └── constraint: /7: contradiction + └── aggregations [outer=(10)] + └── function: max [type=string, outer=(10)] + └── variable: a.s [type=string, outer=(10)] + +# -------------------------------------------------- +# EliminateExistsGroupBy + EliminateExistsProject +# -------------------------------------------------- +opt +SELECT * FROM a WHERE EXISTS(SELECT MAX(s) FROM a GROUP BY i) +---- +select + ├── columns: k:1(int!null) i:2(int) f:3(float) s:4(string) j:5(jsonb) arr:6(int[]) + ├── keys: (1) + ├── scan a + │ ├── columns: a.k:1(int!null) a.i:2(int) a.f:3(float) a.s:4(string) a.j:5(jsonb) a.arr:6(int[]) + │ └── keys: (1) + └── filters [type=bool] + └── exists [type=bool] + └── scan a + └── columns: a.i:8(int) a.s:10(string) diff --git a/pkg/sql/opt/norm/testdata/select b/pkg/sql/opt/norm/testdata/select index 536dcad31917..daad0c850ae9 100644 --- a/pkg/sql/opt/norm/testdata/select +++ b/pkg/sql/opt/norm/testdata/select @@ -19,6 +19,15 @@ TABLE b └── INDEX primary └── x int not null +exec-ddl +CREATE TABLE t.c (u INT PRIMARY KEY, v INT) +---- +TABLE c + ├── u int not null + ├── v int + └── INDEX primary + └── u int not null + # -------------------------------------------------- # EnsureSelectFiltersAnd # -------------------------------------------------- @@ -789,3 +798,209 @@ select └── eq [type=bool, outer=(6), constraints=(/6: [/0 - /0]; tight)] ├── variable: c [type=int, outer=(6)] └── const: 0 [type=int] + +# -------------------------------------------------- +# HoistSelectExists +# -------------------------------------------------- +opt +SELECT * FROM a WHERE EXISTS(SELECT * FROM b WHERE x=k) +---- +semi-join + ├── columns: k:1(int!null) i:2(int) f:3(float) s:4(string) j:5(jsonb) + ├── scan a + │ ├── columns: a.k:1(int!null) a.i:2(int) a.f:3(float) a.s:4(string) a.j:5(jsonb) + │ └── keys: (1) + ├── scan b + │ ├── columns: b.x:6(int!null) b.y:7(int) + │ └── keys: (6) + └── filters [type=bool, outer=(1,6)] + └── eq [type=bool, outer=(1,6)] + ├── variable: b.x [type=int, outer=(6)] + └── variable: a.k [type=int, outer=(1)] + +# Ensure that EXISTS is hoisted even when it is one of several conjuncts. +opt +SELECT * FROM a WHERE s='foo' AND EXISTS(SELECT * FROM b WHERE x=k) AND i>1 +---- +semi-join + ├── columns: k:1(int!null) i:2(int) f:3(float) s:4(string) j:5(jsonb) + ├── select + │ ├── columns: a.k:1(int!null) a.i:2(int) a.f:3(float) a.s:4(string) a.j:5(jsonb) + │ ├── keys: (1) + │ ├── scan a + │ │ ├── columns: a.k:1(int!null) a.i:2(int) a.f:3(float) a.s:4(string) a.j:5(jsonb) + │ │ └── keys: (1) + │ └── filters [type=bool, outer=(2,4), constraints=(/2: [/2 - ]; /4: [/'foo' - /'foo']; tight)] + │ ├── eq [type=bool, outer=(4), constraints=(/4: [/'foo' - /'foo']; tight)] + │ │ ├── variable: a.s [type=string, outer=(4)] + │ │ └── const: 'foo' [type=string] + │ └── gt [type=bool, outer=(2), constraints=(/2: [/2 - ]; tight)] + │ ├── variable: a.i [type=int, outer=(2)] + │ └── const: 1 [type=int] + ├── scan b + │ ├── columns: b.x:6(int!null) b.y:7(int) + │ └── keys: (6) + └── filters [type=bool, outer=(1,6)] + └── eq [type=bool, outer=(1,6)] + ├── variable: b.x [type=int, outer=(6)] + └── variable: a.k [type=int, outer=(1)] + +# Multiple Exists operators in same Select list. +opt +SELECT * FROM a WHERE EXISTS(SELECT * FROM b WHERE x=k) AND EXISTS(SELECT * FROM b WHERE x=i) +---- +semi-join + ├── columns: k:1(int!null) i:2(int) f:3(float) s:4(string) j:5(jsonb) + ├── semi-join + │ ├── columns: a.k:1(int!null) a.i:2(int) a.f:3(float) a.s:4(string) a.j:5(jsonb) + │ ├── scan a + │ │ ├── columns: a.k:1(int!null) a.i:2(int) a.f:3(float) a.s:4(string) a.j:5(jsonb) + │ │ └── keys: (1) + │ ├── scan b + │ │ ├── columns: b.x:8(int!null) b.y:9(int) + │ │ └── keys: (8) + │ └── filters [type=bool, outer=(2,8)] + │ └── eq [type=bool, outer=(2,8)] + │ ├── variable: b.x [type=int, outer=(8)] + │ └── variable: a.i [type=int, outer=(2)] + ├── scan b + │ ├── columns: b.x:6(int!null) b.y:7(int) + │ └── keys: (6) + └── filters [type=bool, outer=(1,6)] + └── eq [type=bool, outer=(1,6)] + ├── variable: b.x [type=int, outer=(6)] + └── variable: a.k [type=int, outer=(1)] + +# Don't hoist uncorrelated subqueries. +opt +SELECT * FROM a WHERE EXISTS(SELECT * FROM b) +---- +select + ├── columns: k:1(int!null) i:2(int) f:3(float) s:4(string) j:5(jsonb) + ├── keys: (1) + ├── scan a + │ ├── columns: a.k:1(int!null) a.i:2(int) a.f:3(float) a.s:4(string) a.j:5(jsonb) + │ └── keys: (1) + └── filters [type=bool] + └── exists [type=bool] + └── scan b + ├── columns: b.x:6(int!null) b.y:7(int) + └── keys: (6) + +# Hoist nested EXISTS. +opt +SELECT * FROM a WHERE EXISTS(SELECT * FROM b WHERE EXISTS (SELECT * FROM c WHERE x=u) AND x=k) +---- +semi-join-apply + ├── columns: k:1(int!null) i:2(int) f:3(float) s:4(string) j:5(jsonb) + ├── scan a + │ ├── columns: a.k:1(int!null) a.i:2(int) a.f:3(float) a.s:4(string) a.j:5(jsonb) + │ └── keys: (1) + ├── semi-join + │ ├── columns: b.x:6(int!null) b.y:7(int) + │ ├── outer: (1) + │ ├── scan b + │ │ ├── columns: b.x:6(int!null) b.y:7(int) + │ │ └── keys: (6) + │ ├── scan c + │ │ ├── columns: c.u:8(int!null) c.v:9(int) + │ │ └── keys: (8) + │ └── filters [type=bool, outer=(1,6,8)] + │ ├── eq [type=bool, outer=(1,6)] + │ │ ├── variable: b.x [type=int, outer=(6)] + │ │ └── variable: a.k [type=int, outer=(1)] + │ └── eq [type=bool, outer=(6,8)] + │ ├── variable: b.x [type=int, outer=(6)] + │ └── variable: c.u [type=int, outer=(8)] + └── true [type=bool] + +# -------------------------------------------------- +# HoistSelectNotExists +# -------------------------------------------------- +opt +SELECT * FROM a WHERE NOT EXISTS(SELECT * FROM b WHERE x=k) +---- +anti-join + ├── columns: k:1(int!null) i:2(int) f:3(float) s:4(string) j:5(jsonb) + ├── scan a + │ ├── columns: a.k:1(int!null) a.i:2(int) a.f:3(float) a.s:4(string) a.j:5(jsonb) + │ └── keys: (1) + ├── scan b + │ ├── columns: b.x:6(int!null) b.y:7(int) + │ └── keys: (6) + └── filters [type=bool, outer=(1,6)] + └── eq [type=bool, outer=(1,6)] + ├── variable: b.x [type=int, outer=(6)] + └── variable: a.k [type=int, outer=(1)] + +# Ensure that NOT EXISTS is hoisted even when one of several conjuncts. +opt +SELECT * FROM a WHERE s='foo' AND NOT EXISTS(SELECT * FROM b WHERE x=k) AND i>1 +---- +anti-join + ├── columns: k:1(int!null) i:2(int) f:3(float) s:4(string) j:5(jsonb) + ├── select + │ ├── columns: a.k:1(int!null) a.i:2(int) a.f:3(float) a.s:4(string) a.j:5(jsonb) + │ ├── keys: (1) + │ ├── scan a + │ │ ├── columns: a.k:1(int!null) a.i:2(int) a.f:3(float) a.s:4(string) a.j:5(jsonb) + │ │ └── keys: (1) + │ └── filters [type=bool, outer=(2,4), constraints=(/2: [/2 - ]; /4: [/'foo' - /'foo']; tight)] + │ ├── eq [type=bool, outer=(4), constraints=(/4: [/'foo' - /'foo']; tight)] + │ │ ├── variable: a.s [type=string, outer=(4)] + │ │ └── const: 'foo' [type=string] + │ └── gt [type=bool, outer=(2), constraints=(/2: [/2 - ]; tight)] + │ ├── variable: a.i [type=int, outer=(2)] + │ └── const: 1 [type=int] + ├── scan b + │ ├── columns: b.x:6(int!null) b.y:7(int) + │ └── keys: (6) + └── filters [type=bool, outer=(1,6)] + └── eq [type=bool, outer=(1,6)] + ├── variable: b.x [type=int, outer=(6)] + └── variable: a.k [type=int, outer=(1)] + +# Multiple Not Exists operators in same Select list. +opt +SELECT * +FROM a +WHERE NOT EXISTS(SELECT * FROM b WHERE x=k) AND NOT EXISTS(SELECT * FROM b WHERE x=i) +---- +anti-join + ├── columns: k:1(int!null) i:2(int) f:3(float) s:4(string) j:5(jsonb) + ├── anti-join + │ ├── columns: a.k:1(int!null) a.i:2(int) a.f:3(float) a.s:4(string) a.j:5(jsonb) + │ ├── scan a + │ │ ├── columns: a.k:1(int!null) a.i:2(int) a.f:3(float) a.s:4(string) a.j:5(jsonb) + │ │ └── keys: (1) + │ ├── scan b + │ │ ├── columns: b.x:8(int!null) b.y:9(int) + │ │ └── keys: (8) + │ └── filters [type=bool, outer=(2,8)] + │ └── eq [type=bool, outer=(2,8)] + │ ├── variable: b.x [type=int, outer=(8)] + │ └── variable: a.i [type=int, outer=(2)] + ├── scan b + │ ├── columns: b.x:6(int!null) b.y:7(int) + │ └── keys: (6) + └── filters [type=bool, outer=(1,6)] + └── eq [type=bool, outer=(1,6)] + ├── variable: b.x [type=int, outer=(6)] + └── variable: a.k [type=int, outer=(1)] + +# Don't hoist uncorrelated subqueries. +opt +SELECT * FROM a WHERE NOT EXISTS(SELECT * FROM b) +---- +select + ├── columns: k:1(int!null) i:2(int) f:3(float) s:4(string) j:5(jsonb) + ├── keys: (1) + ├── scan a + │ ├── columns: a.k:1(int!null) a.i:2(int) a.f:3(float) a.s:4(string) a.j:5(jsonb) + │ └── keys: (1) + └── filters [type=bool] + └── not [type=bool] + └── exists [type=bool] + └── scan b + ├── columns: b.x:6(int!null) b.y:7(int) + └── keys: (6) diff --git a/pkg/sql/opt/ops/scalar.opt b/pkg/sql/opt/ops/scalar.opt index 442d606773e8..2b65017b50a0 100644 --- a/pkg/sql/opt/ops/scalar.opt +++ b/pkg/sql/opt/ops/scalar.opt @@ -7,48 +7,62 @@ # Scalar - All operators in this file are marked with the Scalar tag, so they # can be easily distinguished from Relational and Enforcer operators. -# Subquery is a subquery in a single-row context such as -# `SELECT 1 = (SELECT 1)` or `SELECT (1, 'a') = (SELECT 1, 'a')`. -# In a single-row context, the outer query is only valid if the subquery -# returns at most one row. +# Subquery is a subquery in a single-row context. Here are some examples: # -# Subqueries in a multi-row context such as -# `SELECT 1 IN (SELECT c FROM t)` or `SELECT (1, 'a') IN (SELECT 1, 'a')` -# can be transformed to a single row context using the Any operator. (Note that -# this is different from the SQL ANY operator. See the comment above the Any -# operator for more details.) +# SELECT 1 = (SELECT 1) +# SELECT (1, 'a') = (SELECT 1, 'a')` # -# We use the following transformations: -# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -# ` IN ()` -# ==> `Any(SELECT = x FROM () AS q(x))` -# -# ` NOT IN ()` -# ==> `NOT Any(SELECT = x FROM () AS q(x))` -# -# ` {SOME|ANY}()` -# ==> `Any(SELECT x FROM () AS q(x))` -# -# ` ALL()` -# ==> `NOT Any(SELECT NOT( x) FROM () AS q(x))` -# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +# In a single-row context, the outer query is only valid if the subquery returns +# at most one row. Subqueries in a multi-row context can be transformed to a +# single row context using the Any operator. See the comment above the Any +# operator for more details. # # The Input field contains the subquery itself, which should be wrapped in a # Max1Row operator to enforce that the subquery can return at most one row # (Max1Row may be removed by the optimizer later if it can determine statically -# that the subquery will always return at most one row). The Projection field -# contains a single column representing the output of the subquery. For -# example, `(SELECT 1, 'a')` would be represented by the following structure: +# that the subquery will always return at most one row). In addition, the +# subquery must project exactly one output column. If the subquery returns one +# row, then that column is bound to the single column value in that row. If the +# subquery returns zero rows, then that column is bound to NULL. +# +# The Projection field contains a single expression that represents the output +# of the Subquery operator. The expression can be of arbitrary complexity, and +# can depend on the output column of the Input expression by using a Variable +# operator. For example, `(SELECT 1, 'a')` would be represented by the following +# structure: +# +# (Subquery +# (Max1Row +# (Project (Values (Tuple)) (Projections (Tuple (Const 1) (Const 'a')))) +# ) +# (Variable 3) +# ) +# +# Here Variable 3 refers to the projection from the Input expression, which is +# (Tuple (Const 1) (Const 'a')). Here is an example with a more complex +# Projection field, in which the Subquery evaluates to non-NULL even though its +# Input expression returns zero rows: +# +# (Subquery +# (Select (Scan a) (False)) +# (IsNull (Variable 1)) +# ) +# +# Since the subquery returns zero rows, (Variable 1) is bound to NULL, and so +# the IsNull operator returns True, which then becomes the final output of the +# Subquery operator. It is equivalent to this formulation: # -# (Subquery -# (Max1Row -# (Project (Values (Tuple)) (Projections (Tuple (Const 1) (Const 'a')))) +# (IsNull +# (Subquery +# (Select (Scan a) (False)) +# (Variable 1) +# ) # ) -# (Variable 3) -# ) # -# Here Variable 3 refers to the projection from the Input, -# (Tuple (Const 1) (Const 'a')). +# These behaviors may seem unnecessary or arbitrary at first glance, but they're +# designed to allow transformation rules to easily "bubble up" a subquery to the +# root of a scalar expression tree, so that it can then be turned into one of +# the JoinApply operators. [Scalar] define Subquery { Input Expr @@ -58,7 +72,23 @@ define Subquery { # Any is a special operator that does not exist in SQL. However, it is very # similar to the SQL ANY, and can be converted to the SQL ANY operator using # the following transformation: -# `Any()` ==> `True = ANY()` +# +# Any() ==> True = ANY() +# +# The following transformations translate from various SQL operators into the +# Any operator: +# +# IN () +# ==> Any(SELECT = x FROM () AS q(x)) +# +# NOT IN () +# ==> NOT Any(SELECT = x FROM () AS q(x)) +# +# {SOME|ANY}() +# ==> Any(SELECT x FROM () AS q(x)) +# +# ALL() +# ==> NOT Any(SELECT NOT( x) FROM () AS q(x)) # # Any expects the subquery to return a single boolean column. The semantics # are equivalent to the SQL ANY expression above on the right: Any returns true diff --git a/pkg/sql/opt/rule_name_string.go b/pkg/sql/opt/rule_name_string.go index bf6a99b80692..d1b0bd80371c 100644 --- a/pkg/sql/opt/rule_name_string.go +++ b/pkg/sql/opt/rule_name_string.go @@ -4,9 +4,9 @@ package opt import "strconv" -const _RuleName_name = "InvalidRuleNameNumManualRuleNamesEliminateEmptyAndEliminateEmptyOrEliminateSingletonAndOrSimplifyAndSimplifyOrSimplifyFiltersFoldNullAndOrNegateComparisonEliminateNotNegateAndNegateOrCommuteVarInequalityCommuteConstInequalityNormalizeCmpPlusConstNormalizeCmpMinusConstNormalizeCmpConstMinusNormalizeTupleEqualityFoldNullComparisonLeftFoldNullComparisonRightFoldIsNullFoldNonNullIsNullFoldIsNotNullFoldNonNullIsNotNullCommuteNullIsEliminateDistinctEnsureJoinFiltersAndEnsureJoinFiltersPushFilterIntoJoinLeftPushFilterIntoJoinRightPushLimitIntoProjectPushOffsetIntoProjectFoldPlusZeroFoldZeroPlusFoldMinusZeroFoldMultOneFoldOneMultFoldDivOneInvertMinusEliminateUnaryMinusEliminateProjectEliminateProjectProjectFilterUnusedProjectColsFilterUnusedScanColsFilterUnusedSelectColsFilterUnusedLimitColsFilterUnusedOffsetColsFilterUnusedJoinLeftColsFilterUnusedJoinRightColsFilterUnusedAggColsFilterUnusedGroupByColsFilterUnusedValueColsCommuteVarCommuteConstEliminateCoalesceSimplifyCoalesceEliminateCastFoldNullCastFoldNullUnaryFoldNullBinaryLeftFoldNullBinaryRightFoldNullInNonEmptyFoldNullInEmptyFoldNullNotInEmptyNormalizeInConstFoldInNullEnsureSelectFiltersAndEnsureSelectFiltersEliminateSelectMergeSelectsPushSelectIntoProjectPushSelectIntoJoinLeftPushSelectIntoJoinRightMergeSelectInnerJoinPushSelectIntoGroupByPushLimitIntoScanGenerateIndexScansConstrainScanNumRuleNames" +const _RuleName_name = "InvalidRuleNameNumManualRuleNamesEliminateEmptyAndEliminateEmptyOrEliminateSingletonAndOrSimplifyAndSimplifyOrSimplifyFiltersFoldNullAndOrNegateComparisonEliminateNotNegateAndNegateOrCommuteVarInequalityCommuteConstInequalityNormalizeCmpPlusConstNormalizeCmpMinusConstNormalizeCmpConstMinusNormalizeTupleEqualityFoldNullComparisonLeftFoldNullComparisonRightFoldIsNullFoldNonNullIsNullFoldIsNotNullFoldNonNullIsNotNullCommuteNullIsEliminateDistinctEnsureJoinFiltersAndEnsureJoinFiltersPushFilterIntoJoinLeftPushFilterIntoJoinRightDecorrelateJoinTryDecorrelateSelectPushLimitIntoProjectPushOffsetIntoProjectFoldPlusZeroFoldZeroPlusFoldMinusZeroFoldMultOneFoldOneMultFoldDivOneInvertMinusEliminateUnaryMinusEliminateProjectEliminateProjectProjectFilterUnusedProjectColsFilterUnusedScanColsFilterUnusedSelectColsFilterUnusedLimitColsFilterUnusedOffsetColsFilterUnusedJoinLeftColsFilterUnusedJoinRightColsFilterUnusedAggColsFilterUnusedGroupByColsFilterUnusedValueColsCommuteVarCommuteConstEliminateCoalesceSimplifyCoalesceEliminateCastFoldNullCastFoldNullUnaryFoldNullBinaryLeftFoldNullBinaryRightFoldNullInNonEmptyFoldNullInEmptyFoldNullNotInEmptyNormalizeInConstFoldInNullEliminateExistsProjectEliminateExistsGroupByEnsureSelectFiltersAndEnsureSelectFiltersEliminateSelectMergeSelectsPushSelectIntoProjectPushSelectIntoJoinLeftPushSelectIntoJoinRightMergeSelectInnerJoinPushSelectIntoGroupByHoistSelectExistsHoistSelectNotExistsPushLimitIntoScanGenerateIndexScansConstrainScanNumRuleNames" -var _RuleName_index = [...]uint16{0, 15, 33, 50, 66, 89, 100, 110, 125, 138, 154, 166, 175, 183, 203, 225, 246, 268, 290, 312, 334, 357, 367, 384, 397, 417, 430, 447, 467, 484, 506, 529, 549, 570, 582, 594, 607, 618, 629, 639, 650, 669, 685, 708, 731, 751, 773, 794, 816, 840, 865, 884, 907, 928, 938, 950, 967, 983, 996, 1008, 1021, 1039, 1058, 1076, 1091, 1109, 1125, 1135, 1157, 1176, 1191, 1203, 1224, 1246, 1269, 1289, 1310, 1327, 1345, 1358, 1370} +var _RuleName_index = [...]uint16{0, 15, 33, 50, 66, 89, 100, 110, 125, 138, 154, 166, 175, 183, 203, 225, 246, 268, 290, 312, 334, 357, 367, 384, 397, 417, 430, 447, 467, 484, 506, 529, 544, 564, 584, 605, 617, 629, 642, 653, 664, 674, 685, 704, 720, 743, 766, 786, 808, 829, 851, 875, 900, 919, 942, 963, 973, 985, 1002, 1018, 1031, 1043, 1056, 1074, 1093, 1111, 1126, 1144, 1160, 1170, 1192, 1214, 1236, 1255, 1270, 1282, 1303, 1325, 1348, 1368, 1389, 1406, 1426, 1443, 1461, 1474, 1486} func (i RuleName) String() string { if i >= RuleName(len(_RuleName_index)-1) {