Skip to content

Commit

Permalink
opt: fix functional deps and stats for WithScanExpr
Browse files Browse the repository at this point in the history
This commit fixes the functional dependencies and stats calculation
for WithScanExpr so they use the output columns rather than the input
columns.

Fixes #40296

Release note: None
  • Loading branch information
rytaft committed Sep 9, 2019
1 parent 746d213 commit e82388f
Show file tree
Hide file tree
Showing 10 changed files with 263 additions and 119 deletions.
12 changes: 7 additions & 5 deletions pkg/sql/opt/memo/logical_props_builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -743,9 +743,7 @@ func (b *logicalPropsBuilder) buildWithProps(with *WithExpr, rel *props.Relation

// Statistics
// ----------
if !b.disableStats {
b.sb.statsFromChild(with, 1)
}
// Passed through from the call above to b.buildProps.
}

func (b *logicalPropsBuilder) buildWithScanProps(ref *WithScanExpr, rel *props.Relational) {
Expand Down Expand Up @@ -780,16 +778,20 @@ func (b *logicalPropsBuilder) buildWithScanProps(ref *WithScanExpr, rel *props.R
rel.FuncDeps = props.FuncDepSet{}
rel.FuncDeps.CopyFrom(&ref.BindingProps.FuncDeps)
for i := range ref.InCols {
rel.FuncDeps.AddSynthesizedCol(opt.MakeColSet(ref.InCols[i]), ref.OutCols[i])
rel.FuncDeps.AddEquivalency(ref.InCols[i], ref.OutCols[i])
}
rel.FuncDeps.ProjectCols(ref.OutCols.ToSet())

// Cardinality
// -----------
// Copied from the referenced expression.

// Statistics
// ----------
// Copied from the referenced expression.
rel.Stats = props.Statistics{}
if !b.disableStats {
b.sb.buildWithScan(ref, rel)
}
}

func (b *logicalPropsBuilder) buildExplainProps(explain *ExplainExpr, rel *props.Relational) {
Expand Down
49 changes: 42 additions & 7 deletions pkg/sql/opt/memo/statistics_builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -216,6 +216,9 @@ func (sb *statisticsBuilder) availabilityFromInput(e RelExpr) bool {
case *ZigzagJoinExpr:
ensureZigzagJoinInputProps(t, sb)
return t.leftProps.Stats.Available

case *WithScanExpr:
return t.BindingProps.Stats.Available
}

available := true
Expand Down Expand Up @@ -366,6 +369,9 @@ func (sb *statisticsBuilder) colStat(colSet opt.ColSet, e RelExpr) *props.Column
case opt.ProjectSetOp:
return sb.colStatProjectSet(colSet, e.(*ProjectSetExpr))

case opt.WithScanOp:
return sb.colStatWithScan(colSet, e.(*WithScanExpr))

case opt.InsertOp, opt.UpdateOp, opt.UpsertOp, opt.DeleteOp:
return sb.colStatMutation(colSet, e)

Expand All @@ -379,13 +385,6 @@ func (sb *statisticsBuilder) colStat(colSet opt.ColSet, e RelExpr) *props.Column
case opt.WithOp:
return sb.colStat(colSet, e.Child(1).(RelExpr))

case opt.WithScanOp:
// This is tricky, since if we deferred to the expression being referenced,
// the computation of stats for a WithScan would depend on something
// outside of the expression itself. Just call it unknown for now.
// TODO(justin): find a real solution for this.
return sb.colStatUnknown(colSet, e.Relational())

case opt.FakeRelOp:
panic(errors.AssertionFailedf("FakeRelOp does not contain col stat for %v", colSet))
}
Expand Down Expand Up @@ -2102,6 +2101,42 @@ func (sb *statisticsBuilder) colStatProjectSet(
return colStat
}

// +----------+
// | WithScan |
// +----------+

func (sb *statisticsBuilder) buildWithScan(withScan *WithScanExpr, relProps *props.Relational) {
s := &relProps.Stats
if zeroCardinality := s.Init(relProps); zeroCardinality {
// Short cut if cardinality is 0.
return
}
s.Available = sb.availabilityFromInput(withScan)

inputStats := withScan.BindingProps.Stats

s.RowCount = inputStats.RowCount
sb.finalizeFromCardinality(relProps)
}

func (sb *statisticsBuilder) colStatWithScan(
colSet opt.ColSet, withScan *WithScanExpr,
) *props.ColumnStatistic {
s := &withScan.Relational().Stats
withProps := withScan.BindingProps
inColSet := translateColSet(colSet, withScan.OutCols, withScan.InCols)

// TODO(rytaft): This would be more accurate if we could access the WithExpr
// itself.
inColStat := sb.colStatLeaf(inColSet, &withProps.Stats, &withProps.FuncDeps, withProps.NotNullCols)

colStat, _ := s.ColStats.Add(colSet)
colStat.DistinctCount = inColStat.DistinctCount
colStat.NullCount = inColStat.NullCount
sb.finalizeFromRowCount(colStat, s.RowCount)
return colStat
}

// +--------------------------------+
// | Insert, Update, Upsert, Delete |
// +--------------------------------+
Expand Down
20 changes: 10 additions & 10 deletions pkg/sql/opt/memo/testdata/logprops/with
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@ WITH foo AS (SELECT * FROM xy) SELECT * FROM foo
----
with &1 (foo)
├── columns: x:3(int!null) y:4(int)
├── key: (1)
├── fd: (1)-->(2,3), (2)-->(4)
├── key: (3)
├── fd: (3)-->(4)
├── scan xy
│ ├── columns: xy.x:1(int!null) xy.y:2(int)
│ ├── key: (1)
Expand All @@ -20,8 +20,8 @@ with &1 (foo)
├── mapping:
│ ├── xy.x:1(int) => x:3(int)
│ └── xy.y:2(int) => y:4(int)
├── key: (1)
└── fd: (1)-->(2,3), (2)-->(4)
├── key: (3)
└── fd: (3)-->(4)

# Side effects should be propagated up to the top-level from the Binding side
# of a WITH.
Expand All @@ -33,7 +33,7 @@ with &1 (foo)
├── cardinality: [1 - 1]
├── side-effects
├── key: ()
├── fd: ()-->(1), (1)-->(2)
├── fd: ()-->(2)
├── project
│ ├── columns: "?column?":1(decimal)
│ ├── cardinality: [1 - 1]
Expand All @@ -55,7 +55,7 @@ with &1 (foo)
│ └── "?column?":1(decimal) => "?column?":2(decimal)
├── cardinality: [1 - 1]
├── key: ()
└── fd: ()-->(1), (1)-->(2)
└── fd: ()-->(2)

# Side effects should be propagated up to the top-level from the Input side of
# a With.
Expand Down Expand Up @@ -93,7 +93,7 @@ with &1 (foo)
│ │ └── "?column?":1(int) => "?column?":2(int)
│ ├── cardinality: [1 - 1]
│ ├── key: ()
│ └── fd: ()-->(1), (1)-->(2)
│ └── fd: ()-->(2)
└── projections
└── div [type=decimal, side-effects]
├── const: 1 [type=int]
Expand Down Expand Up @@ -134,7 +134,7 @@ with &1 (foo)
│ │ └── int8:1(int) => int8:2(int)
│ ├── cardinality: [1 - 1]
│ ├── key: ()
│ └── fd: ()-->(1), (1)-->(2)
│ └── fd: ()-->(2)
└── projections
└── const: 1 [type=int]

Expand Down Expand Up @@ -162,7 +162,7 @@ inner-join-apply
│ ├── outer: (1)
│ ├── cardinality: [1 - 1]
│ ├── key: ()
│ ├── fd: ()-->(2), (2)-->(3)
│ ├── fd: ()-->(3)
│ ├── project
│ │ ├── columns: "?column?":2(int)
│ │ ├── outer: (1)
Expand All @@ -184,5 +184,5 @@ inner-join-apply
│ │ └── "?column?":2(int) => "?column?":3(int)
│ ├── cardinality: [1 - 1]
│ ├── key: ()
│ └── fd: ()-->(2), (2)-->(3)
│ └── fd: ()-->(3)
└── filters (true)
15 changes: 7 additions & 8 deletions pkg/sql/opt/memo/testdata/stats/groupby
Original file line number Diff line number Diff line change
Expand Up @@ -468,46 +468,45 @@ GROUP BY q.b
with &1 (q)
├── columns: "?column?":6(int!null)
├── cardinality: [0 - 3]
├── stats: [rows=1]
├── stats: [rows=0.27]
├── fd: ()-->(6)
├── values
│ ├── columns: column1:1(bool!null) column2:2(int)
│ ├── cardinality: [3 - 3]
│ ├── stats: [rows=3]
│ ├── stats: [rows=3, distinct(1)=0.3, null(1)=0, distinct(2)=0.3, null(2)=0.03]
│ ├── (true, NULL) [type=tuple{bool, int}]
│ ├── (false, NULL) [type=tuple{bool, int}]
│ └── (true, 5) [type=tuple{bool, int}]
└── project
├── columns: "?column?":6(int!null)
├── cardinality: [0 - 3]
├── stats: [rows=1]
├── stats: [rows=0.27]
├── fd: ()-->(6)
├── select
│ ├── columns: b:4(int) bool_or:5(bool!null)
│ ├── cardinality: [0 - 3]
│ ├── stats: [rows=1, distinct(5)=1, null(5)=0]
│ ├── stats: [rows=0.27, distinct(5)=0.27, null(5)=0]
│ ├── key: (4)
│ ├── fd: ()-->(5)
│ ├── group-by
│ │ ├── columns: b:4(int) bool_or:5(bool)
│ │ ├── grouping columns: b:4(int)
│ │ ├── cardinality: [0 - 3]
│ │ ├── stats: [rows=1, distinct(4)=1, null(4)=0, distinct(5)=1, null(5)=0]
│ │ ├── stats: [rows=0.3, distinct(4)=0.3, null(4)=0.03, distinct(5)=0.3, null(5)=0.03]
│ │ ├── key: (4)
│ │ ├── fd: (4)-->(5)
│ │ ├── select
│ │ │ ├── columns: a:3(bool!null) b:4(int)
│ │ │ ├── cardinality: [0 - 3]
│ │ │ ├── stats: [rows=1, distinct(3)=1, null(3)=0, distinct(4)=1, null(4)=0]
│ │ │ ├── stats: [rows=3, distinct(3)=0.3, null(3)=0, distinct(4)=0.3, null(4)=0.03]
│ │ │ ├── fd: ()-->(3)
│ │ │ ├── with-scan &1 (q)
│ │ │ │ ├── columns: a:3(bool!null) b:4(int)
│ │ │ │ ├── mapping:
│ │ │ │ │ ├── column1:1(bool) => a:3(bool)
│ │ │ │ │ └── column2:2(int) => b:4(int)
│ │ │ │ ├── cardinality: [3 - 3]
│ │ │ │ ├── stats: [rows=3]
│ │ │ │ └── fd: (1)-->(3), (2)-->(4)
│ │ │ │ └── stats: [rows=3, distinct(3)=0.3, null(3)=0, distinct(4)=0.3, null(4)=0.03]
│ │ │ └── filters
│ │ │ └── variable: a [type=bool, outer=(3), constraints=(/3: [/true - /true]; tight), fd=()-->(3)]
│ │ └── aggregations
Expand Down
11 changes: 5 additions & 6 deletions pkg/sql/opt/memo/testdata/stats/scan
Original file line number Diff line number Diff line change
Expand Up @@ -542,11 +542,11 @@ WHERE
----
with &1 (subq)
├── columns: "?column?":26(int!null)
├── stats: [rows=1]
├── stats: [rows=165000]
├── fd: ()-->(26)
├── project
│ ├── columns: col1:23(bool) tab1.g:18(int4!null)
│ ├── stats: [rows=333333.333]
│ ├── stats: [rows=333333.333, distinct(18)=33333.3333, null(18)=0, distinct(23)=2, null(23)=3333.33333]
│ ├── inner-join (hash)
│ │ ├── columns: tab0.e:5(varchar) tab0.f:6("char") tab0.h:8(varchar) tab0.j:10(float!null) tab1.e:16(varchar) tab1.f:17("char") tab1.g:18(int4!null) tab1.j:21(float!null)
│ │ ├── stats: [rows=333333.333, distinct(10)=100, null(10)=0, distinct(18)=100, null(18)=0, distinct(21)=100, null(21)=0]
Expand All @@ -562,19 +562,18 @@ with &1 (subq)
│ └── CASE WHEN ilike_escape(regexp_replace(tab0.h, tab1.e, tab0.f, tab0.e::STRING), tab1.f, '') THEN true ELSE false END [type=bool, outer=(5,6,8,16,17)]
└── project
├── columns: "?column?":26(int!null)
├── stats: [rows=1]
├── stats: [rows=165000]
├── fd: ()-->(26)
├── select
│ ├── columns: col0:24(int4!null) col1:25(bool!null)
│ ├── stats: [rows=1, distinct(24)=1, null(24)=0, distinct(25)=1, null(25)=0]
│ ├── stats: [rows=165000, distinct(24)=33300.7812, null(24)=0, distinct(25)=1, null(25)=0]
│ ├── fd: ()-->(25)
│ ├── with-scan &1 (subq)
│ │ ├── columns: col0:24(int4!null) col1:25(bool)
│ │ ├── mapping:
│ │ │ ├── tab1.g:18(int4) => col0:24(int4)
│ │ │ └── col1:23(bool) => col1:25(bool)
│ │ ├── stats: [rows=333333.333]
│ │ └── fd: (18)-->(24), (23)-->(25)
│ │ └── stats: [rows=333333.333, distinct(24)=33333.3333, null(24)=0, distinct(25)=2, null(25)=3333.33333]
│ └── filters
│ └── variable: col1 [type=bool, outer=(25), constraints=(/25: [/true - /true]; tight), fd=()-->(25)]
└── projections
Expand Down
9 changes: 4 additions & 5 deletions pkg/sql/opt/memo/testdata/stats/select
Original file line number Diff line number Diff line change
Expand Up @@ -1376,11 +1376,11 @@ SELECT x FROM t WHERE x
----
with &1 (t)
├── columns: x:6(bool!null)
├── stats: [rows=1, distinct(6)=1, null(6)=0]
├── stats: [rows=1.98e+20, distinct(6)=1, null(6)=0]
├── fd: ()-->(6)
├── project
│ ├── columns: x:5(bool)
│ ├── stats: [rows=4e+20]
│ ├── stats: [rows=4e+20, distinct(5)=2, null(5)=4e+18]
│ ├── left-join (hash)
│ │ ├── columns: t1.x:1(bool) t2.x:3(bool)
│ │ ├── stats: [rows=4e+20]
Expand All @@ -1395,14 +1395,13 @@ with &1 (t)
│ └── (t1.x::INT8 << 5533)::BOOL OR t2.x [type=bool, outer=(1,3)]
└── select
├── columns: x:6(bool!null)
├── stats: [rows=1, distinct(6)=1, null(6)=0]
├── stats: [rows=1.98e+20, distinct(6)=1, null(6)=0]
├── fd: ()-->(6)
├── with-scan &1 (t)
│ ├── columns: x:6(bool)
│ ├── mapping:
│ │ └── x:5(bool) => x:6(bool)
│ ├── stats: [rows=4e+20]
│ └── fd: (5)-->(6)
│ └── stats: [rows=4e+20, distinct(6)=2, null(6)=4e+18]
└── filters
└── variable: x [type=bool, outer=(6), constraints=(/6: [/true - /true]; tight), fd=()-->(6)]

Expand Down
Loading

0 comments on commit e82388f

Please sign in to comment.