From 55ac430700da1c925d678338822d09db993da368 Mon Sep 17 00:00:00 2001 From: Justin Jaffray Date: Wed, 13 Jun 2018 17:07:34 -0400 Subject: [PATCH] opt: generate spans for IN filters This code is largely based off of makeSpansForTupleIn, though I wasn't able to reuse much of it since the context is slightly different. Also fix up a CREATE STATISTICS statement and modify a TPCC query. Release note: None --- .../exec/execbuilder/testdata/select_index | 26 ++-- pkg/sql/opt/memo/constraint_builder.go | 104 +++++++++++++- .../opt/memo/testdata/logprops/constraints | 129 ++++++++++++++++++ pkg/sql/opt/optbuilder/testdata/where | 8 +- pkg/sql/opt/xform/testdata/external/tpcc | 68 +++++---- 5 files changed, 276 insertions(+), 59 deletions(-) diff --git a/pkg/sql/opt/exec/execbuilder/testdata/select_index b/pkg/sql/opt/exec/execbuilder/testdata/select_index index 03678030ee13..acdea2f5cd14 100644 --- a/pkg/sql/opt/exec/execbuilder/testdata/select_index +++ b/pkg/sql/opt/exec/execbuilder/testdata/select_index @@ -467,16 +467,14 @@ render · · (b) · · table abcd@abcd · · · spans /1/4-/1/5 · · -# TODO(radu): in this case, we're not preferring abcd, unlike 2.0 query TTTTT EXPLAIN (VERBOSE) SELECT b FROM abcd WHERE (a, b) IN ((1, 4), (2, 9)) ---- -render · · (b) · - │ render 0 b · · - └── scan · · (a, b) · -· table abcd@adb · · -· spans /1-/3 · · -· filter (a, b) IN ((1, 4), (2, 9)) · · +render · · (b) · + │ render 0 b · · + └── scan · · (a, b) · +· table abcd@abcd · · +· spans /1/4-/1/5 /2/9-/2/10 · · statement ok CREATE TABLE ab ( @@ -885,17 +883,9 @@ scan · · (x, y) · query TTTTT EXPLAIN (VERBOSE) SELECT * FROM xy WHERE (x, y) IN ((NULL, NULL), (1, NULL), (NULL, 1), (1, 1), (1, 2)) ---- -render · · (x, y) · - │ render 0 x · · - │ render 1 y · · - └── filter · · (x, y, rowid[hidden]) · - │ filter (x, y) IN ((NULL, NULL), (1, NULL), (NULL, 1), (1, 1), (1, 2)) · · - └── index-join · · (x, y, rowid[hidden]) · - ├── scan · · (y, rowid[hidden]) · - │ table xy@xy_y_idx · · - │ spans /1-/3 · · - └── scan · · (x, y, rowid[hidden]) · -· table xy@primary · · +scan · · (x, y) · +· table xy@xy_idx · · +· spans /1/1-/1/3 · · # ------------------------------------------------------------------------------ # Non-covering index diff --git a/pkg/sql/opt/memo/constraint_builder.go b/pkg/sql/opt/memo/constraint_builder.go index 9f0443f3235d..d87492d152c1 100644 --- a/pkg/sql/opt/memo/constraint_builder.go +++ b/pkg/sql/opt/memo/constraint_builder.go @@ -165,6 +165,107 @@ func (cb *constraintsBuilder) buildSingleColumnConstraintConst( return unconstrained, false } +// buildConstraintForTupleIn handles the case where we have a tuple IN another +// tuple, for instance: +// +// (a, b, c) IN ((1, 2, 3), (4, 5, 6)) +// +// This function is a less powerful version of makeSpansForTupleIn, since it +// does not operate on a particular index. The return value indicates +// if the spans are exactly equivalent to the expression (and not weaker). +// Assumes that ev is an InOp and both children are TupleOps. +func (cb *constraintsBuilder) buildConstraintForTupleIn( + ev ExprView, +) (_ *constraint.Set, tight bool) { + lhs, rhs := ev.Child(0), ev.Child(1) + + // We can only constrain here if every element of rhs is a TupleOp. + for i, n := 0, rhs.ChildCount(); i < n; i++ { + val := rhs.Child(i) + if val.Operator() != opt.TupleOp { + return unconstrained, false + } + } + + var sp constraint.Span + constrainedCols := make([]opt.OrderingColumn, 0, lhs.ChildCount()) + colIdxsInLHS := make([]int, 0, lhs.ChildCount()) + for i, n := 0, lhs.ChildCount(); i < n; i++ { + if colID, ok := lhs.Child(i).Private().(opt.ColumnID); ok { + // We can't constrain a column if it's compared to anything besides a constant. + allConstant := true + for j, m := 0, rhs.ChildCount(); j < m; j++ { + val := rhs.Child(j) + + if val.Operator() != opt.TupleOp { + return unconstrained, false + } + + if !val.Child(i).IsConstValue() { + allConstant = false + break + } + } + + if allConstant { + constrainedCols = append( + constrainedCols, + opt.MakeOrderingColumn(colID, false /* descending */), + ) + colIdxsInLHS = append(colIdxsInLHS, i) + } + } + } + + // If any of the LHS entries are not constrained then our constraints are not + // tight. + tight = (len(constrainedCols) == lhs.ChildCount()) + + keyCtx := constraint.KeyContext{EvalCtx: cb.evalCtx} + keyCtx.Columns.Init(constrainedCols) + var spans constraint.Spans + spans.Alloc(len(constrainedCols)) + vals := make(tree.Datums, len(colIdxsInLHS)) + for i, n := 0, rhs.ChildCount(); i < n; i++ { + val := rhs.Child(i) + + hasNull := false + for j := range colIdxsInLHS { + elem := val.Child(colIdxsInLHS[j]) + datum := ExtractConstDatum(elem) + if datum == tree.DNull { + hasNull = true + break + } + vals[j] = datum + } + + // Nothing can match a tuple containing a NULL, so it introduces no + // constraints. + if hasNull { + // TODO(justin): consider redefining "tight" so that this is included in + // it. The spans are not "exactly equivalent" in the presence of NULLs, + // because of examples like the following: + // (x, y) IN ((1, 2), (NULL, 4)) + // is not the same as + // (x, y) IN ((1, 2)), + // because the former is NULL (not false) on (3,4). + tight = false + continue + } + + key := constraint.MakeCompositeKey(vals...) + sp.Init(key, constraint.IncludeBoundary, key, constraint.IncludeBoundary) + spans.Append(&sp) + } + + spans.SortAndMerge(&keyCtx) + + var c constraint.Constraint + c.Init(&keyCtx, &spans) + return constraint.SingleConstraint(&c), tight +} + func (cb *constraintsBuilder) buildConstraintForTupleInequality( ev ExprView, ) (_ *constraint.Set, tight bool) { @@ -301,7 +402,8 @@ func (cb *constraintsBuilder) buildConstraints(ev ExprView) (_ *constraint.Set, // Tuple inequality. return cb.buildConstraintForTupleInequality(ev) - //TODO(radu): case opt.InOp: + case opt.InOp: + return cb.buildConstraintForTupleIn(ev) } } if child0.Operator() == opt.VariableOp { diff --git a/pkg/sql/opt/memo/testdata/logprops/constraints b/pkg/sql/opt/memo/testdata/logprops/constraints index e4ae79bee89b..fc482eac60a2 100644 --- a/pkg/sql/opt/memo/testdata/logprops/constraints +++ b/pkg/sql/opt/memo/testdata/logprops/constraints @@ -629,3 +629,132 @@ select └── tuple [type=tuple{int, int}] ├── const: 1 [type=int] └── const: 2 [type=int] + +exec-ddl +CREATE TABLE c +( + k INT PRIMARY KEY, + u INT, + v INT, + INDEX v (v, u) +) +---- +TABLE c + ├── k int not null + ├── u int + ├── v int + ├── INDEX primary + │ └── k int not null + └── INDEX v + ├── v int + ├── u int + └── k int not null + +opt +SELECT * FROM c WHERE (v, u) IN ((1, 2), (3, 50), (5, 100)) +---- +scan c@v + ├── columns: k:1(int!null) u:2(int) v:3(int!null) + ├── constraint: /3/2/1: [/1/2 - /1/2] [/3/50 - /3/50] [/5/100 - /5/100] + ├── stats: [rows=4, distinct(3)=3] + └── keys: (1) + +# A tuple with NULL in it can't match anything, so it should be excluded from the constraints. +opt +SELECT * FROM c WHERE (v, u) IN ((1, 2), (3, 50), (5, NULL)) +---- +scan c@v + ├── columns: k:1(int!null) u:2(int) v:3(int!null) + ├── constraint: /3/2/1: [/1/2 - /1/2] [/3/50 - /3/50] + ├── stats: [rows=2, distinct(3)=2] + └── keys: (1) + +# TODO(justin): ideally we would be normalizing away the 2 on the LHS here to +# get v = 1 and tight spans. +opt +SELECT * FROM c WHERE (v, 2) IN ((1, 2), (3, 50), (5, 100)) +---- +select + ├── columns: k:1(int!null) u:2(int) v:3(int!null) + ├── stats: [rows=4, distinct(3)=3] + ├── keys: (1) + ├── scan c@v + │ ├── columns: k:1(int!null) u:2(int) v:3(int) + │ ├── constraint: /3/2/1: [/1 - /1] [/3 - /3] [/5 - /5] + │ ├── stats: [rows=4, distinct(3)=3] + │ └── keys: (1) + └── filters [type=bool, outer=(3), constraints=(/3: [/1 - /1] [/3 - /3] [/5 - /5])] + └── in [type=bool, outer=(3), constraints=(/3: [/1 - /1] [/3 - /3] [/5 - /5])] + ├── tuple [type=tuple{int, int}, outer=(3)] + │ ├── variable: c.v [type=int, outer=(3)] + │ └── const: 2 [type=int] + └── tuple [type=tuple{tuple{int, int}, tuple{int, int}, tuple{int, int}}] + ├── tuple [type=tuple{int, int}] + │ ├── const: 1 [type=int] + │ └── const: 2 [type=int] + ├── tuple [type=tuple{int, int}] + │ ├── const: 3 [type=int] + │ └── const: 50 [type=int] + └── tuple [type=tuple{int, int}] + ├── const: 5 [type=int] + └── const: 100 [type=int] + +# TODO(justin): in a perfect world we would be able to somehow transform this +# filter to (v, u) IN ((1, 1), (3, 47), (5, 95)) in order to get tight spans. +# This could be achieved via row-reduction. +opt +SELECT * FROM c WHERE (v, u + v) IN ((1, 2), (3, 50), (5, 100)) +---- +select + ├── columns: k:1(int!null) u:2(int) v:3(int!null) + ├── stats: [rows=4, distinct(3)=3] + ├── keys: (1) + ├── scan c@v + │ ├── columns: k:1(int!null) u:2(int) v:3(int) + │ ├── constraint: /3/2/1: [/1 - /1] [/3 - /3] [/5 - /5] + │ ├── stats: [rows=4, distinct(3)=3] + │ └── keys: (1) + └── filters [type=bool, outer=(2,3), constraints=(/3: [/1 - /1] [/3 - /3] [/5 - /5])] + └── in [type=bool, outer=(2,3), constraints=(/3: [/1 - /1] [/3 - /3] [/5 - /5])] + ├── tuple [type=tuple{int, int}, outer=(2,3)] + │ ├── variable: c.v [type=int, outer=(3)] + │ └── plus [type=int, outer=(2,3)] + │ ├── variable: c.u [type=int, outer=(2)] + │ └── variable: c.v [type=int, outer=(3)] + └── tuple [type=tuple{tuple{int, int}, tuple{int, int}, tuple{int, int}}] + ├── tuple [type=tuple{int, int}] + │ ├── const: 1 [type=int] + │ └── const: 2 [type=int] + ├── tuple [type=tuple{int, int}] + │ ├── const: 3 [type=int] + │ └── const: 50 [type=int] + └── tuple [type=tuple{int, int}] + ├── const: 5 [type=int] + └── const: 100 [type=int] + +opt +SELECT * FROM c WHERE (v, u) IN ((1, 2), (k, 50), (5, 100)) +---- +select + ├── columns: k:1(int!null) u:2(int!null) v:3(int) + ├── stats: [rows=4, distinct(2)=3] + ├── keys: (1) + ├── scan c + │ ├── columns: k:1(int!null) u:2(int) v:3(int) + │ ├── stats: [rows=1000, distinct(2)=700] + │ └── keys: (1) + └── filters [type=bool, outer=(1-3), constraints=(/2: [/2 - /2] [/50 - /50] [/100 - /100])] + └── in [type=bool, outer=(1-3), constraints=(/2: [/2 - /2] [/50 - /50] [/100 - /100])] + ├── tuple [type=tuple{int, int}, outer=(2,3)] + │ ├── variable: c.v [type=int, outer=(3)] + │ └── variable: c.u [type=int, outer=(2)] + └── tuple [type=tuple{tuple{int, int}, tuple{int, int}, tuple{int, int}}, outer=(1)] + ├── tuple [type=tuple{int, int}] + │ ├── const: 1 [type=int] + │ └── const: 2 [type=int] + ├── tuple [type=tuple{int, int}, outer=(1)] + │ ├── variable: c.k [type=int, outer=(1)] + │ └── const: 50 [type=int] + └── tuple [type=tuple{int, int}] + ├── const: 5 [type=int] + └── const: 100 [type=int] diff --git a/pkg/sql/opt/optbuilder/testdata/where b/pkg/sql/opt/optbuilder/testdata/where index 8a09db880a8f..006914e5c48c 100644 --- a/pkg/sql/opt/optbuilder/testdata/where +++ b/pkg/sql/opt/optbuilder/testdata/where @@ -225,9 +225,9 @@ build SELECT * FROM ab WHERE (a, b) IN ((1, 10), (3, 30), (4, 40)) ---- project - ├── columns: a:1(int) b:2(int) + ├── columns: a:1(int!null) b:2(int) └── select - ├── columns: a:1(int) b:2(int) rowid:3(int!null) + ├── columns: a:1(int!null) b:2(int) rowid:3(int!null) ├── scan ab │ └── columns: a:1(int) b:2(int) rowid:3(int!null) └── filters [type=bool] @@ -250,9 +250,9 @@ build SELECT * FROM ab WHERE (a, b) IN ((1, 10), (4, NULL), (NULL, 50)) ---- project - ├── columns: a:1(int) b:2(int) + ├── columns: a:1(int!null) b:2(int) └── select - ├── columns: a:1(int) b:2(int) rowid:3(int!null) + ├── columns: a:1(int!null) b:2(int) rowid:3(int!null) ├── scan ab │ └── columns: a:1(int) b:2(int) rowid:3(int!null) └── filters [type=bool] diff --git a/pkg/sql/opt/xform/testdata/external/tpcc b/pkg/sql/opt/xform/testdata/external/tpcc index 7734fe4dc536..2d91ab5cdf69 100644 --- a/pkg/sql/opt/xform/testdata/external/tpcc +++ b/pkg/sql/opt/xform/testdata/external/tpcc @@ -71,9 +71,9 @@ TABLE district └── d_id int not null exec-ddl -ALTER TABLE warehouse INJECT STATISTICS '[ +ALTER TABLE district INJECT STATISTICS '[ { - "columns": ["w_id"], + "columns": ["d_w_id"], "created_at": "2018-01-01 1:00:00.00000+00:00", "row_count": 100, "distinct_count": 10 @@ -544,14 +544,14 @@ SELECT w_tax FROM warehouse WHERE w_id = 10 ---- project ├── columns: w_tax:8(decimal) - ├── stats: [rows=10] - ├── cost: 10.00 + ├── stats: [rows=1] + ├── cost: 1.00 ├── prune: (8) └── scan warehouse ├── columns: warehouse.w_id:1(int!null) warehouse.w_tax:8(decimal) ├── constraint: /1: [/10 - /10] - ├── stats: [rows=10, distinct(1)=1] - ├── cost: 10.00 + ├── stats: [rows=1, distinct(1)=1] + ├── cost: 1.00 ├── keys: (1) └── prune: (8) @@ -591,31 +591,27 @@ scan item opt format=show-all SELECT s_quantity, s_ytd, s_order_cnt, s_remote_cnt, s_data, s_dist_05 FROM stock -WHERE (s_i_id, s_w_id) IN ((1000, 10), (900, 8), (1100, 6), (1500, 4), (1400, 8)) +WHERE (s_i_id, s_w_id) IN ((1000, 4), (900, 4), (1100, 4), (1500, 4), (1400, 4)) ORDER BY s_i_id ---- -project +sort ├── columns: s_quantity:3(int) s_ytd:14(int) s_order_cnt:15(int) s_remote_cnt:16(int) s_data:17(string) s_dist_05:8(string) - ├── stats: [rows=333333] - ├── cost: 250.50 + ├── stats: [rows=50] + ├── cost: 52.82 ├── ordering: +1 ├── prune: (1,3,8,14-17) - └── lookup-join stock - ├── columns: stock.s_i_id:1(int!null) stock.s_w_id:2(int!null) stock.s_quantity:3(int) stock.s_dist_05:8(string) stock.s_ytd:14(int) stock.s_order_cnt:15(int) stock.s_remote_cnt:16(int) stock.s_data:17(string) - ├── key columns: [2 1] - ├── stats: [rows=333333] - ├── cost: 250.50 - ├── keys: (1,2) - ├── ordering: +1 - ├── prune: (3,8,14-17) - └── scan stock@secondary - ├── columns: stock.s_i_id:1(int!null) stock.s_w_id:2(int!null) - ├── constraint: /1/2: [/900/8 - /900/8] [/1000/10 - /1000/10] [/1100/6 - /1100/6] [/1400/8 - /1400/8] [/1500/4 - /1500/4] + └── project + ├── columns: stock.s_i_id:1(int!null) stock.s_quantity:3(int) stock.s_dist_05:8(string) stock.s_ytd:14(int) stock.s_order_cnt:15(int) stock.s_remote_cnt:16(int) stock.s_data:17(string) + ├── stats: [rows=50] + ├── cost: 50.00 + ├── prune: (1,3,8,14-17) + └── scan stock + ├── columns: stock.s_i_id:1(int!null) stock.s_w_id:2(int!null) stock.s_quantity:3(int) stock.s_dist_05:8(string) stock.s_ytd:14(int) stock.s_order_cnt:15(int) stock.s_remote_cnt:16(int) stock.s_data:17(string) + ├── constraint: /2/1: [/4/900 - /4/900] [/4/1000 - /4/1000] [/4/1100 - /4/1100] [/4/1400 - /4/1400] [/4/1500 - /4/1500] ├── stats: [rows=50, distinct(1)=5] ├── cost: 50.00 ├── keys: (1,2) - ├── ordering: +1 - └── prune: (1,2) + └── prune: (3,8,14-17) # -------------------------------------------------- # 2.5 The Payment Transaction @@ -912,34 +908,34 @@ group-by ├── columns: count:22(int) ├── cardinality: [1 - 1] ├── stats: [rows=1] - ├── cost: 1178.00 + ├── cost: 110.30 ├── prune: (22) ├── select │ ├── columns: warehouse.w_id:1(int) warehouse.w_ytd:9(decimal!null) district.d_w_id:11(int) sum_d_ytd:21(decimal!null) - │ ├── stats: [rows=777] - │ ├── cost: 1178.00 + │ ├── stats: [rows=1] + │ ├── cost: 110.30 │ ├── full-join │ │ ├── columns: warehouse.w_id:1(int) warehouse.w_ytd:9(decimal) district.d_w_id:11(int) sum_d_ytd:21(decimal) - │ │ ├── stats: [rows=7000] - │ │ ├── cost: 1108.00 + │ │ ├── stats: [rows=10] + │ │ ├── cost: 110.20 │ │ ├── prune: (9,21) │ │ ├── scan warehouse │ │ │ ├── columns: warehouse.w_id:1(int!null) warehouse.w_ytd:9(decimal) - │ │ │ ├── stats: [rows=100] - │ │ │ ├── cost: 100.00 + │ │ │ ├── stats: [rows=10] + │ │ │ ├── cost: 10.00 │ │ │ ├── keys: (1) │ │ │ └── prune: (1,9) │ │ ├── group-by │ │ │ ├── columns: district.d_w_id:11(int!null) sum_d_ytd:21(decimal) │ │ │ ├── grouping columns: district.d_w_id:11(int!null) - │ │ │ ├── stats: [rows=700, distinct(11)=700] - │ │ │ ├── cost: 1000.00 + │ │ │ ├── stats: [rows=10, distinct(11)=10] + │ │ │ ├── cost: 100.00 │ │ │ ├── keys: (11) │ │ │ ├── prune: (21) │ │ │ ├── scan district │ │ │ │ ├── columns: district.d_w_id:11(int!null) district.d_ytd:19(decimal) - │ │ │ │ ├── stats: [rows=1000, distinct(11)=700] - │ │ │ │ ├── cost: 1000.00 + │ │ │ │ ├── stats: [rows=100, distinct(11)=10] + │ │ │ │ ├── cost: 100.00 │ │ │ │ └── prune: (11,19) │ │ │ └── aggregations [outer=(19)] │ │ │ └── sum [type=decimal, outer=(19)] @@ -962,8 +958,8 @@ ORDER BY d_w_id, d_id ---- scan district ├── columns: d_next_o_id:11(int) - ├── stats: [rows=1000] - ├── cost: 1000.00 + ├── stats: [rows=100] + ├── cost: 100.00 ├── keys: (1,2) ├── ordering: +2,+1 └── prune: (1,2,11)