diff --git a/pkg/sql/opt/optgen/exprgen/testdata/join b/pkg/sql/opt/optgen/exprgen/testdata/join index 72a44b63f45c..94d66d22cac6 100644 --- a/pkg/sql/opt/optgen/exprgen/testdata/join +++ b/pkg/sql/opt/optgen/exprgen/testdata/join @@ -66,7 +66,7 @@ left-join (lookup abc@ab) ├── columns: t.public.abc.a:5(int) t.public.abc.b:6(int) ├── key columns: [5] = [5] ├── stats: [rows=333333.333] - ├── cost: 355060.02 + ├── cost: 358393.353 ├── scan t.public.def │ ├── columns: t.public.def.d:1(int) t.public.def.e:2(int) │ ├── stats: [rows=1000] diff --git a/pkg/sql/opt/xform/coster.go b/pkg/sql/opt/xform/coster.go index 95a316922567..ebf5e9722ec1 100644 --- a/pkg/sql/opt/xform/coster.go +++ b/pkg/sql/opt/xform/coster.go @@ -363,6 +363,33 @@ func (c *coster) computeLookupJoinCost(join *memo.LookupJoinExpr) memo.Cost { // cost of emitting the rows. numLookupCols := join.Cols.Difference(join.Input.Relational().OutputCols).Len() perRowCost := seqIOCostFactor + c.rowScanCost(join.Table, join.Index, numLookupCols) + + // Add a cost if we have to evaluate an ON condition on every row. The more + // leftover conditions, the more expensive it should be. We want to + // differentiate between two lookup joins where one uses only a subset of the + // columns. For example: + // abc JOIN xyz ON a=x AND b=y + // We could have a lookup join using an index on y (and left-over condition + // a=x), and another lookup join on an index on x,y. The latter is definitely + // preferable (the former could generate a lot of internal results that are + // then discarded). + // + // TODO(radu): we should take into account that the "internal" row count is + // higher, according to the selectivities of the conditions. Unfortunately + // this is very tricky, in particular because of left-over conditions that are + // not selective. + // For example: + // ab JOIN xy ON a=x AND x=10 + // becomes (during normalization): + // ab JOIN xy ON a=x AND a=10 AND x=10 + // which can become a lookup join with left-over condition x=10 which doesn't + // actually filter anything. + // + // TODO(radu): this should be extended to all join types. It's tricky for hash + // joins where we don't have the equality and leftover filters readily + // available. + perRowCost += cpuCostFactor * memo.Cost(len(join.On)) + cost += memo.Cost(join.Relational().Stats.RowCount) * perRowCost return cost } diff --git a/pkg/sql/opt/xform/testdata/coster/join b/pkg/sql/opt/xform/testdata/coster/join index 09417b25aa97..7e47f37927eb 100644 --- a/pkg/sql/opt/xform/testdata/coster/join +++ b/pkg/sql/opt/xform/testdata/coster/join @@ -166,3 +166,158 @@ index-join abc ├── cost: 10.306 ├── key: (1) └── fd: ()-->(3) + +# Regression test for #34810: make sure we pick the lookup join that uses +# all equality columns. + +exec-ddl +CREATE TABLE abcde ( + a TEXT NOT NULL, + b UUID NOT NULL, + c UUID NOT NULL, + d VARCHAR(255) NOT NULL, + e TEXT NOT NULL, + CONSTRAINT "primary" PRIMARY KEY (a, b, c), + UNIQUE INDEX idx_abd (a, b, d), + UNIQUE INDEX idx_abcd (a, b, c, d) +) +---- +TABLE abcde + ├── a string not null + ├── b uuid not null + ├── c uuid not null + ├── d string not null + ├── e string not null + ├── INDEX primary + │ ├── a string not null + │ ├── b uuid not null + │ └── c uuid not null + ├── INDEX idx_abd + │ ├── a string not null + │ ├── b uuid not null + │ ├── d string not null + │ └── c uuid not null (storing) + └── INDEX idx_abcd + ├── a string not null + ├── b uuid not null + ├── c uuid not null + └── d string not null + +exec-ddl +ALTER TABLE abcde INJECT STATISTICS '[ + { + "columns": ["a"], + "created_at": "2019-02-08 04:10:40.001179+00:00", + "row_count": 250000, + "distinct_count": 1 + }, + { + "columns": ["b"], + "created_at": "2019-02-08 04:10:40.119954+00:00", + "row_count": 250000, + "distinct_count": 2 + }, + { + "columns": ["d"], + "created_at": "2019-02-08 04:10:40.119954+00:00", + "row_count": 250000, + "distinct_count": 125000 + } +]' +---- + +exec-ddl +CREATE TABLE wxyz ( + w TEXT NOT NULL, + x UUID NOT NULL, + y UUID NOT NULL, + z TEXT NOT NULL, + CONSTRAINT "primary" PRIMARY KEY (w, x, y), + CONSTRAINT "foreign" FOREIGN KEY (w, x, y) REFERENCES abcde (a, b, c) +) +---- +TABLE wxyz + ├── w string not null + ├── x uuid not null + ├── y uuid not null + ├── z string not null + ├── INDEX primary + │ ├── w string not null + │ ├── x uuid not null + │ └── y uuid not null + └── FOREIGN KEY (w, x, y) REFERENCES t.public.abcde (a, b, c) + +exec-ddl +ALTER TABLE wxyz INJECT STATISTICS '[ + { + "columns": ["w"], + "created_at": "2019-02-08 04:10:40.001179+00:00", + "row_count": 10000, + "distinct_count": 1 + }, + { + "columns": ["x"], + "created_at": "2019-02-08 04:10:40.119954+00:00", + "row_count": 10000, + "distinct_count": 1 + }, + { + "columns": ["y"], + "created_at": "2019-02-08 04:10:40.119954+00:00", + "row_count": 10000, + "distinct_count": 2500 + } +]' +---- + +opt +SELECT w, x, y, z +FROM wxyz +INNER JOIN abcde +ON w = a AND x = b AND y = c +WHERE w = 'foo' AND x = '2AB23800-06B1-4E19-A3BB-DF3768B808D2' +ORDER BY d +LIMIT 10 +---- +project + ├── columns: w:1(string!null) x:2(uuid!null) y:3(uuid!null) z:4(string!null) [hidden: d:8(string!null)] + ├── cardinality: [0 - 10] + ├── stats: [rows=10] + ├── cost: 122481.301 + ├── key: (8) + ├── fd: ()-->(1,2), (3)-->(4,8), (8)-->(3,4) + ├── ordering: +8 opt(1,2) [actual: +8] + └── limit + ├── columns: w:1(string!null) x:2(uuid!null) y:3(uuid!null) z:4(string!null) a:5(string!null) b:6(uuid!null) c:7(uuid!null) d:8(string!null) + ├── internal-ordering: +8 opt(1,2,5,6) + ├── cardinality: [0 - 10] + ├── stats: [rows=10] + ├── cost: 122481.191 + ├── key: (7) + ├── fd: ()-->(1,2,5,6), (3)-->(4), (7)-->(8), (8)-->(7), (1)==(5), (5)==(1), (2)==(6), (6)==(2), (3)==(7), (7)==(3) + ├── ordering: +8 opt(1,2,5,6) [actual: +8] + ├── sort + │ ├── columns: w:1(string!null) x:2(uuid!null) y:3(uuid!null) z:4(string!null) a:5(string!null) b:6(uuid!null) c:7(uuid!null) d:8(string!null) + │ ├── stats: [rows=50048.8759, distinct(1)=1, null(1)=0, distinct(2)=1, null(2)=0, distinct(3)=2500, null(3)=0, distinct(4)=1000, null(4)=0, distinct(5)=1, null(5)=0, distinct(6)=1, null(6)=0, distinct(7)=2500, null(7)=0, distinct(8)=38781.1698, null(8)=0] + │ ├── cost: 122481.081 + │ ├── key: (7) + │ ├── fd: ()-->(1,2,5,6), (3)-->(4), (7)-->(8), (8)-->(7), (1)==(5), (5)==(1), (2)==(6), (6)==(2), (3)==(7), (7)==(3) + │ ├── ordering: +8 opt(1,2,5,6) [actual: +8] + │ └── inner-join (lookup abcde@idx_abcd) + │ ├── columns: w:1(string!null) x:2(uuid!null) y:3(uuid!null) z:4(string!null) a:5(string!null) b:6(uuid!null) c:7(uuid!null) d:8(string!null) + │ ├── key columns: [1 2 3] = [5 6 7] + │ ├── stats: [rows=50048.8759, distinct(1)=1, null(1)=0, distinct(2)=1, null(2)=0, distinct(3)=2500, null(3)=0, distinct(4)=1000, null(4)=0, distinct(5)=1, null(5)=0, distinct(6)=1, null(6)=0, distinct(7)=2500, null(7)=0, distinct(8)=38781.1698, null(8)=0] + │ ├── cost: 105853.783 + │ ├── key: (7) + │ ├── fd: ()-->(1,2,5,6), (3)-->(4), (7)-->(8), (8)-->(7), (1)==(5), (5)==(1), (2)==(6), (6)==(2), (3)==(7), (7)==(3) + │ ├── scan wxyz + │ │ ├── columns: w:1(string!null) x:2(uuid!null) y:3(uuid!null) z:4(string!null) + │ │ ├── constraint: /1/2/3: [/'foo'/'2ab23800-06b1-4e19-a3bb-df3768b808d2' - /'foo'/'2ab23800-06b1-4e19-a3bb-df3768b808d2'] + │ │ ├── stats: [rows=10000, distinct(1)=1, null(1)=0, distinct(2)=1, null(2)=0, distinct(3)=2500, null(3)=0, distinct(4)=1000, null(4)=0] + │ │ ├── cost: 10800.01 + │ │ ├── key: (3) + │ │ └── fd: ()-->(1,2), (3)-->(4) + │ └── filters + │ ├── a = 'foo' [type=bool, outer=(5), constraints=(/5: [/'foo' - /'foo']; tight), fd=()-->(5)] + │ └── b = '2ab23800-06b1-4e19-a3bb-df3768b808d2' [type=bool, outer=(6), constraints=(/6: [/'2ab23800-06b1-4e19-a3bb-df3768b808d2' - /'2ab23800-06b1-4e19-a3bb-df3768b808d2']; tight), fd=()-->(6)] + └── const: 10 [type=int] diff --git a/pkg/sql/opt/xform/testdata/coster/zone b/pkg/sql/opt/xform/testdata/coster/zone index 26432a72067a..3eb9e44ce7bc 100644 --- a/pkg/sql/opt/xform/testdata/coster/zone +++ b/pkg/sql/opt/xform/testdata/coster/zone @@ -317,7 +317,7 @@ inner-join (lookup xy@y2) ├── flags: no-merge-join;no-hash-join ├── key columns: [2] = [5] ├── stats: [rows=98.01, distinct(1)=9.9, null(1)=0, distinct(2)=1, null(2)=0, distinct(4)=9.9, null(4)=0, distinct(5)=1, null(5)=0] - ├── cost: 152.0444 + ├── cost: 153.0245 ├── key: (1,4) ├── fd: ()-->(2,5), (1)-->(3), (2,3)~~>(1), (2)==(5), (5)==(2) ├── prune: (1,3,4) @@ -359,7 +359,7 @@ inner-join (lookup xy@y1) ├── flags: no-merge-join;no-hash-join ├── key columns: [2] = [5] ├── stats: [rows=98.01, distinct(1)=9.9, null(1)=0, distinct(2)=1, null(2)=0, distinct(4)=9.9, null(4)=0, distinct(5)=1, null(5)=0] - ├── cost: 152.0444 + ├── cost: 153.0245 ├── key: (1,4) ├── fd: ()-->(2,5), (1)-->(3), (2,3)~~>(1), (2)==(5), (5)==(2) ├── prune: (1,3,4) diff --git a/pkg/sql/opt/xform/testdata/external/tpcc b/pkg/sql/opt/xform/testdata/external/tpcc index 00feeb0304c9..a58eb257b0ed 100644 --- a/pkg/sql/opt/xform/testdata/external/tpcc +++ b/pkg/sql/opt/xform/testdata/external/tpcc @@ -904,7 +904,7 @@ scalar-group-by ├── columns: count:28(int) ├── cardinality: [1 - 1] ├── stats: [rows=1] - ├── cost: 1.82111111 + ├── cost: 1.84111111 ├── key: () ├── fd: ()-->(28) ├── prune: (28) @@ -912,7 +912,7 @@ scalar-group-by │ ├── columns: ol_o_id:1(int!null) ol_d_id:2(int!null) ol_w_id:3(int!null) ol_i_id:5(int!null) s_i_id:11(int!null) s_w_id:12(int!null) s_quantity:13(int!null) │ ├── key columns: [3 5] = [12 11] │ ├── stats: [rows=1, distinct(1)=0.11109736, null(1)=0, distinct(2)=0.111097416, null(2)=0, distinct(3)=0.111111111, null(3)=0, distinct(5)=0.111111056, null(5)=0, distinct(11)=0.111111056, null(11)=0, distinct(12)=0.111111111, null(12)=0, distinct(13)=1, null(13)=0] - │ ├── cost: 1.79111111 + │ ├── cost: 1.81111111 │ ├── fd: ()-->(2,3,12), (11)-->(13), (5)==(11), (11)==(5), (3)==(12), (12)==(3) │ ├── interesting orderings: (+3,+2,-1) │ ├── scan order_line diff --git a/pkg/sql/opt/xform/testdata/external/tpcc-no-stats b/pkg/sql/opt/xform/testdata/external/tpcc-no-stats index 636cd49d3b7a..e9a6331c7d78 100644 --- a/pkg/sql/opt/xform/testdata/external/tpcc-no-stats +++ b/pkg/sql/opt/xform/testdata/external/tpcc-no-stats @@ -709,7 +709,7 @@ scalar-group-by ├── columns: count:28(int) ├── cardinality: [1 - 1] ├── stats: [rows=1] - ├── cost: 0.141477778 + ├── cost: 0.142211111 ├── key: () ├── fd: ()-->(28) ├── prune: (28) @@ -717,7 +717,7 @@ scalar-group-by │ ├── columns: ol_o_id:1(int!null) ol_d_id:2(int!null) ol_w_id:3(int!null) ol_i_id:5(int!null) s_i_id:11(int!null) s_w_id:12(int!null) s_quantity:13(int!null) │ ├── key columns: [3 5] = [12 11] │ ├── stats: [rows=0.0366666667, distinct(1)=0.0111105556, null(1)=0, distinct(2)=0.0111111111, null(2)=0, distinct(3)=0.0111111111, null(3)=0, distinct(5)=0.0111105556, null(5)=0, distinct(11)=0.0111105556, null(11)=0, distinct(12)=0.0111111111, null(12)=0, distinct(13)=0.0366666667, null(13)=0] - │ ├── cost: 0.121111111 + │ ├── cost: 0.121844444 │ ├── fd: ()-->(2,3,12), (11)-->(13), (5)==(11), (11)==(5), (3)==(12), (12)==(3) │ ├── interesting orderings: (+3,+2,-1) │ ├── scan order_line @@ -762,7 +762,7 @@ scalar-group-by ├── columns: count:22(int) ├── cardinality: [1 - 1] ├── stats: [rows=1] - ├── cost: 1588.01 + ├── cost: 1588.34 ├── key: () ├── fd: ()-->(22) ├── prune: (22) @@ -770,7 +770,7 @@ scalar-group-by │ ├── columns: w_id:1(int!null) w_ytd:9(decimal!null) d_w_id:11(int!null) sum:21(decimal!null) │ ├── key columns: [11] = [1] │ ├── stats: [rows=33, distinct(1)=33, null(1)=0, distinct(9)=28.3508504, null(9)=0, distinct(11)=33, null(11)=0, distinct(21)=28.3508504, null(21)=0] - │ ├── cost: 1587.66 + │ ├── cost: 1587.99 │ ├── key: (11) │ ├── fd: (1)-->(9), (11)-->(21), (1)==(11), (11)==(1) │ ├── interesting orderings: (+11) diff --git a/pkg/sql/opt/xform/testdata/rules/join b/pkg/sql/opt/xform/testdata/rules/join index dae0ca231323..00d22c265e4e 100644 --- a/pkg/sql/opt/xform/testdata/rules/join +++ b/pkg/sql/opt/xform/testdata/rules/join @@ -1879,11 +1879,11 @@ memo (optimized, ~11KB, required=[presentation: a:1]) ├── G1: (project G2 G3 a) │ └── [presentation: a:1] │ ├── best: (project G2 G3 a) - │ └── cost: 87.93 + │ └── cost: 88.05 ├── G2: (select G4 G5) (lookup-join G6 G5 t5,keyCols=[1],outCols=(1,2)) (select G7 G5) │ └── [] │ ├── best: (lookup-join G6 G5 t5,keyCols=[1],outCols=(1,2)) - │ └── cost: 87.80 + │ └── cost: 87.92 ├── G3: (projections) ├── G4: (scan t5,cols=(1,2)) │ └── []