From 25d515d63c77742c2b815af1b22462166c7e1bf1 Mon Sep 17 00:00:00 2001 From: Rebecca Taft Date: Fri, 15 May 2020 11:28:41 -0500 Subject: [PATCH] opt: fix distinct count estimates for index constraint columns Prior to this commit, the statistics_builder was incorrectly estimating the distinct count of columns that were only slightly constrained as part of an index constraint. For example, it was estimating based on constraints such as /a/b: [/1 - /5/6] or /a/b: [ - /5/6] that the distinct count of column b should be reduced by 2/3. However, in reality, we cannot assume anything about the distinct count of column b based on those two constraints. This commit fixes the estimate by only reducing the distinct count for columns that are part of the prefix of the constraint (columns for which all the spans have the same start and end values) or the first column after. Release note (performance improvement): Fixed the optimizer's distinct count estimate for columns constrained by an index constraint, which was too low in some cases. The fix improves the optimizer's cardinality estimates, which can lead to better query plan selection. --- pkg/sql/opt/memo/statistics_builder.go | 15 +++++++- pkg/sql/opt/memo/testdata/stats/scan | 29 ++++++--------- pkg/sql/opt/memo/testdata/stats/select | 2 +- pkg/sql/opt/xform/testdata/external/trading | 35 +++++++++++-------- .../xform/testdata/external/trading-mutation | 35 +++++++++++-------- pkg/sql/opt/xform/testdata/rules/select | 6 ++-- 6 files changed, 69 insertions(+), 53 deletions(-) diff --git a/pkg/sql/opt/memo/statistics_builder.go b/pkg/sql/opt/memo/statistics_builder.go index bad38a5ff531..5ae01d2f54f3 100644 --- a/pkg/sql/opt/memo/statistics_builder.go +++ b/pkg/sql/opt/memo/statistics_builder.go @@ -2628,7 +2628,20 @@ func (sb *statisticsBuilder) applyIndexConstraint( // Calculate distinct counts. applied, lastColMinDistinct := sb.updateDistinctCountsFromConstraint(c, e, relProps) - for i, n := 0, c.ConstrainedColumns(sb.evalCtx); i < n; i++ { + + // Collect the set of constrained columns for which we were able to estimate + // a distinct count, including the first column after the constraint prefix + // (if applicable, add a distinct count estimate for that column using the + // function updateDistinctCountFromUnappliedConjuncts). + // + // Note that the resulting set might not include *all* constrained columns. + // For example, we cannot make any assumptions about the distinct count of + // column b based on the constraints /a/b: [/1 - /5/6] or /a/b: [ - /5/6]. + // TODO(rytaft): Consider treating remaining constrained columns as + // "unapplied conjuncts" and account for their selectivity in + // selectivityFromUnappliedConjuncts. + prefix := c.Prefix(sb.evalCtx) + for i, n := 0, c.ConstrainedColumns(sb.evalCtx); i < n && i <= prefix; i++ { col := c.Columns.Get(i).ID() constrainedCols.Add(col) if i < applied { diff --git a/pkg/sql/opt/memo/testdata/stats/scan b/pkg/sql/opt/memo/testdata/stats/scan index 60426d6e8443..da894004aac9 100644 --- a/pkg/sql/opt/memo/testdata/stats/scan +++ b/pkg/sql/opt/memo/testdata/stats/scan @@ -211,26 +211,19 @@ select opt SELECT * FROM a WHERE ((s >= 'bar' AND s <= 'foo') OR (s >= 'foobar')) AND d <= 5.0 AND s IS NOT NULL ---- -index-join a +select ├── columns: x:1(int!null) y:2(int) s:3(string!null) d:4(decimal!null) b:5(bool) ├── stats: [rows=500, distinct(3)=1, null(3)=0, distinct(4)=100, null(4)=0] ├── key: (1) ├── fd: (1)-->(2-5), (3,4)-->(1,2,5) - └── select - ├── columns: x:1(int!null) s:3(string!null) d:4(decimal!null) - ├── stats: [rows=166.666667, distinct(4)=33.3333333, null(4)=0] - ├── key: (1) - ├── fd: (1)-->(3,4), (3,4)-->(1) - ├── scan a@secondary - │ ├── columns: x:1(int!null) s:3(string!null) d:4(decimal!null) - │ ├── constraint: /-3/4 - │ │ ├── [ - /'foobar'/5.0] - │ │ └── [/'foo' - /'bar'/5.0] - │ ├── stats: [rows=500, distinct(1)=478.548451, null(1)=0, distinct(3)=1, null(3)=0, distinct(4)=100, null(4)=0] - │ ├── key: (1) - │ └── fd: (1)-->(3,4), (3,4)-->(1) - └── filters - └── d:4 <= 5.0 [type=bool, outer=(4), constraints=(/4: (/NULL - /5.0]; tight)] + ├── scan a + │ ├── columns: x:1(int!null) y:2(int) s:3(string) d:4(decimal!null) b:5(bool) + │ ├── stats: [rows=3000, distinct(1)=2000, null(1)=0, distinct(3)=2, null(3)=0, distinct(4)=300, null(4)=0] + │ ├── key: (1) + │ └── fd: (1)-->(2-5), (3,4)~~>(1,2,5) + └── filters + ├── (((s:3 >= 'bar') AND (s:3 <= 'foo')) OR (s:3 >= 'foobar')) AND (s:3 IS NOT NULL) [type=bool, outer=(3), constraints=(/3: [/'bar' - /'foo'] [/'foobar' - ]; tight)] + └── d:4 <= 5.0 [type=bool, outer=(4), constraints=(/4: (/NULL - /5.0]; tight)] # Bump up null counts. @@ -365,7 +358,7 @@ index-join a ├── fd: (1)-->(2-5), (3,4)-->(1,2,5) └── select ├── columns: x:1(int!null) s:3(string!null) d:4(decimal!null) - ├── stats: [rows=111.111111, distinct(4)=33.3333333, null(4)=0] + ├── stats: [rows=333.333333, distinct(4)=98.265847, null(4)=0] ├── key: (1) ├── fd: (1)-->(3,4), (3,4)-->(1) ├── scan a@secondary @@ -373,7 +366,7 @@ index-join a │ ├── constraint: /-3/4 │ │ ├── [ - /'foobar'/5.0] │ │ └── [/'foo' - /'bar'/5.0] - │ ├── stats: [rows=333.333333, distinct(1)=323.895037, null(1)=0, distinct(3)=1, null(3)=0, distinct(4)=100, null(4)=0] + │ ├── stats: [rows=1000, distinct(1)=911.337892, null(1)=0, distinct(3)=1, null(3)=0, distinct(4)=294.797541, null(4)=0] │ ├── key: (1) │ └── fd: (1)-->(3,4), (3,4)-->(1) └── filters diff --git a/pkg/sql/opt/memo/testdata/stats/select b/pkg/sql/opt/memo/testdata/stats/select index 86377d7d30a9..965424def2cf 100644 --- a/pkg/sql/opt/memo/testdata/stats/select +++ b/pkg/sql/opt/memo/testdata/stats/select @@ -185,7 +185,7 @@ project ├── scan idx@yz │ ├── columns: y:2(int!null) z:3(int) │ ├── constraint: /-2/3/1: (/4/NULL - /NULL) - │ └── stats: [rows=333.333333, distinct(2)=33.3333333, null(2)=0, distinct(3)=100, null(3)=10] + │ └── stats: [rows=333.333333, distinct(2)=33.3333333, null(2)=0] └── filters └── z:3 < 10 [type=bool, outer=(3), constraints=(/3: (/NULL - /9]; tight)] diff --git a/pkg/sql/opt/xform/testdata/external/trading b/pkg/sql/opt/xform/testdata/external/trading index 18745af617af..c480f89e1b89 100644 --- a/pkg/sql/opt/xform/testdata/external/trading +++ b/pkg/sql/opt/xform/testdata/external/trading @@ -827,30 +827,35 @@ project │ │ │ │ ├── stats: [rows=1000000, distinct(16)=1, null(16)=0, distinct(17)=1, null(17)=0, distinct(18)=1000000, null(18)=0, distinct(19)=56999.9987, null(19)=0] │ │ │ │ ├── key: (18-20) │ │ │ │ └── fd: ()-->(16,17) - │ │ │ ├── inner-join (hash) + │ │ │ ├── inner-join (merge) │ │ │ │ ├── columns: id:1!null name:2!null rarity:3 setname:4 number:5!null isfoil:6!null cardsinfo.dealerid:7!null cardsinfo.cardid:8!null cardsinfo.buyprice:9!null cardsinfo.sellprice:10!null discount:11!null desiredinventory:12!null actualinventory:13!null maxinventory:14!null cardsinfo.version:15!null + │ │ │ │ ├── left ordering: +1 + │ │ │ │ ├── right ordering: +8 │ │ │ │ ├── stats: [rows=29618.4611, distinct(1)=19000, null(1)=0, distinct(2)=11668.1409, null(2)=0, distinct(5)=829, null(5)=0, distinct(6)=5572.85686, null(6)=0, distinct(7)=1, null(7)=0, distinct(8)=19000, null(8)=0, distinct(9)=21037.9959, null(9)=0, distinct(10)=21037.9959, null(10)=0, distinct(11)=21037.9959, null(11)=0, distinct(12)=21037.9959, null(12)=0, distinct(13)=21037.9959, null(13)=0, distinct(14)=21037.9959, null(14)=0, distinct(15)=23225.5851, null(15)=0] │ │ │ │ ├── key: (8) │ │ │ │ ├── fd: ()-->(7), (1)-->(2-6), (2,4,5)~~>(1,3,6), (8)-->(9-15), (15)-->(8-14), (1)==(8), (8)==(1) + │ │ │ │ ├── select + │ │ │ │ │ ├── columns: id:1!null name:2!null rarity:3 setname:4 number:5!null isfoil:6!null + │ │ │ │ │ ├── stats: [rows=19000, distinct(1)=19000, null(1)=0, distinct(2)=13000, null(2)=0, distinct(5)=829, null(5)=0, distinct(6)=5601.15328, null(6)=0] + │ │ │ │ │ ├── key: (1) + │ │ │ │ │ ├── fd: (1)-->(2-6), (2,4,5)~~>(1,3,6) + │ │ │ │ │ ├── ordering: +1 + │ │ │ │ │ ├── scan cards + │ │ │ │ │ │ ├── columns: id:1!null name:2!null rarity:3 setname:4 number:5!null isfoil:6!null + │ │ │ │ │ │ ├── stats: [rows=57000, distinct(1)=57000, null(1)=0, distinct(2)=39000, null(2)=0, distinct(5)=829, null(5)=0, distinct(6)=5700, null(6)=0] + │ │ │ │ │ │ ├── key: (1) + │ │ │ │ │ │ ├── fd: (1)-->(2-6), (2,4,5)~~>(1,3,6) + │ │ │ │ │ │ └── ordering: +1 + │ │ │ │ │ └── filters + │ │ │ │ │ └── (name:2, setname:4, number:5) > ('Shock', '7E', 248) [outer=(2,4,5), constraints=(/2/4/5: [/'Shock'/'7E'/249 - ]; tight)] │ │ │ │ ├── scan cardsinfo │ │ │ │ │ ├── columns: cardsinfo.dealerid:7!null cardsinfo.cardid:8!null cardsinfo.buyprice:9!null cardsinfo.sellprice:10!null discount:11!null desiredinventory:12!null actualinventory:13!null maxinventory:14!null cardsinfo.version:15!null │ │ │ │ │ ├── constraint: /7/8: [/1 - /1] │ │ │ │ │ ├── stats: [rows=58333.3333, distinct(7)=1, null(7)=0, distinct(8)=37420.3552, null(8)=0, distinct(9)=40676.7278, null(9)=0, distinct(10)=40676.7278, null(10)=0, distinct(11)=40676.7278, null(11)=0, distinct(12)=40676.7278, null(12)=0, distinct(13)=40676.7278, null(13)=0, distinct(14)=40676.7278, null(14)=0, distinct(15)=58333.3333, null(15)=0] │ │ │ │ │ ├── key: (8) - │ │ │ │ │ └── fd: ()-->(7), (8)-->(9-15), (15)-->(8-14) - │ │ │ │ ├── index-join cards - │ │ │ │ │ ├── columns: id:1!null name:2!null rarity:3 setname:4 number:5!null isfoil:6!null - │ │ │ │ │ ├── stats: [rows=19000, distinct(1)=19000, null(1)=0, distinct(2)=13000, null(2)=0, distinct(5)=829, null(5)=0, distinct(6)=5601.15328, null(6)=0] - │ │ │ │ │ ├── key: (1) - │ │ │ │ │ ├── fd: (1)-->(2-6), (2,4,5)~~>(1,3,6) - │ │ │ │ │ └── scan cards@cardsnamesetnumber - │ │ │ │ │ ├── columns: id:1!null name:2!null setname:4 number:5!null - │ │ │ │ │ ├── constraint: /2/4/5: [/'Shock'/'7E'/249 - ] - │ │ │ │ │ ├── stats: [rows=2111.11111, distinct(2)=2111.11111, null(2)=0, distinct(4)=54, null(4)=0, distinct(5)=276.333333, null(5)=0] - │ │ │ │ │ ├── key: (1) - │ │ │ │ │ └── fd: (1)-->(2,4,5), (2,4,5)~~>(1) - │ │ │ │ └── filters - │ │ │ │ └── id:1 = cardsinfo.cardid:8 [outer=(1,8), constraints=(/1: (/NULL - ]; /8: (/NULL - ]), fd=(1)==(8), (8)==(1)] + │ │ │ │ │ ├── fd: ()-->(7), (8)-->(9-15), (15)-->(8-14) + │ │ │ │ │ └── ordering: +8 opt(7) [actual: +8] + │ │ │ │ └── filters (true) │ │ │ └── filters │ │ │ └── transactiondetails.cardid:19 = id:1 [outer=(1,19), constraints=(/1: (/NULL - ]; /19: (/NULL - ]), fd=(1)==(19), (19)==(1)] │ │ └── aggregations diff --git a/pkg/sql/opt/xform/testdata/external/trading-mutation b/pkg/sql/opt/xform/testdata/external/trading-mutation index b6829150ec9e..3fd201410472 100644 --- a/pkg/sql/opt/xform/testdata/external/trading-mutation +++ b/pkg/sql/opt/xform/testdata/external/trading-mutation @@ -831,30 +831,35 @@ project │ │ │ │ ├── stats: [rows=1000000, distinct(20)=1, null(20)=0, distinct(21)=1, null(21)=0, distinct(22)=1000000, null(22)=0, distinct(23)=56999.9987, null(23)=0] │ │ │ │ ├── key: (22-24) │ │ │ │ └── fd: ()-->(20,21) - │ │ │ ├── inner-join (hash) + │ │ │ ├── inner-join (merge) │ │ │ │ ├── columns: id:1!null name:2!null rarity:3 setname:4 number:5!null isfoil:6!null cardsinfo.dealerid:7!null cardsinfo.cardid:8!null cardsinfo.buyprice:9!null cardsinfo.sellprice:10!null cardsinfo.discount:11!null desiredinventory:12!null actualinventory:13!null maxinventory:14!null cardsinfo.version:15!null + │ │ │ │ ├── left ordering: +1 + │ │ │ │ ├── right ordering: +8 │ │ │ │ ├── stats: [rows=29618.4611, distinct(1)=19000, null(1)=0, distinct(2)=11668.1409, null(2)=0, distinct(5)=829, null(5)=0, distinct(6)=5572.85686, null(6)=0, distinct(7)=1, null(7)=0, distinct(8)=19000, null(8)=0, distinct(9)=21037.9959, null(9)=0, distinct(10)=21037.9959, null(10)=0, distinct(11)=21037.9959, null(11)=0, distinct(12)=21037.9959, null(12)=0, distinct(13)=21037.9959, null(13)=0, distinct(14)=21037.9959, null(14)=0, distinct(15)=23225.5851, null(15)=0] │ │ │ │ ├── key: (8) │ │ │ │ ├── fd: ()-->(7), (1)-->(2-6), (2,4,5)~~>(1,3,6), (8)-->(9-15), (15)-->(8-14), (1)==(8), (8)==(1) + │ │ │ │ ├── select + │ │ │ │ │ ├── columns: id:1!null name:2!null rarity:3 setname:4 number:5!null isfoil:6!null + │ │ │ │ │ ├── stats: [rows=19000, distinct(1)=19000, null(1)=0, distinct(2)=13000, null(2)=0, distinct(5)=829, null(5)=0, distinct(6)=5601.15328, null(6)=0] + │ │ │ │ │ ├── key: (1) + │ │ │ │ │ ├── fd: (1)-->(2-6), (2,4,5)~~>(1,3,6) + │ │ │ │ │ ├── ordering: +1 + │ │ │ │ │ ├── scan cards + │ │ │ │ │ │ ├── columns: id:1!null name:2!null rarity:3 setname:4 number:5!null isfoil:6!null + │ │ │ │ │ │ ├── stats: [rows=57000, distinct(1)=57000, null(1)=0, distinct(2)=39000, null(2)=0, distinct(5)=829, null(5)=0, distinct(6)=5700, null(6)=0] + │ │ │ │ │ │ ├── key: (1) + │ │ │ │ │ │ ├── fd: (1)-->(2-6), (2,4,5)~~>(1,3,6) + │ │ │ │ │ │ └── ordering: +1 + │ │ │ │ │ └── filters + │ │ │ │ │ └── (name:2, setname:4, number:5) > ('Shock', '7E', 248) [outer=(2,4,5), constraints=(/2/4/5: [/'Shock'/'7E'/249 - ]; tight)] │ │ │ │ ├── scan cardsinfo │ │ │ │ │ ├── columns: cardsinfo.dealerid:7!null cardsinfo.cardid:8!null cardsinfo.buyprice:9!null cardsinfo.sellprice:10!null cardsinfo.discount:11!null desiredinventory:12!null actualinventory:13!null maxinventory:14!null cardsinfo.version:15!null │ │ │ │ │ ├── constraint: /7/8: [/1 - /1] │ │ │ │ │ ├── stats: [rows=58333.3333, distinct(7)=1, null(7)=0, distinct(8)=37420.3552, null(8)=0, distinct(9)=40676.7278, null(9)=0, distinct(10)=40676.7278, null(10)=0, distinct(11)=40676.7278, null(11)=0, distinct(12)=40676.7278, null(12)=0, distinct(13)=40676.7278, null(13)=0, distinct(14)=40676.7278, null(14)=0, distinct(15)=58333.3333, null(15)=0] │ │ │ │ │ ├── key: (8) - │ │ │ │ │ └── fd: ()-->(7), (8)-->(9-15), (15)-->(8-14) - │ │ │ │ ├── index-join cards - │ │ │ │ │ ├── columns: id:1!null name:2!null rarity:3 setname:4 number:5!null isfoil:6!null - │ │ │ │ │ ├── stats: [rows=19000, distinct(1)=19000, null(1)=0, distinct(2)=13000, null(2)=0, distinct(5)=829, null(5)=0, distinct(6)=5601.15328, null(6)=0] - │ │ │ │ │ ├── key: (1) - │ │ │ │ │ ├── fd: (1)-->(2-6), (2,4,5)~~>(1,3,6) - │ │ │ │ │ └── scan cards@cardsnamesetnumber - │ │ │ │ │ ├── columns: id:1!null name:2!null setname:4 number:5!null - │ │ │ │ │ ├── constraint: /2/4/5: [/'Shock'/'7E'/249 - ] - │ │ │ │ │ ├── stats: [rows=2111.11111, distinct(2)=2111.11111, null(2)=0, distinct(4)=54, null(4)=0, distinct(5)=276.333333, null(5)=0] - │ │ │ │ │ ├── key: (1) - │ │ │ │ │ └── fd: (1)-->(2,4,5), (2,4,5)~~>(1) - │ │ │ │ └── filters - │ │ │ │ └── id:1 = cardsinfo.cardid:8 [outer=(1,8), constraints=(/1: (/NULL - ]; /8: (/NULL - ]), fd=(1)==(8), (8)==(1)] + │ │ │ │ │ ├── fd: ()-->(7), (8)-->(9-15), (15)-->(8-14) + │ │ │ │ │ └── ordering: +8 opt(7) [actual: +8] + │ │ │ │ └── filters (true) │ │ │ └── filters │ │ │ └── transactiondetails.cardid:23 = id:1 [outer=(1,23), constraints=(/1: (/NULL - ]; /23: (/NULL - ]), fd=(1)==(23), (23)==(1)] │ │ └── aggregations diff --git a/pkg/sql/opt/xform/testdata/rules/select b/pkg/sql/opt/xform/testdata/rules/select index aaf92aa3c638..d69f57a5bfbd 100644 --- a/pkg/sql/opt/xform/testdata/rules/select +++ b/pkg/sql/opt/xform/testdata/rules/select @@ -647,7 +647,7 @@ memo (optimized, ~5KB, required=[presentation: k:1,u:2,v:3,j:4]) ├── G1: (select G2 G3) (select G4 G3) │ └── [presentation: k:1,u:2,v:3,j:4] │ ├── best: (select G4 G3) - │ └── cost: 45.72 + │ └── cost: 411.23 ├── G2: (scan b) │ └── [] │ ├── best: (scan b) @@ -656,13 +656,13 @@ memo (optimized, ~5KB, required=[presentation: k:1,u:2,v:3,j:4]) ├── G4: (index-join G7 b,cols=(1-4)) │ └── [] │ ├── best: (index-join G7 b,cols=(1-4)) - │ └── cost: 45.62 + │ └── cost: 410.42 ├── G5: (gt G8 G9) ├── G6: (lt G8 G10) ├── G7: (scan b@u,cols=(1,2),constrained) │ └── [] │ ├── best: (scan b@u,cols=(1,2),constrained) - │ └── cost: 9.25 + │ └── cost: 83.21 ├── G8: (tuple G11) ├── G9: (tuple G12) ├── G10: (tuple G13)