Skip to content

Commit

Permalink
opt: fix distinct count estimates for index constraint columns
Browse files Browse the repository at this point in the history
Prior to this commit, the statistics_builder was incorrectly estimating
the distinct count of columns that were only slightly constrained as part
of an index constraint. For example, it was estimating based on
constraints such as /a/b: [/1 - /5/6] or /a/b: [ - /5/6] that the distinct
count of column b should be reduced by 2/3. However, in reality, we cannot
assume anything about the distinct count of column b based on those two
constraints.

This commit fixes the estimate by only reducing the distinct count for
columns that are part of the prefix of the constraint (columns for which
all the spans have the same start and end values) or the first column after.

Release note (performance improvement): Fixed the optimizer's distinct count
estimate for columns constrained by an index constraint, which was too low
in some cases. The fix improves the optimizer's cardinality estimates, which
can lead to better query plan selection.
  • Loading branch information
rytaft committed May 15, 2020
1 parent 1b17389 commit 25d515d
Show file tree
Hide file tree
Showing 6 changed files with 69 additions and 53 deletions.
15 changes: 14 additions & 1 deletion pkg/sql/opt/memo/statistics_builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -2628,7 +2628,20 @@ func (sb *statisticsBuilder) applyIndexConstraint(

// Calculate distinct counts.
applied, lastColMinDistinct := sb.updateDistinctCountsFromConstraint(c, e, relProps)
for i, n := 0, c.ConstrainedColumns(sb.evalCtx); i < n; i++ {

// Collect the set of constrained columns for which we were able to estimate
// a distinct count, including the first column after the constraint prefix
// (if applicable, add a distinct count estimate for that column using the
// function updateDistinctCountFromUnappliedConjuncts).
//
// Note that the resulting set might not include *all* constrained columns.
// For example, we cannot make any assumptions about the distinct count of
// column b based on the constraints /a/b: [/1 - /5/6] or /a/b: [ - /5/6].
// TODO(rytaft): Consider treating remaining constrained columns as
// "unapplied conjuncts" and account for their selectivity in
// selectivityFromUnappliedConjuncts.
prefix := c.Prefix(sb.evalCtx)
for i, n := 0, c.ConstrainedColumns(sb.evalCtx); i < n && i <= prefix; i++ {
col := c.Columns.Get(i).ID()
constrainedCols.Add(col)
if i < applied {
Expand Down
29 changes: 11 additions & 18 deletions pkg/sql/opt/memo/testdata/stats/scan
Original file line number Diff line number Diff line change
Expand Up @@ -211,26 +211,19 @@ select
opt
SELECT * FROM a WHERE ((s >= 'bar' AND s <= 'foo') OR (s >= 'foobar')) AND d <= 5.0 AND s IS NOT NULL
----
index-join a
select
├── columns: x:1(int!null) y:2(int) s:3(string!null) d:4(decimal!null) b:5(bool)
├── stats: [rows=500, distinct(3)=1, null(3)=0, distinct(4)=100, null(4)=0]
├── key: (1)
├── fd: (1)-->(2-5), (3,4)-->(1,2,5)
└── select
├── columns: x:1(int!null) s:3(string!null) d:4(decimal!null)
├── stats: [rows=166.666667, distinct(4)=33.3333333, null(4)=0]
├── key: (1)
├── fd: (1)-->(3,4), (3,4)-->(1)
├── scan a@secondary
│ ├── columns: x:1(int!null) s:3(string!null) d:4(decimal!null)
│ ├── constraint: /-3/4
│ │ ├── [ - /'foobar'/5.0]
│ │ └── [/'foo' - /'bar'/5.0]
│ ├── stats: [rows=500, distinct(1)=478.548451, null(1)=0, distinct(3)=1, null(3)=0, distinct(4)=100, null(4)=0]
│ ├── key: (1)
│ └── fd: (1)-->(3,4), (3,4)-->(1)
└── filters
└── d:4 <= 5.0 [type=bool, outer=(4), constraints=(/4: (/NULL - /5.0]; tight)]
├── scan a
│ ├── columns: x:1(int!null) y:2(int) s:3(string) d:4(decimal!null) b:5(bool)
│ ├── stats: [rows=3000, distinct(1)=2000, null(1)=0, distinct(3)=2, null(3)=0, distinct(4)=300, null(4)=0]
│ ├── key: (1)
│ └── fd: (1)-->(2-5), (3,4)~~>(1,2,5)
└── filters
├── (((s:3 >= 'bar') AND (s:3 <= 'foo')) OR (s:3 >= 'foobar')) AND (s:3 IS NOT NULL) [type=bool, outer=(3), constraints=(/3: [/'bar' - /'foo'] [/'foobar' - ]; tight)]
└── d:4 <= 5.0 [type=bool, outer=(4), constraints=(/4: (/NULL - /5.0]; tight)]

# Bump up null counts.

Expand Down Expand Up @@ -365,15 +358,15 @@ index-join a
├── fd: (1)-->(2-5), (3,4)-->(1,2,5)
└── select
├── columns: x:1(int!null) s:3(string!null) d:4(decimal!null)
├── stats: [rows=111.111111, distinct(4)=33.3333333, null(4)=0]
├── stats: [rows=333.333333, distinct(4)=98.265847, null(4)=0]
├── key: (1)
├── fd: (1)-->(3,4), (3,4)-->(1)
├── scan a@secondary
│ ├── columns: x:1(int!null) s:3(string!null) d:4(decimal!null)
│ ├── constraint: /-3/4
│ │ ├── [ - /'foobar'/5.0]
│ │ └── [/'foo' - /'bar'/5.0]
│ ├── stats: [rows=333.333333, distinct(1)=323.895037, null(1)=0, distinct(3)=1, null(3)=0, distinct(4)=100, null(4)=0]
│ ├── stats: [rows=1000, distinct(1)=911.337892, null(1)=0, distinct(3)=1, null(3)=0, distinct(4)=294.797541, null(4)=0]
│ ├── key: (1)
│ └── fd: (1)-->(3,4), (3,4)-->(1)
└── filters
Expand Down
2 changes: 1 addition & 1 deletion pkg/sql/opt/memo/testdata/stats/select
Original file line number Diff line number Diff line change
Expand Up @@ -185,7 +185,7 @@ project
├── scan idx@yz
│ ├── columns: y:2(int!null) z:3(int)
│ ├── constraint: /-2/3/1: (/4/NULL - /NULL)
│ └── stats: [rows=333.333333, distinct(2)=33.3333333, null(2)=0, distinct(3)=100, null(3)=10]
│ └── stats: [rows=333.333333, distinct(2)=33.3333333, null(2)=0]
└── filters
└── z:3 < 10 [type=bool, outer=(3), constraints=(/3: (/NULL - /9]; tight)]

Expand Down
35 changes: 20 additions & 15 deletions pkg/sql/opt/xform/testdata/external/trading
Original file line number Diff line number Diff line change
Expand Up @@ -827,30 +827,35 @@ project
│ │ │ │ ├── stats: [rows=1000000, distinct(16)=1, null(16)=0, distinct(17)=1, null(17)=0, distinct(18)=1000000, null(18)=0, distinct(19)=56999.9987, null(19)=0]
│ │ │ │ ├── key: (18-20)
│ │ │ │ └── fd: ()-->(16,17)
│ │ │ ├── inner-join (hash)
│ │ │ ├── inner-join (merge)
│ │ │ │ ├── columns: id:1!null name:2!null rarity:3 setname:4 number:5!null isfoil:6!null cardsinfo.dealerid:7!null cardsinfo.cardid:8!null cardsinfo.buyprice:9!null cardsinfo.sellprice:10!null discount:11!null desiredinventory:12!null actualinventory:13!null maxinventory:14!null cardsinfo.version:15!null
│ │ │ │ ├── left ordering: +1
│ │ │ │ ├── right ordering: +8
│ │ │ │ ├── stats: [rows=29618.4611, distinct(1)=19000, null(1)=0, distinct(2)=11668.1409, null(2)=0, distinct(5)=829, null(5)=0, distinct(6)=5572.85686, null(6)=0, distinct(7)=1, null(7)=0, distinct(8)=19000, null(8)=0, distinct(9)=21037.9959, null(9)=0, distinct(10)=21037.9959, null(10)=0, distinct(11)=21037.9959, null(11)=0, distinct(12)=21037.9959, null(12)=0, distinct(13)=21037.9959, null(13)=0, distinct(14)=21037.9959, null(14)=0, distinct(15)=23225.5851, null(15)=0]
│ │ │ │ ├── key: (8)
│ │ │ │ ├── fd: ()-->(7), (1)-->(2-6), (2,4,5)~~>(1,3,6), (8)-->(9-15), (15)-->(8-14), (1)==(8), (8)==(1)
│ │ │ │ ├── select
│ │ │ │ │ ├── columns: id:1!null name:2!null rarity:3 setname:4 number:5!null isfoil:6!null
│ │ │ │ │ ├── stats: [rows=19000, distinct(1)=19000, null(1)=0, distinct(2)=13000, null(2)=0, distinct(5)=829, null(5)=0, distinct(6)=5601.15328, null(6)=0]
│ │ │ │ │ ├── key: (1)
│ │ │ │ │ ├── fd: (1)-->(2-6), (2,4,5)~~>(1,3,6)
│ │ │ │ │ ├── ordering: +1
│ │ │ │ │ ├── scan cards
│ │ │ │ │ │ ├── columns: id:1!null name:2!null rarity:3 setname:4 number:5!null isfoil:6!null
│ │ │ │ │ │ ├── stats: [rows=57000, distinct(1)=57000, null(1)=0, distinct(2)=39000, null(2)=0, distinct(5)=829, null(5)=0, distinct(6)=5700, null(6)=0]
│ │ │ │ │ │ ├── key: (1)
│ │ │ │ │ │ ├── fd: (1)-->(2-6), (2,4,5)~~>(1,3,6)
│ │ │ │ │ │ └── ordering: +1
│ │ │ │ │ └── filters
│ │ │ │ │ └── (name:2, setname:4, number:5) > ('Shock', '7E', 248) [outer=(2,4,5), constraints=(/2/4/5: [/'Shock'/'7E'/249 - ]; tight)]
│ │ │ │ ├── scan cardsinfo
│ │ │ │ │ ├── columns: cardsinfo.dealerid:7!null cardsinfo.cardid:8!null cardsinfo.buyprice:9!null cardsinfo.sellprice:10!null discount:11!null desiredinventory:12!null actualinventory:13!null maxinventory:14!null cardsinfo.version:15!null
│ │ │ │ │ ├── constraint: /7/8: [/1 - /1]
│ │ │ │ │ ├── stats: [rows=58333.3333, distinct(7)=1, null(7)=0, distinct(8)=37420.3552, null(8)=0, distinct(9)=40676.7278, null(9)=0, distinct(10)=40676.7278, null(10)=0, distinct(11)=40676.7278, null(11)=0, distinct(12)=40676.7278, null(12)=0, distinct(13)=40676.7278, null(13)=0, distinct(14)=40676.7278, null(14)=0, distinct(15)=58333.3333, null(15)=0]
│ │ │ │ │ ├── key: (8)
│ │ │ │ │ └── fd: ()-->(7), (8)-->(9-15), (15)-->(8-14)
│ │ │ │ ├── index-join cards
│ │ │ │ │ ├── columns: id:1!null name:2!null rarity:3 setname:4 number:5!null isfoil:6!null
│ │ │ │ │ ├── stats: [rows=19000, distinct(1)=19000, null(1)=0, distinct(2)=13000, null(2)=0, distinct(5)=829, null(5)=0, distinct(6)=5601.15328, null(6)=0]
│ │ │ │ │ ├── key: (1)
│ │ │ │ │ ├── fd: (1)-->(2-6), (2,4,5)~~>(1,3,6)
│ │ │ │ │ └── scan cards@cardsnamesetnumber
│ │ │ │ │ ├── columns: id:1!null name:2!null setname:4 number:5!null
│ │ │ │ │ ├── constraint: /2/4/5: [/'Shock'/'7E'/249 - ]
│ │ │ │ │ ├── stats: [rows=2111.11111, distinct(2)=2111.11111, null(2)=0, distinct(4)=54, null(4)=0, distinct(5)=276.333333, null(5)=0]
│ │ │ │ │ ├── key: (1)
│ │ │ │ │ └── fd: (1)-->(2,4,5), (2,4,5)~~>(1)
│ │ │ │ └── filters
│ │ │ │ └── id:1 = cardsinfo.cardid:8 [outer=(1,8), constraints=(/1: (/NULL - ]; /8: (/NULL - ]), fd=(1)==(8), (8)==(1)]
│ │ │ │ │ ├── fd: ()-->(7), (8)-->(9-15), (15)-->(8-14)
│ │ │ │ │ └── ordering: +8 opt(7) [actual: +8]
│ │ │ │ └── filters (true)
│ │ │ └── filters
│ │ │ └── transactiondetails.cardid:19 = id:1 [outer=(1,19), constraints=(/1: (/NULL - ]; /19: (/NULL - ]), fd=(1)==(19), (19)==(1)]
│ │ └── aggregations
Expand Down
35 changes: 20 additions & 15 deletions pkg/sql/opt/xform/testdata/external/trading-mutation
Original file line number Diff line number Diff line change
Expand Up @@ -831,30 +831,35 @@ project
│ │ │ │ ├── stats: [rows=1000000, distinct(20)=1, null(20)=0, distinct(21)=1, null(21)=0, distinct(22)=1000000, null(22)=0, distinct(23)=56999.9987, null(23)=0]
│ │ │ │ ├── key: (22-24)
│ │ │ │ └── fd: ()-->(20,21)
│ │ │ ├── inner-join (hash)
│ │ │ ├── inner-join (merge)
│ │ │ │ ├── columns: id:1!null name:2!null rarity:3 setname:4 number:5!null isfoil:6!null cardsinfo.dealerid:7!null cardsinfo.cardid:8!null cardsinfo.buyprice:9!null cardsinfo.sellprice:10!null cardsinfo.discount:11!null desiredinventory:12!null actualinventory:13!null maxinventory:14!null cardsinfo.version:15!null
│ │ │ │ ├── left ordering: +1
│ │ │ │ ├── right ordering: +8
│ │ │ │ ├── stats: [rows=29618.4611, distinct(1)=19000, null(1)=0, distinct(2)=11668.1409, null(2)=0, distinct(5)=829, null(5)=0, distinct(6)=5572.85686, null(6)=0, distinct(7)=1, null(7)=0, distinct(8)=19000, null(8)=0, distinct(9)=21037.9959, null(9)=0, distinct(10)=21037.9959, null(10)=0, distinct(11)=21037.9959, null(11)=0, distinct(12)=21037.9959, null(12)=0, distinct(13)=21037.9959, null(13)=0, distinct(14)=21037.9959, null(14)=0, distinct(15)=23225.5851, null(15)=0]
│ │ │ │ ├── key: (8)
│ │ │ │ ├── fd: ()-->(7), (1)-->(2-6), (2,4,5)~~>(1,3,6), (8)-->(9-15), (15)-->(8-14), (1)==(8), (8)==(1)
│ │ │ │ ├── select
│ │ │ │ │ ├── columns: id:1!null name:2!null rarity:3 setname:4 number:5!null isfoil:6!null
│ │ │ │ │ ├── stats: [rows=19000, distinct(1)=19000, null(1)=0, distinct(2)=13000, null(2)=0, distinct(5)=829, null(5)=0, distinct(6)=5601.15328, null(6)=0]
│ │ │ │ │ ├── key: (1)
│ │ │ │ │ ├── fd: (1)-->(2-6), (2,4,5)~~>(1,3,6)
│ │ │ │ │ ├── ordering: +1
│ │ │ │ │ ├── scan cards
│ │ │ │ │ │ ├── columns: id:1!null name:2!null rarity:3 setname:4 number:5!null isfoil:6!null
│ │ │ │ │ │ ├── stats: [rows=57000, distinct(1)=57000, null(1)=0, distinct(2)=39000, null(2)=0, distinct(5)=829, null(5)=0, distinct(6)=5700, null(6)=0]
│ │ │ │ │ │ ├── key: (1)
│ │ │ │ │ │ ├── fd: (1)-->(2-6), (2,4,5)~~>(1,3,6)
│ │ │ │ │ │ └── ordering: +1
│ │ │ │ │ └── filters
│ │ │ │ │ └── (name:2, setname:4, number:5) > ('Shock', '7E', 248) [outer=(2,4,5), constraints=(/2/4/5: [/'Shock'/'7E'/249 - ]; tight)]
│ │ │ │ ├── scan cardsinfo
│ │ │ │ │ ├── columns: cardsinfo.dealerid:7!null cardsinfo.cardid:8!null cardsinfo.buyprice:9!null cardsinfo.sellprice:10!null cardsinfo.discount:11!null desiredinventory:12!null actualinventory:13!null maxinventory:14!null cardsinfo.version:15!null
│ │ │ │ │ ├── constraint: /7/8: [/1 - /1]
│ │ │ │ │ ├── stats: [rows=58333.3333, distinct(7)=1, null(7)=0, distinct(8)=37420.3552, null(8)=0, distinct(9)=40676.7278, null(9)=0, distinct(10)=40676.7278, null(10)=0, distinct(11)=40676.7278, null(11)=0, distinct(12)=40676.7278, null(12)=0, distinct(13)=40676.7278, null(13)=0, distinct(14)=40676.7278, null(14)=0, distinct(15)=58333.3333, null(15)=0]
│ │ │ │ │ ├── key: (8)
│ │ │ │ │ └── fd: ()-->(7), (8)-->(9-15), (15)-->(8-14)
│ │ │ │ ├── index-join cards
│ │ │ │ │ ├── columns: id:1!null name:2!null rarity:3 setname:4 number:5!null isfoil:6!null
│ │ │ │ │ ├── stats: [rows=19000, distinct(1)=19000, null(1)=0, distinct(2)=13000, null(2)=0, distinct(5)=829, null(5)=0, distinct(6)=5601.15328, null(6)=0]
│ │ │ │ │ ├── key: (1)
│ │ │ │ │ ├── fd: (1)-->(2-6), (2,4,5)~~>(1,3,6)
│ │ │ │ │ └── scan cards@cardsnamesetnumber
│ │ │ │ │ ├── columns: id:1!null name:2!null setname:4 number:5!null
│ │ │ │ │ ├── constraint: /2/4/5: [/'Shock'/'7E'/249 - ]
│ │ │ │ │ ├── stats: [rows=2111.11111, distinct(2)=2111.11111, null(2)=0, distinct(4)=54, null(4)=0, distinct(5)=276.333333, null(5)=0]
│ │ │ │ │ ├── key: (1)
│ │ │ │ │ └── fd: (1)-->(2,4,5), (2,4,5)~~>(1)
│ │ │ │ └── filters
│ │ │ │ └── id:1 = cardsinfo.cardid:8 [outer=(1,8), constraints=(/1: (/NULL - ]; /8: (/NULL - ]), fd=(1)==(8), (8)==(1)]
│ │ │ │ │ ├── fd: ()-->(7), (8)-->(9-15), (15)-->(8-14)
│ │ │ │ │ └── ordering: +8 opt(7) [actual: +8]
│ │ │ │ └── filters (true)
│ │ │ └── filters
│ │ │ └── transactiondetails.cardid:23 = id:1 [outer=(1,23), constraints=(/1: (/NULL - ]; /23: (/NULL - ]), fd=(1)==(23), (23)==(1)]
│ │ └── aggregations
Expand Down
6 changes: 3 additions & 3 deletions pkg/sql/opt/xform/testdata/rules/select
Original file line number Diff line number Diff line change
Expand Up @@ -647,7 +647,7 @@ memo (optimized, ~5KB, required=[presentation: k:1,u:2,v:3,j:4])
├── G1: (select G2 G3) (select G4 G3)
│ └── [presentation: k:1,u:2,v:3,j:4]
│ ├── best: (select G4 G3)
│ └── cost: 45.72
│ └── cost: 411.23
├── G2: (scan b)
│ └── []
│ ├── best: (scan b)
Expand All @@ -656,13 +656,13 @@ memo (optimized, ~5KB, required=[presentation: k:1,u:2,v:3,j:4])
├── G4: (index-join G7 b,cols=(1-4))
│ └── []
│ ├── best: (index-join G7 b,cols=(1-4))
│ └── cost: 45.62
│ └── cost: 410.42
├── G5: (gt G8 G9)
├── G6: (lt G8 G10)
├── G7: (scan b@u,cols=(1,2),constrained)
│ └── []
│ ├── best: (scan b@u,cols=(1,2),constrained)
│ └── cost: 9.25
│ └── cost: 83.21
├── G8: (tuple G11)
├── G9: (tuple G12)
├── G10: (tuple G13)
Expand Down

0 comments on commit 25d515d

Please sign in to comment.