Skip to content

Commit

Permalink
opt: create a special type for selectivity to clean up stats code
Browse files Browse the repository at this point in the history
Previously, the selectivity of a filter was represented by a float64 type.
There were several places in the statistics code where range checks were
being performed to ensure selectivity was between (0,1].

This change cleans up the statistics code and creates a Selectivity type with
custom methods to replace direct numerical operations, incorporating the range
check to ensure all operations on selectivity return a valid value.

As a result of implementing these changes, there are slight changes to some
values in the test files. The query plans and performance mostly stay the same.

Resolves: #53860

Release note: None
  • Loading branch information
angelazxu committed Feb 3, 2021
1 parent 4adc7d0 commit 1c31782
Show file tree
Hide file tree
Showing 23 changed files with 364 additions and 189 deletions.
8 changes: 4 additions & 4 deletions pkg/sql/opt/exec/execbuilder/testdata/stats
Original file line number Diff line number Diff line change
Expand Up @@ -317,20 +317,20 @@ limit
├── cardinality: [0 - 1]
├── immutable
├── stats: [rows=1]
├── cost: 226.06
├── cost: 226.059998
├── key: ()
├── fd: ()-->(1)
├── select
│ ├── columns: j:1
│ ├── immutable
│ ├── stats: [rows=10, distinct(1)=1, null(1)=10]
│ ├── cost: 226.04
│ ├── stats: [rows=10.0000001, distinct(1)=1, null(1)=10]
│ ├── cost: 226.039998
│ ├── fd: ()-->(1)
│ ├── limit hint: 1.00
│ ├── scan tj
│ │ ├── columns: j:1
│ │ ├── stats: [rows=1000, distinct(1)=100, null(1)=10]
│ │ ├── cost: 216.02
│ │ ├── cost: 216.019998
│ │ ├── limit hint: 100.00
│ │ └── prune: (1)
│ └── filters
Expand Down
162 changes: 72 additions & 90 deletions pkg/sql/opt/memo/statistics_builder.go

Large diffs are not rendered by default.

6 changes: 3 additions & 3 deletions pkg/sql/opt/memo/testdata/stats/index-join
Original file line number Diff line number Diff line change
Expand Up @@ -185,7 +185,7 @@ SELECT * FROM a WHERE s = 'foo' AND x + y = 10
select
├── columns: x:1(int!null) y:2(int) s:3(string!null) d:4(decimal!null)
├── immutable
├── stats: [rows=33.3333333, distinct(1)=33.3333333, null(1)=0, distinct(2)=28.5927601, null(2)=16.6666667, distinct(3)=1, null(3)=0, distinct(4)=30.9412676, null(4)=0, distinct(1-3)=33.3333333, null(1-3)=0]
├── stats: [rows=33.3333334, distinct(1)=33.3333334, null(1)=0, distinct(2)=28.5927601, null(2)=16.6666667, distinct(3)=1, null(3)=0, distinct(4)=30.9412677, null(4)=0, distinct(1-3)=33.3333334, null(1-3)=0]
├── key: (1)
├── fd: ()-->(3), (1)-->(2,4), (4)-->(1,2)
├── index-join a
Expand All @@ -207,7 +207,7 @@ SELECT * FROM a WHERE s = 'foo'
----
index-join a
├── columns: x:1(int!null) y:2(int) s:3(string!null) d:4(decimal!null)
├── stats: [rows=100, distinct(1)=100, null(1)=0, distinct(2)=64.4232893, null(2)=50, distinct(3)=1, null(3)=0, distinct(1-3)=100, null(1-3)=0]
├── stats: [rows=100, distinct(1)=100, null(1)=0, distinct(2)=64.4232894, null(2)=50, distinct(3)=1, null(3)=0, distinct(1-3)=100, null(1-3)=0]
├── key: (1)
├── fd: ()-->(3), (1)-->(2,4), (4)-->(1,2)
└── scan a@secondary
Expand All @@ -231,7 +231,7 @@ SELECT * FROM a WHERE (s = 'foo' OR s = 'bar') AND s IS NOT NULL
----
index-join a
├── columns: x:1(int!null) y:2(int) s:3(string!null) d:4(decimal!null)
├── stats: [rows=200, distinct(1)=200, null(1)=0, distinct(2)=88.4618791, null(2)=100, distinct(3)=2, null(3)=0, distinct(2,3)=176.923758, null(2,3)=0, distinct(1-3)=200, null(1-3)=0]
├── stats: [rows=200, distinct(1)=200, null(1)=0, distinct(2)=88.4618792, null(2)=100, distinct(3)=2, null(3)=0, distinct(2,3)=176.923758, null(2,3)=0, distinct(1-3)=200, null(1-3)=0]
├── key: (1)
├── fd: (1)-->(2-4), (3,4)-->(1,2)
└── scan a@secondary
Expand Down
4 changes: 2 additions & 2 deletions pkg/sql/opt/memo/testdata/stats/inverted-geo
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,7 @@ project
│ │ ├── columns: rowid:3(int!null) g_inverted_key:5(geometry!null)
│ │ ├── inverted constraint: /5/3
│ │ │ └── spans: ["B\xfd\xff\xff\xff\xff\xff\xff\xff\xff", "B\xfd\xff\xff\xff\xff\xff\xff\xff\xff"]
│ │ ├── stats: [rows=7e-07, distinct(3)=1.99999931e-07, null(3)=0, distinct(5)=7e-07, null(5)=0]
│ │ ├── stats: [rows=7e-07, distinct(3)=7e-07, null(3)=0, distinct(5)=7e-07, null(5)=0]
│ │ │ histogram(5)=
│ │ ├── key: (3)
│ │ └── fd: (3)-->(5)
Expand Down Expand Up @@ -402,7 +402,7 @@ project
│ │ ├── columns: rowid:3(int!null) g_inverted_key:5(geometry!null)
│ │ ├── inverted constraint: /5/3
│ │ │ └── spans: ["B\xfd\xff\xff\xff\xff\xff\xff\xff\xff", "B\xfd\xff\xff\xff\xff\xff\xff\xff\xff"]
│ │ ├── stats: [rows=7e-07, distinct(3)=1.99999931e-07, null(3)=0, distinct(5)=7e-07, null(5)=0]
│ │ ├── stats: [rows=7e-07, distinct(3)=7e-07, null(3)=0, distinct(5)=7e-07, null(5)=0]
│ │ │ histogram(5)=
│ │ ├── key: (3)
│ │ └── fd: (3)-->(5)
Expand Down
4 changes: 2 additions & 2 deletions pkg/sql/opt/memo/testdata/stats/inverted-geo-multi-column
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ project
│ │ ├── ["B\xfd\x10\x00\x00\x00\x00\x00\x00\x01", "B\xfd\x12\x00\x00\x00\x00\x00\x00\x00")
│ │ └── ["B\xfd\x14\x00\x00\x00\x00\x00\x00\x00", "B\xfd\x14\x00\x00\x00\x00\x00\x00\x00"]
│ ├── flags: force-index=m
│ ├── stats: [rows=153.552632, distinct(1)=43.8721805, null(1)=0, distinct(3)=1, null(3)=0, distinct(6)=3, null(6)=0, distinct(3,6)=3, null(3,6)=0]
│ ├── stats: [rows=153.552632, distinct(1)=43.8721804, null(1)=0, distinct(3)=1, null(3)=0, distinct(6)=3, null(6)=0, distinct(3,6)=3, null(3,6)=0]
│ │ histogram(3)= 0 100
│ │ <--- 'banana'
│ │ histogram(6)= 0 0 25.592 0 25.592 51.184 25.592 0 0 0 25.592 0
Expand Down Expand Up @@ -158,7 +158,7 @@ project
│ │ ├── ["B\xfd\x10\x00\x00\x00\x00\x00\x00\x01", "B\xfd\x12\x00\x00\x00\x00\x00\x00\x00")
│ │ └── ["B\xfd\x14\x00\x00\x00\x00\x00\x00\x00", "B\xfd\x14\x00\x00\x00\x00\x00\x00\x00"]
│ ├── flags: force-index=p
│ ├── stats: [rows=153.552632, distinct(1)=43.8721805, null(1)=0, distinct(3)=1, null(3)=0, distinct(7)=3, null(7)=0, distinct(3,7)=3, null(3,7)=0]
│ ├── stats: [rows=153.552632, distinct(1)=43.8721804, null(1)=0, distinct(3)=1, null(3)=0, distinct(7)=3, null(7)=0, distinct(3,7)=3, null(3,7)=0]
│ │ histogram(3)= 0 100
│ │ <--- 'banana'
│ │ histogram(7)= 0 0 25.592 0 25.592 51.184 25.592 0 0 0 25.592 0
Expand Down
4 changes: 2 additions & 2 deletions pkg/sql/opt/memo/testdata/stats/limit
Original file line number Diff line number Diff line change
Expand Up @@ -214,13 +214,13 @@ limit
├── fd: ()-->(3), (1)-->(2,4), (4)-->(1,2)
├── project
│ ├── columns: x:1(int!null) y:2(int) s:3(string!null) d:4(decimal!null)
│ ├── stats: [rows=100, distinct(2)=92.7652197, null(2)=50, distinct(3)=1, null(3)=0, distinct(2,3)=92.7652197, null(2,3)=0]
│ ├── stats: [rows=100, distinct(2)=92.7652198, null(2)=50, distinct(3)=1, null(3)=0, distinct(2,3)=92.7652198, null(2,3)=0]
│ ├── key: (1)
│ ├── fd: ()-->(3), (1)-->(2,4), (4)-->(1,2)
│ ├── limit hint: 5.00
│ └── select
│ ├── columns: x:1(int!null) y:2(int) s:3(string!null) d:4(decimal!null) crdb_internal_mvcc_timestamp:5(decimal)
│ ├── stats: [rows=100, distinct(2)=92.7652197, null(2)=50, distinct(3)=1, null(3)=0, distinct(2,3)=92.7652197, null(2,3)=0]
│ ├── stats: [rows=100, distinct(2)=92.7652198, null(2)=50, distinct(3)=1, null(3)=0, distinct(2,3)=92.7652198, null(2,3)=0]
│ ├── key: (1)
│ ├── fd: ()-->(3), (1)-->(2,4,5), (4)-->(1,2,5)
│ ├── limit hint: 5.00
Expand Down
6 changes: 3 additions & 3 deletions pkg/sql/opt/memo/testdata/stats/lookup-join
Original file line number Diff line number Diff line change
Expand Up @@ -245,13 +245,13 @@ left-join (lookup def)
├── columns: a:1(int!null) b:2(int) c:3(int!null) d:5(int) e:6(int) f:7(int) g:8(float)
├── key columns: [7 6] = [7 6]
├── lookup columns are key
├── stats: [rows=100, distinct(5)=10, null(5)=90, distinct(8)=9.95021575, null(8)=91]
├── stats: [rows=100, distinct(5)=10.0000001, null(5)=89.9999999, distinct(8)=9.95021585, null(8)=90.9999999]
├── key: (1,3,6,7)
├── fd: (1,3)-->(2), (6,7)-->(5,8)
├── left-join (lookup def@d_idx)
│ ├── columns: a:1(int!null) b:2(int) c:3(int!null) d:5(int) e:6(int) f:7(int)
│ ├── key columns: [1] = [5]
│ ├── stats: [rows=100, distinct(5)=10, null(5)=90]
│ ├── stats: [rows=100, distinct(5)=10.0000001, null(5)=89.9999999]
│ ├── key: (1,3,6,7)
│ ├── fd: (1,3)-->(2), (6,7)-->(5)
│ ├── scan abc
Expand All @@ -270,7 +270,7 @@ SELECT * FROM abc LEFT JOIN DEF ON a = e AND b = 3
----
right-join (hash)
├── columns: a:1(int!null) b:2(int) c:3(int!null) d:5(int) e:6(int) f:7(int) g:8(float)
├── stats: [rows=100, distinct(6)=100, null(6)=0, distinct(8)=95.1671064, null(8)=1]
├── stats: [rows=100.000001, distinct(6)=100, null(6)=0, distinct(8)=95.1671073, null(8)=1.00000001]
├── key: (1,3,6,7)
├── fd: (1,3)-->(2), (6,7)-->(5,8)
├── scan def
Expand Down
24 changes: 12 additions & 12 deletions pkg/sql/opt/memo/testdata/stats/partial-index-scan
Original file line number Diff line number Diff line change
Expand Up @@ -54,12 +54,12 @@ SELECT * FROM a WHERE s = 'foo'
----
index-join a
├── columns: k:1(int!null) i:2(int) s:3(string!null) t:4(string)
├── stats: [rows=96.4285714, distinct(3)=1, null(3)=0]
├── stats: [rows=96.4285715, distinct(3)=1, null(3)=0]
├── key: (1)
├── fd: ()-->(3), (1)-->(2,4)
└── scan a@idx,partial
├── columns: k:1(int!null) i:2(int)
├── stats: [rows=96.4285714, distinct(3)=1, null(3)=0]
├── stats: [rows=96.4285715, distinct(3)=1, null(3)=0]
├── key: (1)
└── fd: (1)-->(2)

Expand Down Expand Up @@ -1281,17 +1281,17 @@ SELECT k FROM spatial WHERE st_intersects('LINESTRING(0.5 0.5, 0.7 0.7)', g) AND
project
├── columns: k:1(int!null)
├── immutable
├── stats: [rows=8.54700855]
├── stats: [rows=8.54700856]
├── key: (1)
└── select
├── columns: k:1(int!null) g:2(geometry!null) s:3(string!null)
├── immutable
├── stats: [rows=8.54700855, distinct(2)=7, null(2)=0, distinct(3)=3, null(3)=0]
├── stats: [rows=8.54700856, distinct(2)=7, null(2)=0, distinct(3)=3, null(3)=0]
├── key: (1)
├── fd: (1)-->(2,3)
├── index-join spatial
│ ├── columns: k:1(int!null) g:2(geometry) s:3(string)
│ ├── stats: [rows=8.54700855]
│ ├── stats: [rows=8.54700856]
│ ├── key: (1)
│ ├── fd: (1)-->(2,3)
│ └── inverted-filter
Expand All @@ -1304,7 +1304,7 @@ project
│ │ └── ["B\xfd\x14\x00\x00\x00\x00\x00\x00\x00", "B\xfd\x14\x00\x00\x00\x00\x00\x00\x00"]
│ ├── pre-filterer expression
│ │ └── st_intersects('010200000002000000000000000000E03F000000000000E03F666666666666E63F666666666666E63F', g:2) [type=bool]
│ ├── stats: [rows=8.54700855]
│ ├── stats: [rows=8.54700856]
│ ├── key: (1)
│ └── scan spatial@p,partial
│ ├── columns: k:1(int!null) g_inverted_key:6(geometry!null)
Expand All @@ -1313,7 +1313,7 @@ project
│ │ ├── ["B\xfd\x10\x00\x00\x00\x00\x00\x00\x00", "B\xfd\x10\x00\x00\x00\x00\x00\x00\x00"]
│ │ ├── ["B\xfd\x10\x00\x00\x00\x00\x00\x00\x01", "B\xfd\x12\x00\x00\x00\x00\x00\x00\x00")
│ │ └── ["B\xfd\x14\x00\x00\x00\x00\x00\x00\x00", "B\xfd\x14\x00\x00\x00\x00\x00\x00\x00"]
│ ├── stats: [rows=8.54700855, distinct(1)=8.54700855, null(1)=0, distinct(3)=3, null(3)=0, distinct(6)=8.54700855, null(6)=0]
│ ├── stats: [rows=8.54700856, distinct(1)=8.54700856, null(1)=0, distinct(3)=3, null(3)=0, distinct(6)=8.54700856, null(6)=0]
│ ├── key: (1)
│ └── fd: (1)-->(6)
└── filters
Expand All @@ -1325,17 +1325,17 @@ SELECT k FROM spatial WHERE st_intersects('LINESTRING(0.5 0.5, 0.7 0.7)', g) AND
project
├── columns: k:1(int!null)
├── immutable
├── stats: [rows=2.84900285]
├── stats: [rows=2.84900286]
├── key: (1)
└── select
├── columns: k:1(int!null) g:2(geometry!null) s:3(string!null)
├── immutable
├── stats: [rows=2.84900285, distinct(2)=2.84900285, null(2)=0, distinct(3)=1, null(3)=0]
├── stats: [rows=2.84900286, distinct(2)=2.84900286, null(2)=0, distinct(3)=1, null(3)=0]
├── key: (1)
├── fd: ()-->(3), (1)-->(2)
├── index-join spatial
│ ├── columns: k:1(int!null) g:2(geometry) s:3(string)
│ ├── stats: [rows=8.54700855]
│ ├── stats: [rows=8.54700856]
│ ├── key: (1)
│ ├── fd: (1)-->(2,3)
│ └── inverted-filter
Expand All @@ -1348,7 +1348,7 @@ project
│ │ └── ["B\xfd\x14\x00\x00\x00\x00\x00\x00\x00", "B\xfd\x14\x00\x00\x00\x00\x00\x00\x00"]
│ ├── pre-filterer expression
│ │ └── st_intersects('010200000002000000000000000000E03F000000000000E03F666666666666E63F666666666666E63F', g:2) [type=bool]
│ ├── stats: [rows=8.54700855]
│ ├── stats: [rows=8.54700856]
│ ├── key: (1)
│ └── scan spatial@p,partial
│ ├── columns: k:1(int!null) g_inverted_key:6(geometry!null)
Expand All @@ -1357,7 +1357,7 @@ project
│ │ ├── ["B\xfd\x10\x00\x00\x00\x00\x00\x00\x00", "B\xfd\x10\x00\x00\x00\x00\x00\x00\x00"]
│ │ ├── ["B\xfd\x10\x00\x00\x00\x00\x00\x00\x01", "B\xfd\x12\x00\x00\x00\x00\x00\x00\x00")
│ │ └── ["B\xfd\x14\x00\x00\x00\x00\x00\x00\x00", "B\xfd\x14\x00\x00\x00\x00\x00\x00\x00"]
│ ├── stats: [rows=8.54700855, distinct(1)=8.54700855, null(1)=0, distinct(3)=3, null(3)=0, distinct(6)=8.54700855, null(6)=0]
│ ├── stats: [rows=8.54700856, distinct(1)=8.54700856, null(1)=0, distinct(3)=3, null(3)=0, distinct(6)=8.54700856, null(6)=0]
│ ├── key: (1)
│ └── fd: (1)-->(6)
└── filters
Expand Down
Loading

0 comments on commit 1c31782

Please sign in to comment.