Skip to content

Commit

Permalink
opt: add cost penalty for scans with large cardinality
Browse files Browse the repository at this point in the history
This commit adds a new cost function, largeCardinalityRowCountPenalty,
which calculates a penalty that should be added to the row count of scans.
It is non-zero for expressions with unbounded maximum cardinality or with
maximum cardinality exceeding the row count estimate. Adding a few rows
worth of cost helps prevent surprising plans for very small tables or for
when stats are stale.

Fixes cockroachdb#64570

Release note (performance improvement): When choosing between index
scans that are estimated to have the same number of rows, the optimizer
now prefers indexes for which it has higher certainty about the maximum
number of rows over indexes for which there is more uncertainty in the
estimated row count. This helps to avoid choosing suboptimal plans for
small tables or if the statistics are stale.
  • Loading branch information
rytaft committed Jun 28, 2021
1 parent b1fa526 commit 83e6497
Show file tree
Hide file tree
Showing 44 changed files with 1,354 additions and 1,117 deletions.
4 changes: 2 additions & 2 deletions pkg/sql/logictest/testdata/logic_test/alter_primary_key
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ SELECT * FROM parent
4 5

query T
SELECT * FROM [EXPLAIN SELECT * FROM child WHERE x >= 1 AND x < 5 AND y >= 2 AND y <= 6] OFFSET 2
SELECT * FROM [EXPLAIN SELECT * FROM child@primary WHERE x >= 1 AND x < 5 AND y >= 2 AND y <= 6] OFFSET 2
----
·
• filter
Expand All @@ -130,7 +130,7 @@ SELECT * FROM [EXPLAIN SELECT * FROM child WHERE x >= 1 AND x < 5 AND y >= 2 AND
spans: [/1/2 - /4/6]

query III rowsort
SELECT * FROM child WHERE x >= 1 AND x < 5 AND y >= 2 AND y <= 6
SELECT * FROM child@primary WHERE x >= 1 AND x < 5 AND y >= 2 AND y <= 6
----
1 2 3
4 5 6
Expand Down
4 changes: 2 additions & 2 deletions pkg/sql/logictest/testdata/logic_test/prepare
Original file line number Diff line number Diff line change
Expand Up @@ -1171,14 +1171,14 @@ select
├── columns: k:1 str:2
├── immutable
├── stats: [rows=333.333333]
├── cost: 1064.43
├── cost: 1074.83
├── key: (1)
├── fd: (1)-->(2)
├── prune: (2)
├── scan t2
│ ├── columns: k:1 str:2
│ ├── stats: [rows=1000]
│ ├── cost: 1054.41
│ ├── cost: 1064.81
│ ├── key: (1)
│ ├── fd: (1)-->(2)
│ ├── prune: (1,2)
Expand Down
10 changes: 5 additions & 5 deletions pkg/sql/logictest/testdata/logic_test/vectorize_local
Original file line number Diff line number Diff line change
Expand Up @@ -104,35 +104,35 @@ EXPLAIN (OPT, VERBOSE) SELECT c.a FROM c INNER MERGE JOIN d ON c.a = d.b
project
├── columns: a:1
├── stats: [rows=10]
├── cost: 1100.77
├── cost: 1122.17
├── prune: (1)
└── inner-join (merge)
├── columns: c.a:1 d.b:8
├── flags: force merge join
├── left ordering: +1
├── right ordering: +8
├── stats: [rows=10, distinct(1)=1, null(1)=0, distinct(8)=1, null(8)=0]
├── cost: 1100.66
├── cost: 1122.06
├── fd: (1)==(8), (8)==(1)
├── sort
│ ├── columns: c.a:1
│ ├── stats: [rows=1, distinct(1)=1, null(1)=0]
│ ├── cost: 15.93
│ ├── cost: 26.73
│ ├── ordering: +1
│ ├── prune: (1)
│ ├── interesting orderings: (+1)
│ ├── unfiltered-cols: (1-6)
│ └── scan c@sec
│ ├── columns: c.a:1
│ ├── stats: [rows=1, distinct(1)=1, null(1)=0]
│ ├── cost: 15.89
│ ├── cost: 26.69
│ ├── prune: (1)
│ ├── interesting orderings: (+1)
│ └── unfiltered-cols: (1-6)
├── scan d
│ ├── columns: d.b:8
│ ├── stats: [rows=1000, distinct(8)=100, null(8)=0]
│ ├── cost: 1074.61
│ ├── cost: 1085.21
│ ├── ordering: +8
│ ├── prune: (8)
│ ├── interesting orderings: (+8)
Expand Down
8 changes: 5 additions & 3 deletions pkg/sql/opt/exec/execbuilder/testdata/aggregate
Original file line number Diff line number Diff line change
Expand Up @@ -670,7 +670,7 @@ vectorized: true
• group (scalar)
│ columns: (min int)
│ estimated row count: 1 (missing stats)
│ aggregate 0: min(x)
│ aggregate 0: any_not_null(x)
└── • project
│ columns: (x int)
Expand All @@ -680,6 +680,7 @@ vectorized: true
estimated row count: 1 (missing stats)
table: xyz@zyx
spans: /3/2-/3/3
limit: 1

statement ok
SET tracing = on,kv,results; SELECT min(x) FROM xyz WHERE (y, z) = (2, 3.0); SET tracing = off
Expand All @@ -701,16 +702,17 @@ vectorized: true
• group (scalar)
│ columns: (max int)
│ estimated row count: 1 (missing stats)
│ aggregate 0: max(x)
│ aggregate 0: any_not_null(x)
└── • project
│ columns: (x int)
└── • scan
└── • revscan
columns: (x int, y int, z float)
estimated row count: 1 (missing stats)
table: xyz@zyx
spans: /3/2-/3/3
limit: 1

# VARIANCE/STDDEV

Expand Down
4 changes: 2 additions & 2 deletions pkg/sql/opt/exec/execbuilder/testdata/distsql_agg
Original file line number Diff line number Diff line change
Expand Up @@ -963,15 +963,15 @@ group-by
├── grouping columns: b:2
├── internal-ordering: +2 opt(1)
├── stats: [rows=9.5617925, distinct(2)=9.5617925, null(2)=0]
├── cost: 15.1256179
├── cost: 25.9256179
├── key: (2)
├── fd: (2)-->(5)
├── prune: (5)
├── scan data2
│ ├── columns: a:1 b:2
│ ├── constraint: /1/2: [/1 - /1]
│ ├── stats: [rows=10, distinct(1)=1, null(1)=0, distinct(2)=9.5617925, null(2)=0]
│ ├── cost: 14.81
│ ├── cost: 25.61
│ ├── key: (2)
│ ├── fd: ()-->(1)
│ ├── ordering: +2 opt(1) [actual: +2]
Expand Down
34 changes: 17 additions & 17 deletions pkg/sql/opt/exec/execbuilder/testdata/explain
Original file line number Diff line number Diff line change
Expand Up @@ -1097,21 +1097,21 @@ EXPLAIN (OPT,VERBOSE) SELECT * FROM tc WHERE a = 10 ORDER BY b
sort
├── columns: a:1 b:2
├── stats: [rows=10, distinct(1)=1, null(1)=0]
├── cost: 76.7943856
├── cost: 87.5943856
├── fd: ()-->(1)
├── ordering: +2 opt(1) [actual: +2]
├── prune: (2)
└── index-join tc
├── columns: a:1 b:2
├── stats: [rows=10, distinct(1)=1, null(1)=0]
├── cost: 75.72
├── cost: 86.52
├── fd: ()-->(1)
├── prune: (2)
└── scan tc@c
├── columns: a:1 rowid:3
├── constraint: /1/3: [/10 - /10]
├── stats: [rows=10, distinct(1)=1, null(1)=0]
├── cost: 14.81
├── cost: 25.61
├── key: (3)
└── fd: ()-->(1)

Expand All @@ -1121,21 +1121,21 @@ EXPLAIN (OPT,TYPES) SELECT * FROM tc WHERE a = 10 ORDER BY b
sort
├── columns: a:1(int!null) b:2(int)
├── stats: [rows=10, distinct(1)=1, null(1)=0]
├── cost: 76.7943856
├── cost: 87.5943856
├── fd: ()-->(1)
├── ordering: +2 opt(1) [actual: +2]
├── prune: (2)
└── index-join tc
├── columns: a:1(int!null) b:2(int)
├── stats: [rows=10, distinct(1)=1, null(1)=0]
├── cost: 75.72
├── cost: 86.52
├── fd: ()-->(1)
├── prune: (2)
└── scan tc@c
├── columns: a:1(int!null) rowid:3(int!null)
├── constraint: /1/3: [/10 - /10]
├── stats: [rows=10, distinct(1)=1, null(1)=0]
├── cost: 14.81
├── cost: 25.61
├── key: (3)
└── fd: ()-->(1)

Expand Down Expand Up @@ -1183,20 +1183,20 @@ inner-join (hash)
├── columns: a:1 b:2 k:6 v:7
├── multiplicity: left-rows(zero-or-one), right-rows(zero-or-more)
├── stats: [rows=990, distinct(1)=99, null(1)=0, distinct(6)=99, null(6)=0]
├── cost: 2249.87625
├── cost: 2271.67625
├── fd: (6)-->(7), (1)==(6), (6)==(1)
├── prune: (2,7)
├── scan tc
│ ├── columns: a:1 b:2
│ ├── stats: [rows=1000, distinct(1)=100, null(1)=10]
│ ├── cost: 1115.01
│ ├── cost: 1126.01
│ ├── prune: (1,2)
│ ├── interesting orderings: (+1)
│ └── unfiltered-cols: (1-5)
├── scan t
│ ├── columns: k:6 v:7
│ ├── stats: [rows=1000, distinct(6)=1000, null(6)=0]
│ ├── cost: 1094.81
│ ├── cost: 1105.61
│ ├── key: (6)
│ ├── fd: (6)-->(7)
│ ├── prune: (6,7)
Expand Down Expand Up @@ -1224,7 +1224,7 @@ sort
├── columns: a:1 b:2 [hidden: column6:6]
├── immutable
├── stats: [rows=333.333333]
├── cost: 1204.26951
├── cost: 1215.26951
├── fd: (1,2)-->(6)
├── ordering: +6
├── prune: (1,2,6)
Expand All @@ -1233,20 +1233,20 @@ sort
├── columns: column6:6 a:1 b:2
├── immutable
├── stats: [rows=333.333333]
├── cost: 1131.70667
├── cost: 1142.70667
├── fd: (1,2)-->(6)
├── prune: (1,2,6)
├── interesting orderings: (+1)
├── select
│ ├── columns: a:1 b:2
│ ├── immutable
│ ├── stats: [rows=333.333333]
│ ├── cost: 1125.03
│ ├── cost: 1136.03
│ ├── interesting orderings: (+1)
│ ├── scan tc
│ │ ├── columns: a:1 b:2
│ │ ├── stats: [rows=1000]
│ │ ├── cost: 1115.01
│ │ ├── cost: 1126.01
│ │ ├── prune: (1,2)
│ │ └── interesting orderings: (+1)
│ └── filters
Expand All @@ -1261,7 +1261,7 @@ sort
├── columns: a:1(int) b:2(int) [hidden: column6:6(int)]
├── immutable
├── stats: [rows=333.333333]
├── cost: 1204.26951
├── cost: 1215.26951
├── fd: (1,2)-->(6)
├── ordering: +6
├── prune: (1,2,6)
Expand All @@ -1270,20 +1270,20 @@ sort
├── columns: column6:6(int) a:1(int) b:2(int)
├── immutable
├── stats: [rows=333.333333]
├── cost: 1131.70667
├── cost: 1142.70667
├── fd: (1,2)-->(6)
├── prune: (1,2,6)
├── interesting orderings: (+1)
├── select
│ ├── columns: a:1(int) b:2(int)
│ ├── immutable
│ ├── stats: [rows=333.333333]
│ ├── cost: 1125.03
│ ├── cost: 1136.03
│ ├── interesting orderings: (+1)
│ ├── scan tc
│ │ ├── columns: a:1(int) b:2(int)
│ │ ├── stats: [rows=1000]
│ │ ├── cost: 1115.01
│ │ ├── cost: 1126.01
│ │ ├── prune: (1,2)
│ │ └── interesting orderings: (+1)
│ └── filters
Expand Down
24 changes: 12 additions & 12 deletions pkg/sql/opt/exec/execbuilder/testdata/inverted_index
Original file line number Diff line number Diff line change
Expand Up @@ -1468,7 +1468,7 @@ inner-join (lookup geo_table)
├── lookup columns are key
├── immutable
├── stats: [rows=9801]
├── cost: 112694.84
├── cost: 112705.64
├── key: (1,5)
├── fd: (1)-->(2), (5)-->(6)
├── prune: (1,5)
Expand All @@ -1477,13 +1477,13 @@ inner-join (lookup geo_table)
│ ├── inverted-expr
│ │ └── st_intersects(geo_table2.geom:2, geo_table.geom:12)
│ ├── stats: [rows=10000, distinct(1)=999.956829, null(1)=0, distinct(11)=999.956829, null(11)=0]
│ ├── cost: 41794.82
│ ├── cost: 41805.62
│ ├── key: (1,11)
│ ├── fd: (1)-->(2)
│ ├── scan geo_table2
│ │ ├── columns: geo_table2.k:1 geo_table2.geom:2
│ │ ├── stats: [rows=1000, distinct(1)=1000, null(1)=0, distinct(2)=100, null(2)=10]
│ │ ├── cost: 1094.81
│ │ ├── cost: 1105.61
│ │ ├── key: (1)
│ │ ├── fd: (1)-->(2)
│ │ ├── prune: (1,2)
Expand Down Expand Up @@ -1558,7 +1558,7 @@ left-join (lookup geo_table)
├── second join in paired joiner
├── immutable
├── stats: [rows=10000]
├── cost: 112894.84
├── cost: 112905.64
├── key: (1,5)
├── fd: (1)-->(2), (5)-->(6)
├── prune: (1,5)
Expand All @@ -1568,13 +1568,13 @@ left-join (lookup geo_table)
│ ├── inverted-expr
│ │ └── st_intersects(geo_table2.geom:2, geo_table.geom:12)
│ ├── stats: [rows=10000, distinct(1)=1000, null(1)=0, distinct(11)=999.956829, null(11)=0]
│ ├── cost: 41994.82
│ ├── cost: 42005.62
│ ├── key: (1,11)
│ ├── fd: (1)-->(2), (11)-->(16)
│ ├── scan geo_table2
│ │ ├── columns: geo_table2.k:1 geo_table2.geom:2
│ │ ├── stats: [rows=1000, distinct(1)=1000, null(1)=0]
│ │ ├── cost: 1094.81
│ │ ├── cost: 1105.61
│ │ ├── key: (1)
│ │ ├── fd: (1)-->(2)
│ │ ├── prune: (1,2)
Expand All @@ -1594,7 +1594,7 @@ semi-join (lookup geo_table)
├── second join in paired joiner
├── immutable
├── stats: [rows=10]
├── cost: 112694.84
├── cost: 112705.64
├── key: (1)
├── fd: (1)-->(2)
├── prune: (1)
Expand All @@ -1604,13 +1604,13 @@ semi-join (lookup geo_table)
│ ├── inverted-expr
│ │ └── st_intersects(geo_table2.geom:2, geo_table.geom:12)
│ ├── stats: [rows=10000, distinct(1)=999.956829, null(1)=0, distinct(11)=999.956829, null(11)=0]
│ ├── cost: 41994.82
│ ├── cost: 42005.62
│ ├── key: (1,11)
│ ├── fd: (1)-->(2), (11)-->(16)
│ ├── scan geo_table2
│ │ ├── columns: geo_table2.k:1 geo_table2.geom:2
│ │ ├── stats: [rows=1000, distinct(1)=1000, null(1)=0, distinct(2)=100, null(2)=10]
│ │ ├── cost: 1094.81
│ │ ├── cost: 1105.61
│ │ ├── key: (1)
│ │ ├── fd: (1)-->(2)
│ │ ├── prune: (1,2)
Expand All @@ -1631,7 +1631,7 @@ anti-join (lookup geo_table)
├── second join in paired joiner
├── immutable
├── stats: [rows=990]
├── cost: 112694.84
├── cost: 112705.64
├── key: (1)
├── fd: (1)-->(2)
├── prune: (1)
Expand All @@ -1641,13 +1641,13 @@ anti-join (lookup geo_table)
│ ├── inverted-expr
│ │ └── st_intersects(geo_table2.geom:2, geo_table.geom:12)
│ ├── stats: [rows=10000, distinct(1)=1000, null(1)=0, distinct(11)=999.956829, null(11)=0]
│ ├── cost: 41994.82
│ ├── cost: 42005.62
│ ├── key: (1,11)
│ ├── fd: (1)-->(2), (11)-->(16)
│ ├── scan geo_table2
│ │ ├── columns: geo_table2.k:1 geo_table2.geom:2
│ │ ├── stats: [rows=1000, distinct(1)=1000, null(1)=0]
│ │ ├── cost: 1094.81
│ │ ├── cost: 1105.61
│ │ ├── key: (1)
│ │ ├── fd: (1)-->(2)
│ │ ├── prune: (1,2)
Expand Down
Loading

0 comments on commit 83e6497

Please sign in to comment.