forked from cockroachdb/cockroach
-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
opt: use histograms for inverted JSON/ARRAY scan statistics
Fixes cockroachdb#56870 Release note (performance improvement): The optimizer now uses collected histograms statistics to better estimate the cost of JSON and ARRAY inverted index scans, which may lead to more efficient query plans.
- Loading branch information
Showing
8 changed files
with
756 additions
and
321 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,130 @@ | ||
exec-ddl | ||
CREATE TABLE t ( | ||
k INT PRIMARY KEY, | ||
a INT[], | ||
INVERTED INDEX a_idx (a) | ||
) | ||
---- | ||
|
||
# Histogram boundaries are for JSON values `{}`, `{1}`, `{2}`, `{3}`. The | ||
# row_count is lower than the sum of the histogram buckets num_eq's because some | ||
# rows can have multiple inverted index entries, for example `{1, 2}`. There | ||
# are: | ||
# | ||
# - 1000 rows total | ||
# - 10 empty arrays | ||
# - 990 arrays encoded into 1010 index entries | ||
# | ||
exec-ddl | ||
ALTER TABLE t INJECT STATISTICS '[ | ||
{ | ||
"columns": ["a"], | ||
"created_at": "2018-01-01 1:00:00.00000+00:00", | ||
"row_count": 1000, | ||
"distinct_count": 3, | ||
"null_count": 0, | ||
"histo_col_type": "BYTES", | ||
"histo_buckets": [ | ||
{ | ||
"distinct_range": 0, | ||
"num_eq": 10, | ||
"num_range": 0, | ||
"upper_bound": "\\x43" | ||
}, | ||
{ | ||
"distinct_range": 0, | ||
"num_eq": 990, | ||
"num_range": 0, | ||
"upper_bound": "\\x89" | ||
}, | ||
{ | ||
"distinct_range": 0, | ||
"num_eq": 10, | ||
"num_range": 0, | ||
"upper_bound": "\\x8a" | ||
}, | ||
{ | ||
"distinct_range": 0, | ||
"num_eq": 10, | ||
"num_range": 0, | ||
"upper_bound": "\\x8b" | ||
} | ||
] | ||
} | ||
]' | ||
---- | ||
|
||
# Containment of an empty array requires a scan over all array entries. | ||
opt | ||
SELECT * FROM t@a_idx WHERE a @> '{}' | ||
---- | ||
index-join t | ||
├── columns: k:1(int!null) a:2(int[]!null) | ||
├── immutable | ||
├── stats: [rows=333.333333] | ||
├── key: (1) | ||
├── fd: (1)-->(2) | ||
└── inverted-filter | ||
├── columns: k:1(int!null) | ||
├── inverted expression: /4 | ||
│ ├── tight: true, unique: false | ||
│ └── union spans: ["", ""] | ||
├── stats: [rows=1020] | ||
├── key: (1) | ||
└── scan t@a_idx | ||
├── columns: k:1(int!null) a_inverted_key:4(int[]!null) | ||
├── inverted constraint: /4/1 | ||
│ └── spans: ["", ""] | ||
├── flags: force-index=a_idx | ||
├── stats: [rows=1020, distinct(1)=1000, null(1)=0, distinct(4)=4, null(4)=0] | ||
│ histogram(4)= 0 10 0 990 0 10 0 10 | ||
│ <--- '\x43' --- '\x89' --- '\x8a' --- '\x8b' | ||
├── key: (1) | ||
└── fd: (1)-->(4) | ||
|
||
# An inverted index scan is preferred for a more selective filter. | ||
opt | ||
SELECT * FROM t WHERE a @> '{2}' | ||
---- | ||
index-join t | ||
├── columns: k:1(int!null) a:2(int[]!null) | ||
├── immutable | ||
├── stats: [rows=111.111111] | ||
├── key: (1) | ||
├── fd: (1)-->(2) | ||
└── scan t@a_idx | ||
├── columns: k:1(int!null) | ||
├── inverted constraint: /4/1 | ||
│ └── spans: ["\x8a", "\x8a"] | ||
├── stats: [rows=10, distinct(4)=1, null(4)=0] | ||
│ histogram(4)= 0 10 0 0 | ||
│ <--- '\x8a' --- '\x8b' | ||
└── key: (1) | ||
|
||
# A disjunction requires scanning all entries that match either the left or the | ||
# right. | ||
opt | ||
SELECT * FROM t WHERE a @> '{2}' OR a @> '{3}' | ||
---- | ||
index-join t | ||
├── columns: k:1(int!null) a:2(int[]!null) | ||
├── immutable | ||
├── stats: [rows=333.333333, distinct(2)=3, null(2)=0] | ||
├── key: (1) | ||
├── fd: (1)-->(2) | ||
└── inverted-filter | ||
├── columns: k:1(int!null) | ||
├── inverted expression: /4 | ||
│ ├── tight: true, unique: false | ||
│ └── union spans: ["\x8a", "\x8c") | ||
├── stats: [rows=20] | ||
├── key: (1) | ||
└── scan t@a_idx | ||
├── columns: k:1(int!null) a_inverted_key:4(int[]!null) | ||
├── inverted constraint: /4/1 | ||
│ └── spans: ["\x8a", "\x8c") | ||
├── stats: [rows=20, distinct(1)=19.6078431, null(1)=0, distinct(4)=2, null(4)=0] | ||
│ histogram(4)= 0 10 0 10 | ||
│ <--- '\x8a' --- '\x8b' | ||
├── key: (1) | ||
└── fd: (1)-->(4) |
Oops, something went wrong.