-
Notifications
You must be signed in to change notification settings - Fork 3.8k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
59326: opt: use histograms for inverted JSON/ARRAY scan statistics r=rytaft a=mgartner #### memo: simplify statisticsBuilder.constrainScan A `ScanExpr`, which includes an `InvertedConstraint`, is always passed to `statisticsBuilder.constrainScan`, therefore there is no need to pass the `InvertedConstraint` separately. Release note: None #### memo: rename multi-column-inverted-geo test file to inverted-geo-multi-column Release note: None #### opt: use histograms for inverted JSON/ARRAY scan statistics Fixes #56870 Release note (performance improvement): The optimizer now uses collected histograms statistics to better estimate the cost of JSON and ARRAY inverted index scans, which may lead to more efficient query plans. Co-authored-by: Marcus Gartner <[email protected]>
- Loading branch information
Showing
9 changed files
with
765 additions
and
332 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,130 @@ | ||
exec-ddl | ||
CREATE TABLE t ( | ||
k INT PRIMARY KEY, | ||
a INT[], | ||
INVERTED INDEX a_idx (a) | ||
) | ||
---- | ||
|
||
# Histogram boundaries are for JSON values `{}`, `{1}`, `{2}`, `{3}`. The | ||
# row_count is lower than the sum of the histogram buckets num_eq's because some | ||
# rows can have multiple inverted index entries, for example `{1, 2}`. There | ||
# are: | ||
# | ||
# - 1000 rows total | ||
# - 10 empty arrays | ||
# - 990 arrays encoded into 1010 index entries | ||
# | ||
exec-ddl | ||
ALTER TABLE t INJECT STATISTICS '[ | ||
{ | ||
"columns": ["a"], | ||
"created_at": "2018-01-01 1:00:00.00000+00:00", | ||
"row_count": 1000, | ||
"distinct_count": 3, | ||
"null_count": 0, | ||
"histo_col_type": "BYTES", | ||
"histo_buckets": [ | ||
{ | ||
"distinct_range": 0, | ||
"num_eq": 10, | ||
"num_range": 0, | ||
"upper_bound": "\\x43" | ||
}, | ||
{ | ||
"distinct_range": 0, | ||
"num_eq": 990, | ||
"num_range": 0, | ||
"upper_bound": "\\x89" | ||
}, | ||
{ | ||
"distinct_range": 0, | ||
"num_eq": 10, | ||
"num_range": 0, | ||
"upper_bound": "\\x8a" | ||
}, | ||
{ | ||
"distinct_range": 0, | ||
"num_eq": 10, | ||
"num_range": 0, | ||
"upper_bound": "\\x8b" | ||
} | ||
] | ||
} | ||
]' | ||
---- | ||
|
||
# Containment of an empty array requires a scan over all array entries. | ||
opt | ||
SELECT * FROM t@a_idx WHERE a @> '{}' | ||
---- | ||
index-join t | ||
├── columns: k:1(int!null) a:2(int[]!null) | ||
├── immutable | ||
├── stats: [rows=333.333333] | ||
├── key: (1) | ||
├── fd: (1)-->(2) | ||
└── inverted-filter | ||
├── columns: k:1(int!null) | ||
├── inverted expression: /4 | ||
│ ├── tight: true, unique: false | ||
│ └── union spans: ["", ""] | ||
├── stats: [rows=1020] | ||
├── key: (1) | ||
└── scan t@a_idx | ||
├── columns: k:1(int!null) a_inverted_key:4(int[]!null) | ||
├── inverted constraint: /4/1 | ||
│ └── spans: ["", ""] | ||
├── flags: force-index=a_idx | ||
├── stats: [rows=1020, distinct(1)=1000, null(1)=0, distinct(4)=4, null(4)=0] | ||
│ histogram(4)= 0 10 0 990 0 10 0 10 | ||
│ <--- '\x43' --- '\x89' --- '\x8a' --- '\x8b' | ||
├── key: (1) | ||
└── fd: (1)-->(4) | ||
|
||
# An inverted index scan is preferred for a more selective filter. | ||
opt | ||
SELECT * FROM t WHERE a @> '{2}' | ||
---- | ||
index-join t | ||
├── columns: k:1(int!null) a:2(int[]!null) | ||
├── immutable | ||
├── stats: [rows=111.111111] | ||
├── key: (1) | ||
├── fd: (1)-->(2) | ||
└── scan t@a_idx | ||
├── columns: k:1(int!null) | ||
├── inverted constraint: /4/1 | ||
│ └── spans: ["\x8a", "\x8a"] | ||
├── stats: [rows=10, distinct(4)=1, null(4)=0] | ||
│ histogram(4)= 0 10 0 0 | ||
│ <--- '\x8a' --- '\x8b' | ||
└── key: (1) | ||
|
||
# A disjunction requires scanning all entries that match either the left or the | ||
# right. | ||
opt | ||
SELECT * FROM t WHERE a @> '{2}' OR a @> '{3}' | ||
---- | ||
index-join t | ||
├── columns: k:1(int!null) a:2(int[]!null) | ||
├── immutable | ||
├── stats: [rows=333.333333, distinct(2)=3, null(2)=0] | ||
├── key: (1) | ||
├── fd: (1)-->(2) | ||
└── inverted-filter | ||
├── columns: k:1(int!null) | ||
├── inverted expression: /4 | ||
│ ├── tight: true, unique: false | ||
│ └── union spans: ["\x8a", "\x8c") | ||
├── stats: [rows=20] | ||
├── key: (1) | ||
└── scan t@a_idx | ||
├── columns: k:1(int!null) a_inverted_key:4(int[]!null) | ||
├── inverted constraint: /4/1 | ||
│ └── spans: ["\x8a", "\x8c") | ||
├── stats: [rows=20, distinct(1)=19.6078431, null(1)=0, distinct(4)=2, null(4)=0] | ||
│ histogram(4)= 0 10 0 10 | ||
│ <--- '\x8a' --- '\x8b' | ||
├── key: (1) | ||
└── fd: (1)-->(4) |
File renamed without changes.
Oops, something went wrong.