Skip to content

Commit

Permalink
opt: inverted-index accelerate filters of the form j->0 @> '{"b": "c"}
Browse files Browse the repository at this point in the history
Previously, the optimizer did not plan inverted index scans for filters
having an integer as the index for the fetch value in a filter alongside
the "contains" or the "contained by" operator.

To address this, we now build JSON arrays from fetch value expressions
with integer indexes. From these JSON arrays, inverted spans are built
for constraining scans over inverted indexes. With these changes chains
of both integer and string fetch value operators are now supported
alongside the "contains" and the "contained by" operators.
(e.g., j->0 @> '{"b": "c"}' and j->0 <@ '{"b": "c"}').

Epic: CRDB-3301
Fixes: cockroachdb#94667

Release note (performance improvement): The optimizer now plans
inverted index scans for queries that filter by JSON fetch value
operators (->) with integer indices alongside the "contains" or
the "contained by" operators, e.g, json_col->0 @> '{"b": "c"}'
or json_col->0 <@ '{"b": "c"}'
  • Loading branch information
Shivs11 committed Jan 30, 2023
1 parent 10ef5d9 commit 208fba2
Show file tree
Hide file tree
Showing 4 changed files with 516 additions and 49 deletions.
92 changes: 91 additions & 1 deletion pkg/sql/logictest/testdata/logic_test/inverted_index
Original file line number Diff line number Diff line change
Expand Up @@ -806,7 +806,96 @@ INSERT INTO f VALUES
(36, '[[]]'),
(37, '[{"a": [0, "b"]}, null, 1]'),
(38, '[[0, 1, 2], {"b": "c"}]'),
(39, '[[0, [1, 2]]]')
(39, '[[0, [1, 2]]]'),
(40, '[[0, 1, 2]]'),
(41, '[[{"a": {"b": []}}]]')

query T
SELECT j FROM f@i WHERE j->0 @> '[0, 1, 2, 3]'
----

query T
SELECT j FROM f@i WHERE j->0 @> '[0]'
----
[[0, 1, 2], {"b": "c"}]
[[0, [1, 2]]]
[[0, 1, 2]]

query T
SELECT j FROM f@i WHERE j->0->1 @> '[1, 2, 3]'
----

query T
SELECT j FROM f@i WHERE j->0->1 @> '[1, 2]'
----
[[0, [1, 2]]]

query T
SELECT j FROM f@i WHERE j->0 @> '{"a": {}}'
----
[{"a": {"b": "c"}}, "d", "e"]

query T
SELECT j FROM f@i WHERE j->0 @> '{"a": {"b": "c"}}'
----
[{"a": {"b": "c"}}, "d", "e"]

query T
SELECT j FROM f@i WHERE j->0->1 @> '{"a": {"b": []}}'
----
[[{"a": {"b": []}}]]

query T
SELECT j FROM f@i WHERE j->'a'->0 @> '1'
----
{"a": [1, 2]}
{"a": [1, 2, null]}

query T
SELECT j FROM f@i WHERE j->0->'a' @> '{"b": "c"}'
----
[{"a": {"b": "c"}}, "d", "e"]

query T
SELECT j FROM f@i WHERE j->0 <@ '[1, 2, 3]'
----
[1, 2, {"b": "c"}]
[[]]

query T
SELECT j FROM f@i WHERE j->1 <@ '[1, 2, 3]'
----
[1, 2, {"b": "c"}]

query T
SELECT j FROM f@i WHERE j->0->0 <@ '[1, 2, 3]'
----

query T
SELECT j FROM f@i WHERE j->2 <@ '["d", "e"]'
----
[{"a": {"b": "c"}}, "d", "e"]

query T
SELECT j FROM f@i WHERE j->0 <@ '{"a": {"b": "c"}}'
----
[{"a": {"b": "c"}}, "d", "e"]

query T
SELECT j FROM f@i WHERE j->0 <@ '["a", "b"]'
----
["a"]
[[]]

query T
SELECT j FROM f@i WHERE j->0 <@ '"a"'
----
["a"]

query T
SELECT j FROM f@i WHERE j->0 <@ '1'
----
[1, 2, {"b": "c"}]

query T
SELECT j FROM f@i WHERE j->0 = '[]' ORDER BY k
Expand Down Expand Up @@ -873,6 +962,7 @@ SELECT j FROM f@i WHERE j->0->0 = '0' ORDER BY k
----
[[0, 1, 2], {"b": "c"}]
[[0, [1, 2]]]
[[0, 1, 2]]

query T
SELECT j FROM f@i WHERE j->0->1 = '[1, 2]' ORDER BY k
Expand Down
194 changes: 181 additions & 13 deletions pkg/sql/opt/exec/execbuilder/testdata/inverted_index
Original file line number Diff line number Diff line change
Expand Up @@ -1484,26 +1484,27 @@ vectorized: true
right columns: (a, b_inverted_key)
right fixed values: 1 column

# Inverted indices won't be used for queries of the form
# Inverted indices will be used for queries of the form
# b->0 @> '{"b": "c"}'
query T
EXPLAIN (VERBOSE) SELECT b FROM d WHERE b->0 @> '{"b": "c"}'
----
distribution: local
vectorized: true
·
filter
index join
│ columns: (b)
│ estimated row count: 111 (missing stats)
│ filter: (b->0) @> '{"b": "c"}'
│ table: d@d_pkey
│ key columns: a
└── • scan
columns: (b)
estimated row count: 1,000 (missing stats)
table: d@d_pkey
spans: FULL SCAN
columns: (a)
estimated row count: 111 (missing stats)
table: d@foo_inv
spans: /Arr/"b"/"c"-/Arr/"b"/"c"/PrefixEnd

# Inverted indices won't be used for queries of the form
# Inverted indices will be used for queries of the form
# b->0 <@ '{"b": "c"}'
query T
EXPLAIN (VERBOSE) SELECT b FROM d WHERE b->0 <@ '{"b": "c"}'
Expand All @@ -1516,11 +1517,178 @@ vectorized: true
│ estimated row count: 333 (missing stats)
│ filter: (b->0) <@ '{"b": "c"}'
└── • scan
columns: (b)
estimated row count: 1,000 (missing stats)
table: d@d_pkey
spans: FULL SCAN
└── • index join
│ columns: (b)
│ estimated row count: 111 (missing stats)
│ table: d@d_pkey
│ key columns: a
└── • project
│ columns: (a)
└── • inverted filter
│ columns: (a, b_inverted_key)
│ estimated row count: 111 (missing stats)
│ inverted column: b_inverted_key
│ num spans: 3
└── • scan
columns: (a, b_inverted_key)
estimated row count: 111 (missing stats)
table: d@foo_inv
spans: /[]-/{} /Arr/{}-/Arr/{}/PrefixEnd /Arr/"b"/"c"-/Arr/"b"/"c"/PrefixEnd

query T
EXPLAIN (VERBOSE) SELECT b FROM d WHERE b->0 <@ '1'
----
distribution: local
vectorized: true
·
• filter
│ columns: (b)
│ estimated row count: 333 (missing stats)
│ filter: (b->0) <@ '1'
└── • index join
│ columns: (b)
│ estimated row count: 111 (missing stats)
│ table: d@d_pkey
│ key columns: a
└── • project
│ columns: (a)
└── • inverted filter
│ columns: (a, b_inverted_key)
│ estimated row count: 111 (missing stats)
│ inverted column: b_inverted_key
│ num spans: 3
└── • scan
columns: (a, b_inverted_key)
estimated row count: 111 (missing stats)
table: d@foo_inv
spans: /1-/1/PrefixEnd /[]-/{} /Arr/1-/Arr/1/PrefixEnd

query T
EXPLAIN (VERBOSE) SELECT b FROM d WHERE b->0 @> '1'
----
distribution: local
vectorized: true
·
• index join
│ columns: (b)
│ estimated row count: 111 (missing stats)
│ table: d@d_pkey
│ key columns: a
└── • project
│ columns: (a)
└── • inverted filter
│ columns: (a, b_inverted_key)
│ estimated row count: 111 (missing stats)
│ inverted column: b_inverted_key
│ num spans: 2
└── • scan
columns: (a, b_inverted_key)
estimated row count: 111 (missing stats)
table: d@foo_inv
spans: /Arr/1-/Arr/1/PrefixEnd /Arr/Arr/1-/Arr/Arr/1/PrefixEnd

query T
EXPLAIN (VERBOSE) SELECT b FROM d WHERE b->0 @> '[1, 2]'
----
distribution: local
vectorized: true
·
• project
│ columns: (b)
└── • lookup join (inner)
│ columns: (a, b)
│ estimated row count: 12 (missing stats)
│ table: d@d_pkey
│ equality: (a) = (a)
│ equality cols are key
│ pred: (b->0) @> '[1, 2]'
└── • project
│ columns: (a)
└── • zigzag join
columns: (a, b_inverted_key, a, b_inverted_key)
estimated row count: 12 (missing stats)
left table: d@foo_inv
left columns: (a, b_inverted_key)
left fixed values: 1 column
right table: d@foo_inv
right columns: (a, b_inverted_key)
right fixed values: 1 column

query T
EXPLAIN (VERBOSE) SELECT b FROM d WHERE b->0 <@ '[1, 2]'
----
distribution: local
vectorized: true
·
• filter
│ columns: (b)
│ estimated row count: 333 (missing stats)
│ filter: (b->0) <@ '[1, 2]'
└── • index join
│ columns: (b)
│ estimated row count: 111 (missing stats)
│ table: d@d_pkey
│ key columns: a
└── • project
│ columns: (a)
└── • inverted filter
│ columns: (a, b_inverted_key)
│ estimated row count: 111 (missing stats)
│ inverted column: b_inverted_key
│ num spans: 8
└── • scan
columns: (a, b_inverted_key)
estimated row count: 111 (missing stats)
table: d@foo_inv
spans: /1-/1/PrefixEnd /2-/2/PrefixEnd /[]-/{} /Arr/1-/Arr/1/PrefixEnd /Arr/2-/Arr/2/PrefixEnd /Arr/[]-/Arr/{} /Arr/Arr/1-/Arr/Arr/1/PrefixEnd /Arr/Arr/2-/Arr/Arr/2/PrefixEnd

query T
EXPLAIN (VERBOSE) SELECT b FROM d WHERE (b->0 @> '[1, 2]') AND (b->1 <@ '[1]')
----
distribution: local
vectorized: true
·
• project
│ columns: (b)
└── • lookup join (inner)
│ columns: (a, b)
│ estimated row count: 4 (missing stats)
│ table: d@d_pkey
│ equality: (a) = (a)
│ equality cols are key
│ pred: ((b->0) @> '[1, 2]') AND ((b->1) <@ '[1]')
└── • project
│ columns: (a)
└── • zigzag join
columns: (a, b_inverted_key, a, b_inverted_key)
estimated row count: 12 (missing stats)
left table: d@foo_inv
left columns: (a, b_inverted_key)
left fixed values: 1 column
right table: d@foo_inv
right columns: (a, b_inverted_key)
right fixed values: 1 column


# Stats reflect the following, with some histogram buckets removed:
# insert into d select g, '[1,2]' from generate_series(1,1000) g(g);
Expand Down
8 changes: 0 additions & 8 deletions pkg/sql/opt/invertedidx/json_array.go
Original file line number Diff line number Diff line change
Expand Up @@ -612,14 +612,6 @@ func (j *jsonOrArrayFilterPlanner) extractJSONFetchValContainsCondition(
return inverted.NonInvertedColExpression{}
}

// Not using inverted indices, yet, for filters of the form
// j->0 @> '{"b": "c"}' or j->0 <@ '{"b": "c"}'
for i := range keys {
if _, ok := keys[i].(*tree.DString); !ok {
return inverted.NonInvertedColExpression{}
}
}

// Build a new JSON object with the collected keys and val.
obj := buildObject(keys, val.JSON)

Expand Down
Loading

0 comments on commit 208fba2

Please sign in to comment.