Skip to content

Commit

Permalink
opt: inverted-index accelerate queries with filters in the form json-…
Browse files Browse the repository at this point in the history
…>'field' IN (...)

Previously, the optimizer did not plan inverted index scans for queries consisting
of the IN operator.

To address this, we now build JSON arrays from fetch value expressions consisting
of both integers and strings. Each entity present inside of the tuple following the
IN operator has it's own seperate JSON array being created. From these JSON arrays,
inverted spans are built. Each inverted span generated is OR'ed together to replicate
the functionality of the IN operator. In other words, a filter of the form:
j->0 IN ('1', '2') is thought of as (j->0 = '1') OR (j->0 = '2') and the generated
inverted span replicates this behaviour.

Epic: CRDB-3301
Fixes: cockroachdb#87856

Release note (performance improvement): The optimizer now plans
inverted index scans for queries that filter by JSON fetch value
operators (->), consisting of both string and integer index values,
alongside the IN operator, e.g, json_col->0 IN ('1', '2') and
json_col->'a' IN ('1', '2')
  • Loading branch information
Shivs11 committed Feb 2, 2023
1 parent 10ef5d9 commit dc441a6
Show file tree
Hide file tree
Showing 4 changed files with 780 additions and 1 deletion.
81 changes: 80 additions & 1 deletion pkg/sql/logictest/testdata/logic_test/inverted_index
Original file line number Diff line number Diff line change
Expand Up @@ -806,7 +806,8 @@ INSERT INTO f VALUES
(36, '[[]]'),
(37, '[{"a": [0, "b"]}, null, 1]'),
(38, '[[0, 1, 2], {"b": "c"}]'),
(39, '[[0, [1, 2]]]')
(39, '[[0, [1, 2]]]'),
(40, '{"a": "a"}')

query T
SELECT j FROM f@i WHERE j->0 = '[]' ORDER BY k
Expand Down Expand Up @@ -1197,6 +1198,84 @@ SELECT j FROM f@i WHERE j->'a'->'b' <@ '{"c": [1, 2], "d": 2}' OR j->'a'->'b' <@
{"a": {"b": "c"}, "d": "e"}
{"a": {"b": []}}

#Testing the IN operator
query T
SELECT j FROM f@i WHERE j->'a' IN ('1', '2', '3') ORDER BY k
----
{"a": 1}
{"a": 1, "b": 2}
{"a": 1, "c": 3}

query T
SELECT j FROM f@i WHERE j->'a' IN ('"a"') ORDER BY k
----
{"a": "a"}

query T
SELECT j FROM f@i WHERE j->'a' IN ('"a"', 'null') ORDER BY k
----
{"a": null}
{"a": "a"}

query T
SELECT j FROM f@i WHERE j->'a' IN ('{"b": "c"}', '{"c": "d"}') ORDER BY k
----
{"a": {"b": "c"}}
{"a": {"b": "c"}, "d": "e"}

query T
SELECT j FROM f@i WHERE j->'a' IN ('{"b": "c"}', '"a"') ORDER BY k
----
{"a": {"b": "c"}}
{"a": {"b": "c"}, "d": "e"}
{"a": "a"}

query T
SELECT j FROM f@i WHERE j->'a' IN ('[1, 2]', '"a"') ORDER BY k
----
{"a": [1, 2]}
{"a": "a"}

query T
SELECT j FROM f@i WHERE j->0 IN ('1', '2', '3') ORDER BY k
----
[1, 2, {"b": "c"}]

query T
SELECT j FROM f@i WHERE j->1 IN ('1', '2', '3') ORDER BY k
----
[1, 2, {"b": "c"}]

query T
SELECT j FROM f@i WHERE j->0 IN ('1', '2', '3', '"d"') AND j->1 IN
('"e"', '"d"') ORDER BY k
----

query T
SELECT j FROM f@i WHERE j->0 IN ('1', '2', '3', '"d"') OR j->1 IN
('"e"', '"d"') ORDER BY k
----
[1, 2, {"b": "c"}]
[{"a": {"b": "c"}}, "d", "e"]

query T
SELECT j FROM f@i WHERE j->0 IN ('{"a": {"b": "c"}}', '{"a": [0, "b"]}') ORDER BY k
----
[{"a": {"b": "c"}}, "d", "e"]
[{"a": [0, "b"]}, null, 1]

query T
SELECT j FROM f@i WHERE j->0 IN ('{"a": {"b": "c"}}', '[]') ORDER BY k
----
[{"a": {"b": "c"}}, "d", "e"]
[[]]

query T
SELECT j FROM f@i WHERE j->0 IN ('[0, [1, 2]]', '[0, 1, 2]') ORDER BY k
----
[[0, 1, 2], {"b": "c"}]
[[0, [1, 2]]]

# Regression test for #63180. This query should not cause an index out of range
# error in the inverted filterer.
query T
Expand Down
37 changes: 37 additions & 0 deletions pkg/sql/opt/invertedidx/json_array.go
Original file line number Diff line number Diff line change
Expand Up @@ -392,6 +392,10 @@ func (j *jsonOrArrayFilterPlanner) extractInvertedFilterConditionFromLeaf(
if fetch, ok := t.Left.(*memo.FetchValExpr); ok {
invertedExpr = j.extractJSONFetchValEqCondition(ctx, evalCtx, fetch, t.Right)
}
case *memo.InExpr:
if fetch, ok := t.Left.(*memo.FetchValExpr); ok {
invertedExpr = j.extractJSONInCondition(ctx, evalCtx, fetch, t.Right)
}
case *memo.OverlapsExpr:
invertedExpr = j.extractArrayOverlapsCondition(ctx, evalCtx, t.Left, t.Right)
}
Expand All @@ -412,6 +416,39 @@ func (j *jsonOrArrayFilterPlanner) extractInvertedFilterConditionFromLeaf(
return invertedExpr, remainingFilters, nil
}

// extractJSONInCondition extracts an InvertedExpression representing
// an inverted filter over the planner's inverted index, based on the
// give left and right expression arguments. If an
// InvertedExpression cannot be generated from the expression, an
// inverted.NonInvertedColExpression is returned.
func (j *jsonOrArrayFilterPlanner) extractJSONInCondition(
ctx context.Context, evalCtx *eval.Context, left *memo.FetchValExpr, right opt.ScalarExpr,
) inverted.Expression {
// The right side of the expression should be a constant JSON value.
if !memo.CanExtractConstDatum(right) {
return inverted.NonInvertedColExpression{}
}
var invertedExpr inverted.Expression
tuple := right.Child(0)
tupleLen := tuple.ChildCount()
for index := 0; index < tupleLen; index++ {
child := tuple.Child(index).(opt.ScalarExpr)
expr := j.extractJSONFetchValEqCondition(ctx, evalCtx, left, child)
if invertedExpr == nil {
invertedExpr = expr
}
invertedExpr = inverted.Or(invertedExpr, expr)
}

if invertedExpr == nil {
// An inverted expression could not be extracted.
return inverted.NonInvertedColExpression{}
}

return invertedExpr

}

// extractArrayOverlapsCondition extracts an InvertedExpression
// representing an inverted filter over the planner's inverted index, based
// on the given left and right expression arguments. Returns an empty
Expand Down
82 changes: 82 additions & 0 deletions pkg/sql/opt/invertedidx/json_array_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -885,6 +885,88 @@ func TestTryFilterJsonOrArrayIndex(t *testing.T) {
unique: true,
remainingFilters: "str <@ '{hello}' AND str @> '{hello}'",
},
{
// Testing the IN operator with the fetch value as a string
filters: "j->'a' IN ('1', '2', '3')",
indexOrd: jsonOrd,
ok: true,
tight: true,
unique: false,
remainingFilters: "",
},
{
filters: `j->'a' IN ('"a"', '"b"')`,
indexOrd: jsonOrd,
ok: true,
tight: true,
unique: false,
remainingFilters: "",
},
{
filters: `j->'a' IN ('{"a": "b"}', '"a"')`,
indexOrd: jsonOrd,
ok: true,
tight: false,
unique: false,
remainingFilters: `j->'a' IN ('{"a": "b"}', '"a"')`,
},
{
filters: `j->'a' IN ('[1,2,3]', '[1]')`,
indexOrd: jsonOrd,
ok: true,
tight: false,
unique: false,
remainingFilters: `j->'a' IN ('[1,2,3]', '[1]')`,
},
{
filters: `j->'a' IN ('[1,2,3]', '{"a": "b"}', '"a"', 'null')`,
indexOrd: jsonOrd,
ok: true,
tight: false,
unique: false,
remainingFilters: `j->'a' IN ('[1,2,3]', '{"a": "b"}', '"a"', 'null')`,
},
{
// Testing the IN operator with the fetch value as an integer
filters: "j->0->1 IN ('1', '2', '3')",
indexOrd: jsonOrd,
ok: true,
tight: false,
unique: false,
remainingFilters: "j->0->1 IN ('1', '2', '3')",
},
{
filters: `j->0 IN ('"a"', '"b"')`,
indexOrd: jsonOrd,
ok: true,
tight: false,
unique: false,
remainingFilters: `j->0 IN ('"a"', '"b"')`,
},
{
filters: `j->0->'a' IN ('{"a": "b"}', '"a"')`,
indexOrd: jsonOrd,
ok: true,
tight: false,
unique: false,
remainingFilters: `j->0->'a' IN ('{"a": "b"}', '"a"')`,
},
{
filters: `j->'a'->0 IN ('[1,2,3]', '[1]')`,
indexOrd: jsonOrd,
ok: true,
tight: false,
unique: false,
remainingFilters: `j->'a'->0 IN ('[1,2,3]', '[1]')`,
},
{
filters: `j->0 IN ('[1,2,3]', '{"a": "b"}', '"a"', 'null')`,
indexOrd: jsonOrd,
ok: true,
tight: false,
unique: false,
remainingFilters: `j->0 IN ('[1,2,3]', '{"a": "b"}', '"a"', 'null')`,
},
}

for _, tc := range testCases {
Expand Down
Loading

0 comments on commit dc441a6

Please sign in to comment.