Skip to content

Commit

Permalink
Merge #59494
Browse files Browse the repository at this point in the history
59494: opt: index accelerate chained fetch value operators r=rytaft a=mgartner

#### opt: add test for JSON fetch val inverse

Release note: None

#### opt: index accelerate chained fetch value operators

Prior to #55316, the optimizer generated inverted index scans on indexed
JSON columns when queries had filters with chained fetch value
operators, for example `j->'a'->'b' = '1'`. The logic that made this
possible was found to create query plans not equivalent to the query, so
it was removed. This commit restores the ability to index accelerate
chained -> operators.

Fixes #55317

Release note (performance improvement): A bug fix included in 20.2.1 for
for the JSON fetch value operator, `->`, resulted in chained `->`
operators in query filters not being index accelerated, e.g.,
`j->'a'->'b' = '1'`. Chained `->` are now index accelerated.


Co-authored-by: Marcus Gartner <[email protected]>
  • Loading branch information
craig[bot] and mgartner committed Jan 29, 2021
2 parents 22a46b1 + 0bbcced commit 3f7283a
Show file tree
Hide file tree
Showing 5 changed files with 174 additions and 41 deletions.
22 changes: 21 additions & 1 deletion pkg/sql/logictest/testdata/logic_test/inverted_index
Original file line number Diff line number Diff line change
Expand Up @@ -744,7 +744,15 @@ INSERT INTO f VALUES
(2, '{"b": 2}'),
(3, '{"b": 2, "a": 1}'),
(4, '{"a": 1, "c": 3}'),
(5, '{"a": [1, 2]}')
(5, '{"a": [1, 2]}'),
(6, '{"a": {"b": 1}}'),
(7, '{"a": {"b": 1, "d": 2}}'),
(8, '{"a": {"d": 2}}'),
(9, '{"a": {"b": [1, 2]}}'),
(10, '{"a": {"b": {"c": 1}}}'),
(11, '{"a": {"b": {"c": 1, "d": 2}}}}'),
(12, '{"a": {"b": {"d": 2}}}}'),
(13, '{"a": {"b": {"c": [1, 2]}}}')

query T
SELECT j FROM f@i WHERE j->'a' = '1' ORDER BY k
Expand All @@ -769,6 +777,18 @@ SELECT j FROM f@i WHERE j->'a' = '1' OR j @> '{"b": 2}' ORDER BY k
{"a": 1, "b": 2}
{"a": 1, "c": 3}

query T
SELECT j FROM f@i WHERE j->'a'->'b' = '1' ORDER BY k
----
{"a": {"b": 1}}
{"a": {"b": 1, "d": 2}}

query T
SELECT j FROM f@i WHERE j->'a'->'b'->'c' = '1' ORDER BY k
----
{"a": {"b": {"c": 1}}}
{"a": {"b": {"c": 1, "d": 2}}}

subtest arrays

statement ok
Expand Down
17 changes: 8 additions & 9 deletions pkg/sql/opt/exec/execbuilder/testdata/inverted_index
Original file line number Diff line number Diff line change
Expand Up @@ -350,24 +350,23 @@ vectorized: true
table: d@foo_inv
spans: /"a"/"b"-/"a"/"b"/PrefixEnd

# TODO(mgartner): Add support for building inverted index constraints for chained JSON
# fetch operators.
query T
EXPLAIN (VERBOSE) SELECT * from d where b->'a'->'c' = '"b"'
EXPLAIN (VERBOSE) SELECT * from d@foo_inv where b->'a'->'c' = '"b"'
----
distribution: local
vectorized: true
·
filter
index join
│ columns: (a, b)
│ estimated row count: 111 (missing stats)
│ filter: ((b->'a')->'c') = '"b"'
│ table: d@primary
│ key columns: a
└── • scan
columns: (a, b)
estimated row count: 1,000 (missing stats)
table: d@primary
spans: FULL SCAN
columns: (a)
estimated row count: 111 (missing stats)
table: d@foo_inv
spans: /"a"/"c"/"b"-/"a"/"c"/"b"/PrefixEnd

query T
EXPLAIN (VERBOSE) SELECT * from d where b->(NULL::STRING) = '"b"'
Expand Down
111 changes: 80 additions & 31 deletions pkg/sql/opt/invertedidx/json_array.go
Original file line number Diff line number Diff line change
Expand Up @@ -334,35 +334,18 @@ func (j *jsonOrArrayFilterPlanner) extractJSONOrArrayContainsCondition(

// extractJSONFetchValEqCondition extracts an InvertedExpression representing an
// inverted filter over the planner's inverted index, based on equality between
// a fetch val expression and a right scalar expression. If the following criteria
// are not met, an empty InvertedExpression is returned.
// a chain of fetch val expressions and a right scalar expression. If an
// InvertedExpression cannot be generated from the expression, an
// inverted.NonInvertedColExpression is returned.
//
// 1. The fetch value operator's left expression must be a variable
// referencing the inverted column in the index.
// 2. The fetch value operator's right expression must be a constant string.
// 3. The right expression in the equality expression must be a constant JSON
// value that is not an object or an array.
//
// TODO(mgartner): Support chained fetch val operators, e.g., j->'a'->'b' = '1'.
// In order to generate an InvertedExpression, left must be a fetch val
// expression in the form [col]->[index0]->[index1]->...->[indexN] where col is
// a variable or expression referencing the inverted column in the inverted
// index and each index is a constant string. The right expression must be a
// constant JSON value that is not an object or an array.
func (j *jsonOrArrayFilterPlanner) extractJSONFetchValEqCondition(
evalCtx *tree.EvalContext, fetch *memo.FetchValExpr, right opt.ScalarExpr,
evalCtx *tree.EvalContext, left *memo.FetchValExpr, right opt.ScalarExpr,
) inverted.Expression {
// The left side of the fetch val expression, the Json field, should be a
// variable corresponding to the index column.
if !isIndexColumn(j.tabID, j.index, fetch.Json, j.computedColumns) {
return inverted.NonInvertedColExpression{}
}

// The right side of the fetch val expression, the Index field, should be a
// constant string.
if !memo.CanExtractConstDatum(fetch.Index) {
return inverted.NonInvertedColExpression{}
}
key, ok := memo.ExtractConstDatum(fetch.Index).(*tree.DString)
if !ok {
return inverted.NonInvertedColExpression{}
}

// The right side of the equals expression should be a constant JSON value
// that is not an object or array.
if !memo.CanExtractConstDatum(right) {
Expand All @@ -377,10 +360,76 @@ func (j *jsonOrArrayFilterPlanner) extractJSONFetchValEqCondition(
return inverted.NonInvertedColExpression{}
}

// Build a new JSON object of the form: {<key>: <right>}.
b := json.NewObjectBuilder(1)
b.Add(string(*key), val.JSON)
obj := tree.NewDJSON(b.Build())
// Recursively traverse fetch val expressions and collect keys with which to
// build the InvertedExpression. If it is not possible to build an inverted
// expression from the tree of fetch val expressions, collectKeys returns
// early and foundKeys remains false. If successful, foundKeys is set to
// true and JSON fetch value indexes are collected in keys. The keys are
// ordered by the outer-most fetch val index first. The outer-most fetch val
// index is the right-most in the -> chain, for example (j->'a'->'b') is
// equivalent to ((j->'a')->'b') and 'b' is the outer-most fetch val index.
//
// Later on, we iterate forward through these keys to build a JSON object
// from the inside-out with the inner-most value being the JSON scalar
// extracted above from the right ScalarExpr function argument. In the
// resulting JSON object, the outer-most JSON fetch value indexes are the
// inner most JSON object keys.
//
// As an example, when left is (j->'a'->'b') and right is ('1'), the keys
// {"b", "a"} are collected and the JSON object {"a": {"b": 1}} is built.
foundKeys := false
var keys []string
var collectKeys func(fetch *memo.FetchValExpr)
collectKeys = func(fetch *memo.FetchValExpr) {
// The right side of the fetch val expression, the Index field, must be
// a constant string. If not, then we cannot build an inverted
// expression.
if !memo.CanExtractConstDatum(fetch.Index) {
return
}
key, ok := memo.ExtractConstDatum(fetch.Index).(*tree.DString)
if !ok {
return
}

// Append the key to the list of keys.
keys = append(keys, string(*key))

// If the left side of the fetch val expression, the Json field, is a
// variable or expression corresponding to the index column, then we
// have found a valid list of keys to build an inverted expression.
if isIndexColumn(j.tabID, j.index, fetch.Json, j.computedColumns) {
foundKeys = true
return
}

// If the left side of the fetch val expression is another fetch val
// expression, recursively collect its keys.
if innerFetch, ok := fetch.Json.(*memo.FetchValExpr); ok {
collectKeys(innerFetch)
}

// Otherwise, we cannot build an inverted expression.
}
collectKeys(left)
if !foundKeys {
return inverted.NonInvertedColExpression{}
}

// Build a new JSON object of the form:
// {<keyN>: ... {<key1>: {key0: <val>}}}
// Note that key0 is the outer-most fetch val index, so the expression
// j->'a'->'b' = 1 results in {"a": {"b": 1}}.
var obj json.JSON
for i := 0; i < len(keys); i++ {
b := json.NewObjectBuilder(1)
if i == 0 {
b.Add(keys[i], val.JSON)
} else {
b.Add(keys[i], obj)
}
obj = b.Build()
}

return getInvertedExprForJSONOrArrayIndex(evalCtx, obj)
return getInvertedExprForJSONOrArrayIndex(evalCtx, tree.NewDJSON(obj))
}
27 changes: 27 additions & 0 deletions pkg/sql/opt/invertedidx/json_array_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -424,6 +424,33 @@ func TestTryFilterJsonOrArrayIndex(t *testing.T) {
indexOrd: arrayOrd,
ok: false,
},
{
filters: "j->'a'->'b' = '1'",
indexOrd: jsonOrd,
ok: true,
tight: true,
unique: true,
},
{
filters: "j->'a'->'b'->'c' = '1'",
indexOrd: jsonOrd,
ok: true,
tight: true,
unique: true,
},
{
// Integer indexes are not yet supported.
filters: "j->0->'b' = '1'",
indexOrd: jsonOrd,
ok: false,
},
{
// The inner most expression is not a fetch val expression with an
// indexed column on the left.
filters: "(j-'c')->'a'->'b' = '1'",
indexOrd: jsonOrd,
ok: false,
},
{
filters: "j->'a' = '1' AND j->'b' = '2'",
indexOrd: jsonOrd,
Expand Down
38 changes: 38 additions & 0 deletions pkg/sql/opt/xform/testdata/rules/select
Original file line number Diff line number Diff line change
Expand Up @@ -1951,6 +1951,44 @@ project
│ └── spans: ["7a\x00\x01\x12b\x00\x01", "7a\x00\x01\x12b\x00\x01"]
└── key: (1)

# Chained fetch val operators.
opt expect=GenerateInvertedIndexScans
SELECT k FROM b WHERE j->'a'->'b' = '"c"'
----
project
├── columns: k:1!null
├── immutable
├── key: (1)
└── scan b@j_inv_idx
├── columns: k:1!null
├── inverted constraint: /6/1
│ └── spans: ["7a\x00\x02b\x00\x01\x12c\x00\x01", "7a\x00\x02b\x00\x01\x12c\x00\x01"]
└── key: (1)

# Do not generate an inverted scan when the fetch val and equality operators are
# wrapped in a NOT operator. The -> operator returns NULL if the key does not
# exist in the JSON object, so (NOT j->'a' = '"b"') is not equivalent to the
# inverse of the existence of the key/value pair {"a": "b"} in the inverted
# index. See #49143 and #55316.
opt expect-not=GenerateInvertedIndexScans
SELECT k FROM b WHERE NOT j->'a' = '"b"'
----
project
├── columns: k:1!null
├── immutable
├── key: (1)
└── select
├── columns: k:1!null j:4
├── immutable
├── key: (1)
├── fd: (1)-->(4)
├── scan b
│ ├── columns: k:1!null j:4
│ ├── key: (1)
│ └── fd: (1)-->(4)
└── filters
└── (j:4->'a') != '"b"' [outer=(4), immutable]

# Do not generate an inverted scan when the index of the fetch val operator is
# not a string.
opt expect-not=GenerateInvertedIndexScans
Expand Down

0 comments on commit 3f7283a

Please sign in to comment.