diff --git a/pkg/sql/logictest/testdata/logic_test/inverted_index b/pkg/sql/logictest/testdata/logic_test/inverted_index index dadfd2f6107d..a96fbadae9c7 100644 --- a/pkg/sql/logictest/testdata/logic_test/inverted_index +++ b/pkg/sql/logictest/testdata/logic_test/inverted_index @@ -767,7 +767,13 @@ INSERT INTO f VALUES (25, '{"a": {"b": "c", "d": "e"}}'), (26, '{"a": {"b": "c"}, "d": "e"}'), (27, '[1, 2, {"b": "c"}]'), - (28, '[{"a": {"b": "c"}}, "d", "e"]') + (28, '[{"a": {"b": "c"}}, "d", "e"]'), + (29, '{"a": null}'), + (30, '{"a": [1, 2, null]}'), + (31, 'null'), + (32, '{}'), + (33, '[]'), + (34, '{"a": {"b": []}}') query T SELECT j FROM f@i WHERE j->'a' = '1' ORDER BY k @@ -880,6 +886,204 @@ SELECT j FROM f@i WHERE j->'a' = '"b"' AND j->'c' = '[{"d": 1}, {"e": 2}]' ORDER ---- {"a": "b", "c": [{"d": 1}, {"e": 2}]} +# Expressions with fetch val and containment operators use the inverted index. +query T +SELECT j FROM f@i WHERE j->'a' @> '"b"' ORDER BY k +---- +{"a": ["b", "c", "d", "e"]} +{"a": ["b", "e", "c", "d"]} +{"a": "b", "x": ["c", "d", "e"]} +{"a": "b", "c": [{"d": 1}, {"e": 2}]} + +query T +SELECT j FROM f@i WHERE j->'a' <@ '"b"' ORDER BY k +---- +{"a": "b", "x": ["c", "d", "e"]} +{"a": "b", "c": [{"d": 1}, {"e": 2}]} + +query T +SELECT j FROM f@i WHERE j->'a' @> 'null' ORDER BY k +---- +{"a": null} +{"a": [1, 2, null]} + +query T +SELECT j FROM f@i WHERE j->'a' <@ 'null' ORDER BY k +---- +{"a": null} + +query T +SELECT j FROM f@i WHERE j->'a' <@ '[]' ORDER BY k +---- +{"a": []} + +query T +SELECT j FROM f@i WHERE j->'a' <@ '{}' ORDER BY k +---- +{"a": {}} + +query T +SELECT j FROM f@i WHERE j->'a' @> '[]' ORDER BY k +---- +{"a": [1, 2]} +{"a": []} +{"a": ["b", "c", "d", "e"]} +{"a": ["b", "e", "c", "d"]} +{"a": [1, 2, null]} + +query T +SELECT j FROM f@i WHERE j->'a' @> '{}' ORDER BY k +---- +{"a": {"b": 1}} +{"a": {"b": 1, "d": 2}} +{"a": {"d": 2}} +{"a": {"b": [1, 2]}} +{"a": {"b": {"c": 1}}} +{"a": {"b": {"c": 1, "d": 2}}} +{"a": {"b": {"d": 2}}} +{"a": {"b": {"c": [1, 2]}}} +{"a": {"b": {"c": [1, 2, 3]}}} +{"a": {}} +{"a": {"b": "c"}} +{"a": {"b": ["c", "d", "e"]}} +{"a": {"b": "c", "d": "e"}} +{"a": {"b": "c"}, "d": "e"} +{"a": {"b": []}} + +query T +SELECT j FROM f@i WHERE j->'a' <@ '{"b": [1, 2]}' ORDER BY k +---- +{"a": {"b": [1, 2]}} +{"a": {}} +{"a": {"b": []}} + +query T +SELECT j FROM f@i WHERE j->'a' <@ '{"b": {"c": [1, 2]}}' ORDER BY k +---- +{"a": {"b": {"c": [1, 2]}}} +{"a": {}} + +query T +SELECT j FROM f@i WHERE j->'a' @> '{"b": ["c"]}' ORDER BY k +---- +{"a": {"b": ["c", "d", "e"]}} + +query T +SELECT j FROM f@i WHERE j->'c' @> '[{"d": 1}]' ORDER BY k +---- +{"a": "b", "c": [{"d": 1}, {"e": 2}]} + +# Expressions with chained fetch val and containment operators use the inverted +# index. +query T +SELECT j FROM f@i WHERE j->'a'->'b' <@ '1' ORDER BY k +---- +{"a": {"b": 1}} +{"a": {"b": 1, "d": 2}} + +query T +SELECT j FROM f@i WHERE j->'a'->'b' @> '1' ORDER BY k +---- +{"a": {"b": 1}} +{"a": {"b": 1, "d": 2}} +{"a": {"b": [1, 2]}} + +query T +SELECT j FROM f@i WHERE j->'a'->'b' @> '[1, 2]' ORDER BY k +---- +{"a": {"b": [1, 2]}} + +query T +SELECT j FROM f@i WHERE j->'a'->'b' <@ '[1, 2]' ORDER BY k +---- +{"a": {"b": 1}} +{"a": {"b": 1, "d": 2}} +{"a": {"b": [1, 2]}} +{"a": {"b": []}} + +query T +SELECT j FROM f@i WHERE j->'a'->'b' @> '"c"' ORDER BY k +---- +{"a": {"b": "c"}} +{"a": {"b": ["c", "d", "e"]}} +{"a": {"b": "c", "d": "e"}} +{"a": {"b": "c"}, "d": "e"} + +# Expressions with fetch val on the right side should use the inverted index. +query T +SELECT j FROM f@i WHERE '"b"' <@ j->'a' ORDER BY k +---- +{"a": ["b", "c", "d", "e"]} +{"a": ["b", "e", "c", "d"]} +{"a": "b", "x": ["c", "d", "e"]} +{"a": "b", "c": [{"d": 1}, {"e": 2}]} + +query T +SELECT j FROM f@i WHERE '[1, 2]' <@ j->'a'->'b' ORDER BY k +---- +{"a": {"b": [1, 2]}} + +query T +SELECT j FROM f@i WHERE '{"b": {"c": [1, 2]}}' <@ j->'a' ORDER BY k +---- +{"a": {"b": {"c": [1, 2]}}} +{"a": {"b": {"c": [1, 2, 3]}}} + +# Conjunctions of fetch val and containment expressions use the inverted index. +query T +SELECT j FROM f@i WHERE j->'a' @> '"b"' AND '["c"]' <@ j->'a' ORDER BY k +---- +{"a": ["b", "c", "d", "e"]} +{"a": ["b", "e", "c", "d"]} + +#TODO(angelazxu): Uncomment these tests once #63180 is fixed. +# query T +# SELECT j FROM f@i WHERE j->'a' <@ '{"b": [1, 2]}' AND j->'a'->'b' @> '[1]' ORDER BY k +# ---- + +# query T +# SELECT j FROM f@i WHERE j->'a' @> '"b"' AND j->'a' <@ '["b", "c", "d", "e"]' ORDER BY k +# ---- + +query T +SELECT j FROM f@i WHERE j->'a' @> '{"d": 2}' AND '[1, 2]' @> j->'a'->'b' ORDER BY k +---- +{"a": {"b": 1, "d": 2}} + +# Disjunctions of fetch val and containment expressions use the inverted index. +query T +SELECT j FROM f@i WHERE j->'a' @> '[1, 2]' OR j->'a'->'b' @> '[1, 2]' ORDER BY k +---- +{"a": [1, 2]} +{"a": {"b": [1, 2]}} +{"a": [1, 2, null]} + +query T +SELECT j FROM f@i WHERE j->'a' @> '"b"' OR j->'a'->'b' <@ '[1, 2]' ORDER BY k +---- +{"a": {"b": 1}} +{"a": {"b": 1, "d": 2}} +{"a": {"b": [1, 2]}} +{"a": ["b", "c", "d", "e"]} +{"a": ["b", "e", "c", "d"]} +{"a": "b", "x": ["c", "d", "e"]} +{"a": "b", "c": [{"d": 1}, {"e": 2}]} +{"a": {"b": []}} + +query T +SELECT j FROM f@i WHERE j->'a'->'b' <@ '{"c": [1, 2], "d": 2}' OR j->'a'->'b' <@ '["c", "d", "e", 1, 2, 3]' ORDER BY k +---- +{"a": {"b": 1}} +{"a": {"b": 1, "d": 2}} +{"a": {"b": [1, 2]}} +{"a": {"b": {"d": 2}}} +{"a": {"b": {"c": [1, 2]}}} +{"a": {"b": "c"}} +{"a": {"b": ["c", "d", "e"]}} +{"a": {"b": "c", "d": "e"}} +{"a": {"b": "c"}, "d": "e"} +{"a": {"b": []}} + subtest arrays statement ok diff --git a/pkg/sql/opt/exec/execbuilder/testdata/inverted_index b/pkg/sql/opt/exec/execbuilder/testdata/inverted_index index c3be2181789e..2c41014d9b9b 100644 --- a/pkg/sql/opt/exec/execbuilder/testdata/inverted_index +++ b/pkg/sql/opt/exec/execbuilder/testdata/inverted_index @@ -377,6 +377,281 @@ vectorized: true • norows columns: (a, b) +query T +EXPLAIN (VERBOSE) SELECT * from d where b->'a' @> '"b"' +---- +distribution: local +vectorized: true +· +• index join +│ columns: (a, b) +│ estimated row count: 111 (missing stats) +│ table: d@primary +│ key columns: a +│ +└── • project + │ columns: (a) + │ estimated row count: 111 (missing stats) + │ + └── • inverted filter + │ columns: (a, b_inverted_key) + │ inverted column: b_inverted_key + │ num spans: 2 + │ + └── • scan + columns: (a, b_inverted_key) + estimated row count: 111 (missing stats) + table: d@foo_inv + spans: /"a"/"b"-/"a"/"b"/PrefixEnd /"a"/Arr/"b"-/"a"/Arr/"b"/PrefixEnd + +query T +EXPLAIN (VERBOSE) SELECT * from d@foo_inv where b->'a'->'c' @> '"b"' +---- +distribution: local +vectorized: true +· +• index join +│ columns: (a, b) +│ estimated row count: 111 (missing stats) +│ table: d@primary +│ key columns: a +│ +└── • project + │ columns: (a) + │ estimated row count: 111 (missing stats) + │ + └── • inverted filter + │ columns: (a, b_inverted_key) + │ inverted column: b_inverted_key + │ num spans: 2 + │ + └── • scan + columns: (a, b_inverted_key) + estimated row count: 111 (missing stats) + table: d@foo_inv + spans: /"a"/"c"/"b"-/"a"/"c"/"b"/PrefixEnd /"a"/"c"/Arr/"b"-/"a"/"c"/Arr/"b"/PrefixEnd + +# TODO(angelazxu): The {} span does not need to be scanned here, but is +# included when finding spans contained by {"a": "b"} (see #63184). +query T +EXPLAIN (VERBOSE) SELECT * from d@foo_inv where b->'a' <@ '"b"' +---- +distribution: local +vectorized: true +· +• filter +│ columns: (a, b) +│ estimated row count: 333 (missing stats) +│ filter: (b->'a') <@ '"b"' +│ +└── • index join + │ columns: (a, b) + │ estimated row count: 111 (missing stats) + │ table: d@primary + │ key columns: a + │ + └── • project + │ columns: (a) + │ estimated row count: 111 (missing stats) + │ + └── • inverted filter + │ columns: (a, b_inverted_key) + │ inverted column: b_inverted_key + │ num spans: 2 + │ + └── • scan + columns: (a, b_inverted_key) + estimated row count: 111 (missing stats) + table: d@foo_inv + spans: /{}-/{}/PrefixEnd /"a"/"b"-/"a"/"b"/PrefixEnd + +query T +EXPLAIN (VERBOSE) SELECT * from d@foo_inv where b->'a'->'c' <@ '"b"' +---- +distribution: local +vectorized: true +· +• filter +│ columns: (a, b) +│ estimated row count: 333 (missing stats) +│ filter: ((b->'a')->'c') <@ '"b"' +│ +└── • index join + │ columns: (a, b) + │ estimated row count: 111 (missing stats) + │ table: d@primary + │ key columns: a + │ + └── • project + │ columns: (a) + │ estimated row count: 111 (missing stats) + │ + └── • inverted filter + │ columns: (a, b_inverted_key) + │ inverted column: b_inverted_key + │ num spans: 3 + │ + └── • scan + columns: (a, b_inverted_key) + estimated row count: 111 (missing stats) + table: d@foo_inv + spans: /{}-/{}/PrefixEnd /"a"/{}-/"a"/{}/PrefixEnd /"a"/"c"/"b"-/"a"/"c"/"b"/PrefixEnd + +query T +EXPLAIN (VERBOSE) SELECT * from d where b->'a' @> '[1, 2]' +---- +distribution: local +vectorized: true +· +• lookup join (inner) +│ columns: (a, b) +│ estimated row count: 12 (missing stats) +│ table: d@primary +│ equality: (a) = (a) +│ equality cols are key +│ pred: (b->'a') @> '[1, 2]' +│ +└── • zigzag join + columns: (a) + estimated row count: 12 (missing stats) + left table: d@foo_inv + left columns: (a) + left fixed values: 1 column + right table: d@foo_inv + right columns: () + right fixed values: 1 column + +query T +EXPLAIN (VERBOSE) SELECT * from d where b->'a' <@ '[1, 2]' +---- +distribution: local +vectorized: true +· +• filter +│ columns: (a, b) +│ estimated row count: 333 (missing stats) +│ filter: (b->'a') <@ '[1, 2]' +│ +└── • index join + │ columns: (a, b) + │ estimated row count: 111 (missing stats) + │ table: d@primary + │ key columns: a + │ + └── • project + │ columns: (a) + │ estimated row count: 111 (missing stats) + │ + └── • inverted filter + │ columns: (a, b_inverted_key) + │ inverted column: b_inverted_key + │ num spans: 6 + │ + └── • scan + columns: (a, b_inverted_key) + estimated row count: 111 (missing stats) + table: d@foo_inv + spans: /{}-/{}/PrefixEnd /"a"/1-/"a"/1/PrefixEnd /"a"/2-/"a"/2/PrefixEnd /"a"/[]-/"a"/{} /"a"/Arr/1-/"a"/Arr/1/PrefixEnd /"a"/Arr/2-/"a"/Arr/2/PrefixEnd + +query T +EXPLAIN (VERBOSE) SELECT * from d where b->'a' @> '{"d": 2}' +---- +distribution: local +vectorized: true +· +• index join +│ columns: (a, b) +│ estimated row count: 111 (missing stats) +│ table: d@primary +│ key columns: a +│ +└── • scan + columns: (a) + estimated row count: 111 (missing stats) + table: d@foo_inv + spans: /"a"/"d"/2-/"a"/"d"/2/PrefixEnd + +query T +EXPLAIN (VERBOSE) SELECT * from d where b->'a' <@ '{"d": 2}' +---- +distribution: local +vectorized: true +· +• filter +│ columns: (a, b) +│ estimated row count: 333 (missing stats) +│ filter: (b->'a') <@ '{"d": 2}' +│ +└── • index join + │ columns: (a, b) + │ estimated row count: 111 (missing stats) + │ table: d@primary + │ key columns: a + │ + └── • project + │ columns: (a) + │ estimated row count: 111 (missing stats) + │ + └── • inverted filter + │ columns: (a, b_inverted_key) + │ inverted column: b_inverted_key + │ num spans: 3 + │ + └── • scan + columns: (a, b_inverted_key) + estimated row count: 111 (missing stats) + table: d@foo_inv + spans: /{}-/{}/PrefixEnd /"a"/{}-/"a"/{}/PrefixEnd /"a"/"d"/2-/"a"/"d"/2/PrefixEnd + +query T +EXPLAIN (VERBOSE) SELECT * from d where '"b"' <@ b->'a' +---- +distribution: local +vectorized: true +· +• filter +│ columns: (a, b) +│ estimated row count: 333 (missing stats) +│ filter: '"b"' <@ (b->'a') +│ +└── • scan + columns: (a, b) + estimated row count: 1,000 (missing stats) + table: d@primary + spans: FULL SCAN + +query T +EXPLAIN (VERBOSE) SELECT * from d where '[1, 2]' @> b->'a' +---- +distribution: local +vectorized: true +· +• filter +│ columns: (a, b) +│ estimated row count: 333 (missing stats) +│ filter: '[1, 2]' @> (b->'a') +│ +└── • index join + │ columns: (a, b) + │ estimated row count: 111 (missing stats) + │ table: d@primary + │ key columns: a + │ + └── • project + │ columns: (a) + │ estimated row count: 111 (missing stats) + │ + └── • inverted filter + │ columns: (a, b_inverted_key) + │ inverted column: b_inverted_key + │ num spans: 6 + │ + └── • scan + columns: (a, b_inverted_key) + estimated row count: 111 (missing stats) + table: d@foo_inv + spans: /{}-/{}/PrefixEnd /"a"/1-/"a"/1/PrefixEnd /"a"/2-/"a"/2/PrefixEnd /"a"/[]-/"a"/{} /"a"/Arr/1-/"a"/Arr/1/PrefixEnd /"a"/Arr/2-/"a"/Arr/2/PrefixEnd + query T EXPLAIN (VERBOSE) SELECT * from d where '"b"' = b->'a' ---- diff --git a/pkg/sql/opt/exec/execbuilder/testdata/virtual_columns b/pkg/sql/opt/exec/execbuilder/testdata/virtual_columns index 9fd5b963ce86..b5ec1d806c91 100644 --- a/pkg/sql/opt/exec/execbuilder/testdata/virtual_columns +++ b/pkg/sql/opt/exec/execbuilder/testdata/virtual_columns @@ -1417,7 +1417,10 @@ vectorized: true table: inv@iv_j_idx spans: /10/"a"/"b"-/10/"a"/"b"/PrefixEnd /20/"a"/"b"-/20/"a"/"b"/PrefixEnd /30/"a"/"b"-/30/"a"/"b"/PrefixEnd -# Verify that we use iv_jv_idx. +statement ok +DROP INDEX inv@iv_j_idx + +# Verify that we use iv_jv_idx query T EXPLAIN (VERBOSE) SELECT k FROM inv WHERE iv IN (10, 20, 30) AND jv @> '{"a": "b"}' ---- diff --git a/pkg/sql/opt/invertedidx/json_array.go b/pkg/sql/opt/invertedidx/json_array.go index d761fa0660e3..f5c236228d33 100644 --- a/pkg/sql/opt/invertedidx/json_array.go +++ b/pkg/sql/opt/invertedidx/json_array.go @@ -383,7 +383,17 @@ func (j *jsonOrArrayFilterPlanner) extractJSONOrArrayContainsCondition( indexColumn, constantVal = right, left containedBy = !containedBy } else { - // If neither condition is met, we cannot create an InvertedExpression. + if fetch, ok := left.(*memo.FetchValExpr); ok { + // When the expression has a JSON fetch operator on the left, it is + // handled in extractJSONFetchValContainsCondition. + return j.extractJSONFetchValContainsCondition(evalCtx, fetch, right, containedBy) + } else if fetch, ok := right.(*memo.FetchValExpr); ok { + // When the expression has a JSON fetch operator on the right, it is + // handled in extractJSONFetchValContainsCondition as an equivalent + // expression with right and left swapped. + return j.extractJSONFetchValContainsCondition(evalCtx, fetch, left, !containedBy) + } + // If none of the conditions are met, we cannot create an InvertedExpression. return inverted.NonInvertedColExpression{} } d := memo.ExtractConstDatum(constantVal) @@ -403,7 +413,7 @@ func (j *jsonOrArrayFilterPlanner) extractJSONOrArrayContainsCondition( // extractJSONFetchValEqCondition extracts an InvertedExpression representing an // inverted filter over the planner's inverted index, based on equality between -// a chain of fetch val expressions and a right scalar expression. If an +// a chain of fetch val expressions and a scalar expression. If an // InvertedExpression cannot be generated from the expression, an // inverted.NonInvertedColExpression is returned. // @@ -415,7 +425,7 @@ func (j *jsonOrArrayFilterPlanner) extractJSONOrArrayContainsCondition( func (j *jsonOrArrayFilterPlanner) extractJSONFetchValEqCondition( evalCtx *tree.EvalContext, left *memo.FetchValExpr, right opt.ScalarExpr, ) inverted.Expression { - // The right side of the equals expression should be a constant JSON value. + // The right side of the expression should be a constant JSON value. if !memo.CanExtractConstDatum(right) { return inverted.NonInvertedColExpression{} } @@ -424,86 +434,228 @@ func (j *jsonOrArrayFilterPlanner) extractJSONFetchValEqCondition( return inverted.NonInvertedColExpression{} } - // Recursively traverse fetch val expressions and collect keys with which to - // build the InvertedExpression. If it is not possible to build an inverted - // expression from the tree of fetch val expressions, collectKeys returns - // early and foundKeys remains false. If successful, foundKeys is set to - // true and JSON fetch value indexes are collected in keys. The keys are - // ordered by the outer-most fetch val index first. The outer-most fetch val - // index is the right-most in the -> chain, for example (j->'a'->'b') is - // equivalent to ((j->'a')->'b') and 'b' is the outer-most fetch val index. - // - // Later on, we iterate forward through these keys to build a JSON object - // from the inside-out with the inner-most value being the JSON scalar - // extracted above from the right ScalarExpr function argument. In the - // resulting JSON object, the outer-most JSON fetch value indexes are the - // inner most JSON object keys. - // - // As an example, when left is (j->'a'->'b') and right is ('1'), the keys - // {"b", "a"} are collected and the JSON object {"a": {"b": 1}} is built. - foundKeys := false + // Collect a slice of keys from the fetch val expression. var keys []string - var collectKeys func(fetch *memo.FetchValExpr) - collectKeys = func(fetch *memo.FetchValExpr) { - // The right side of the fetch val expression, the Index field, must be - // a constant string. If not, then we cannot build an inverted - // expression. - if !memo.CanExtractConstDatum(fetch.Index) { - return + keys = j.collectKeys(keys, left) + if len(keys) == 0 { + return inverted.NonInvertedColExpression{} + } + + // Build a new JSON object with the collected keys and val. + obj := buildObject(keys, val.JSON) + + var invertedExpr inverted.Expression + // For Equals expressions, we will generate the inverted expression for the + // single object built from the keys and val. + invertedExpr = getInvertedExprForJSONOrArrayIndexForContaining(evalCtx, tree.NewDJSON(obj)) + + // When the right side is an array or object, the InvertedExpression + // generated is not tight. We must indicate it is non-tight so an additional + // filter is added. + typ := val.JSON.Type() + if typ == json.ArrayJSONType || typ == json.ObjectJSONType { + invertedExpr.SetNotTight() + } + return invertedExpr +} + +// extractJSONFetchValContainsCondition extracts an InvertedExpression +// representing an inverted filter over the planner's inverted index, based on +// containment between a chain of fetch val expressions and a scalar +// expression. If an InvertedExpression cannot be generated from the +// expression, an inverted.NonInvertedColExpression is returned. +// +// In order to generate an InvertedExpression, left must be a fetch val +// expression in the form [col]->[index0]->[index1]->...->[indexN] where col is +// a variable or expression referencing the inverted column in the inverted +// index and each index is a constant string. The right expression must be a +// constant JSON value. For expressions with a left constant value and a right +// fetch val expression, the arguments will be swapped when passed in. +// +// The type of operator is indicated by the containedBy parameter, which is +// true for <@ and false for @>. +func (j *jsonOrArrayFilterPlanner) extractJSONFetchValContainsCondition( + evalCtx *tree.EvalContext, left *memo.FetchValExpr, right opt.ScalarExpr, containedBy bool, +) inverted.Expression { + // The right side of the expression should be a constant JSON value. + if !memo.CanExtractConstDatum(right) { + return inverted.NonInvertedColExpression{} + } + val, ok := memo.ExtractConstDatum(right).(*tree.DJSON) + if !ok { + return inverted.NonInvertedColExpression{} + } + + // Collect a slice of keys from the fetch val expression. + var keys []string + keys = j.collectKeys(keys, left) + if len(keys) == 0 { + return inverted.NonInvertedColExpression{} + } + + // Build a new JSON object with the collected keys and val. + obj := buildObject(keys, val.JSON) + + var invertedExpr inverted.Expression + + // For Contains and ContainedBy expressions, we may need to build additional + // objects to cover all possibilities. + objs, err := buildFetchContainmentObjects(keys, val.JSON, containedBy) + if err != nil { + return inverted.NonInvertedColExpression{} + } + objs = append(objs, obj) + // We get an inverted expression for each object constructed, and union + // these expressions. + for i := range objs { + var expr inverted.Expression + if containedBy { + expr = getInvertedExprForJSONOrArrayIndexForContainedBy(evalCtx, tree.NewDJSON(objs[i])) + } else { + expr = getInvertedExprForJSONOrArrayIndexForContaining(evalCtx, tree.NewDJSON(objs[i])) } - key, ok := memo.ExtractConstDatum(fetch.Index).(*tree.DString) - if !ok { - return + if invertedExpr == nil { + invertedExpr = expr + } else { + invertedExpr = inverted.Or(invertedExpr, expr) } + } + return invertedExpr +} - // Append the key to the list of keys. - keys = append(keys, string(*key)) +// collectKeys is called on fetch val expressions to the find corresponding +// keys used to build a JSON object. It recursively traverses the fetch val +// expressions and collects keys with which to build the InvertedExpression. +// If it is not possible to build an inverted expression from the tree of fetch +// val expressions, collectKeys returns nil for keys. If successful, the JSON +// fetch value indexes are collected in keys. The keys are ordered by the +// outer-most fetch val index first. The outer-most fetch val index is the +// right-most in the -> chain, for example (j->'a'->'b') is equivalent to +// ((j->'a')->'b') and 'b' is the outer-most fetch val index. +// +// Callers of this function should iterate forward through these keys to build +// a JSON object from the inside-out with the inner-most value being the JSON +// scalar extracted above from the right ScalarExpr function argument. In the +// resulting JSON object, the outer-most JSON fetch value indexes are the +// inner most JSON object keys. +// +// As an example, when left is (j->'a'->'b') and right is ('1'), the keys +// {"b", "a"} are collected and the JSON object {"a": {"b": 1}} is built. +func (j *jsonOrArrayFilterPlanner) collectKeys( + currKeys []string, fetch *memo.FetchValExpr, +) (keys []string) { + // The right side of the fetch val expression, the Index field, must be + // a constant string. If not, then we cannot build an inverted + // expression. + if !memo.CanExtractConstDatum(fetch.Index) { + return nil + } + key, ok := memo.ExtractConstDatum(fetch.Index).(*tree.DString) + if !ok { + return nil + } - // If the left side of the fetch val expression, the Json field, is a - // variable or expression corresponding to the index column, then we - // have found a valid list of keys to build an inverted expression. - if isIndexColumn(j.tabID, j.index, fetch.Json, j.computedColumns) { - foundKeys = true - return - } + // Append the key to the list of keys. + keys = append(currKeys, string(*key)) - // If the left side of the fetch val expression is another fetch val - // expression, recursively collect its keys. - if innerFetch, ok := fetch.Json.(*memo.FetchValExpr); ok { - collectKeys(innerFetch) - } + // If the left side of the fetch val expression, the Json field, is a + // variable or expression corresponding to the index column, then we + // have found a valid list of keys to build an inverted expression. + if isIndexColumn(j.tabID, j.index, fetch.Json, j.computedColumns) { + return keys + } - // Otherwise, we cannot build an inverted expression. + // If the left side of the fetch val expression is another fetch val + // expression, recursively collect its keys. + if innerFetch, ok := fetch.Json.(*memo.FetchValExpr); ok { + return j.collectKeys(keys, innerFetch) } - collectKeys(left) - if !foundKeys { - return inverted.NonInvertedColExpression{} + // Otherwise, we cannot build an inverted expression. + return nil +} + +// buildFetchContainmentObjects constructs new JSON objects with given keys and val. +// The keys and val are extracted from a fetch val containment expression, and +// the objects constructed depend on the value type and whether the expression +// uses <@ or @>. For example, the expression j->'a'->'b' @> "c" would have +// {"a", "b"} as keys, "c" as val, and construct {"a": "b": ["c"]}. +// An array of the constructed JSONs is returned. +func buildFetchContainmentObjects( + keys []string, val json.JSON, containedBy bool, +) ([]json.JSON, error) { + var objs []json.JSON + typ := val.Type() + switch typ { + case json.ArrayJSONType: + // For arrays in ContainedBy expressions, we must create a scalar value + // object, because getInvertedExprForJSONOrArrayIndexForContainedBy will + // not include the scalar value spans. + + // Array value examples: + // j->'a' @> '[1]', no new object required, we already have '{"a": [1]}' + // j->'a' <@ '[1]', build '{"a": 1}', we already have '{"a": [1]}' + // j->'a' <@ '[1, [2], 3]', build '{"a": 1}', '{"a": 3}', we already have '{"a": [1, [2], 3]}' + if containedBy { + for i := 0; i < val.Len(); i++ { + v, err := val.FetchValIdx(i) + if err != nil { + return nil, err + } + t := v.Type() + if t == json.ArrayJSONType || t == json.ObjectJSONType { + // The scalar value is only needed for non-nested arrays and objects. + continue + } + newObj := buildObject(keys, v) + objs = append(objs, newObj) + } + } + + case json.ObjectJSONType: + // For objects in ContainedBy expressions, we do not need to generate the + // empty object value for each level of nesting, because the spans will be + // added for us in getInvertedExprForJSONOrArrayIndexForContainedBy. + // For objects in Contains expressions, no additional spans are required + // outside of the given object's spans. + + // Object value examples: + // j->'a' @> '{"b": 2}', we already have '{"a": {"b": 2}}' + // j->'a' <@ '{"b": 2}', we already have '{"a": {"b": 2}}' + return nil, nil + + default: + // For scalars in Contains expressions, we construct an array value + // containing the scalar. + + // Scalar value examples: + // j->'a' @> '1', build '{"a": [1]}', we already have '{"a": 1}' + // j->'a' <@ '1', we already have '{"a": 1}' + if !containedBy { + arr := json.NewArrayBuilder(1) + arr.Add(val) + v := arr.Build() + newObj := buildObject(keys, v) + objs = append(objs, newObj) + } } + return objs, nil +} - // Build a new JSON object of the form: - // {: ... {: {key0: }}} - // Note that key0 is the outer-most fetch val index, so the expression - // j->'a'->'b' = 1 results in {"a": {"b": 1}}. +// buildObject constructs a new JSON object of the form: +// {: ... {: {key0: }}} +// Where the keys and val are extracted from a fetch val expression by the +// caller. Note that key0 is the outer-most fetch val index, so the expression +// j->'a'->'b' = 1 results in {"a": {"b": 1}}. +func buildObject(keys []string, val json.JSON) json.JSON { var obj json.JSON for i := 0; i < len(keys); i++ { b := json.NewObjectBuilder(1) if i == 0 { - b.Add(keys[i], val.JSON) + b.Add(keys[i], val) } else { b.Add(keys[i], obj) } obj = b.Build() } - - invertedExpr := getInvertedExprForJSONOrArrayIndexForContaining(evalCtx, tree.NewDJSON(obj)) - - // When the right side is an array or object, the InvertedExpression - // generated is not tight. We must indicate it is non-tight so an additional - // filter is added. - typ := val.JSON.Type() - if typ == json.ArrayJSONType || typ == json.ObjectJSONType { - invertedExpr.SetNotTight() - } - - return invertedExpr + return obj } diff --git a/pkg/sql/opt/invertedidx/json_array_test.go b/pkg/sql/opt/invertedidx/json_array_test.go index 169a1a07048c..6ffc634c7ed8 100644 --- a/pkg/sql/opt/invertedidx/json_array_test.go +++ b/pkg/sql/opt/invertedidx/json_array_test.go @@ -548,6 +548,160 @@ func TestTryFilterJsonOrArrayIndex(t *testing.T) { unique: true, remainingFilters: "j @> '[[1, 2]]'", }, + { + // Contains is supported with a fetch val operator on the left. + filters: `j->'a' @> '1'`, + indexOrd: jsonOrd, + ok: true, + tight: true, + unique: false, + remainingFilters: "", + }, + { + // Contains is supported with chained fetch val operators on the left. + filters: `j->'a'->'b' @> '1'`, + indexOrd: jsonOrd, + ok: true, + tight: true, + unique: false, + remainingFilters: "", + }, + { + // Contains with a fetch val is supported for JSON arrays. + filters: `j->'a'->'b' @> '[1, 2]'`, + indexOrd: jsonOrd, + ok: true, + tight: false, + unique: true, + remainingFilters: "j->'a'->'b' @> '[1, 2]'", + }, + { + filters: `j->'a'->'b' @> '[[1, 2]]'`, + indexOrd: jsonOrd, + ok: true, + tight: false, + unique: true, + remainingFilters: "j->'a'->'b' @> '[[1, 2]]'", + }, + { + // Contains with a fetch val is supported for JSON objects. + filters: `j->'a'->'b' @> '{"c": 1}'`, + indexOrd: jsonOrd, + ok: true, + tight: true, + unique: true, + remainingFilters: "", + }, + { + filters: `j->'a'->'b' @> '{"c": {"d": "e"}}'`, + indexOrd: jsonOrd, + ok: true, + tight: true, + unique: true, + remainingFilters: "", + }, + { + filters: `j->'a'->'b' @> '[{"c": 1, "d": "2"}]'`, + indexOrd: jsonOrd, + ok: true, + tight: false, + unique: true, + remainingFilters: "j->'a'->'b' @> '[{\"c\": 1, \"d\": \"2\"}]'", + }, + { + filters: `j->'a'->'b' @> '{"c": [1, 2], "d": "2"}'`, + indexOrd: jsonOrd, + ok: true, + tight: false, + unique: true, + remainingFilters: "j->'a'->'b' @> '{\"c\": [1, 2], \"d\": \"2\"}'", + }, + { + // ContainedBy is supported with a fetch val operator on the left. + filters: `j->'a' <@ '1'`, + indexOrd: jsonOrd, + ok: true, + tight: false, + unique: false, + remainingFilters: "j->'a' <@ '1'", + }, + { + // ContainedBy is supported with chained fetch val operators on the left. + filters: `j->'a'->'b' <@ '1'`, + indexOrd: jsonOrd, + ok: true, + tight: false, + unique: false, + remainingFilters: "j->'a'->'b' <@ '1'", + }, + { + // ContainedBy with a fetch val is supported for JSON arrays. + filters: `j->'a'->'b' <@ '[1, 2]'`, + indexOrd: jsonOrd, + ok: true, + tight: false, + unique: false, + remainingFilters: "j->'a'->'b' <@ '[1, 2]'", + }, + { + filters: `j->'a'->'b' <@ '[[1, 2]]'`, + indexOrd: jsonOrd, + ok: true, + tight: false, + unique: false, + remainingFilters: "j->'a'->'b' <@ '[[1, 2]]'", + }, + { + // ContainedBy with a fetch val is supported for JSON objects. + filters: `j->'a'->'b' <@ '{"c": 1}'`, + indexOrd: jsonOrd, + ok: true, + tight: false, + unique: false, + remainingFilters: "j->'a'->'b' <@ '{\"c\": 1}'", + }, + { + filters: `j->'a'->'b' <@ '{"c": {"d": "e"}}'`, + indexOrd: jsonOrd, + ok: true, + tight: false, + unique: false, + remainingFilters: "j->'a'->'b' <@ '{\"c\": {\"d\": \"e\"}}'", + }, + { + filters: `j->'a'->'b' <@ '[{"c": 1, "d": "2"}]'`, + indexOrd: jsonOrd, + ok: true, + tight: false, + unique: false, + remainingFilters: "j->'a'->'b' <@ '[{\"c\": 1, \"d\": \"2\"}]'", + }, + { + filters: `j->'a'->'b' <@ '{"c": [1, 2], "d": "2"}'`, + indexOrd: jsonOrd, + ok: true, + tight: false, + unique: false, + remainingFilters: "j->'a'->'b' <@ '{\"c\": [1, 2], \"d\": \"2\"}'", + }, + { + // Contains is supported with a fetch val operator on the right. + filters: `'1' @> j->'a'`, + indexOrd: jsonOrd, + ok: true, + tight: false, + unique: false, + remainingFilters: "'1' @> j->'a'", + }, + { + // ContainedBy is supported with a fetch val operator on the right. + filters: `'1' <@ j->'a'`, + indexOrd: jsonOrd, + ok: true, + tight: true, + unique: false, + remainingFilters: "", + }, } for _, tc := range testCases { diff --git a/pkg/sql/opt/memo/testdata/stats/inverted-json b/pkg/sql/opt/memo/testdata/stats/inverted-json index 0c1215f96716..f81a016187ee 100644 --- a/pkg/sql/opt/memo/testdata/stats/inverted-json +++ b/pkg/sql/opt/memo/testdata/stats/inverted-json @@ -984,3 +984,373 @@ select │ └── fd: (1)-->(4) └── filters └── (j:2->'a') = '{}' [type=bool, outer=(2), immutable] + +# A query with fetch val and contains operators uses the inverted index. +opt +SELECT * FROM t WHERE j->'a' @> '1' +---- +index-join t + ├── columns: k:1(int!null) j:2(jsonb) + ├── immutable + ├── stats: [rows=222.222222] + ├── key: (1) + ├── fd: (1)-->(2) + └── inverted-filter + ├── columns: k:1(int!null) + ├── inverted expression: /4 + │ ├── tight: true, unique: false + │ └── union spans + │ ├── ["7a\x00\x01*\x02\x00", "7a\x00\x01*\x02\x00"] + │ └── ["7a\x00\x02\x00\x03\x00\x01*\x02\x00", "7a\x00\x02\x00\x03\x00\x01*\x02\x00"] + ├── stats: [rows=2e-07] + ├── key: (1) + └── scan t@j_idx + ├── columns: k:1(int!null) j_inverted_key:4(jsonb!null) + ├── inverted constraint: /4/1 + │ └── spans + │ ├── ["7a\x00\x01*\x02\x00", "7a\x00\x01*\x02\x00"] + │ └── ["7a\x00\x02\x00\x03\x00\x01*\x02\x00", "7a\x00\x02\x00\x03\x00\x01*\x02\x00"] + ├── stats: [rows=2e-07, distinct(1)=2e-07, null(1)=0, distinct(4)=2e-07, null(4)=0] + │ histogram(4)= + ├── key: (1) + └── fd: (1)-->(4) + +# A query with fetch val and contained by operators uses the inverted index, +# and the expression is not tight. +opt +SELECT * FROM t WHERE j->'a' <@ '1' +---- +select + ├── columns: k:1(int!null) j:2(jsonb) + ├── immutable + ├── stats: [rows=666.666667] + ├── key: (1) + ├── fd: (1)-->(2) + ├── index-join t + │ ├── columns: k:1(int!null) j:2(jsonb) + │ ├── stats: [rows=100] + │ ├── key: (1) + │ ├── fd: (1)-->(2) + │ └── inverted-filter + │ ├── columns: k:1(int!null) + │ ├── inverted expression: /4 + │ │ ├── tight: false, unique: false + │ │ └── union spans + │ │ ├── ["7\x00\x019", "7\x00\x019"] + │ │ └── ["7a\x00\x01*\x02\x00", "7a\x00\x01*\x02\x00"] + │ ├── stats: [rows=100] + │ ├── key: (1) + │ └── scan t@j_idx + │ ├── columns: k:1(int!null) j_inverted_key:4(jsonb!null) + │ ├── inverted constraint: /4/1 + │ │ └── spans + │ │ ├── ["7\x00\x019", "7\x00\x019"] + │ │ └── ["7a\x00\x01*\x02\x00", "7a\x00\x01*\x02\x00"] + │ ├── stats: [rows=100, distinct(1)=100, null(1)=0, distinct(4)=1, null(4)=0] + │ │ histogram(4)= 0 100 + │ │ <--- '\x37000139' + │ ├── key: (1) + │ └── fd: (1)-->(4) + └── filters + └── (j:2->'a') <@ '1' [type=bool, outer=(2), immutable] + +# A query with chained fetch val and contains operators uses the inverted index. +opt +SELECT * FROM t WHERE j->'a'->'b' @> '"c"' +---- +index-join t + ├── columns: k:1(int!null) j:2(jsonb) + ├── immutable + ├── stats: [rows=222.222222] + ├── key: (1) + ├── fd: (1)-->(2) + └── inverted-filter + ├── columns: k:1(int!null) + ├── inverted expression: /4 + │ ├── tight: true, unique: false + │ └── union spans + │ ├── ["7a\x00\x02b\x00\x01\x12c\x00\x01", "7a\x00\x02b\x00\x01\x12c\x00\x01"] + │ └── ["7a\x00\x02b\x00\x02\x00\x03\x00\x01\x12c\x00\x01", "7a\x00\x02b\x00\x02\x00\x03\x00\x01\x12c\x00\x01"] + ├── stats: [rows=2e-07] + ├── key: (1) + └── scan t@j_idx + ├── columns: k:1(int!null) j_inverted_key:4(jsonb!null) + ├── inverted constraint: /4/1 + │ └── spans + │ ├── ["7a\x00\x02b\x00\x01\x12c\x00\x01", "7a\x00\x02b\x00\x01\x12c\x00\x01"] + │ └── ["7a\x00\x02b\x00\x02\x00\x03\x00\x01\x12c\x00\x01", "7a\x00\x02b\x00\x02\x00\x03\x00\x01\x12c\x00\x01"] + ├── stats: [rows=2e-07, distinct(1)=2e-07, null(1)=0, distinct(4)=2e-07, null(4)=0] + │ histogram(4)= + ├── key: (1) + └── fd: (1)-->(4) + +# A query with chained fetch val and contained by operators uses the inverted +# index, and the expression is not tight. +opt +SELECT * FROM t WHERE j->'a'->'b' <@ '"c"' +---- +select + ├── columns: k:1(int!null) j:2(jsonb) + ├── immutable + ├── stats: [rows=666.666667] + ├── key: (1) + ├── fd: (1)-->(2) + ├── index-join t + │ ├── columns: k:1(int!null) j:2(jsonb) + │ ├── stats: [rows=100] + │ ├── key: (1) + │ ├── fd: (1)-->(2) + │ └── inverted-filter + │ ├── columns: k:1(int!null) + │ ├── inverted expression: /4 + │ │ ├── tight: false, unique: false + │ │ └── union spans + │ │ ├── ["7\x00\x019", "7\x00\x019"] + │ │ ├── ["7a\x00\x019", "7a\x00\x019"] + │ │ └── ["7a\x00\x02b\x00\x01\x12c\x00\x01", "7a\x00\x02b\x00\x01\x12c\x00\x01"] + │ ├── stats: [rows=100] + │ ├── key: (1) + │ └── scan t@j_idx + │ ├── columns: k:1(int!null) j_inverted_key:4(jsonb!null) + │ ├── inverted constraint: /4/1 + │ │ └── spans + │ │ ├── ["7\x00\x019", "7\x00\x019"] + │ │ ├── ["7a\x00\x019", "7a\x00\x019"] + │ │ └── ["7a\x00\x02b\x00\x01\x12c\x00\x01", "7a\x00\x02b\x00\x01\x12c\x00\x01"] + │ ├── stats: [rows=100, distinct(1)=100, null(1)=0, distinct(4)=1, null(4)=0] + │ │ histogram(4)= 0 100 + │ │ <--- '\x37000139' + │ ├── key: (1) + │ └── fd: (1)-->(4) + └── filters + └── ((j:2->'a')->'b') <@ '"c"' [type=bool, outer=(2), immutable] + +# A query with fetch val and contains operators uses the inverted index when an +# object is on the right side. +opt +SELECT * FROM t WHERE j->'a' @> '{"b": "c"}' +---- +index-join t + ├── columns: k:1(int!null) j:2(jsonb) + ├── immutable + ├── stats: [rows=222.222222] + ├── key: (1) + ├── fd: (1)-->(2) + └── scan t@j_idx + ├── columns: k:1(int!null) + ├── inverted constraint: /4/1 + │ └── spans: ["7a\x00\x02b\x00\x01\x12c\x00\x01", "7a\x00\x02b\x00\x01\x12c\x00\x01"] + ├── stats: [rows=2e-07, distinct(4)=2e-07, null(4)=0] + │ histogram(4)= + └── key: (1) + +# A query with fetch val and contained by operators uses the inverted index +# when an object is on the right side, and the expression is not tight. +opt +SELECT * FROM t WHERE j->'a' <@ '{"b": "c"}' +---- +select + ├── columns: k:1(int!null) j:2(jsonb) + ├── immutable + ├── stats: [rows=666.666667] + ├── key: (1) + ├── fd: (1)-->(2) + ├── index-join t + │ ├── columns: k:1(int!null) j:2(jsonb) + │ ├── stats: [rows=100] + │ ├── key: (1) + │ ├── fd: (1)-->(2) + │ └── inverted-filter + │ ├── columns: k:1(int!null) + │ ├── inverted expression: /4 + │ │ ├── tight: false, unique: false + │ │ └── union spans + │ │ ├── ["7\x00\x019", "7\x00\x019"] + │ │ ├── ["7a\x00\x019", "7a\x00\x019"] + │ │ └── ["7a\x00\x02b\x00\x01\x12c\x00\x01", "7a\x00\x02b\x00\x01\x12c\x00\x01"] + │ ├── stats: [rows=100] + │ ├── key: (1) + │ └── scan t@j_idx + │ ├── columns: k:1(int!null) j_inverted_key:4(jsonb!null) + │ ├── inverted constraint: /4/1 + │ │ └── spans + │ │ ├── ["7\x00\x019", "7\x00\x019"] + │ │ ├── ["7a\x00\x019", "7a\x00\x019"] + │ │ └── ["7a\x00\x02b\x00\x01\x12c\x00\x01", "7a\x00\x02b\x00\x01\x12c\x00\x01"] + │ ├── stats: [rows=100, distinct(1)=100, null(1)=0, distinct(4)=1, null(4)=0] + │ │ histogram(4)= 0 100 + │ │ <--- '\x37000139' + │ ├── key: (1) + │ └── fd: (1)-->(4) + └── filters + └── (j:2->'a') <@ '{"b": "c"}' [type=bool, outer=(2), immutable] + +# A query with fetch val and contains operators uses the inverted index when an +# array is on the right side, and the expression is not tight. +opt +SELECT * FROM t WHERE j->'a' @> '[1, 2]' +---- +select + ├── columns: k:1(int!null) j:2(jsonb) + ├── immutable + ├── stats: [rows=24.691358] + ├── key: (1) + ├── fd: (1)-->(2) + ├── index-join t + │ ├── columns: k:1(int!null) j:2(jsonb) + │ ├── stats: [rows=2e-07] + │ ├── key: (1) + │ ├── fd: (1)-->(2) + │ └── inverted-filter + │ ├── columns: k:1(int!null) + │ ├── inverted expression: /4 + │ │ ├── tight: false, unique: true + │ │ ├── union spans: empty + │ │ └── INTERSECTION + │ │ ├── span expression + │ │ │ ├── tight: true, unique: true + │ │ │ └── union spans: ["7a\x00\x02\x00\x03\x00\x01*\x02\x00", "7a\x00\x02\x00\x03\x00\x01*\x02\x00"] + │ │ └── span expression + │ │ ├── tight: true, unique: true + │ │ └── union spans: ["7a\x00\x02\x00\x03\x00\x01*\x04\x00", "7a\x00\x02\x00\x03\x00\x01*\x04\x00"] + │ ├── stats: [rows=2e-07] + │ ├── key: (1) + │ └── scan t@j_idx + │ ├── columns: k:1(int!null) j_inverted_key:4(jsonb!null) + │ ├── inverted constraint: /4/1 + │ │ └── spans + │ │ ├── ["7a\x00\x02\x00\x03\x00\x01*\x02\x00", "7a\x00\x02\x00\x03\x00\x01*\x02\x00"] + │ │ └── ["7a\x00\x02\x00\x03\x00\x01*\x04\x00", "7a\x00\x02\x00\x03\x00\x01*\x04\x00"] + │ ├── stats: [rows=2e-07, distinct(1)=2e-07, null(1)=0, distinct(4)=2e-07, null(4)=0] + │ │ histogram(4)= + │ ├── key: (1) + │ └── fd: (1)-->(4) + └── filters + └── (j:2->'a') @> '[1, 2]' [type=bool, outer=(2), immutable] + +# A query with fetch val and contained by operators uses the inverted index +# when an array is on the right side, and the expression is not tight. +opt +SELECT * FROM t WHERE j->'a' <@ '[1, 2]' +---- +select + ├── columns: k:1(int!null) j:2(jsonb) + ├── immutable + ├── stats: [rows=666.666667] + ├── key: (1) + ├── fd: (1)-->(2) + ├── index-join t + │ ├── columns: k:1(int!null) j:2(jsonb) + │ ├── stats: [rows=100] + │ ├── key: (1) + │ ├── fd: (1)-->(2) + │ └── inverted-filter + │ ├── columns: k:1(int!null) + │ ├── inverted expression: /4 + │ │ ├── tight: false, unique: false + │ │ └── union spans + │ │ ├── ["7\x00\x019", "7\x00\x019"] + │ │ ├── ["7a\x00\x01*\x02\x00", "7a\x00\x01*\x02\x00"] + │ │ ├── ["7a\x00\x01*\x04\x00", "7a\x00\x01*\x04\x00"] + │ │ ├── ["7a\x00\x018", "7a\x00\x018"] + │ │ ├── ["7a\x00\x02\x00\x03\x00\x01*\x02\x00", "7a\x00\x02\x00\x03\x00\x01*\x02\x00"] + │ │ └── ["7a\x00\x02\x00\x03\x00\x01*\x04\x00", "7a\x00\x02\x00\x03\x00\x01*\x04\x00"] + │ ├── stats: [rows=100] + │ ├── key: (1) + │ └── scan t@j_idx + │ ├── columns: k:1(int!null) j_inverted_key:4(jsonb!null) + │ ├── inverted constraint: /4/1 + │ │ └── spans + │ │ ├── ["7\x00\x019", "7\x00\x019"] + │ │ ├── ["7a\x00\x01*\x02\x00", "7a\x00\x01*\x02\x00"] + │ │ ├── ["7a\x00\x01*\x04\x00", "7a\x00\x01*\x04\x00"] + │ │ ├── ["7a\x00\x018", "7a\x00\x018"] + │ │ ├── ["7a\x00\x02\x00\x03\x00\x01*\x02\x00", "7a\x00\x02\x00\x03\x00\x01*\x02\x00"] + │ │ └── ["7a\x00\x02\x00\x03\x00\x01*\x04\x00", "7a\x00\x02\x00\x03\x00\x01*\x04\x00"] + │ ├── stats: [rows=100, distinct(1)=100, null(1)=0, distinct(4)=1, null(4)=0] + │ │ histogram(4)= 0 100 + │ │ <--- '\x37000139' + │ ├── key: (1) + │ └── fd: (1)-->(4) + └── filters + └── (j:2->'a') <@ '[1, 2]' [type=bool, outer=(2), immutable] + +# A query with fetch val and contained by operators uses the inverted index +# when the fetch val is on the right side. +opt +SELECT * FROM t WHERE '"c"' <@ j->'a'->'b' +---- +index-join t + ├── columns: k:1(int!null) j:2(jsonb) + ├── immutable + ├── stats: [rows=666.666667] + ├── key: (1) + ├── fd: (1)-->(2) + └── inverted-filter + ├── columns: k:1(int!null) + ├── inverted expression: /4 + │ ├── tight: true, unique: false + │ └── union spans + │ ├── ["7a\x00\x02b\x00\x01\x12c\x00\x01", "7a\x00\x02b\x00\x01\x12c\x00\x01"] + │ └── ["7a\x00\x02b\x00\x02\x00\x03\x00\x01\x12c\x00\x01", "7a\x00\x02b\x00\x02\x00\x03\x00\x01\x12c\x00\x01"] + ├── stats: [rows=2e-07] + ├── key: (1) + └── scan t@j_idx + ├── columns: k:1(int!null) j_inverted_key:4(jsonb!null) + ├── inverted constraint: /4/1 + │ └── spans + │ ├── ["7a\x00\x02b\x00\x01\x12c\x00\x01", "7a\x00\x02b\x00\x01\x12c\x00\x01"] + │ └── ["7a\x00\x02b\x00\x02\x00\x03\x00\x01\x12c\x00\x01", "7a\x00\x02b\x00\x02\x00\x03\x00\x01\x12c\x00\x01"] + ├── stats: [rows=2e-07, distinct(1)=2e-07, null(1)=0, distinct(4)=2e-07, null(4)=0] + │ histogram(4)= + ├── key: (1) + └── fd: (1)-->(4) + +# A query with fetch val and contains operators uses the inverted index when +# the fetch val is on the right side. +opt +SELECT * FROM t WHERE '[1, 2]' @> j->'a'->'b' +---- +select + ├── columns: k:1(int!null) j:2(jsonb) + ├── immutable + ├── stats: [rows=666.666667] + ├── key: (1) + ├── fd: (1)-->(2) + ├── index-join t + │ ├── columns: k:1(int!null) j:2(jsonb) + │ ├── stats: [rows=100] + │ ├── key: (1) + │ ├── fd: (1)-->(2) + │ └── inverted-filter + │ ├── columns: k:1(int!null) + │ ├── inverted expression: /4 + │ │ ├── tight: false, unique: false + │ │ └── union spans + │ │ ├── ["7\x00\x019", "7\x00\x019"] + │ │ ├── ["7a\x00\x019", "7a\x00\x019"] + │ │ ├── ["7a\x00\x02b\x00\x01*\x02\x00", "7a\x00\x02b\x00\x01*\x02\x00"] + │ │ ├── ["7a\x00\x02b\x00\x01*\x04\x00", "7a\x00\x02b\x00\x01*\x04\x00"] + │ │ ├── ["7a\x00\x02b\x00\x018", "7a\x00\x02b\x00\x018"] + │ │ ├── ["7a\x00\x02b\x00\x02\x00\x03\x00\x01*\x02\x00", "7a\x00\x02b\x00\x02\x00\x03\x00\x01*\x02\x00"] + │ │ └── ["7a\x00\x02b\x00\x02\x00\x03\x00\x01*\x04\x00", "7a\x00\x02b\x00\x02\x00\x03\x00\x01*\x04\x00"] + │ ├── stats: [rows=100] + │ ├── key: (1) + │ └── scan t@j_idx + │ ├── columns: k:1(int!null) j_inverted_key:4(jsonb!null) + │ ├── inverted constraint: /4/1 + │ │ └── spans + │ │ ├── ["7\x00\x019", "7\x00\x019"] + │ │ ├── ["7a\x00\x019", "7a\x00\x019"] + │ │ ├── ["7a\x00\x02b\x00\x01*\x02\x00", "7a\x00\x02b\x00\x01*\x02\x00"] + │ │ ├── ["7a\x00\x02b\x00\x01*\x04\x00", "7a\x00\x02b\x00\x01*\x04\x00"] + │ │ ├── ["7a\x00\x02b\x00\x018", "7a\x00\x02b\x00\x018"] + │ │ ├── ["7a\x00\x02b\x00\x02\x00\x03\x00\x01*\x02\x00", "7a\x00\x02b\x00\x02\x00\x03\x00\x01*\x02\x00"] + │ │ └── ["7a\x00\x02b\x00\x02\x00\x03\x00\x01*\x04\x00", "7a\x00\x02b\x00\x02\x00\x03\x00\x01*\x04\x00"] + │ ├── stats: [rows=100, distinct(1)=100, null(1)=0, distinct(4)=1, null(4)=0] + │ │ histogram(4)= 0 100 + │ │ <--- '\x37000139' + │ ├── key: (1) + │ └── fd: (1)-->(4) + └── filters + └── '[1, 2]' @> ((j:2->'a')->'b') [type=bool, outer=(2), immutable] diff --git a/pkg/sql/opt/xform/testdata/rules/select b/pkg/sql/opt/xform/testdata/rules/select index ff1858c6dd5a..a08bee2bb606 100644 --- a/pkg/sql/opt/xform/testdata/rules/select +++ b/pkg/sql/opt/xform/testdata/rules/select @@ -2830,6 +2830,277 @@ project ├── key: (1) └── fd: (1)-->(6) +# Query using the fetch val and containment operators. +opt expect=GenerateInvertedIndexScans +SELECT k FROM b WHERE j->'a' @> '"b"' +---- +project + ├── columns: k:1!null + ├── immutable + ├── key: (1) + └── inverted-filter + ├── columns: k:1!null + ├── inverted expression: /6 + │ ├── tight: true, unique: false + │ └── union spans + │ ├── ["7a\x00\x01\x12b\x00\x01", "7a\x00\x01\x12b\x00\x01"] + │ └── ["7a\x00\x02\x00\x03\x00\x01\x12b\x00\x01", "7a\x00\x02\x00\x03\x00\x01\x12b\x00\x01"] + ├── key: (1) + └── scan b@j_inv_idx + ├── columns: k:1!null j_inverted_key:6!null + ├── inverted constraint: /6/1 + │ └── spans + │ ├── ["7a\x00\x01\x12b\x00\x01", "7a\x00\x01\x12b\x00\x01"] + │ └── ["7a\x00\x02\x00\x03\x00\x01\x12b\x00\x01", "7a\x00\x02\x00\x03\x00\x01\x12b\x00\x01"] + ├── key: (1) + └── fd: (1)-->(6) + +opt expect=GenerateInvertedIndexScans +SELECT k FROM b WHERE j->'a' <@ '"b"' +---- +project + ├── columns: k:1!null + ├── immutable + ├── key: (1) + └── select + ├── columns: k:1!null j:4 + ├── immutable + ├── key: (1) + ├── fd: (1)-->(4) + ├── index-join b + │ ├── columns: k:1!null j:4 + │ ├── key: (1) + │ ├── fd: (1)-->(4) + │ └── inverted-filter + │ ├── columns: k:1!null + │ ├── inverted expression: /6 + │ │ ├── tight: false, unique: false + │ │ └── union spans + │ │ ├── ["7\x00\x019", "7\x00\x019"] + │ │ └── ["7a\x00\x01\x12b\x00\x01", "7a\x00\x01\x12b\x00\x01"] + │ ├── key: (1) + │ └── scan b@j_inv_idx + │ ├── columns: k:1!null j_inverted_key:6!null + │ ├── inverted constraint: /6/1 + │ │ └── spans + │ │ ├── ["7\x00\x019", "7\x00\x019"] + │ │ └── ["7a\x00\x01\x12b\x00\x01", "7a\x00\x01\x12b\x00\x01"] + │ ├── key: (1) + │ └── fd: (1)-->(6) + └── filters + └── (j:4->'a') <@ '"b"' [outer=(4), immutable] + +# Chained fetch val operators and containment operator. +opt expect=GenerateInvertedIndexScans +SELECT k FROM b WHERE j->'a'->'b' @> '"c"' +---- +project + ├── columns: k:1!null + ├── immutable + ├── key: (1) + └── inverted-filter + ├── columns: k:1!null + ├── inverted expression: /6 + │ ├── tight: true, unique: false + │ └── union spans + │ ├── ["7a\x00\x02b\x00\x01\x12c\x00\x01", "7a\x00\x02b\x00\x01\x12c\x00\x01"] + │ └── ["7a\x00\x02b\x00\x02\x00\x03\x00\x01\x12c\x00\x01", "7a\x00\x02b\x00\x02\x00\x03\x00\x01\x12c\x00\x01"] + ├── key: (1) + └── scan b@j_inv_idx + ├── columns: k:1!null j_inverted_key:6!null + ├── inverted constraint: /6/1 + │ └── spans + │ ├── ["7a\x00\x02b\x00\x01\x12c\x00\x01", "7a\x00\x02b\x00\x01\x12c\x00\x01"] + │ └── ["7a\x00\x02b\x00\x02\x00\x03\x00\x01\x12c\x00\x01", "7a\x00\x02b\x00\x02\x00\x03\x00\x01\x12c\x00\x01"] + ├── key: (1) + └── fd: (1)-->(6) + +opt expect=GenerateInvertedIndexScans +SELECT k FROM b WHERE j->'a'->'b' <@ '"c"' +---- +project + ├── columns: k:1!null + ├── immutable + ├── key: (1) + └── select + ├── columns: k:1!null j:4 + ├── immutable + ├── key: (1) + ├── fd: (1)-->(4) + ├── index-join b + │ ├── columns: k:1!null j:4 + │ ├── key: (1) + │ ├── fd: (1)-->(4) + │ └── inverted-filter + │ ├── columns: k:1!null + │ ├── inverted expression: /6 + │ │ ├── tight: false, unique: false + │ │ └── union spans + │ │ ├── ["7\x00\x019", "7\x00\x019"] + │ │ ├── ["7a\x00\x019", "7a\x00\x019"] + │ │ └── ["7a\x00\x02b\x00\x01\x12c\x00\x01", "7a\x00\x02b\x00\x01\x12c\x00\x01"] + │ ├── key: (1) + │ └── scan b@j_inv_idx + │ ├── columns: k:1!null j_inverted_key:6!null + │ ├── inverted constraint: /6/1 + │ │ └── spans + │ │ ├── ["7\x00\x019", "7\x00\x019"] + │ │ ├── ["7a\x00\x019", "7a\x00\x019"] + │ │ └── ["7a\x00\x02b\x00\x01\x12c\x00\x01", "7a\x00\x02b\x00\x01\x12c\x00\x01"] + │ ├── key: (1) + │ └── fd: (1)-->(6) + └── filters + └── ((j:4->'a')->'b') <@ '"c"' [outer=(4), immutable] + +# Query using the fetch val and equality operators in a disjunction. +opt expect=GenerateInvertedIndexScans +SELECT k FROM b WHERE j->'a' @> '"b"' OR j->'c' @> '"d"' +---- +project + ├── columns: k:1!null + ├── immutable + ├── key: (1) + └── inverted-filter + ├── columns: k:1!null + ├── inverted expression: /6 + │ ├── tight: true, unique: false + │ └── union spans + │ ├── ["7a\x00\x01\x12b\x00\x01", "7a\x00\x01\x12b\x00\x01"] + │ ├── ["7a\x00\x02\x00\x03\x00\x01\x12b\x00\x01", "7a\x00\x02\x00\x03\x00\x01\x12b\x00\x01"] + │ ├── ["7c\x00\x01\x12d\x00\x01", "7c\x00\x01\x12d\x00\x01"] + │ └── ["7c\x00\x02\x00\x03\x00\x01\x12d\x00\x01", "7c\x00\x02\x00\x03\x00\x01\x12d\x00\x01"] + ├── key: (1) + └── scan b@j_inv_idx + ├── columns: k:1!null j_inverted_key:6!null + ├── inverted constraint: /6/1 + │ └── spans + │ ├── ["7a\x00\x01\x12b\x00\x01", "7a\x00\x01\x12b\x00\x01"] + │ ├── ["7a\x00\x02\x00\x03\x00\x01\x12b\x00\x01", "7a\x00\x02\x00\x03\x00\x01\x12b\x00\x01"] + │ ├── ["7c\x00\x01\x12d\x00\x01", "7c\x00\x01\x12d\x00\x01"] + │ └── ["7c\x00\x02\x00\x03\x00\x01\x12d\x00\x01", "7c\x00\x02\x00\x03\x00\x01\x12d\x00\x01"] + ├── key: (1) + └── fd: (1)-->(6) + +# Query using the fetch val and contains operators in a disjunction with a +# contained by operator. +opt expect=GenerateInvertedIndexScans +SELECT k FROM b WHERE j->'a' @> '["b"]' OR j <@ '{"c": "d"}' +---- +project + ├── columns: k:1!null + ├── immutable + ├── key: (1) + └── select + ├── columns: k:1!null j:4 + ├── immutable + ├── key: (1) + ├── fd: (1)-->(4) + ├── index-join b + │ ├── columns: k:1!null j:4 + │ ├── key: (1) + │ ├── fd: (1)-->(4) + │ └── inverted-filter + │ ├── columns: k:1!null + │ ├── inverted expression: /6 + │ │ ├── tight: false, unique: false + │ │ └── union spans + │ │ ├── ["7\x00\x019", "7\x00\x019"] + │ │ ├── ["7a\x00\x02\x00\x03\x00\x01\x12b\x00\x01", "7a\x00\x02\x00\x03\x00\x01\x12b\x00\x01"] + │ │ └── ["7c\x00\x01\x12d\x00\x01", "7c\x00\x01\x12d\x00\x01"] + │ ├── key: (1) + │ └── scan b@j_inv_idx + │ ├── columns: k:1!null j_inverted_key:6!null + │ ├── inverted constraint: /6/1 + │ │ └── spans + │ │ ├── ["7\x00\x019", "7\x00\x019"] + │ │ ├── ["7a\x00\x02\x00\x03\x00\x01\x12b\x00\x01", "7a\x00\x02\x00\x03\x00\x01\x12b\x00\x01"] + │ │ └── ["7c\x00\x01\x12d\x00\x01", "7c\x00\x01\x12d\x00\x01"] + │ ├── key: (1) + │ └── fd: (1)-->(6) + └── filters + └── ((j:4->'a') @> '["b"]') OR (j:4 <@ '{"c": "d"}') [outer=(4), immutable] + +# Query using the fetch val and equality operators in a conjunction. +opt expect=GenerateInvertedIndexScans +SELECT k FROM b WHERE j->'a' @> '"b"' AND j->'c' @> '"d"' +---- +project + ├── columns: k:1!null + ├── immutable + ├── key: (1) + └── inverted-filter + ├── columns: k:1!null + ├── inverted expression: /6 + │ ├── tight: true, unique: false + │ ├── union spans: empty + │ └── INTERSECTION + │ ├── span expression + │ │ ├── tight: true, unique: false + │ │ └── union spans + │ │ ├── ["7a\x00\x01\x12b\x00\x01", "7a\x00\x01\x12b\x00\x01"] + │ │ └── ["7a\x00\x02\x00\x03\x00\x01\x12b\x00\x01", "7a\x00\x02\x00\x03\x00\x01\x12b\x00\x01"] + │ └── span expression + │ ├── tight: true, unique: false + │ └── union spans + │ ├── ["7c\x00\x01\x12d\x00\x01", "7c\x00\x01\x12d\x00\x01"] + │ └── ["7c\x00\x02\x00\x03\x00\x01\x12d\x00\x01", "7c\x00\x02\x00\x03\x00\x01\x12d\x00\x01"] + ├── key: (1) + └── scan b@j_inv_idx + ├── columns: k:1!null j_inverted_key:6!null + ├── inverted constraint: /6/1 + │ └── spans + │ ├── ["7a\x00\x01\x12b\x00\x01", "7a\x00\x01\x12b\x00\x01"] + │ ├── ["7a\x00\x02\x00\x03\x00\x01\x12b\x00\x01", "7a\x00\x02\x00\x03\x00\x01\x12b\x00\x01"] + │ ├── ["7c\x00\x01\x12d\x00\x01", "7c\x00\x01\x12d\x00\x01"] + │ └── ["7c\x00\x02\x00\x03\x00\x01\x12d\x00\x01", "7c\x00\x02\x00\x03\x00\x01\x12d\x00\x01"] + ├── key: (1) + └── fd: (1)-->(6) + +# Query using the fetch val and contains operators in conjunction with a +# contained by operator. +opt expect=GenerateInvertedIndexScans +SELECT k FROM b WHERE j->'a' @> '["b"]' AND j <@ '{"c": "d"}' +---- +project + ├── columns: k:1!null + ├── immutable + ├── key: (1) + └── select + ├── columns: k:1!null j:4 + ├── immutable + ├── key: (1) + ├── fd: (1)-->(4) + ├── index-join b + │ ├── columns: k:1!null j:4 + │ ├── key: (1) + │ ├── fd: (1)-->(4) + │ └── inverted-filter + │ ├── columns: k:1!null + │ ├── inverted expression: /6 + │ │ ├── tight: false, unique: false + │ │ ├── union spans: empty + │ │ └── INTERSECTION + │ │ ├── span expression + │ │ │ ├── tight: true, unique: true + │ │ │ └── union spans: ["7a\x00\x02\x00\x03\x00\x01\x12b\x00\x01", "7a\x00\x02\x00\x03\x00\x01\x12b\x00\x01"] + │ │ └── span expression + │ │ ├── tight: false, unique: false + │ │ └── union spans + │ │ ├── ["7\x00\x019", "7\x00\x019"] + │ │ └── ["7c\x00\x01\x12d\x00\x01", "7c\x00\x01\x12d\x00\x01"] + │ ├── key: (1) + │ └── scan b@j_inv_idx + │ ├── columns: k:1!null j_inverted_key:6!null + │ ├── inverted constraint: /6/1 + │ │ └── spans + │ │ ├── ["7\x00\x019", "7\x00\x019"] + │ │ ├── ["7a\x00\x02\x00\x03\x00\x01\x12b\x00\x01", "7a\x00\x02\x00\x03\x00\x01\x12b\x00\x01"] + │ │ └── ["7c\x00\x01\x12d\x00\x01", "7c\x00\x01\x12d\x00\x01"] + │ ├── key: (1) + │ └── fd: (1)-->(6) + └── filters + └── j:4 <@ '{"c": "d"}' [outer=(4), immutable] + # GenerateInvertedIndexScans propagates row-level locking information. opt expect=GenerateInvertedIndexScans SELECT k FROM b WHERE j @> '{"a": "b"}' FOR UPDATE