From 6021d7bf909362fd9845369ccac19ac87e1c7bcb Mon Sep 17 00:00:00 2001 From: Angela Xu Date: Fri, 2 Apr 2021 17:04:19 -0700 Subject: [PATCH] opt: index accelerate JSON filters in the forms j->'a' @> '1' and j->'a'<@'1' We previously did not have inverted index support for expressions with a JSON fetch val operator on the left side of @> (contains) or <@ (contained by) expressions. This commit adds support to use the inverted index for query filters with JSON fetch val and containment operators. These include any contains or contained by expressions with fetch val or chained fetch val operators on the left side, and a constant value on the right side, including booleans, strings, numbers, nulls, arrays, and objects. Fixes #61430 Release note (performance improvement): Expressions with the -> (fetch val) operator on the left side of either <@ (contained by) or @> (contains) now support index-acceleration. --- .../testdata/logic_test/inverted_index | 206 +++++++++- .../exec/execbuilder/testdata/inverted_index | 275 +++++++++++++ .../exec/execbuilder/testdata/virtual_columns | 5 +- pkg/sql/opt/invertedidx/json_array.go | 283 ++++++++++---- pkg/sql/opt/invertedidx/json_array_test.go | 154 ++++++++ pkg/sql/opt/memo/testdata/stats/inverted-json | 370 ++++++++++++++++++ pkg/sql/opt/xform/testdata/rules/select | 271 +++++++++++++ 7 files changed, 1496 insertions(+), 68 deletions(-) diff --git a/pkg/sql/logictest/testdata/logic_test/inverted_index b/pkg/sql/logictest/testdata/logic_test/inverted_index index dadfd2f6107d..a96fbadae9c7 100644 --- a/pkg/sql/logictest/testdata/logic_test/inverted_index +++ b/pkg/sql/logictest/testdata/logic_test/inverted_index @@ -767,7 +767,13 @@ INSERT INTO f VALUES (25, '{"a": {"b": "c", "d": "e"}}'), (26, '{"a": {"b": "c"}, "d": "e"}'), (27, '[1, 2, {"b": "c"}]'), - (28, '[{"a": {"b": "c"}}, "d", "e"]') + (28, '[{"a": {"b": "c"}}, "d", "e"]'), + (29, '{"a": null}'), + (30, '{"a": [1, 2, null]}'), + (31, 'null'), + (32, '{}'), + (33, '[]'), + (34, '{"a": {"b": []}}') query T SELECT j FROM f@i WHERE j->'a' = '1' ORDER BY k @@ -880,6 +886,204 @@ SELECT j FROM f@i WHERE j->'a' = '"b"' AND j->'c' = '[{"d": 1}, {"e": 2}]' ORDER ---- {"a": "b", "c": [{"d": 1}, {"e": 2}]} +# Expressions with fetch val and containment operators use the inverted index. +query T +SELECT j FROM f@i WHERE j->'a' @> '"b"' ORDER BY k +---- +{"a": ["b", "c", "d", "e"]} +{"a": ["b", "e", "c", "d"]} +{"a": "b", "x": ["c", "d", "e"]} +{"a": "b", "c": [{"d": 1}, {"e": 2}]} + +query T +SELECT j FROM f@i WHERE j->'a' <@ '"b"' ORDER BY k +---- +{"a": "b", "x": ["c", "d", "e"]} +{"a": "b", "c": [{"d": 1}, {"e": 2}]} + +query T +SELECT j FROM f@i WHERE j->'a' @> 'null' ORDER BY k +---- +{"a": null} +{"a": [1, 2, null]} + +query T +SELECT j FROM f@i WHERE j->'a' <@ 'null' ORDER BY k +---- +{"a": null} + +query T +SELECT j FROM f@i WHERE j->'a' <@ '[]' ORDER BY k +---- +{"a": []} + +query T +SELECT j FROM f@i WHERE j->'a' <@ '{}' ORDER BY k +---- +{"a": {}} + +query T +SELECT j FROM f@i WHERE j->'a' @> '[]' ORDER BY k +---- +{"a": [1, 2]} +{"a": []} +{"a": ["b", "c", "d", "e"]} +{"a": ["b", "e", "c", "d"]} +{"a": [1, 2, null]} + +query T +SELECT j FROM f@i WHERE j->'a' @> '{}' ORDER BY k +---- +{"a": {"b": 1}} +{"a": {"b": 1, "d": 2}} +{"a": {"d": 2}} +{"a": {"b": [1, 2]}} +{"a": {"b": {"c": 1}}} +{"a": {"b": {"c": 1, "d": 2}}} +{"a": {"b": {"d": 2}}} +{"a": {"b": {"c": [1, 2]}}} +{"a": {"b": {"c": [1, 2, 3]}}} +{"a": {}} +{"a": {"b": "c"}} +{"a": {"b": ["c", "d", "e"]}} +{"a": {"b": "c", "d": "e"}} +{"a": {"b": "c"}, "d": "e"} +{"a": {"b": []}} + +query T +SELECT j FROM f@i WHERE j->'a' <@ '{"b": [1, 2]}' ORDER BY k +---- +{"a": {"b": [1, 2]}} +{"a": {}} +{"a": {"b": []}} + +query T +SELECT j FROM f@i WHERE j->'a' <@ '{"b": {"c": [1, 2]}}' ORDER BY k +---- +{"a": {"b": {"c": [1, 2]}}} +{"a": {}} + +query T +SELECT j FROM f@i WHERE j->'a' @> '{"b": ["c"]}' ORDER BY k +---- +{"a": {"b": ["c", "d", "e"]}} + +query T +SELECT j FROM f@i WHERE j->'c' @> '[{"d": 1}]' ORDER BY k +---- +{"a": "b", "c": [{"d": 1}, {"e": 2}]} + +# Expressions with chained fetch val and containment operators use the inverted +# index. +query T +SELECT j FROM f@i WHERE j->'a'->'b' <@ '1' ORDER BY k +---- +{"a": {"b": 1}} +{"a": {"b": 1, "d": 2}} + +query T +SELECT j FROM f@i WHERE j->'a'->'b' @> '1' ORDER BY k +---- +{"a": {"b": 1}} +{"a": {"b": 1, "d": 2}} +{"a": {"b": [1, 2]}} + +query T +SELECT j FROM f@i WHERE j->'a'->'b' @> '[1, 2]' ORDER BY k +---- +{"a": {"b": [1, 2]}} + +query T +SELECT j FROM f@i WHERE j->'a'->'b' <@ '[1, 2]' ORDER BY k +---- +{"a": {"b": 1}} +{"a": {"b": 1, "d": 2}} +{"a": {"b": [1, 2]}} +{"a": {"b": []}} + +query T +SELECT j FROM f@i WHERE j->'a'->'b' @> '"c"' ORDER BY k +---- +{"a": {"b": "c"}} +{"a": {"b": ["c", "d", "e"]}} +{"a": {"b": "c", "d": "e"}} +{"a": {"b": "c"}, "d": "e"} + +# Expressions with fetch val on the right side should use the inverted index. +query T +SELECT j FROM f@i WHERE '"b"' <@ j->'a' ORDER BY k +---- +{"a": ["b", "c", "d", "e"]} +{"a": ["b", "e", "c", "d"]} +{"a": "b", "x": ["c", "d", "e"]} +{"a": "b", "c": [{"d": 1}, {"e": 2}]} + +query T +SELECT j FROM f@i WHERE '[1, 2]' <@ j->'a'->'b' ORDER BY k +---- +{"a": {"b": [1, 2]}} + +query T +SELECT j FROM f@i WHERE '{"b": {"c": [1, 2]}}' <@ j->'a' ORDER BY k +---- +{"a": {"b": {"c": [1, 2]}}} +{"a": {"b": {"c": [1, 2, 3]}}} + +# Conjunctions of fetch val and containment expressions use the inverted index. +query T +SELECT j FROM f@i WHERE j->'a' @> '"b"' AND '["c"]' <@ j->'a' ORDER BY k +---- +{"a": ["b", "c", "d", "e"]} +{"a": ["b", "e", "c", "d"]} + +#TODO(angelazxu): Uncomment these tests once #63180 is fixed. +# query T +# SELECT j FROM f@i WHERE j->'a' <@ '{"b": [1, 2]}' AND j->'a'->'b' @> '[1]' ORDER BY k +# ---- + +# query T +# SELECT j FROM f@i WHERE j->'a' @> '"b"' AND j->'a' <@ '["b", "c", "d", "e"]' ORDER BY k +# ---- + +query T +SELECT j FROM f@i WHERE j->'a' @> '{"d": 2}' AND '[1, 2]' @> j->'a'->'b' ORDER BY k +---- +{"a": {"b": 1, "d": 2}} + +# Disjunctions of fetch val and containment expressions use the inverted index. +query T +SELECT j FROM f@i WHERE j->'a' @> '[1, 2]' OR j->'a'->'b' @> '[1, 2]' ORDER BY k +---- +{"a": [1, 2]} +{"a": {"b": [1, 2]}} +{"a": [1, 2, null]} + +query T +SELECT j FROM f@i WHERE j->'a' @> '"b"' OR j->'a'->'b' <@ '[1, 2]' ORDER BY k +---- +{"a": {"b": 1}} +{"a": {"b": 1, "d": 2}} +{"a": {"b": [1, 2]}} +{"a": ["b", "c", "d", "e"]} +{"a": ["b", "e", "c", "d"]} +{"a": "b", "x": ["c", "d", "e"]} +{"a": "b", "c": [{"d": 1}, {"e": 2}]} +{"a": {"b": []}} + +query T +SELECT j FROM f@i WHERE j->'a'->'b' <@ '{"c": [1, 2], "d": 2}' OR j->'a'->'b' <@ '["c", "d", "e", 1, 2, 3]' ORDER BY k +---- +{"a": {"b": 1}} +{"a": {"b": 1, "d": 2}} +{"a": {"b": [1, 2]}} +{"a": {"b": {"d": 2}}} +{"a": {"b": {"c": [1, 2]}}} +{"a": {"b": "c"}} +{"a": {"b": ["c", "d", "e"]}} +{"a": {"b": "c", "d": "e"}} +{"a": {"b": "c"}, "d": "e"} +{"a": {"b": []}} + subtest arrays statement ok diff --git a/pkg/sql/opt/exec/execbuilder/testdata/inverted_index b/pkg/sql/opt/exec/execbuilder/testdata/inverted_index index c3be2181789e..2c41014d9b9b 100644 --- a/pkg/sql/opt/exec/execbuilder/testdata/inverted_index +++ b/pkg/sql/opt/exec/execbuilder/testdata/inverted_index @@ -377,6 +377,281 @@ vectorized: true • norows columns: (a, b) +query T +EXPLAIN (VERBOSE) SELECT * from d where b->'a' @> '"b"' +---- +distribution: local +vectorized: true +· +• index join +│ columns: (a, b) +│ estimated row count: 111 (missing stats) +│ table: d@primary +│ key columns: a +│ +└── • project + │ columns: (a) + │ estimated row count: 111 (missing stats) + │ + └── • inverted filter + │ columns: (a, b_inverted_key) + │ inverted column: b_inverted_key + │ num spans: 2 + │ + └── • scan + columns: (a, b_inverted_key) + estimated row count: 111 (missing stats) + table: d@foo_inv + spans: /"a"/"b"-/"a"/"b"/PrefixEnd /"a"/Arr/"b"-/"a"/Arr/"b"/PrefixEnd + +query T +EXPLAIN (VERBOSE) SELECT * from d@foo_inv where b->'a'->'c' @> '"b"' +---- +distribution: local +vectorized: true +· +• index join +│ columns: (a, b) +│ estimated row count: 111 (missing stats) +│ table: d@primary +│ key columns: a +│ +└── • project + │ columns: (a) + │ estimated row count: 111 (missing stats) + │ + └── • inverted filter + │ columns: (a, b_inverted_key) + │ inverted column: b_inverted_key + │ num spans: 2 + │ + └── • scan + columns: (a, b_inverted_key) + estimated row count: 111 (missing stats) + table: d@foo_inv + spans: /"a"/"c"/"b"-/"a"/"c"/"b"/PrefixEnd /"a"/"c"/Arr/"b"-/"a"/"c"/Arr/"b"/PrefixEnd + +# TODO(angelazxu): The {} span does not need to be scanned here, but is +# included when finding spans contained by {"a": "b"} (see #63184). +query T +EXPLAIN (VERBOSE) SELECT * from d@foo_inv where b->'a' <@ '"b"' +---- +distribution: local +vectorized: true +· +• filter +│ columns: (a, b) +│ estimated row count: 333 (missing stats) +│ filter: (b->'a') <@ '"b"' +│ +└── • index join + │ columns: (a, b) + │ estimated row count: 111 (missing stats) + │ table: d@primary + │ key columns: a + │ + └── • project + │ columns: (a) + │ estimated row count: 111 (missing stats) + │ + └── • inverted filter + │ columns: (a, b_inverted_key) + │ inverted column: b_inverted_key + │ num spans: 2 + │ + └── • scan + columns: (a, b_inverted_key) + estimated row count: 111 (missing stats) + table: d@foo_inv + spans: /{}-/{}/PrefixEnd /"a"/"b"-/"a"/"b"/PrefixEnd + +query T +EXPLAIN (VERBOSE) SELECT * from d@foo_inv where b->'a'->'c' <@ '"b"' +---- +distribution: local +vectorized: true +· +• filter +│ columns: (a, b) +│ estimated row count: 333 (missing stats) +│ filter: ((b->'a')->'c') <@ '"b"' +│ +└── • index join + │ columns: (a, b) + │ estimated row count: 111 (missing stats) + │ table: d@primary + │ key columns: a + │ + └── • project + │ columns: (a) + │ estimated row count: 111 (missing stats) + │ + └── • inverted filter + │ columns: (a, b_inverted_key) + │ inverted column: b_inverted_key + │ num spans: 3 + │ + └── • scan + columns: (a, b_inverted_key) + estimated row count: 111 (missing stats) + table: d@foo_inv + spans: /{}-/{}/PrefixEnd /"a"/{}-/"a"/{}/PrefixEnd /"a"/"c"/"b"-/"a"/"c"/"b"/PrefixEnd + +query T +EXPLAIN (VERBOSE) SELECT * from d where b->'a' @> '[1, 2]' +---- +distribution: local +vectorized: true +· +• lookup join (inner) +│ columns: (a, b) +│ estimated row count: 12 (missing stats) +│ table: d@primary +│ equality: (a) = (a) +│ equality cols are key +│ pred: (b->'a') @> '[1, 2]' +│ +└── • zigzag join + columns: (a) + estimated row count: 12 (missing stats) + left table: d@foo_inv + left columns: (a) + left fixed values: 1 column + right table: d@foo_inv + right columns: () + right fixed values: 1 column + +query T +EXPLAIN (VERBOSE) SELECT * from d where b->'a' <@ '[1, 2]' +---- +distribution: local +vectorized: true +· +• filter +│ columns: (a, b) +│ estimated row count: 333 (missing stats) +│ filter: (b->'a') <@ '[1, 2]' +│ +└── • index join + │ columns: (a, b) + │ estimated row count: 111 (missing stats) + │ table: d@primary + │ key columns: a + │ + └── • project + │ columns: (a) + │ estimated row count: 111 (missing stats) + │ + └── • inverted filter + │ columns: (a, b_inverted_key) + │ inverted column: b_inverted_key + │ num spans: 6 + │ + └── • scan + columns: (a, b_inverted_key) + estimated row count: 111 (missing stats) + table: d@foo_inv + spans: /{}-/{}/PrefixEnd /"a"/1-/"a"/1/PrefixEnd /"a"/2-/"a"/2/PrefixEnd /"a"/[]-/"a"/{} /"a"/Arr/1-/"a"/Arr/1/PrefixEnd /"a"/Arr/2-/"a"/Arr/2/PrefixEnd + +query T +EXPLAIN (VERBOSE) SELECT * from d where b->'a' @> '{"d": 2}' +---- +distribution: local +vectorized: true +· +• index join +│ columns: (a, b) +│ estimated row count: 111 (missing stats) +│ table: d@primary +│ key columns: a +│ +└── • scan + columns: (a) + estimated row count: 111 (missing stats) + table: d@foo_inv + spans: /"a"/"d"/2-/"a"/"d"/2/PrefixEnd + +query T +EXPLAIN (VERBOSE) SELECT * from d where b->'a' <@ '{"d": 2}' +---- +distribution: local +vectorized: true +· +• filter +│ columns: (a, b) +│ estimated row count: 333 (missing stats) +│ filter: (b->'a') <@ '{"d": 2}' +│ +└── • index join + │ columns: (a, b) + │ estimated row count: 111 (missing stats) + │ table: d@primary + │ key columns: a + │ + └── • project + │ columns: (a) + │ estimated row count: 111 (missing stats) + │ + └── • inverted filter + │ columns: (a, b_inverted_key) + │ inverted column: b_inverted_key + │ num spans: 3 + │ + └── • scan + columns: (a, b_inverted_key) + estimated row count: 111 (missing stats) + table: d@foo_inv + spans: /{}-/{}/PrefixEnd /"a"/{}-/"a"/{}/PrefixEnd /"a"/"d"/2-/"a"/"d"/2/PrefixEnd + +query T +EXPLAIN (VERBOSE) SELECT * from d where '"b"' <@ b->'a' +---- +distribution: local +vectorized: true +· +• filter +│ columns: (a, b) +│ estimated row count: 333 (missing stats) +│ filter: '"b"' <@ (b->'a') +│ +└── • scan + columns: (a, b) + estimated row count: 1,000 (missing stats) + table: d@primary + spans: FULL SCAN + +query T +EXPLAIN (VERBOSE) SELECT * from d where '[1, 2]' @> b->'a' +---- +distribution: local +vectorized: true +· +• filter +│ columns: (a, b) +│ estimated row count: 333 (missing stats) +│ filter: '[1, 2]' @> (b->'a') +│ +└── • index join + │ columns: (a, b) + │ estimated row count: 111 (missing stats) + │ table: d@primary + │ key columns: a + │ + └── • project + │ columns: (a) + │ estimated row count: 111 (missing stats) + │ + └── • inverted filter + │ columns: (a, b_inverted_key) + │ inverted column: b_inverted_key + │ num spans: 6 + │ + └── • scan + columns: (a, b_inverted_key) + estimated row count: 111 (missing stats) + table: d@foo_inv + spans: /{}-/{}/PrefixEnd /"a"/1-/"a"/1/PrefixEnd /"a"/2-/"a"/2/PrefixEnd /"a"/[]-/"a"/{} /"a"/Arr/1-/"a"/Arr/1/PrefixEnd /"a"/Arr/2-/"a"/Arr/2/PrefixEnd + query T EXPLAIN (VERBOSE) SELECT * from d where '"b"' = b->'a' ---- diff --git a/pkg/sql/opt/exec/execbuilder/testdata/virtual_columns b/pkg/sql/opt/exec/execbuilder/testdata/virtual_columns index 9fd5b963ce86..b5ec1d806c91 100644 --- a/pkg/sql/opt/exec/execbuilder/testdata/virtual_columns +++ b/pkg/sql/opt/exec/execbuilder/testdata/virtual_columns @@ -1417,7 +1417,10 @@ vectorized: true table: inv@iv_j_idx spans: /10/"a"/"b"-/10/"a"/"b"/PrefixEnd /20/"a"/"b"-/20/"a"/"b"/PrefixEnd /30/"a"/"b"-/30/"a"/"b"/PrefixEnd -# Verify that we use iv_jv_idx. +statement ok +DROP INDEX inv@iv_j_idx + +# Verify that we use iv_jv_idx query T EXPLAIN (VERBOSE) SELECT k FROM inv WHERE iv IN (10, 20, 30) AND jv @> '{"a": "b"}' ---- diff --git a/pkg/sql/opt/invertedidx/json_array.go b/pkg/sql/opt/invertedidx/json_array.go index d761fa0660e3..75310f1f7988 100644 --- a/pkg/sql/opt/invertedidx/json_array.go +++ b/pkg/sql/opt/invertedidx/json_array.go @@ -383,7 +383,17 @@ func (j *jsonOrArrayFilterPlanner) extractJSONOrArrayContainsCondition( indexColumn, constantVal = right, left containedBy = !containedBy } else { - // If neither condition is met, we cannot create an InvertedExpression. + if fetch, ok := left.(*memo.FetchValExpr); ok { + // When the expression has a JSON fetch operator on the left, it is + // handled in extractJSONFetchValContainsCondition. + return j.extractJSONFetchValContainsCondition(evalCtx, fetch, right, containedBy) + } else if fetch, ok := right.(*memo.FetchValExpr); ok { + // When the expression has a JSON fetch operator on the right, it is + // handled in extractJSONFetchValContainsCondition as an equivalent + // expression with right and left swapped. + return j.extractJSONFetchValContainsCondition(evalCtx, fetch, left, !containedBy) + } + // If none of the conditions are met, we cannot create an InvertedExpression. return inverted.NonInvertedColExpression{} } d := memo.ExtractConstDatum(constantVal) @@ -403,7 +413,7 @@ func (j *jsonOrArrayFilterPlanner) extractJSONOrArrayContainsCondition( // extractJSONFetchValEqCondition extracts an InvertedExpression representing an // inverted filter over the planner's inverted index, based on equality between -// a chain of fetch val expressions and a right scalar expression. If an +// a chain of fetch val expressions and a scalar expression. If an // InvertedExpression cannot be generated from the expression, an // inverted.NonInvertedColExpression is returned. // @@ -415,7 +425,58 @@ func (j *jsonOrArrayFilterPlanner) extractJSONOrArrayContainsCondition( func (j *jsonOrArrayFilterPlanner) extractJSONFetchValEqCondition( evalCtx *tree.EvalContext, left *memo.FetchValExpr, right opt.ScalarExpr, ) inverted.Expression { - // The right side of the equals expression should be a constant JSON value. + // The right side of the expression should be a constant JSON value. + if !memo.CanExtractConstDatum(right) { + return inverted.NonInvertedColExpression{} + } + val, ok := memo.ExtractConstDatum(right).(*tree.DJSON) + if !ok { + return inverted.NonInvertedColExpression{} + } + + // Collect a slice of keys from the fetch val expression. + var keys []string + keys = j.collectKeys(keys, left) + if len(keys) == 0 { + return inverted.NonInvertedColExpression{} + } + + // Build a new JSON object with the collected keys and val. + obj := buildObject(keys, val.JSON) + + // For Equals expressions, we will generate the inverted expression for the + // single object built from the keys and val. + invertedExpr := getInvertedExprForJSONOrArrayIndexForContaining(evalCtx, tree.NewDJSON(obj)) + + // When the right side is an array or object, the InvertedExpression + // generated is not tight. We must indicate it is non-tight so an additional + // filter is added. + typ := val.JSON.Type() + if typ == json.ArrayJSONType || typ == json.ObjectJSONType { + invertedExpr.SetNotTight() + } + return invertedExpr +} + +// extractJSONFetchValContainsCondition extracts an InvertedExpression +// representing an inverted filter over the planner's inverted index, based on +// containment between a chain of fetch val expressions and a scalar +// expression. If an InvertedExpression cannot be generated from the +// expression, an inverted.NonInvertedColExpression is returned. +// +// In order to generate an InvertedExpression, left must be a fetch val +// expression in the form [col]->[index0]->[index1]->...->[indexN] where col is +// a variable or expression referencing the inverted column in the inverted +// index and each index is a constant string. The right expression must be a +// constant JSON value. For expressions with a left constant value and a right +// fetch val expression, the arguments will be swapped when passed in. +// +// The type of operator is indicated by the containedBy parameter, which is +// true for <@ and false for @>. +func (j *jsonOrArrayFilterPlanner) extractJSONFetchValContainsCondition( + evalCtx *tree.EvalContext, left *memo.FetchValExpr, right opt.ScalarExpr, containedBy bool, +) inverted.Expression { + // The right side of the expression should be a constant JSON value. if !memo.CanExtractConstDatum(right) { return inverted.NonInvertedColExpression{} } @@ -424,86 +485,176 @@ func (j *jsonOrArrayFilterPlanner) extractJSONFetchValEqCondition( return inverted.NonInvertedColExpression{} } - // Recursively traverse fetch val expressions and collect keys with which to - // build the InvertedExpression. If it is not possible to build an inverted - // expression from the tree of fetch val expressions, collectKeys returns - // early and foundKeys remains false. If successful, foundKeys is set to - // true and JSON fetch value indexes are collected in keys. The keys are - // ordered by the outer-most fetch val index first. The outer-most fetch val - // index is the right-most in the -> chain, for example (j->'a'->'b') is - // equivalent to ((j->'a')->'b') and 'b' is the outer-most fetch val index. - // - // Later on, we iterate forward through these keys to build a JSON object - // from the inside-out with the inner-most value being the JSON scalar - // extracted above from the right ScalarExpr function argument. In the - // resulting JSON object, the outer-most JSON fetch value indexes are the - // inner most JSON object keys. - // - // As an example, when left is (j->'a'->'b') and right is ('1'), the keys - // {"b", "a"} are collected and the JSON object {"a": {"b": 1}} is built. - foundKeys := false + // Collect a slice of keys from the fetch val expression. var keys []string - var collectKeys func(fetch *memo.FetchValExpr) - collectKeys = func(fetch *memo.FetchValExpr) { - // The right side of the fetch val expression, the Index field, must be - // a constant string. If not, then we cannot build an inverted - // expression. - if !memo.CanExtractConstDatum(fetch.Index) { - return + keys = j.collectKeys(keys, left) + if len(keys) == 0 { + return inverted.NonInvertedColExpression{} + } + + // Build a new JSON object with the collected keys and val. + obj := buildObject(keys, val.JSON) + + var invertedExpr inverted.Expression + + // For Contains and ContainedBy expressions, we may need to build additional + // objects to cover all possibilities. + objs, err := buildFetchContainmentObjects(keys, val.JSON, containedBy) + if err != nil { + return inverted.NonInvertedColExpression{} + } + objs = append(objs, obj) + // We get an inverted expression for each object constructed, and union + // these expressions. + for i := range objs { + var expr inverted.Expression + if containedBy { + expr = getInvertedExprForJSONOrArrayIndexForContainedBy(evalCtx, tree.NewDJSON(objs[i])) + } else { + expr = getInvertedExprForJSONOrArrayIndexForContaining(evalCtx, tree.NewDJSON(objs[i])) } - key, ok := memo.ExtractConstDatum(fetch.Index).(*tree.DString) - if !ok { - return + if invertedExpr == nil { + invertedExpr = expr + } else { + invertedExpr = inverted.Or(invertedExpr, expr) } + } + return invertedExpr +} - // Append the key to the list of keys. - keys = append(keys, string(*key)) +// collectKeys is called on fetch val expressions to the find corresponding +// keys used to build a JSON object. It recursively traverses the fetch val +// expressions and collects keys with which to build the InvertedExpression. +// If it is not possible to build an inverted expression from the tree of fetch +// val expressions, collectKeys returns nil for keys. If successful, the JSON +// fetch value indexes are collected in keys. The keys are ordered by the +// outer-most fetch val index first. The outer-most fetch val index is the +// right-most in the -> chain, for example (j->'a'->'b') is equivalent to +// ((j->'a')->'b') and 'b' is the outer-most fetch val index. +// +// Callers of this function should iterate forward through these keys to build +// a JSON object from the inside-out with the inner-most value being the JSON +// scalar extracted above from the right ScalarExpr function argument. In the +// resulting JSON object, the outer-most JSON fetch value indexes are the +// inner most JSON object keys. +// +// As an example, when left is (j->'a'->'b') and right is ('1'), the keys +// {"b", "a"} are collected and the JSON object {"a": {"b": 1}} is built. +func (j *jsonOrArrayFilterPlanner) collectKeys( + currKeys []string, fetch *memo.FetchValExpr, +) (keys []string) { + // The right side of the fetch val expression, the Index field, must be + // a constant string. If not, then we cannot build an inverted + // expression. + if !memo.CanExtractConstDatum(fetch.Index) { + return nil + } + key, ok := memo.ExtractConstDatum(fetch.Index).(*tree.DString) + if !ok { + return nil + } - // If the left side of the fetch val expression, the Json field, is a - // variable or expression corresponding to the index column, then we - // have found a valid list of keys to build an inverted expression. - if isIndexColumn(j.tabID, j.index, fetch.Json, j.computedColumns) { - foundKeys = true - return - } + // Append the key to the list of keys. + keys = append(currKeys, string(*key)) - // If the left side of the fetch val expression is another fetch val - // expression, recursively collect its keys. - if innerFetch, ok := fetch.Json.(*memo.FetchValExpr); ok { - collectKeys(innerFetch) - } + // If the left side of the fetch val expression, the Json field, is a + // variable or expression corresponding to the index column, then we + // have found a valid list of keys to build an inverted expression. + if isIndexColumn(j.tabID, j.index, fetch.Json, j.computedColumns) { + return keys + } - // Otherwise, we cannot build an inverted expression. + // If the left side of the fetch val expression is another fetch val + // expression, recursively collect its keys. + if innerFetch, ok := fetch.Json.(*memo.FetchValExpr); ok { + return j.collectKeys(keys, innerFetch) } - collectKeys(left) - if !foundKeys { - return inverted.NonInvertedColExpression{} + // Otherwise, we cannot build an inverted expression. + return nil +} + +// buildFetchContainmentObjects constructs new JSON objects with given keys and val. +// The keys and val are extracted from a fetch val containment expression, and +// the objects constructed depend on the value type and whether the expression +// uses <@ or @>. For example, the expression j->'a'->'b' @> "c" would have +// {"a", "b"} as keys, "c" as val, and construct {"a": "b": ["c"]}. +// An array of the constructed JSONs is returned. +func buildFetchContainmentObjects( + keys []string, val json.JSON, containedBy bool, +) ([]json.JSON, error) { + var objs []json.JSON + typ := val.Type() + switch typ { + case json.ArrayJSONType: + // For arrays in ContainedBy expressions, we must create a scalar value + // object, because getInvertedExprForJSONOrArrayIndexForContainedBy will + // not include the scalar value spans. + + // Array value examples: + // j->'a' @> '[1]', no new object required, we already have '{"a": [1]}' + // j->'a' <@ '[1]', build '{"a": 1}', we already have '{"a": [1]}' + // j->'a' <@ '[1, [2], 3]', build '{"a": 1}', '{"a": 3}', we already have '{"a": [1, [2], 3]}' + if containedBy { + for i := 0; i < val.Len(); i++ { + v, err := val.FetchValIdx(i) + if err != nil { + return nil, err + } + t := v.Type() + if t == json.ArrayJSONType || t == json.ObjectJSONType { + // The scalar value is only needed for non-nested arrays and objects. + continue + } + newObj := buildObject(keys, v) + objs = append(objs, newObj) + } + } + + case json.ObjectJSONType: + // For objects in ContainedBy expressions, we do not need to generate the + // empty object value for each level of nesting, because the spans will be + // added for us in getInvertedExprForJSONOrArrayIndexForContainedBy. + // For objects in Contains expressions, no additional spans are required + // outside of the given object's spans. + + // Object value examples: + // j->'a' @> '{"b": 2}', we already have '{"a": {"b": 2}}' + // j->'a' <@ '{"b": 2}', we already have '{"a": {"b": 2}}' + return nil, nil + + default: + // For scalars in Contains expressions, we construct an array value + // containing the scalar. + + // Scalar value examples: + // j->'a' @> '1', build '{"a": [1]}', we already have '{"a": 1}' + // j->'a' <@ '1', we already have '{"a": 1}' + if !containedBy { + arr := json.NewArrayBuilder(1) + arr.Add(val) + v := arr.Build() + newObj := buildObject(keys, v) + objs = append(objs, newObj) + } } + return objs, nil +} - // Build a new JSON object of the form: - // {: ... {: {key0: }}} - // Note that key0 is the outer-most fetch val index, so the expression - // j->'a'->'b' = 1 results in {"a": {"b": 1}}. +// buildObject constructs a new JSON object of the form: +// {: ... {: {key0: }}} +// Where the keys and val are extracted from a fetch val expression by the +// caller. Note that key0 is the outer-most fetch val index, so the expression +// j->'a'->'b' = 1 results in {"a": {"b": 1}}. +func buildObject(keys []string, val json.JSON) json.JSON { var obj json.JSON for i := 0; i < len(keys); i++ { b := json.NewObjectBuilder(1) if i == 0 { - b.Add(keys[i], val.JSON) + b.Add(keys[i], val) } else { b.Add(keys[i], obj) } obj = b.Build() } - - invertedExpr := getInvertedExprForJSONOrArrayIndexForContaining(evalCtx, tree.NewDJSON(obj)) - - // When the right side is an array or object, the InvertedExpression - // generated is not tight. We must indicate it is non-tight so an additional - // filter is added. - typ := val.JSON.Type() - if typ == json.ArrayJSONType || typ == json.ObjectJSONType { - invertedExpr.SetNotTight() - } - - return invertedExpr + return obj } diff --git a/pkg/sql/opt/invertedidx/json_array_test.go b/pkg/sql/opt/invertedidx/json_array_test.go index 169a1a07048c..6ffc634c7ed8 100644 --- a/pkg/sql/opt/invertedidx/json_array_test.go +++ b/pkg/sql/opt/invertedidx/json_array_test.go @@ -548,6 +548,160 @@ func TestTryFilterJsonOrArrayIndex(t *testing.T) { unique: true, remainingFilters: "j @> '[[1, 2]]'", }, + { + // Contains is supported with a fetch val operator on the left. + filters: `j->'a' @> '1'`, + indexOrd: jsonOrd, + ok: true, + tight: true, + unique: false, + remainingFilters: "", + }, + { + // Contains is supported with chained fetch val operators on the left. + filters: `j->'a'->'b' @> '1'`, + indexOrd: jsonOrd, + ok: true, + tight: true, + unique: false, + remainingFilters: "", + }, + { + // Contains with a fetch val is supported for JSON arrays. + filters: `j->'a'->'b' @> '[1, 2]'`, + indexOrd: jsonOrd, + ok: true, + tight: false, + unique: true, + remainingFilters: "j->'a'->'b' @> '[1, 2]'", + }, + { + filters: `j->'a'->'b' @> '[[1, 2]]'`, + indexOrd: jsonOrd, + ok: true, + tight: false, + unique: true, + remainingFilters: "j->'a'->'b' @> '[[1, 2]]'", + }, + { + // Contains with a fetch val is supported for JSON objects. + filters: `j->'a'->'b' @> '{"c": 1}'`, + indexOrd: jsonOrd, + ok: true, + tight: true, + unique: true, + remainingFilters: "", + }, + { + filters: `j->'a'->'b' @> '{"c": {"d": "e"}}'`, + indexOrd: jsonOrd, + ok: true, + tight: true, + unique: true, + remainingFilters: "", + }, + { + filters: `j->'a'->'b' @> '[{"c": 1, "d": "2"}]'`, + indexOrd: jsonOrd, + ok: true, + tight: false, + unique: true, + remainingFilters: "j->'a'->'b' @> '[{\"c\": 1, \"d\": \"2\"}]'", + }, + { + filters: `j->'a'->'b' @> '{"c": [1, 2], "d": "2"}'`, + indexOrd: jsonOrd, + ok: true, + tight: false, + unique: true, + remainingFilters: "j->'a'->'b' @> '{\"c\": [1, 2], \"d\": \"2\"}'", + }, + { + // ContainedBy is supported with a fetch val operator on the left. + filters: `j->'a' <@ '1'`, + indexOrd: jsonOrd, + ok: true, + tight: false, + unique: false, + remainingFilters: "j->'a' <@ '1'", + }, + { + // ContainedBy is supported with chained fetch val operators on the left. + filters: `j->'a'->'b' <@ '1'`, + indexOrd: jsonOrd, + ok: true, + tight: false, + unique: false, + remainingFilters: "j->'a'->'b' <@ '1'", + }, + { + // ContainedBy with a fetch val is supported for JSON arrays. + filters: `j->'a'->'b' <@ '[1, 2]'`, + indexOrd: jsonOrd, + ok: true, + tight: false, + unique: false, + remainingFilters: "j->'a'->'b' <@ '[1, 2]'", + }, + { + filters: `j->'a'->'b' <@ '[[1, 2]]'`, + indexOrd: jsonOrd, + ok: true, + tight: false, + unique: false, + remainingFilters: "j->'a'->'b' <@ '[[1, 2]]'", + }, + { + // ContainedBy with a fetch val is supported for JSON objects. + filters: `j->'a'->'b' <@ '{"c": 1}'`, + indexOrd: jsonOrd, + ok: true, + tight: false, + unique: false, + remainingFilters: "j->'a'->'b' <@ '{\"c\": 1}'", + }, + { + filters: `j->'a'->'b' <@ '{"c": {"d": "e"}}'`, + indexOrd: jsonOrd, + ok: true, + tight: false, + unique: false, + remainingFilters: "j->'a'->'b' <@ '{\"c\": {\"d\": \"e\"}}'", + }, + { + filters: `j->'a'->'b' <@ '[{"c": 1, "d": "2"}]'`, + indexOrd: jsonOrd, + ok: true, + tight: false, + unique: false, + remainingFilters: "j->'a'->'b' <@ '[{\"c\": 1, \"d\": \"2\"}]'", + }, + { + filters: `j->'a'->'b' <@ '{"c": [1, 2], "d": "2"}'`, + indexOrd: jsonOrd, + ok: true, + tight: false, + unique: false, + remainingFilters: "j->'a'->'b' <@ '{\"c\": [1, 2], \"d\": \"2\"}'", + }, + { + // Contains is supported with a fetch val operator on the right. + filters: `'1' @> j->'a'`, + indexOrd: jsonOrd, + ok: true, + tight: false, + unique: false, + remainingFilters: "'1' @> j->'a'", + }, + { + // ContainedBy is supported with a fetch val operator on the right. + filters: `'1' <@ j->'a'`, + indexOrd: jsonOrd, + ok: true, + tight: true, + unique: false, + remainingFilters: "", + }, } for _, tc := range testCases { diff --git a/pkg/sql/opt/memo/testdata/stats/inverted-json b/pkg/sql/opt/memo/testdata/stats/inverted-json index 0c1215f96716..f81a016187ee 100644 --- a/pkg/sql/opt/memo/testdata/stats/inverted-json +++ b/pkg/sql/opt/memo/testdata/stats/inverted-json @@ -984,3 +984,373 @@ select │ └── fd: (1)-->(4) └── filters └── (j:2->'a') = '{}' [type=bool, outer=(2), immutable] + +# A query with fetch val and contains operators uses the inverted index. +opt +SELECT * FROM t WHERE j->'a' @> '1' +---- +index-join t + ├── columns: k:1(int!null) j:2(jsonb) + ├── immutable + ├── stats: [rows=222.222222] + ├── key: (1) + ├── fd: (1)-->(2) + └── inverted-filter + ├── columns: k:1(int!null) + ├── inverted expression: /4 + │ ├── tight: true, unique: false + │ └── union spans + │ ├── ["7a\x00\x01*\x02\x00", "7a\x00\x01*\x02\x00"] + │ └── ["7a\x00\x02\x00\x03\x00\x01*\x02\x00", "7a\x00\x02\x00\x03\x00\x01*\x02\x00"] + ├── stats: [rows=2e-07] + ├── key: (1) + └── scan t@j_idx + ├── columns: k:1(int!null) j_inverted_key:4(jsonb!null) + ├── inverted constraint: /4/1 + │ └── spans + │ ├── ["7a\x00\x01*\x02\x00", "7a\x00\x01*\x02\x00"] + │ └── ["7a\x00\x02\x00\x03\x00\x01*\x02\x00", "7a\x00\x02\x00\x03\x00\x01*\x02\x00"] + ├── stats: [rows=2e-07, distinct(1)=2e-07, null(1)=0, distinct(4)=2e-07, null(4)=0] + │ histogram(4)= + ├── key: (1) + └── fd: (1)-->(4) + +# A query with fetch val and contained by operators uses the inverted index, +# and the expression is not tight. +opt +SELECT * FROM t WHERE j->'a' <@ '1' +---- +select + ├── columns: k:1(int!null) j:2(jsonb) + ├── immutable + ├── stats: [rows=666.666667] + ├── key: (1) + ├── fd: (1)-->(2) + ├── index-join t + │ ├── columns: k:1(int!null) j:2(jsonb) + │ ├── stats: [rows=100] + │ ├── key: (1) + │ ├── fd: (1)-->(2) + │ └── inverted-filter + │ ├── columns: k:1(int!null) + │ ├── inverted expression: /4 + │ │ ├── tight: false, unique: false + │ │ └── union spans + │ │ ├── ["7\x00\x019", "7\x00\x019"] + │ │ └── ["7a\x00\x01*\x02\x00", "7a\x00\x01*\x02\x00"] + │ ├── stats: [rows=100] + │ ├── key: (1) + │ └── scan t@j_idx + │ ├── columns: k:1(int!null) j_inverted_key:4(jsonb!null) + │ ├── inverted constraint: /4/1 + │ │ └── spans + │ │ ├── ["7\x00\x019", "7\x00\x019"] + │ │ └── ["7a\x00\x01*\x02\x00", "7a\x00\x01*\x02\x00"] + │ ├── stats: [rows=100, distinct(1)=100, null(1)=0, distinct(4)=1, null(4)=0] + │ │ histogram(4)= 0 100 + │ │ <--- '\x37000139' + │ ├── key: (1) + │ └── fd: (1)-->(4) + └── filters + └── (j:2->'a') <@ '1' [type=bool, outer=(2), immutable] + +# A query with chained fetch val and contains operators uses the inverted index. +opt +SELECT * FROM t WHERE j->'a'->'b' @> '"c"' +---- +index-join t + ├── columns: k:1(int!null) j:2(jsonb) + ├── immutable + ├── stats: [rows=222.222222] + ├── key: (1) + ├── fd: (1)-->(2) + └── inverted-filter + ├── columns: k:1(int!null) + ├── inverted expression: /4 + │ ├── tight: true, unique: false + │ └── union spans + │ ├── ["7a\x00\x02b\x00\x01\x12c\x00\x01", "7a\x00\x02b\x00\x01\x12c\x00\x01"] + │ └── ["7a\x00\x02b\x00\x02\x00\x03\x00\x01\x12c\x00\x01", "7a\x00\x02b\x00\x02\x00\x03\x00\x01\x12c\x00\x01"] + ├── stats: [rows=2e-07] + ├── key: (1) + └── scan t@j_idx + ├── columns: k:1(int!null) j_inverted_key:4(jsonb!null) + ├── inverted constraint: /4/1 + │ └── spans + │ ├── ["7a\x00\x02b\x00\x01\x12c\x00\x01", "7a\x00\x02b\x00\x01\x12c\x00\x01"] + │ └── ["7a\x00\x02b\x00\x02\x00\x03\x00\x01\x12c\x00\x01", "7a\x00\x02b\x00\x02\x00\x03\x00\x01\x12c\x00\x01"] + ├── stats: [rows=2e-07, distinct(1)=2e-07, null(1)=0, distinct(4)=2e-07, null(4)=0] + │ histogram(4)= + ├── key: (1) + └── fd: (1)-->(4) + +# A query with chained fetch val and contained by operators uses the inverted +# index, and the expression is not tight. +opt +SELECT * FROM t WHERE j->'a'->'b' <@ '"c"' +---- +select + ├── columns: k:1(int!null) j:2(jsonb) + ├── immutable + ├── stats: [rows=666.666667] + ├── key: (1) + ├── fd: (1)-->(2) + ├── index-join t + │ ├── columns: k:1(int!null) j:2(jsonb) + │ ├── stats: [rows=100] + │ ├── key: (1) + │ ├── fd: (1)-->(2) + │ └── inverted-filter + │ ├── columns: k:1(int!null) + │ ├── inverted expression: /4 + │ │ ├── tight: false, unique: false + │ │ └── union spans + │ │ ├── ["7\x00\x019", "7\x00\x019"] + │ │ ├── ["7a\x00\x019", "7a\x00\x019"] + │ │ └── ["7a\x00\x02b\x00\x01\x12c\x00\x01", "7a\x00\x02b\x00\x01\x12c\x00\x01"] + │ ├── stats: [rows=100] + │ ├── key: (1) + │ └── scan t@j_idx + │ ├── columns: k:1(int!null) j_inverted_key:4(jsonb!null) + │ ├── inverted constraint: /4/1 + │ │ └── spans + │ │ ├── ["7\x00\x019", "7\x00\x019"] + │ │ ├── ["7a\x00\x019", "7a\x00\x019"] + │ │ └── ["7a\x00\x02b\x00\x01\x12c\x00\x01", "7a\x00\x02b\x00\x01\x12c\x00\x01"] + │ ├── stats: [rows=100, distinct(1)=100, null(1)=0, distinct(4)=1, null(4)=0] + │ │ histogram(4)= 0 100 + │ │ <--- '\x37000139' + │ ├── key: (1) + │ └── fd: (1)-->(4) + └── filters + └── ((j:2->'a')->'b') <@ '"c"' [type=bool, outer=(2), immutable] + +# A query with fetch val and contains operators uses the inverted index when an +# object is on the right side. +opt +SELECT * FROM t WHERE j->'a' @> '{"b": "c"}' +---- +index-join t + ├── columns: k:1(int!null) j:2(jsonb) + ├── immutable + ├── stats: [rows=222.222222] + ├── key: (1) + ├── fd: (1)-->(2) + └── scan t@j_idx + ├── columns: k:1(int!null) + ├── inverted constraint: /4/1 + │ └── spans: ["7a\x00\x02b\x00\x01\x12c\x00\x01", "7a\x00\x02b\x00\x01\x12c\x00\x01"] + ├── stats: [rows=2e-07, distinct(4)=2e-07, null(4)=0] + │ histogram(4)= + └── key: (1) + +# A query with fetch val and contained by operators uses the inverted index +# when an object is on the right side, and the expression is not tight. +opt +SELECT * FROM t WHERE j->'a' <@ '{"b": "c"}' +---- +select + ├── columns: k:1(int!null) j:2(jsonb) + ├── immutable + ├── stats: [rows=666.666667] + ├── key: (1) + ├── fd: (1)-->(2) + ├── index-join t + │ ├── columns: k:1(int!null) j:2(jsonb) + │ ├── stats: [rows=100] + │ ├── key: (1) + │ ├── fd: (1)-->(2) + │ └── inverted-filter + │ ├── columns: k:1(int!null) + │ ├── inverted expression: /4 + │ │ ├── tight: false, unique: false + │ │ └── union spans + │ │ ├── ["7\x00\x019", "7\x00\x019"] + │ │ ├── ["7a\x00\x019", "7a\x00\x019"] + │ │ └── ["7a\x00\x02b\x00\x01\x12c\x00\x01", "7a\x00\x02b\x00\x01\x12c\x00\x01"] + │ ├── stats: [rows=100] + │ ├── key: (1) + │ └── scan t@j_idx + │ ├── columns: k:1(int!null) j_inverted_key:4(jsonb!null) + │ ├── inverted constraint: /4/1 + │ │ └── spans + │ │ ├── ["7\x00\x019", "7\x00\x019"] + │ │ ├── ["7a\x00\x019", "7a\x00\x019"] + │ │ └── ["7a\x00\x02b\x00\x01\x12c\x00\x01", "7a\x00\x02b\x00\x01\x12c\x00\x01"] + │ ├── stats: [rows=100, distinct(1)=100, null(1)=0, distinct(4)=1, null(4)=0] + │ │ histogram(4)= 0 100 + │ │ <--- '\x37000139' + │ ├── key: (1) + │ └── fd: (1)-->(4) + └── filters + └── (j:2->'a') <@ '{"b": "c"}' [type=bool, outer=(2), immutable] + +# A query with fetch val and contains operators uses the inverted index when an +# array is on the right side, and the expression is not tight. +opt +SELECT * FROM t WHERE j->'a' @> '[1, 2]' +---- +select + ├── columns: k:1(int!null) j:2(jsonb) + ├── immutable + ├── stats: [rows=24.691358] + ├── key: (1) + ├── fd: (1)-->(2) + ├── index-join t + │ ├── columns: k:1(int!null) j:2(jsonb) + │ ├── stats: [rows=2e-07] + │ ├── key: (1) + │ ├── fd: (1)-->(2) + │ └── inverted-filter + │ ├── columns: k:1(int!null) + │ ├── inverted expression: /4 + │ │ ├── tight: false, unique: true + │ │ ├── union spans: empty + │ │ └── INTERSECTION + │ │ ├── span expression + │ │ │ ├── tight: true, unique: true + │ │ │ └── union spans: ["7a\x00\x02\x00\x03\x00\x01*\x02\x00", "7a\x00\x02\x00\x03\x00\x01*\x02\x00"] + │ │ └── span expression + │ │ ├── tight: true, unique: true + │ │ └── union spans: ["7a\x00\x02\x00\x03\x00\x01*\x04\x00", "7a\x00\x02\x00\x03\x00\x01*\x04\x00"] + │ ├── stats: [rows=2e-07] + │ ├── key: (1) + │ └── scan t@j_idx + │ ├── columns: k:1(int!null) j_inverted_key:4(jsonb!null) + │ ├── inverted constraint: /4/1 + │ │ └── spans + │ │ ├── ["7a\x00\x02\x00\x03\x00\x01*\x02\x00", "7a\x00\x02\x00\x03\x00\x01*\x02\x00"] + │ │ └── ["7a\x00\x02\x00\x03\x00\x01*\x04\x00", "7a\x00\x02\x00\x03\x00\x01*\x04\x00"] + │ ├── stats: [rows=2e-07, distinct(1)=2e-07, null(1)=0, distinct(4)=2e-07, null(4)=0] + │ │ histogram(4)= + │ ├── key: (1) + │ └── fd: (1)-->(4) + └── filters + └── (j:2->'a') @> '[1, 2]' [type=bool, outer=(2), immutable] + +# A query with fetch val and contained by operators uses the inverted index +# when an array is on the right side, and the expression is not tight. +opt +SELECT * FROM t WHERE j->'a' <@ '[1, 2]' +---- +select + ├── columns: k:1(int!null) j:2(jsonb) + ├── immutable + ├── stats: [rows=666.666667] + ├── key: (1) + ├── fd: (1)-->(2) + ├── index-join t + │ ├── columns: k:1(int!null) j:2(jsonb) + │ ├── stats: [rows=100] + │ ├── key: (1) + │ ├── fd: (1)-->(2) + │ └── inverted-filter + │ ├── columns: k:1(int!null) + │ ├── inverted expression: /4 + │ │ ├── tight: false, unique: false + │ │ └── union spans + │ │ ├── ["7\x00\x019", "7\x00\x019"] + │ │ ├── ["7a\x00\x01*\x02\x00", "7a\x00\x01*\x02\x00"] + │ │ ├── ["7a\x00\x01*\x04\x00", "7a\x00\x01*\x04\x00"] + │ │ ├── ["7a\x00\x018", "7a\x00\x018"] + │ │ ├── ["7a\x00\x02\x00\x03\x00\x01*\x02\x00", "7a\x00\x02\x00\x03\x00\x01*\x02\x00"] + │ │ └── ["7a\x00\x02\x00\x03\x00\x01*\x04\x00", "7a\x00\x02\x00\x03\x00\x01*\x04\x00"] + │ ├── stats: [rows=100] + │ ├── key: (1) + │ └── scan t@j_idx + │ ├── columns: k:1(int!null) j_inverted_key:4(jsonb!null) + │ ├── inverted constraint: /4/1 + │ │ └── spans + │ │ ├── ["7\x00\x019", "7\x00\x019"] + │ │ ├── ["7a\x00\x01*\x02\x00", "7a\x00\x01*\x02\x00"] + │ │ ├── ["7a\x00\x01*\x04\x00", "7a\x00\x01*\x04\x00"] + │ │ ├── ["7a\x00\x018", "7a\x00\x018"] + │ │ ├── ["7a\x00\x02\x00\x03\x00\x01*\x02\x00", "7a\x00\x02\x00\x03\x00\x01*\x02\x00"] + │ │ └── ["7a\x00\x02\x00\x03\x00\x01*\x04\x00", "7a\x00\x02\x00\x03\x00\x01*\x04\x00"] + │ ├── stats: [rows=100, distinct(1)=100, null(1)=0, distinct(4)=1, null(4)=0] + │ │ histogram(4)= 0 100 + │ │ <--- '\x37000139' + │ ├── key: (1) + │ └── fd: (1)-->(4) + └── filters + └── (j:2->'a') <@ '[1, 2]' [type=bool, outer=(2), immutable] + +# A query with fetch val and contained by operators uses the inverted index +# when the fetch val is on the right side. +opt +SELECT * FROM t WHERE '"c"' <@ j->'a'->'b' +---- +index-join t + ├── columns: k:1(int!null) j:2(jsonb) + ├── immutable + ├── stats: [rows=666.666667] + ├── key: (1) + ├── fd: (1)-->(2) + └── inverted-filter + ├── columns: k:1(int!null) + ├── inverted expression: /4 + │ ├── tight: true, unique: false + │ └── union spans + │ ├── ["7a\x00\x02b\x00\x01\x12c\x00\x01", "7a\x00\x02b\x00\x01\x12c\x00\x01"] + │ └── ["7a\x00\x02b\x00\x02\x00\x03\x00\x01\x12c\x00\x01", "7a\x00\x02b\x00\x02\x00\x03\x00\x01\x12c\x00\x01"] + ├── stats: [rows=2e-07] + ├── key: (1) + └── scan t@j_idx + ├── columns: k:1(int!null) j_inverted_key:4(jsonb!null) + ├── inverted constraint: /4/1 + │ └── spans + │ ├── ["7a\x00\x02b\x00\x01\x12c\x00\x01", "7a\x00\x02b\x00\x01\x12c\x00\x01"] + │ └── ["7a\x00\x02b\x00\x02\x00\x03\x00\x01\x12c\x00\x01", "7a\x00\x02b\x00\x02\x00\x03\x00\x01\x12c\x00\x01"] + ├── stats: [rows=2e-07, distinct(1)=2e-07, null(1)=0, distinct(4)=2e-07, null(4)=0] + │ histogram(4)= + ├── key: (1) + └── fd: (1)-->(4) + +# A query with fetch val and contains operators uses the inverted index when +# the fetch val is on the right side. +opt +SELECT * FROM t WHERE '[1, 2]' @> j->'a'->'b' +---- +select + ├── columns: k:1(int!null) j:2(jsonb) + ├── immutable + ├── stats: [rows=666.666667] + ├── key: (1) + ├── fd: (1)-->(2) + ├── index-join t + │ ├── columns: k:1(int!null) j:2(jsonb) + │ ├── stats: [rows=100] + │ ├── key: (1) + │ ├── fd: (1)-->(2) + │ └── inverted-filter + │ ├── columns: k:1(int!null) + │ ├── inverted expression: /4 + │ │ ├── tight: false, unique: false + │ │ └── union spans + │ │ ├── ["7\x00\x019", "7\x00\x019"] + │ │ ├── ["7a\x00\x019", "7a\x00\x019"] + │ │ ├── ["7a\x00\x02b\x00\x01*\x02\x00", "7a\x00\x02b\x00\x01*\x02\x00"] + │ │ ├── ["7a\x00\x02b\x00\x01*\x04\x00", "7a\x00\x02b\x00\x01*\x04\x00"] + │ │ ├── ["7a\x00\x02b\x00\x018", "7a\x00\x02b\x00\x018"] + │ │ ├── ["7a\x00\x02b\x00\x02\x00\x03\x00\x01*\x02\x00", "7a\x00\x02b\x00\x02\x00\x03\x00\x01*\x02\x00"] + │ │ └── ["7a\x00\x02b\x00\x02\x00\x03\x00\x01*\x04\x00", "7a\x00\x02b\x00\x02\x00\x03\x00\x01*\x04\x00"] + │ ├── stats: [rows=100] + │ ├── key: (1) + │ └── scan t@j_idx + │ ├── columns: k:1(int!null) j_inverted_key:4(jsonb!null) + │ ├── inverted constraint: /4/1 + │ │ └── spans + │ │ ├── ["7\x00\x019", "7\x00\x019"] + │ │ ├── ["7a\x00\x019", "7a\x00\x019"] + │ │ ├── ["7a\x00\x02b\x00\x01*\x02\x00", "7a\x00\x02b\x00\x01*\x02\x00"] + │ │ ├── ["7a\x00\x02b\x00\x01*\x04\x00", "7a\x00\x02b\x00\x01*\x04\x00"] + │ │ ├── ["7a\x00\x02b\x00\x018", "7a\x00\x02b\x00\x018"] + │ │ ├── ["7a\x00\x02b\x00\x02\x00\x03\x00\x01*\x02\x00", "7a\x00\x02b\x00\x02\x00\x03\x00\x01*\x02\x00"] + │ │ └── ["7a\x00\x02b\x00\x02\x00\x03\x00\x01*\x04\x00", "7a\x00\x02b\x00\x02\x00\x03\x00\x01*\x04\x00"] + │ ├── stats: [rows=100, distinct(1)=100, null(1)=0, distinct(4)=1, null(4)=0] + │ │ histogram(4)= 0 100 + │ │ <--- '\x37000139' + │ ├── key: (1) + │ └── fd: (1)-->(4) + └── filters + └── '[1, 2]' @> ((j:2->'a')->'b') [type=bool, outer=(2), immutable] diff --git a/pkg/sql/opt/xform/testdata/rules/select b/pkg/sql/opt/xform/testdata/rules/select index ff1858c6dd5a..a08bee2bb606 100644 --- a/pkg/sql/opt/xform/testdata/rules/select +++ b/pkg/sql/opt/xform/testdata/rules/select @@ -2830,6 +2830,277 @@ project ├── key: (1) └── fd: (1)-->(6) +# Query using the fetch val and containment operators. +opt expect=GenerateInvertedIndexScans +SELECT k FROM b WHERE j->'a' @> '"b"' +---- +project + ├── columns: k:1!null + ├── immutable + ├── key: (1) + └── inverted-filter + ├── columns: k:1!null + ├── inverted expression: /6 + │ ├── tight: true, unique: false + │ └── union spans + │ ├── ["7a\x00\x01\x12b\x00\x01", "7a\x00\x01\x12b\x00\x01"] + │ └── ["7a\x00\x02\x00\x03\x00\x01\x12b\x00\x01", "7a\x00\x02\x00\x03\x00\x01\x12b\x00\x01"] + ├── key: (1) + └── scan b@j_inv_idx + ├── columns: k:1!null j_inverted_key:6!null + ├── inverted constraint: /6/1 + │ └── spans + │ ├── ["7a\x00\x01\x12b\x00\x01", "7a\x00\x01\x12b\x00\x01"] + │ └── ["7a\x00\x02\x00\x03\x00\x01\x12b\x00\x01", "7a\x00\x02\x00\x03\x00\x01\x12b\x00\x01"] + ├── key: (1) + └── fd: (1)-->(6) + +opt expect=GenerateInvertedIndexScans +SELECT k FROM b WHERE j->'a' <@ '"b"' +---- +project + ├── columns: k:1!null + ├── immutable + ├── key: (1) + └── select + ├── columns: k:1!null j:4 + ├── immutable + ├── key: (1) + ├── fd: (1)-->(4) + ├── index-join b + │ ├── columns: k:1!null j:4 + │ ├── key: (1) + │ ├── fd: (1)-->(4) + │ └── inverted-filter + │ ├── columns: k:1!null + │ ├── inverted expression: /6 + │ │ ├── tight: false, unique: false + │ │ └── union spans + │ │ ├── ["7\x00\x019", "7\x00\x019"] + │ │ └── ["7a\x00\x01\x12b\x00\x01", "7a\x00\x01\x12b\x00\x01"] + │ ├── key: (1) + │ └── scan b@j_inv_idx + │ ├── columns: k:1!null j_inverted_key:6!null + │ ├── inverted constraint: /6/1 + │ │ └── spans + │ │ ├── ["7\x00\x019", "7\x00\x019"] + │ │ └── ["7a\x00\x01\x12b\x00\x01", "7a\x00\x01\x12b\x00\x01"] + │ ├── key: (1) + │ └── fd: (1)-->(6) + └── filters + └── (j:4->'a') <@ '"b"' [outer=(4), immutable] + +# Chained fetch val operators and containment operator. +opt expect=GenerateInvertedIndexScans +SELECT k FROM b WHERE j->'a'->'b' @> '"c"' +---- +project + ├── columns: k:1!null + ├── immutable + ├── key: (1) + └── inverted-filter + ├── columns: k:1!null + ├── inverted expression: /6 + │ ├── tight: true, unique: false + │ └── union spans + │ ├── ["7a\x00\x02b\x00\x01\x12c\x00\x01", "7a\x00\x02b\x00\x01\x12c\x00\x01"] + │ └── ["7a\x00\x02b\x00\x02\x00\x03\x00\x01\x12c\x00\x01", "7a\x00\x02b\x00\x02\x00\x03\x00\x01\x12c\x00\x01"] + ├── key: (1) + └── scan b@j_inv_idx + ├── columns: k:1!null j_inverted_key:6!null + ├── inverted constraint: /6/1 + │ └── spans + │ ├── ["7a\x00\x02b\x00\x01\x12c\x00\x01", "7a\x00\x02b\x00\x01\x12c\x00\x01"] + │ └── ["7a\x00\x02b\x00\x02\x00\x03\x00\x01\x12c\x00\x01", "7a\x00\x02b\x00\x02\x00\x03\x00\x01\x12c\x00\x01"] + ├── key: (1) + └── fd: (1)-->(6) + +opt expect=GenerateInvertedIndexScans +SELECT k FROM b WHERE j->'a'->'b' <@ '"c"' +---- +project + ├── columns: k:1!null + ├── immutable + ├── key: (1) + └── select + ├── columns: k:1!null j:4 + ├── immutable + ├── key: (1) + ├── fd: (1)-->(4) + ├── index-join b + │ ├── columns: k:1!null j:4 + │ ├── key: (1) + │ ├── fd: (1)-->(4) + │ └── inverted-filter + │ ├── columns: k:1!null + │ ├── inverted expression: /6 + │ │ ├── tight: false, unique: false + │ │ └── union spans + │ │ ├── ["7\x00\x019", "7\x00\x019"] + │ │ ├── ["7a\x00\x019", "7a\x00\x019"] + │ │ └── ["7a\x00\x02b\x00\x01\x12c\x00\x01", "7a\x00\x02b\x00\x01\x12c\x00\x01"] + │ ├── key: (1) + │ └── scan b@j_inv_idx + │ ├── columns: k:1!null j_inverted_key:6!null + │ ├── inverted constraint: /6/1 + │ │ └── spans + │ │ ├── ["7\x00\x019", "7\x00\x019"] + │ │ ├── ["7a\x00\x019", "7a\x00\x019"] + │ │ └── ["7a\x00\x02b\x00\x01\x12c\x00\x01", "7a\x00\x02b\x00\x01\x12c\x00\x01"] + │ ├── key: (1) + │ └── fd: (1)-->(6) + └── filters + └── ((j:4->'a')->'b') <@ '"c"' [outer=(4), immutable] + +# Query using the fetch val and equality operators in a disjunction. +opt expect=GenerateInvertedIndexScans +SELECT k FROM b WHERE j->'a' @> '"b"' OR j->'c' @> '"d"' +---- +project + ├── columns: k:1!null + ├── immutable + ├── key: (1) + └── inverted-filter + ├── columns: k:1!null + ├── inverted expression: /6 + │ ├── tight: true, unique: false + │ └── union spans + │ ├── ["7a\x00\x01\x12b\x00\x01", "7a\x00\x01\x12b\x00\x01"] + │ ├── ["7a\x00\x02\x00\x03\x00\x01\x12b\x00\x01", "7a\x00\x02\x00\x03\x00\x01\x12b\x00\x01"] + │ ├── ["7c\x00\x01\x12d\x00\x01", "7c\x00\x01\x12d\x00\x01"] + │ └── ["7c\x00\x02\x00\x03\x00\x01\x12d\x00\x01", "7c\x00\x02\x00\x03\x00\x01\x12d\x00\x01"] + ├── key: (1) + └── scan b@j_inv_idx + ├── columns: k:1!null j_inverted_key:6!null + ├── inverted constraint: /6/1 + │ └── spans + │ ├── ["7a\x00\x01\x12b\x00\x01", "7a\x00\x01\x12b\x00\x01"] + │ ├── ["7a\x00\x02\x00\x03\x00\x01\x12b\x00\x01", "7a\x00\x02\x00\x03\x00\x01\x12b\x00\x01"] + │ ├── ["7c\x00\x01\x12d\x00\x01", "7c\x00\x01\x12d\x00\x01"] + │ └── ["7c\x00\x02\x00\x03\x00\x01\x12d\x00\x01", "7c\x00\x02\x00\x03\x00\x01\x12d\x00\x01"] + ├── key: (1) + └── fd: (1)-->(6) + +# Query using the fetch val and contains operators in a disjunction with a +# contained by operator. +opt expect=GenerateInvertedIndexScans +SELECT k FROM b WHERE j->'a' @> '["b"]' OR j <@ '{"c": "d"}' +---- +project + ├── columns: k:1!null + ├── immutable + ├── key: (1) + └── select + ├── columns: k:1!null j:4 + ├── immutable + ├── key: (1) + ├── fd: (1)-->(4) + ├── index-join b + │ ├── columns: k:1!null j:4 + │ ├── key: (1) + │ ├── fd: (1)-->(4) + │ └── inverted-filter + │ ├── columns: k:1!null + │ ├── inverted expression: /6 + │ │ ├── tight: false, unique: false + │ │ └── union spans + │ │ ├── ["7\x00\x019", "7\x00\x019"] + │ │ ├── ["7a\x00\x02\x00\x03\x00\x01\x12b\x00\x01", "7a\x00\x02\x00\x03\x00\x01\x12b\x00\x01"] + │ │ └── ["7c\x00\x01\x12d\x00\x01", "7c\x00\x01\x12d\x00\x01"] + │ ├── key: (1) + │ └── scan b@j_inv_idx + │ ├── columns: k:1!null j_inverted_key:6!null + │ ├── inverted constraint: /6/1 + │ │ └── spans + │ │ ├── ["7\x00\x019", "7\x00\x019"] + │ │ ├── ["7a\x00\x02\x00\x03\x00\x01\x12b\x00\x01", "7a\x00\x02\x00\x03\x00\x01\x12b\x00\x01"] + │ │ └── ["7c\x00\x01\x12d\x00\x01", "7c\x00\x01\x12d\x00\x01"] + │ ├── key: (1) + │ └── fd: (1)-->(6) + └── filters + └── ((j:4->'a') @> '["b"]') OR (j:4 <@ '{"c": "d"}') [outer=(4), immutable] + +# Query using the fetch val and equality operators in a conjunction. +opt expect=GenerateInvertedIndexScans +SELECT k FROM b WHERE j->'a' @> '"b"' AND j->'c' @> '"d"' +---- +project + ├── columns: k:1!null + ├── immutable + ├── key: (1) + └── inverted-filter + ├── columns: k:1!null + ├── inverted expression: /6 + │ ├── tight: true, unique: false + │ ├── union spans: empty + │ └── INTERSECTION + │ ├── span expression + │ │ ├── tight: true, unique: false + │ │ └── union spans + │ │ ├── ["7a\x00\x01\x12b\x00\x01", "7a\x00\x01\x12b\x00\x01"] + │ │ └── ["7a\x00\x02\x00\x03\x00\x01\x12b\x00\x01", "7a\x00\x02\x00\x03\x00\x01\x12b\x00\x01"] + │ └── span expression + │ ├── tight: true, unique: false + │ └── union spans + │ ├── ["7c\x00\x01\x12d\x00\x01", "7c\x00\x01\x12d\x00\x01"] + │ └── ["7c\x00\x02\x00\x03\x00\x01\x12d\x00\x01", "7c\x00\x02\x00\x03\x00\x01\x12d\x00\x01"] + ├── key: (1) + └── scan b@j_inv_idx + ├── columns: k:1!null j_inverted_key:6!null + ├── inverted constraint: /6/1 + │ └── spans + │ ├── ["7a\x00\x01\x12b\x00\x01", "7a\x00\x01\x12b\x00\x01"] + │ ├── ["7a\x00\x02\x00\x03\x00\x01\x12b\x00\x01", "7a\x00\x02\x00\x03\x00\x01\x12b\x00\x01"] + │ ├── ["7c\x00\x01\x12d\x00\x01", "7c\x00\x01\x12d\x00\x01"] + │ └── ["7c\x00\x02\x00\x03\x00\x01\x12d\x00\x01", "7c\x00\x02\x00\x03\x00\x01\x12d\x00\x01"] + ├── key: (1) + └── fd: (1)-->(6) + +# Query using the fetch val and contains operators in conjunction with a +# contained by operator. +opt expect=GenerateInvertedIndexScans +SELECT k FROM b WHERE j->'a' @> '["b"]' AND j <@ '{"c": "d"}' +---- +project + ├── columns: k:1!null + ├── immutable + ├── key: (1) + └── select + ├── columns: k:1!null j:4 + ├── immutable + ├── key: (1) + ├── fd: (1)-->(4) + ├── index-join b + │ ├── columns: k:1!null j:4 + │ ├── key: (1) + │ ├── fd: (1)-->(4) + │ └── inverted-filter + │ ├── columns: k:1!null + │ ├── inverted expression: /6 + │ │ ├── tight: false, unique: false + │ │ ├── union spans: empty + │ │ └── INTERSECTION + │ │ ├── span expression + │ │ │ ├── tight: true, unique: true + │ │ │ └── union spans: ["7a\x00\x02\x00\x03\x00\x01\x12b\x00\x01", "7a\x00\x02\x00\x03\x00\x01\x12b\x00\x01"] + │ │ └── span expression + │ │ ├── tight: false, unique: false + │ │ └── union spans + │ │ ├── ["7\x00\x019", "7\x00\x019"] + │ │ └── ["7c\x00\x01\x12d\x00\x01", "7c\x00\x01\x12d\x00\x01"] + │ ├── key: (1) + │ └── scan b@j_inv_idx + │ ├── columns: k:1!null j_inverted_key:6!null + │ ├── inverted constraint: /6/1 + │ │ └── spans + │ │ ├── ["7\x00\x019", "7\x00\x019"] + │ │ ├── ["7a\x00\x02\x00\x03\x00\x01\x12b\x00\x01", "7a\x00\x02\x00\x03\x00\x01\x12b\x00\x01"] + │ │ └── ["7c\x00\x01\x12d\x00\x01", "7c\x00\x01\x12d\x00\x01"] + │ ├── key: (1) + │ └── fd: (1)-->(6) + └── filters + └── j:4 <@ '{"c": "d"}' [outer=(4), immutable] + # GenerateInvertedIndexScans propagates row-level locking information. opt expect=GenerateInvertedIndexScans SELECT k FROM b WHERE j @> '{"a": "b"}' FOR UPDATE