From eae73f26228760e8abf65767aed6305fab89cb2f Mon Sep 17 00:00:00 2001 From: Angela Xu Date: Fri, 2 Apr 2021 17:04:19 -0700 Subject: [PATCH] opt: index accelerate JSON filters in the forms j->'a' @> '1' and j->'a'<@'1' We previously did not have inverted index support for expressions with a JSON fetch val operator on the left side of @> (contains) or <@ (contained by) expressions. This commit adds support to use the inverted index for query filters with JSON fetch val and containment operators. These include any contains or contained by expressions with fetch val or chained fetch val operators on the left side, and a constant value on the right side, including booleans, strings, numbers, nulls, arrays, and objects. Fixes #61430 Release note (performance improvement): Expressions with the -> (fetch val) operator on the left side of either <@ (contained by) or @> (contains) now support index-acceleration. --- .../testdata/logic_test/inverted_index | 199 ++++++++- .../exec/execbuilder/testdata/inverted_index | 275 +++++++++++++ .../exec/execbuilder/testdata/virtual_columns | 9 +- pkg/sql/opt/invertedidx/json_array.go | 286 ++++++++++--- pkg/sql/opt/invertedidx/json_array_test.go | 154 +++++++ pkg/sql/opt/memo/testdata/stats/inverted-json | 380 ++++++++++++++++++ pkg/sql/opt/xform/testdata/rules/select | 273 +++++++++++++ 7 files changed, 1506 insertions(+), 70 deletions(-) diff --git a/pkg/sql/logictest/testdata/logic_test/inverted_index b/pkg/sql/logictest/testdata/logic_test/inverted_index index dadfd2f6107d..30d9949bcf64 100644 --- a/pkg/sql/logictest/testdata/logic_test/inverted_index +++ b/pkg/sql/logictest/testdata/logic_test/inverted_index @@ -767,7 +767,12 @@ INSERT INTO f VALUES (25, '{"a": {"b": "c", "d": "e"}}'), (26, '{"a": {"b": "c"}, "d": "e"}'), (27, '[1, 2, {"b": "c"}]'), - (28, '[{"a": {"b": "c"}}, "d", "e"]') + (28, '[{"a": {"b": "c"}}, "d", "e"]'), + (29, '{"a": null}'), + (30, '{"a": [1, 2, null]}'), + (31, 'null'), + (32, '{}'), + (33, '[]') query T SELECT j FROM f@i WHERE j->'a' = '1' ORDER BY k @@ -880,6 +885,198 @@ SELECT j FROM f@i WHERE j->'a' = '"b"' AND j->'c' = '[{"d": 1}, {"e": 2}]' ORDER ---- {"a": "b", "c": [{"d": 1}, {"e": 2}]} +# Expressions with fetch val and containment operators use the inverted index. +query T +SELECT j FROM f@i WHERE j->'a' @> '"b"' ORDER BY k +---- +{"a": ["b", "c", "d", "e"]} +{"a": ["b", "e", "c", "d"]} +{"a": "b", "x": ["c", "d", "e"]} +{"a": "b", "c": [{"d": 1}, {"e": 2}]} + +query T +SELECT j FROM f@i WHERE j->'a' <@ '"b"' ORDER BY k +---- +{"a": "b", "x": ["c", "d", "e"]} +{"a": "b", "c": [{"d": 1}, {"e": 2}]} + +query T +SELECT j FROM f@i WHERE j->'a' @> 'null' ORDER BY k +---- +{"a": null} +{"a": [1, 2, null]} + +query T +SELECT j FROM f@i WHERE j->'a' <@ 'null' ORDER BY k +---- +{"a": null} + +query T +SELECT j FROM f@i WHERE j->'a' <@ '[]' ORDER BY k +---- +{"a": []} + +query T +SELECT j FROM f@i WHERE j->'a' <@ '{}' ORDER BY k +---- +{"a": {}} + +query T +SELECT j FROM f@i WHERE j->'a' @> '[]' ORDER BY k +---- +{"a": [1, 2]} +{"a": []} +{"a": ["b", "c", "d", "e"]} +{"a": ["b", "e", "c", "d"]} +{"a": [1, 2, null]} + +query T +SELECT j FROM f@i WHERE j->'a' @> '{}' ORDER BY k +---- +{"a": {"b": 1}} +{"a": {"b": 1, "d": 2}} +{"a": {"d": 2}} +{"a": {"b": [1, 2]}} +{"a": {"b": {"c": 1}}} +{"a": {"b": {"c": 1, "d": 2}}} +{"a": {"b": {"d": 2}}} +{"a": {"b": {"c": [1, 2]}}} +{"a": {"b": {"c": [1, 2, 3]}}} +{"a": {}} +{"a": {"b": "c"}} +{"a": {"b": ["c", "d", "e"]}} +{"a": {"b": "c", "d": "e"}} +{"a": {"b": "c"}, "d": "e"} + +query T +SELECT j FROM f@i WHERE j->'a' <@ '{"b": [1, 2]}' ORDER BY k +---- +{"a": {"b": [1, 2]}} +{"a": {}} + +query T +SELECT j FROM f@i WHERE j->'a' <@ '{"b": {"c": [1, 2]}}' ORDER BY k +---- +{"a": {"b": {"c": [1, 2]}}} +{"a": {}} + +query T +SELECT j FROM f@i WHERE j->'a' @> '{"b": ["c"]}' ORDER BY k +---- +{"a": {"b": ["c", "d", "e"]}} + +query T +SELECT j FROM f@i WHERE j->'c' @> '[{"d": 1}]' ORDER BY k +---- +{"a": "b", "c": [{"d": 1}, {"e": 2}]} + +# Expressions with chained fetch val and containment operators use the inverted +# index. +query T +SELECT j FROM f@i WHERE j->'a'->'b' <@ '1' ORDER BY k +---- +{"a": {"b": 1}} +{"a": {"b": 1, "d": 2}} + +query T +SELECT j FROM f@i WHERE j->'a'->'b' @> '1' ORDER BY k +---- +{"a": {"b": 1}} +{"a": {"b": 1, "d": 2}} +{"a": {"b": [1, 2]}} + +query T +SELECT j FROM f@i WHERE j->'a'->'b' @> '[1, 2]' ORDER BY k +---- +{"a": {"b": [1, 2]}} + +query T +SELECT j FROM f@i WHERE j->'a'->'b' <@ '[1, 2]' ORDER BY k +---- +{"a": {"b": 1}} +{"a": {"b": 1, "d": 2}} +{"a": {"b": [1, 2]}} + +query T +SELECT j FROM f@i WHERE j->'a'->'b' @> '"c"' ORDER BY k +---- +{"a": {"b": "c"}} +{"a": {"b": ["c", "d", "e"]}} +{"a": {"b": "c", "d": "e"}} +{"a": {"b": "c"}, "d": "e"} + +# Expressions with fetch val on the right side should use the inverted index. +query T +SELECT j FROM f@i WHERE '"b"' <@ j->'a' ORDER BY k +---- +{"a": ["b", "c", "d", "e"]} +{"a": ["b", "e", "c", "d"]} +{"a": "b", "x": ["c", "d", "e"]} +{"a": "b", "c": [{"d": 1}, {"e": 2}]} + +query T +SELECT j FROM f@i WHERE '[1, 2]' <@ j->'a'->'b' ORDER BY k +---- +{"a": {"b": [1, 2]}} + +query T +SELECT j FROM f@i WHERE '{"b": {"c": [1, 2]}}' <@ j->'a' ORDER BY k +---- +{"a": {"b": {"c": [1, 2]}}} +{"a": {"b": {"c": [1, 2, 3]}}} + +# Conjunctions of fetch val and containment expressions use the inverted index. +query T +SELECT j FROM f@i WHERE j->'a' @> '"b"' AND '["c"]' <@ j->'a' ORDER BY k +---- +{"a": ["b", "c", "d", "e"]} +{"a": ["b", "e", "c", "d"]} + +# query T +# SELECT j FROM f@i WHERE j->'a' <@ '{"b": [1, 2]}' AND j->'a'->'b' @> '[1]' ORDER BY k +# ---- + +# query T +# SELECT j FROM f@i WHERE j->'a' @> '"b"' AND j->'a' <@ '["b", "c", "d", "e"]' ORDER BY k +# ---- + +query T +SELECT j FROM f@i WHERE j->'a' @> '{"d": 2}' AND '[1, 2]' @> j->'a'->'b' ORDER BY k +---- +{"a": {"b": 1, "d": 2}} + +# Disjunctions of fetch val and containment expressions use the inverted index. +query T +SELECT j FROM f@i WHERE j->'a' @> '[1, 2]' OR j->'a'->'b' @> '[1, 2]' ORDER BY k +---- +{"a": [1, 2]} +{"a": {"b": [1, 2]}} +{"a": [1, 2, null]} + +query T +SELECT j FROM f@i WHERE j->'a' @> '"b"' OR j->'a'->'b' <@ '[1, 2]' ORDER BY k +---- +{"a": {"b": 1}} +{"a": {"b": 1, "d": 2}} +{"a": {"b": [1, 2]}} +{"a": ["b", "c", "d", "e"]} +{"a": ["b", "e", "c", "d"]} +{"a": "b", "x": ["c", "d", "e"]} +{"a": "b", "c": [{"d": 1}, {"e": 2}]} + +query T +SELECT j FROM f@i WHERE j->'a'->'b' <@ '{"c": [1, 2], "d": 2}' OR j->'a'->'b' <@ '["c", "d", "e", 1, 2, 3]' ORDER BY k +---- +{"a": {"b": 1}} +{"a": {"b": 1, "d": 2}} +{"a": {"b": [1, 2]}} +{"a": {"b": {"d": 2}}} +{"a": {"b": {"c": [1, 2]}}} +{"a": {"b": "c"}} +{"a": {"b": ["c", "d", "e"]}} +{"a": {"b": "c", "d": "e"}} +{"a": {"b": "c"}, "d": "e"} + subtest arrays statement ok diff --git a/pkg/sql/opt/exec/execbuilder/testdata/inverted_index b/pkg/sql/opt/exec/execbuilder/testdata/inverted_index index 6d560e310ed2..b875f1604af1 100644 --- a/pkg/sql/opt/exec/execbuilder/testdata/inverted_index +++ b/pkg/sql/opt/exec/execbuilder/testdata/inverted_index @@ -377,6 +377,281 @@ vectorized: true • norows columns: (a, b) +query T +EXPLAIN (VERBOSE) SELECT * from d where b->'a' @> '"b"' +---- +distribution: local +vectorized: true +· +• index join +│ columns: (a, b) +│ estimated row count: 111 (missing stats) +│ table: d@primary +│ key columns: a +│ +└── • project + │ columns: (a) + │ estimated row count: 111 (missing stats) + │ + └── • inverted filter + │ columns: (a, b_inverted_key) + │ inverted column: b_inverted_key + │ num spans: 2 + │ + └── • scan + columns: (a, b_inverted_key) + estimated row count: 111 (missing stats) + table: d@foo_inv + spans: /"a"/"b"-/"a"/"b"/PrefixEnd /"a"/Arr/"b"-/"a"/Arr/"b"/PrefixEnd + +query T +EXPLAIN (VERBOSE) SELECT * from d@foo_inv where b->'a'->'c' @> '"b"' +---- +distribution: local +vectorized: true +· +• index join +│ columns: (a, b) +│ estimated row count: 111 (missing stats) +│ table: d@primary +│ key columns: a +│ +└── • project + │ columns: (a) + │ estimated row count: 111 (missing stats) + │ + └── • inverted filter + │ columns: (a, b_inverted_key) + │ inverted column: b_inverted_key + │ num spans: 2 + │ + └── • scan + columns: (a, b_inverted_key) + estimated row count: 111 (missing stats) + table: d@foo_inv + spans: /"a"/"c"/"b"-/"a"/"c"/"b"/PrefixEnd /"a"/"c"/Arr/"b"-/"a"/"c"/Arr/"b"/PrefixEnd + +# TODO(angelazxu): The {} span does not need to be scanned here, but is + # included when finding spans contained by {"a": "b"} (see #63184). +query T +EXPLAIN (VERBOSE) SELECT * from d where b->'a' <@ '"b"' +---- +distribution: local +vectorized: true +· +• filter +│ columns: (a, b) +│ estimated row count: 333 (missing stats) +│ filter: (b->'a') <@ '"b"' +│ +└── • index join + │ columns: (a, b) + │ estimated row count: 111 (missing stats) + │ table: d@primary + │ key columns: a + │ + └── • project + │ columns: (a) + │ estimated row count: 111 (missing stats) + │ + └── • inverted filter + │ columns: (a, b_inverted_key) + │ inverted column: b_inverted_key + │ num spans: 2 + │ + └── • scan + columns: (a, b_inverted_key) + estimated row count: 111 (missing stats) + table: d@foo_inv + spans: /{}-/{}/PrefixEnd /"a"/"b"-/"a"/"b"/PrefixEnd + +query T +EXPLAIN (VERBOSE) SELECT * from d@foo_inv where b->'a'->'c' <@ '"b"' +---- +distribution: local +vectorized: true +· +• filter +│ columns: (a, b) +│ estimated row count: 333 (missing stats) +│ filter: ((b->'a')->'c') <@ '"b"' +│ +└── • index join + │ columns: (a, b) + │ estimated row count: 111 (missing stats) + │ table: d@primary + │ key columns: a + │ + └── • project + │ columns: (a) + │ estimated row count: 111 (missing stats) + │ + └── • inverted filter + │ columns: (a, b_inverted_key) + │ inverted column: b_inverted_key + │ num spans: 4 + │ + └── • scan + columns: (a, b_inverted_key) + estimated row count: 111 (missing stats) + table: d@foo_inv + spans: /{}-/{}/PrefixEnd /"a"/{}-/"a"/{}/PrefixEnd /"a"/{}-/"a"/{}/PrefixEnd /"a"/"c"/"b"-/"a"/"c"/"b"/PrefixEnd + +query T +EXPLAIN (VERBOSE) SELECT * from d where b->'a' @> '[1, 2]' +---- +distribution: local +vectorized: true +· +• lookup join (inner) +│ columns: (a, b) +│ estimated row count: 12 (missing stats) +│ table: d@primary +│ equality: (a) = (a) +│ equality cols are key +│ pred: (b->'a') @> '[1, 2]' +│ +└── • zigzag join + columns: (a) + estimated row count: 12 (missing stats) + left table: d@foo_inv + left columns: (a) + left fixed values: 1 column + right table: d@foo_inv + right columns: () + right fixed values: 1 column + +query T +EXPLAIN (VERBOSE) SELECT * from d where b->'a' <@ '[1, 2]' +---- +distribution: local +vectorized: true +· +• filter +│ columns: (a, b) +│ estimated row count: 333 (missing stats) +│ filter: (b->'a') <@ '[1, 2]' +│ +└── • index join + │ columns: (a, b) + │ estimated row count: 111 (missing stats) + │ table: d@primary + │ key columns: a + │ + └── • project + │ columns: (a) + │ estimated row count: 111 (missing stats) + │ + └── • inverted filter + │ columns: (a, b_inverted_key) + │ inverted column: b_inverted_key + │ num spans: 7 + │ + └── • scan + columns: (a, b_inverted_key) + estimated row count: 111 (missing stats) + table: d@foo_inv + spans: /{}-/{}/PrefixEnd /"a"/1-/"a"/1/PrefixEnd /"a"/2-/"a"/2/PrefixEnd /"a"/[]-/"a"/{} /"a"/[]-/"a"/{} /"a"/Arr/1-/"a"/Arr/1/PrefixEnd /"a"/Arr/2-/"a"/Arr/2/PrefixEnd + +query T +EXPLAIN (VERBOSE) SELECT * from d where b->'a' @> '{"d": 2}' +---- +distribution: local +vectorized: true +· +• index join +│ columns: (a, b) +│ estimated row count: 111 (missing stats) +│ table: d@primary +│ key columns: a +│ +└── • scan + columns: (a) + estimated row count: 111 (missing stats) + table: d@foo_inv + spans: /"a"/"d"/2-/"a"/"d"/2/PrefixEnd + +query T +EXPLAIN (VERBOSE) SELECT * from d where b->'a' <@ '{"d": 2}' +---- +distribution: local +vectorized: true +· +• filter +│ columns: (a, b) +│ estimated row count: 333 (missing stats) +│ filter: (b->'a') <@ '{"d": 2}' +│ +└── • index join + │ columns: (a, b) + │ estimated row count: 111 (missing stats) + │ table: d@primary + │ key columns: a + │ + └── • project + │ columns: (a) + │ estimated row count: 111 (missing stats) + │ + └── • inverted filter + │ columns: (a, b_inverted_key) + │ inverted column: b_inverted_key + │ num spans: 4 + │ + └── • scan + columns: (a, b_inverted_key) + estimated row count: 111 (missing stats) + table: d@foo_inv + spans: /{}-/{}/PrefixEnd /"a"/{}-/"a"/{}/PrefixEnd /"a"/{}-/"a"/{}/PrefixEnd /"a"/"d"/2-/"a"/"d"/2/PrefixEnd + +query T +EXPLAIN (VERBOSE) SELECT * from d where '"b"' <@ b->'a' +---- +distribution: local +vectorized: true +· +• filter +│ columns: (a, b) +│ estimated row count: 333 (missing stats) +│ filter: '"b"' <@ (b->'a') +│ +└── • scan + columns: (a, b) + estimated row count: 1,000 (missing stats) + table: d@primary + spans: FULL SCAN + +query T +EXPLAIN (VERBOSE) SELECT * from d where '[1, 2]' @> b->'a' +---- +distribution: local +vectorized: true +· +• filter +│ columns: (a, b) +│ estimated row count: 333 (missing stats) +│ filter: '[1, 2]' @> (b->'a') +│ +└── • index join + │ columns: (a, b) + │ estimated row count: 111 (missing stats) + │ table: d@primary + │ key columns: a + │ + └── • project + │ columns: (a) + │ estimated row count: 111 (missing stats) + │ + └── • inverted filter + │ columns: (a, b_inverted_key) + │ inverted column: b_inverted_key + │ num spans: 7 + │ + └── • scan + columns: (a, b_inverted_key) + estimated row count: 111 (missing stats) + table: d@foo_inv + spans: /{}-/{}/PrefixEnd /"a"/1-/"a"/1/PrefixEnd /"a"/2-/"a"/2/PrefixEnd /"a"/[]-/"a"/{} /"a"/[]-/"a"/{} /"a"/Arr/1-/"a"/Arr/1/PrefixEnd /"a"/Arr/2-/"a"/Arr/2/PrefixEnd + query T EXPLAIN (VERBOSE) SELECT * from d where '"b"' = b->'a' ---- diff --git a/pkg/sql/opt/exec/execbuilder/testdata/virtual_columns b/pkg/sql/opt/exec/execbuilder/testdata/virtual_columns index 9fd5b963ce86..513679908f22 100644 --- a/pkg/sql/opt/exec/execbuilder/testdata/virtual_columns +++ b/pkg/sql/opt/exec/execbuilder/testdata/virtual_columns @@ -1417,7 +1417,10 @@ vectorized: true table: inv@iv_j_idx spans: /10/"a"/"b"-/10/"a"/"b"/PrefixEnd /20/"a"/"b"-/20/"a"/"b"/PrefixEnd /30/"a"/"b"-/30/"a"/"b"/PrefixEnd -# Verify that we use iv_jv_idx. +# TODO(angelazxu): We previously chose iv_jv_idx here but now use iv_j_idx. +# This was a result of implementing index-acceleration for expressions of the +# form j->'a' @> '1' (#63048). There may be some costing estimates that aren't +# accurate and need to be investigated here. query T EXPLAIN (VERBOSE) SELECT k FROM inv WHERE iv IN (10, 20, 30) AND jv @> '{"a": "b"}' ---- @@ -1427,5 +1430,5 @@ vectorized: true • scan columns: (k) estimated row count: 3 (missing stats) - table: inv@iv_jv_idx - spans: /10/"a"/"b"-/10/"a"/"b"/PrefixEnd /20/"a"/"b"-/20/"a"/"b"/PrefixEnd /30/"a"/"b"-/30/"a"/"b"/PrefixEnd + table: inv@iv_j_idx + spans: /10/"a"/"a"/"b"-/10/"a"/"a"/"b"/PrefixEnd /20/"a"/"a"/"b"-/20/"a"/"a"/"b"/PrefixEnd /30/"a"/"a"/"b"-/30/"a"/"a"/"b"/PrefixEnd diff --git a/pkg/sql/opt/invertedidx/json_array.go b/pkg/sql/opt/invertedidx/json_array.go index d761fa0660e3..989b821df082 100644 --- a/pkg/sql/opt/invertedidx/json_array.go +++ b/pkg/sql/opt/invertedidx/json_array.go @@ -383,7 +383,17 @@ func (j *jsonOrArrayFilterPlanner) extractJSONOrArrayContainsCondition( indexColumn, constantVal = right, left containedBy = !containedBy } else { - // If neither condition is met, we cannot create an InvertedExpression. + if fetch, ok := left.(*memo.FetchValExpr); ok { + // When the expression has a JSON fetch operator on the left, it is + // handled in extractJSONFetchValContainsCondition. + return j.extractJSONFetchValContainsCondition(evalCtx, fetch, right, containedBy) + } else if fetch, ok := right.(*memo.FetchValExpr); ok { + // When the expression has a JSON fetch operator on the right, it is + // handled in extractJSONFetchValContainsCondition as an equivalent + // expression with right and left swapped. + return j.extractJSONFetchValContainsCondition(evalCtx, fetch, left, !containedBy) + } + // If none of the conditions are met, we cannot create an InvertedExpression. return inverted.NonInvertedColExpression{} } d := memo.ExtractConstDatum(constantVal) @@ -403,7 +413,7 @@ func (j *jsonOrArrayFilterPlanner) extractJSONOrArrayContainsCondition( // extractJSONFetchValEqCondition extracts an InvertedExpression representing an // inverted filter over the planner's inverted index, based on equality between -// a chain of fetch val expressions and a right scalar expression. If an +// a chain of fetch val expressions and a scalar expression. If an // InvertedExpression cannot be generated from the expression, an // inverted.NonInvertedColExpression is returned. // @@ -415,7 +425,7 @@ func (j *jsonOrArrayFilterPlanner) extractJSONOrArrayContainsCondition( func (j *jsonOrArrayFilterPlanner) extractJSONFetchValEqCondition( evalCtx *tree.EvalContext, left *memo.FetchValExpr, right opt.ScalarExpr, ) inverted.Expression { - // The right side of the equals expression should be a constant JSON value. + // The right side of the expression should be a constant JSON value. if !memo.CanExtractConstDatum(right) { return inverted.NonInvertedColExpression{} } @@ -424,86 +434,230 @@ func (j *jsonOrArrayFilterPlanner) extractJSONFetchValEqCondition( return inverted.NonInvertedColExpression{} } - // Recursively traverse fetch val expressions and collect keys with which to - // build the InvertedExpression. If it is not possible to build an inverted - // expression from the tree of fetch val expressions, collectKeys returns - // early and foundKeys remains false. If successful, foundKeys is set to - // true and JSON fetch value indexes are collected in keys. The keys are - // ordered by the outer-most fetch val index first. The outer-most fetch val - // index is the right-most in the -> chain, for example (j->'a'->'b') is - // equivalent to ((j->'a')->'b') and 'b' is the outer-most fetch val index. - // - // Later on, we iterate forward through these keys to build a JSON object - // from the inside-out with the inner-most value being the JSON scalar - // extracted above from the right ScalarExpr function argument. In the - // resulting JSON object, the outer-most JSON fetch value indexes are the - // inner most JSON object keys. - // - // As an example, when left is (j->'a'->'b') and right is ('1'), the keys - // {"b", "a"} are collected and the JSON object {"a": {"b": 1}} is built. - foundKeys := false + // Collect a slice of keys from the fetch val expression. var keys []string - var collectKeys func(fetch *memo.FetchValExpr) - collectKeys = func(fetch *memo.FetchValExpr) { - // The right side of the fetch val expression, the Index field, must be - // a constant string. If not, then we cannot build an inverted - // expression. - if !memo.CanExtractConstDatum(fetch.Index) { - return - } - key, ok := memo.ExtractConstDatum(fetch.Index).(*tree.DString) - if !ok { - return - } + keys, foundKeys := j.collectKeys(keys, left) + if !foundKeys { + return inverted.NonInvertedColExpression{} + } - // Append the key to the list of keys. - keys = append(keys, string(*key)) + // Build a new JSON object with the collected keys and val. + obj := buildObject(keys, val.JSON) - // If the left side of the fetch val expression, the Json field, is a - // variable or expression corresponding to the index column, then we - // have found a valid list of keys to build an inverted expression. - if isIndexColumn(j.tabID, j.index, fetch.Json, j.computedColumns) { - foundKeys = true - return - } + var invertedExpr inverted.Expression + // For Equals expressions, we will generate the inverted expression for the + // single object built from the keys and val. + invertedExpr = getInvertedExprForJSONOrArrayIndexForContaining(evalCtx, tree.NewDJSON(obj)) - // If the left side of the fetch val expression is another fetch val - // expression, recursively collect its keys. - if innerFetch, ok := fetch.Json.(*memo.FetchValExpr); ok { - collectKeys(innerFetch) - } + // When the right side is an array or object, the InvertedExpression + // generated is not tight. We must indicate it is non-tight so an additional + // filter is added. + typ := val.JSON.Type() + if typ == json.ArrayJSONType || typ == json.ObjectJSONType { + invertedExpr.SetNotTight() + } + return invertedExpr +} - // Otherwise, we cannot build an inverted expression. +// extractJSONFetchValContainsCondition extracts an InvertedExpression +// representing an inverted filter over the planner's inverted index, based on +// containment between a chain of fetch val expressions and a scalar +// expression. If an InvertedExpression cannot be generated from the +// expression, an inverted.NonInvertedColExpression is returned. +// +// In order to generate an InvertedExpression, left must be a fetch val +// expression in the form [col]->[index0]->[index1]->...->[indexN] where col is +// a variable or expression referencing the inverted column in the inverted +// index and each index is a constant string. The right expression must be a +// constant JSON value. For expressions with a left constant value and a right +// fetch val expression, the arguments will be swapped when passed in. +// +// The type of operator is indicated by the containedBy parameter, which is +// true for <@ and false for @>. +func (j *jsonOrArrayFilterPlanner) extractJSONFetchValContainsCondition( + evalCtx *tree.EvalContext, left *memo.FetchValExpr, right opt.ScalarExpr, containedBy bool, +) inverted.Expression { + // The right side of the expression should be a constant JSON value. + if !memo.CanExtractConstDatum(right) { + return inverted.NonInvertedColExpression{} } - collectKeys(left) + val, ok := memo.ExtractConstDatum(right).(*tree.DJSON) + if !ok { + return inverted.NonInvertedColExpression{} + } + + // Collect a slice of keys from the fetch val expression. + var keys []string + keys, foundKeys := j.collectKeys(keys, left) if !foundKeys { return inverted.NonInvertedColExpression{} } - // Build a new JSON object of the form: - // {: ... {: {key0: }}} - // Note that key0 is the outer-most fetch val index, so the expression - // j->'a'->'b' = 1 results in {"a": {"b": 1}}. + // Build a new JSON object with the collected keys and val. + obj := buildObject(keys, val.JSON) + + var invertedExpr inverted.Expression + + // For Contains and ContainedBy expressions, we may need to build additional + // objects to cover all possibilities. + objs, err := buildFetchContainmentObjects(keys, val.JSON, containedBy) + if err != nil { + return inverted.NonInvertedColExpression{} + } + objs = append(objs, obj) + // We get an inverted expression for each object constructed, and union + // these expressions. + for i := range objs { + var expr inverted.Expression + if containedBy { + expr = getInvertedExprForJSONOrArrayIndexForContainedBy(evalCtx, tree.NewDJSON(objs[i])) + } else { + expr = getInvertedExprForJSONOrArrayIndexForContaining(evalCtx, tree.NewDJSON(objs[i])) + } + if invertedExpr == nil { + invertedExpr = expr + } else { + invertedExpr = inverted.Or(invertedExpr, expr) + } + } + return invertedExpr +} + +// collectKeys is called on fetch val expressions to the find corresponding +// keys used to build a JSON object. It recursively traverses the fetch val +// expressions and collects keys with which to build the InvertedExpression. +// If it is not possible to build an inverted expression from the tree of fetch +// val expressions, collectKeys returns nil for keys and foundKeys is false. +// If successful, foundKeys is set to true and JSON fetch value indexes are +// collected in keys. The keys are ordered by the outer-most fetch val index +// first. The outer-most fetch val index is the right-most in the -> chain, +// for example (j->'a'->'b') is equivalent to ((j->'a')->'b') and 'b' is the +// outer-most fetch val index. +// +// Later on, we iterate forward through these keys to build a JSON object +// from the inside-out with the inner-most value being the JSON scalar +// extracted above from the right ScalarExpr function argument. In the +// resulting JSON object, the outer-most JSON fetch value indexes are the +// inner most JSON object keys. +// +// As an example, when left is (j->'a'->'b') and right is ('1'), the keys +// {"b", "a"} are collected and the JSON object {"a": {"b": 1}} is built. +func (j *jsonOrArrayFilterPlanner) collectKeys( + currKeys []string, fetch *memo.FetchValExpr, +) (keys []string, foundKeys bool) { + // The right side of the fetch val expression, the Index field, must be + // a constant string. If not, then we cannot build an inverted + // expression. + if !memo.CanExtractConstDatum(fetch.Index) { + return + } + key, ok := memo.ExtractConstDatum(fetch.Index).(*tree.DString) + if !ok { + return + } + + // Append the key to the list of keys. + keys = append(currKeys, string(*key)) + + // If the left side of the fetch val expression, the Json field, is a + // variable or expression corresponding to the index column, then we + // have found a valid list of keys to build an inverted expression. + if isIndexColumn(j.tabID, j.index, fetch.Json, j.computedColumns) { + return keys, true + } + + // If the left side of the fetch val expression is another fetch val + // expression, recursively collect its keys. + if innerFetch, ok := fetch.Json.(*memo.FetchValExpr); ok { + keys, foundKeys := j.collectKeys(keys, innerFetch) + return keys, foundKeys + } + // Otherwise, we cannot build an inverted expression. + return nil, false +} + +// fetchContainmentObjects constructs new JSON objects with given keys and val. +// The keys and val are extracted from a fetch val containment expression, and +// the objects constructed depend on the value type and whether the expression +// uses <@ or @>. For example, the expression j->'a'->'b' @> "c" would have +// {"a", "b"} as keys, "c" as val, and construct {"a": "b": ["c"]}. +// An array of the constructed JSONs is returned. +func buildFetchContainmentObjects( + keys []string, val json.JSON, containedBy bool, +) ([]json.JSON, error) { + var objs []json.JSON + typ := val.Type() + switch typ { + case json.ArrayJSONType: + // For arrays in ContainedBy expressions, we must create a scalar value + // object, because getInvertedExprForJSONOrArrayIndexForContainedBy will + // not include the scalar value spans. + + // Array value examples: + // j->'a' @> '[1]', no new object required, we already have '{"a": [1]}' + // j->'a' <@ '[1]', build '{"a": 1}', we already have '{"a": [1]}' + // j->'a' <@ '[1, [2], 3]', build '{"a": 1}', '{"a": 3}', we already have '{"a": [1, [2], 3]}' + if containedBy { + for i := 0; i < val.Len(); i++ { + v, err := val.FetchValIdx(i) + if err != nil { + return nil, err + } + t := v.Type() + if t == json.ArrayJSONType || t == json.ObjectJSONType { + // The scalar value is only needed for non-nested arrays and objects. + continue + } + newObj := buildObject(keys, v) + objs = append(objs, newObj) + } + } + + case json.ObjectJSONType: + // For objects in ContainedBy expressions, we do not need to generate the + // empty object value for each level of nesting, because the spans will be + // added for us in getInvertedExprForJSONOrArrayIndexForContainedBy. + // For objects in Contains expressions, no additional spans are required + // outside of the given object's spans. + + // Object value examples: + // j->'a' @> '{"b": 2}', we already have '{"a": {"b": 2}}' + // j->'a' <@ '{"b": 2}', we already have '{"a": {"b": 2}}' + return nil, nil + + default: + // For scalars in Contains expressions, we construct an array value + // containing the scalar. + + // Scalar value examples: + // j->'a' @> '1', build '{"a": [1]}', we already have '{"a": 1}' + // j->'a' <@ '1', we already have '{"a": 1}' + if !containedBy { + arr := json.NewArrayBuilder(1) + arr.Add(val) + v := arr.Build() + newObj := buildObject(keys, v) + objs = append(objs, newObj) + } + } + return objs, nil +} + +// buildObject constructs a new JSON object of the form: +// {: ... {: {key0: }}} +// Where the keys and val are extracted from a fetch val expression by the +// caller. Note that key0 is the outer-most fetch val index, so the expression +// j->'a'->'b' = 1 results in {"a": {"b": 1}}. +func buildObject(keys []string, val json.JSON) json.JSON { var obj json.JSON for i := 0; i < len(keys); i++ { b := json.NewObjectBuilder(1) if i == 0 { - b.Add(keys[i], val.JSON) + b.Add(keys[i], val) } else { b.Add(keys[i], obj) } obj = b.Build() } - - invertedExpr := getInvertedExprForJSONOrArrayIndexForContaining(evalCtx, tree.NewDJSON(obj)) - - // When the right side is an array or object, the InvertedExpression - // generated is not tight. We must indicate it is non-tight so an additional - // filter is added. - typ := val.JSON.Type() - if typ == json.ArrayJSONType || typ == json.ObjectJSONType { - invertedExpr.SetNotTight() - } - - return invertedExpr + return obj } diff --git a/pkg/sql/opt/invertedidx/json_array_test.go b/pkg/sql/opt/invertedidx/json_array_test.go index 169a1a07048c..6ffc634c7ed8 100644 --- a/pkg/sql/opt/invertedidx/json_array_test.go +++ b/pkg/sql/opt/invertedidx/json_array_test.go @@ -548,6 +548,160 @@ func TestTryFilterJsonOrArrayIndex(t *testing.T) { unique: true, remainingFilters: "j @> '[[1, 2]]'", }, + { + // Contains is supported with a fetch val operator on the left. + filters: `j->'a' @> '1'`, + indexOrd: jsonOrd, + ok: true, + tight: true, + unique: false, + remainingFilters: "", + }, + { + // Contains is supported with chained fetch val operators on the left. + filters: `j->'a'->'b' @> '1'`, + indexOrd: jsonOrd, + ok: true, + tight: true, + unique: false, + remainingFilters: "", + }, + { + // Contains with a fetch val is supported for JSON arrays. + filters: `j->'a'->'b' @> '[1, 2]'`, + indexOrd: jsonOrd, + ok: true, + tight: false, + unique: true, + remainingFilters: "j->'a'->'b' @> '[1, 2]'", + }, + { + filters: `j->'a'->'b' @> '[[1, 2]]'`, + indexOrd: jsonOrd, + ok: true, + tight: false, + unique: true, + remainingFilters: "j->'a'->'b' @> '[[1, 2]]'", + }, + { + // Contains with a fetch val is supported for JSON objects. + filters: `j->'a'->'b' @> '{"c": 1}'`, + indexOrd: jsonOrd, + ok: true, + tight: true, + unique: true, + remainingFilters: "", + }, + { + filters: `j->'a'->'b' @> '{"c": {"d": "e"}}'`, + indexOrd: jsonOrd, + ok: true, + tight: true, + unique: true, + remainingFilters: "", + }, + { + filters: `j->'a'->'b' @> '[{"c": 1, "d": "2"}]'`, + indexOrd: jsonOrd, + ok: true, + tight: false, + unique: true, + remainingFilters: "j->'a'->'b' @> '[{\"c\": 1, \"d\": \"2\"}]'", + }, + { + filters: `j->'a'->'b' @> '{"c": [1, 2], "d": "2"}'`, + indexOrd: jsonOrd, + ok: true, + tight: false, + unique: true, + remainingFilters: "j->'a'->'b' @> '{\"c\": [1, 2], \"d\": \"2\"}'", + }, + { + // ContainedBy is supported with a fetch val operator on the left. + filters: `j->'a' <@ '1'`, + indexOrd: jsonOrd, + ok: true, + tight: false, + unique: false, + remainingFilters: "j->'a' <@ '1'", + }, + { + // ContainedBy is supported with chained fetch val operators on the left. + filters: `j->'a'->'b' <@ '1'`, + indexOrd: jsonOrd, + ok: true, + tight: false, + unique: false, + remainingFilters: "j->'a'->'b' <@ '1'", + }, + { + // ContainedBy with a fetch val is supported for JSON arrays. + filters: `j->'a'->'b' <@ '[1, 2]'`, + indexOrd: jsonOrd, + ok: true, + tight: false, + unique: false, + remainingFilters: "j->'a'->'b' <@ '[1, 2]'", + }, + { + filters: `j->'a'->'b' <@ '[[1, 2]]'`, + indexOrd: jsonOrd, + ok: true, + tight: false, + unique: false, + remainingFilters: "j->'a'->'b' <@ '[[1, 2]]'", + }, + { + // ContainedBy with a fetch val is supported for JSON objects. + filters: `j->'a'->'b' <@ '{"c": 1}'`, + indexOrd: jsonOrd, + ok: true, + tight: false, + unique: false, + remainingFilters: "j->'a'->'b' <@ '{\"c\": 1}'", + }, + { + filters: `j->'a'->'b' <@ '{"c": {"d": "e"}}'`, + indexOrd: jsonOrd, + ok: true, + tight: false, + unique: false, + remainingFilters: "j->'a'->'b' <@ '{\"c\": {\"d\": \"e\"}}'", + }, + { + filters: `j->'a'->'b' <@ '[{"c": 1, "d": "2"}]'`, + indexOrd: jsonOrd, + ok: true, + tight: false, + unique: false, + remainingFilters: "j->'a'->'b' <@ '[{\"c\": 1, \"d\": \"2\"}]'", + }, + { + filters: `j->'a'->'b' <@ '{"c": [1, 2], "d": "2"}'`, + indexOrd: jsonOrd, + ok: true, + tight: false, + unique: false, + remainingFilters: "j->'a'->'b' <@ '{\"c\": [1, 2], \"d\": \"2\"}'", + }, + { + // Contains is supported with a fetch val operator on the right. + filters: `'1' @> j->'a'`, + indexOrd: jsonOrd, + ok: true, + tight: false, + unique: false, + remainingFilters: "'1' @> j->'a'", + }, + { + // ContainedBy is supported with a fetch val operator on the right. + filters: `'1' <@ j->'a'`, + indexOrd: jsonOrd, + ok: true, + tight: true, + unique: false, + remainingFilters: "", + }, } for _, tc := range testCases { diff --git a/pkg/sql/opt/memo/testdata/stats/inverted-json b/pkg/sql/opt/memo/testdata/stats/inverted-json index 0c1215f96716..46d2f5c46247 100644 --- a/pkg/sql/opt/memo/testdata/stats/inverted-json +++ b/pkg/sql/opt/memo/testdata/stats/inverted-json @@ -984,3 +984,383 @@ select │ └── fd: (1)-->(4) └── filters └── (j:2->'a') = '{}' [type=bool, outer=(2), immutable] + +# A query with fetch val and contains operators uses the inverted index. +opt +SELECT * FROM t WHERE j->'a' @> '1' +---- +index-join t + ├── columns: k:1(int!null) j:2(jsonb) + ├── immutable + ├── stats: [rows=222.222222] + ├── key: (1) + ├── fd: (1)-->(2) + └── inverted-filter + ├── columns: k:1(int!null) + ├── inverted expression: /4 + │ ├── tight: true, unique: false + │ └── union spans + │ ├── ["7a\x00\x01*\x02\x00", "7a\x00\x01*\x02\x00"] + │ └── ["7a\x00\x02\x00\x03\x00\x01*\x02\x00", "7a\x00\x02\x00\x03\x00\x01*\x02\x00"] + ├── stats: [rows=2e-07] + ├── key: (1) + └── scan t@j_idx + ├── columns: k:1(int!null) j_inverted_key:4(jsonb!null) + ├── inverted constraint: /4/1 + │ └── spans + │ ├── ["7a\x00\x01*\x02\x00", "7a\x00\x01*\x02\x00"] + │ └── ["7a\x00\x02\x00\x03\x00\x01*\x02\x00", "7a\x00\x02\x00\x03\x00\x01*\x02\x00"] + ├── stats: [rows=2e-07, distinct(1)=2e-07, null(1)=0, distinct(4)=2e-07, null(4)=0] + │ histogram(4)= + ├── key: (1) + └── fd: (1)-->(4) + +# A query with fetch val and contained by operators uses the inverted index, +# and the expression is not tight. +opt +SELECT * FROM t WHERE j->'a' <@ '1' +---- +select + ├── columns: k:1(int!null) j:2(jsonb) + ├── immutable + ├── stats: [rows=666.666667] + ├── key: (1) + ├── fd: (1)-->(2) + ├── index-join t + │ ├── columns: k:1(int!null) j:2(jsonb) + │ ├── stats: [rows=100] + │ ├── key: (1) + │ ├── fd: (1)-->(2) + │ └── inverted-filter + │ ├── columns: k:1(int!null) + │ ├── inverted expression: /4 + │ │ ├── tight: false, unique: false + │ │ └── union spans + │ │ ├── ["7\x00\x019", "7\x00\x019"] + │ │ └── ["7a\x00\x01*\x02\x00", "7a\x00\x01*\x02\x00"] + │ ├── stats: [rows=100] + │ ├── key: (1) + │ └── scan t@j_idx + │ ├── columns: k:1(int!null) j_inverted_key:4(jsonb!null) + │ ├── inverted constraint: /4/1 + │ │ └── spans + │ │ ├── ["7\x00\x019", "7\x00\x019"] + │ │ └── ["7a\x00\x01*\x02\x00", "7a\x00\x01*\x02\x00"] + │ ├── stats: [rows=100, distinct(1)=100, null(1)=0, distinct(4)=1, null(4)=0] + │ │ histogram(4)= 0 100 + │ │ <--- '\x37000139' + │ ├── key: (1) + │ └── fd: (1)-->(4) + └── filters + └── (j:2->'a') <@ '1' [type=bool, outer=(2), immutable] + +# A query with chained fetch val and contains operators uses the inverted index. +opt +SELECT * FROM t WHERE j->'a'->'b' @> '"c"' +---- +index-join t + ├── columns: k:1(int!null) j:2(jsonb) + ├── immutable + ├── stats: [rows=222.222222] + ├── key: (1) + ├── fd: (1)-->(2) + └── inverted-filter + ├── columns: k:1(int!null) + ├── inverted expression: /4 + │ ├── tight: true, unique: false + │ └── union spans + │ ├── ["7a\x00\x02b\x00\x01\x12c\x00\x01", "7a\x00\x02b\x00\x01\x12c\x00\x01"] + │ └── ["7a\x00\x02b\x00\x02\x00\x03\x00\x01\x12c\x00\x01", "7a\x00\x02b\x00\x02\x00\x03\x00\x01\x12c\x00\x01"] + ├── stats: [rows=2e-07] + ├── key: (1) + └── scan t@j_idx + ├── columns: k:1(int!null) j_inverted_key:4(jsonb!null) + ├── inverted constraint: /4/1 + │ └── spans + │ ├── ["7a\x00\x02b\x00\x01\x12c\x00\x01", "7a\x00\x02b\x00\x01\x12c\x00\x01"] + │ └── ["7a\x00\x02b\x00\x02\x00\x03\x00\x01\x12c\x00\x01", "7a\x00\x02b\x00\x02\x00\x03\x00\x01\x12c\x00\x01"] + ├── stats: [rows=2e-07, distinct(1)=2e-07, null(1)=0, distinct(4)=2e-07, null(4)=0] + │ histogram(4)= + ├── key: (1) + └── fd: (1)-->(4) + +# A query with chained fetch val and contained by operators uses the inverted +# index, and the expression is not tight. +opt +SELECT * FROM t WHERE j->'a'->'b' <@ '"c"' +---- +select + ├── columns: k:1(int!null) j:2(jsonb) + ├── immutable + ├── stats: [rows=666.666667] + ├── key: (1) + ├── fd: (1)-->(2) + ├── index-join t + │ ├── columns: k:1(int!null) j:2(jsonb) + │ ├── stats: [rows=100] + │ ├── key: (1) + │ ├── fd: (1)-->(2) + │ └── inverted-filter + │ ├── columns: k:1(int!null) + │ ├── inverted expression: /4 + │ │ ├── tight: false, unique: false + │ │ └── union spans + │ │ ├── ["7\x00\x019", "7\x00\x019"] + │ │ ├── ["7a\x00\x019", "7a\x00\x019"] + │ │ ├── ["7a\x00\x02\x00\x019", "7a\x00\x02\x00\x019"] + │ │ └── ["7a\x00\x02b\x00\x01\x12c\x00\x01", "7a\x00\x02b\x00\x01\x12c\x00\x01"] + │ ├── stats: [rows=100] + │ ├── key: (1) + │ └── scan t@j_idx + │ ├── columns: k:1(int!null) j_inverted_key:4(jsonb!null) + │ ├── inverted constraint: /4/1 + │ │ └── spans + │ │ ├── ["7\x00\x019", "7\x00\x019"] + │ │ ├── ["7a\x00\x019", "7a\x00\x019"] + │ │ ├── ["7a\x00\x02\x00\x019", "7a\x00\x02\x00\x019"] + │ │ └── ["7a\x00\x02b\x00\x01\x12c\x00\x01", "7a\x00\x02b\x00\x01\x12c\x00\x01"] + │ ├── stats: [rows=100, distinct(1)=100, null(1)=0, distinct(4)=1, null(4)=0] + │ │ histogram(4)= 0 100 + │ │ <--- '\x37000139' + │ ├── key: (1) + │ └── fd: (1)-->(4) + └── filters + └── ((j:2->'a')->'b') <@ '"c"' [type=bool, outer=(2), immutable] + +# A query with fetch val and contains operators uses the inverted index when an +# object is on the right side. +opt +SELECT * FROM t WHERE j->'a' @> '{"b": "c"}' +---- +index-join t + ├── columns: k:1(int!null) j:2(jsonb) + ├── immutable + ├── stats: [rows=222.222222] + ├── key: (1) + ├── fd: (1)-->(2) + └── scan t@j_idx + ├── columns: k:1(int!null) + ├── inverted constraint: /4/1 + │ └── spans: ["7a\x00\x02b\x00\x01\x12c\x00\x01", "7a\x00\x02b\x00\x01\x12c\x00\x01"] + ├── stats: [rows=2e-07, distinct(4)=2e-07, null(4)=0] + │ histogram(4)= + └── key: (1) + +# A query with fetch val and contained by operators uses the inverted index +# when an object is on the right side, and the expression is not tight. +opt +SELECT * FROM t WHERE j->'a' <@ '{"b": "c"}' +---- +select + ├── columns: k:1(int!null) j:2(jsonb) + ├── immutable + ├── stats: [rows=666.666667] + ├── key: (1) + ├── fd: (1)-->(2) + ├── index-join t + │ ├── columns: k:1(int!null) j:2(jsonb) + │ ├── stats: [rows=100] + │ ├── key: (1) + │ ├── fd: (1)-->(2) + │ └── inverted-filter + │ ├── columns: k:1(int!null) + │ ├── inverted expression: /4 + │ │ ├── tight: false, unique: false + │ │ └── union spans + │ │ ├── ["7\x00\x019", "7\x00\x019"] + │ │ ├── ["7a\x00\x019", "7a\x00\x019"] + │ │ ├── ["7a\x00\x02\x00\x019", "7a\x00\x02\x00\x019"] + │ │ └── ["7a\x00\x02b\x00\x01\x12c\x00\x01", "7a\x00\x02b\x00\x01\x12c\x00\x01"] + │ ├── stats: [rows=100] + │ ├── key: (1) + │ └── scan t@j_idx + │ ├── columns: k:1(int!null) j_inverted_key:4(jsonb!null) + │ ├── inverted constraint: /4/1 + │ │ └── spans + │ │ ├── ["7\x00\x019", "7\x00\x019"] + │ │ ├── ["7a\x00\x019", "7a\x00\x019"] + │ │ ├── ["7a\x00\x02\x00\x019", "7a\x00\x02\x00\x019"] + │ │ └── ["7a\x00\x02b\x00\x01\x12c\x00\x01", "7a\x00\x02b\x00\x01\x12c\x00\x01"] + │ ├── stats: [rows=100, distinct(1)=100, null(1)=0, distinct(4)=1, null(4)=0] + │ │ histogram(4)= 0 100 + │ │ <--- '\x37000139' + │ ├── key: (1) + │ └── fd: (1)-->(4) + └── filters + └── (j:2->'a') <@ '{"b": "c"}' [type=bool, outer=(2), immutable] + +# A query with fetch val and contains operators uses the inverted index when an +# array is on the right side, and the expression is not tight. +opt +SELECT * FROM t WHERE j->'a' @> '[1, 2]' +---- +select + ├── columns: k:1(int!null) j:2(jsonb) + ├── immutable + ├── stats: [rows=24.691358] + ├── key: (1) + ├── fd: (1)-->(2) + ├── index-join t + │ ├── columns: k:1(int!null) j:2(jsonb) + │ ├── stats: [rows=2e-07] + │ ├── key: (1) + │ ├── fd: (1)-->(2) + │ └── inverted-filter + │ ├── columns: k:1(int!null) + │ ├── inverted expression: /4 + │ │ ├── tight: false, unique: true + │ │ ├── union spans: empty + │ │ └── INTERSECTION + │ │ ├── span expression + │ │ │ ├── tight: true, unique: true + │ │ │ └── union spans: ["7a\x00\x02\x00\x03\x00\x01*\x02\x00", "7a\x00\x02\x00\x03\x00\x01*\x02\x00"] + │ │ └── span expression + │ │ ├── tight: true, unique: true + │ │ └── union spans: ["7a\x00\x02\x00\x03\x00\x01*\x04\x00", "7a\x00\x02\x00\x03\x00\x01*\x04\x00"] + │ ├── stats: [rows=2e-07] + │ ├── key: (1) + │ └── scan t@j_idx + │ ├── columns: k:1(int!null) j_inverted_key:4(jsonb!null) + │ ├── inverted constraint: /4/1 + │ │ └── spans + │ │ ├── ["7a\x00\x02\x00\x03\x00\x01*\x02\x00", "7a\x00\x02\x00\x03\x00\x01*\x02\x00"] + │ │ └── ["7a\x00\x02\x00\x03\x00\x01*\x04\x00", "7a\x00\x02\x00\x03\x00\x01*\x04\x00"] + │ ├── stats: [rows=2e-07, distinct(1)=2e-07, null(1)=0, distinct(4)=2e-07, null(4)=0] + │ │ histogram(4)= + │ ├── key: (1) + │ └── fd: (1)-->(4) + └── filters + └── (j:2->'a') @> '[1, 2]' [type=bool, outer=(2), immutable] + +# A query with fetch val and contained by operators uses the inverted index +# when an array is on the right side, and the expression is not tight. +opt +SELECT * FROM t WHERE j->'a' <@ '[1, 2]' +---- +select + ├── columns: k:1(int!null) j:2(jsonb) + ├── immutable + ├── stats: [rows=666.666667] + ├── key: (1) + ├── fd: (1)-->(2) + ├── index-join t + │ ├── columns: k:1(int!null) j:2(jsonb) + │ ├── stats: [rows=100] + │ ├── key: (1) + │ ├── fd: (1)-->(2) + │ └── inverted-filter + │ ├── columns: k:1(int!null) + │ ├── inverted expression: /4 + │ │ ├── tight: false, unique: false + │ │ └── union spans + │ │ ├── ["7\x00\x019", "7\x00\x019"] + │ │ ├── ["7a\x00\x01*\x02\x00", "7a\x00\x01*\x02\x00"] + │ │ ├── ["7a\x00\x01*\x04\x00", "7a\x00\x01*\x04\x00"] + │ │ ├── ["7a\x00\x018", "7a\x00\x018"] + │ │ ├── ["7a\x00\x02\x00\x018", "7a\x00\x02\x00\x018"] + │ │ ├── ["7a\x00\x02\x00\x03\x00\x01*\x02\x00", "7a\x00\x02\x00\x03\x00\x01*\x02\x00"] + │ │ └── ["7a\x00\x02\x00\x03\x00\x01*\x04\x00", "7a\x00\x02\x00\x03\x00\x01*\x04\x00"] + │ ├── stats: [rows=100] + │ ├── key: (1) + │ └── scan t@j_idx + │ ├── columns: k:1(int!null) j_inverted_key:4(jsonb!null) + │ ├── inverted constraint: /4/1 + │ │ └── spans + │ │ ├── ["7\x00\x019", "7\x00\x019"] + │ │ ├── ["7a\x00\x01*\x02\x00", "7a\x00\x01*\x02\x00"] + │ │ ├── ["7a\x00\x01*\x04\x00", "7a\x00\x01*\x04\x00"] + │ │ ├── ["7a\x00\x018", "7a\x00\x018"] + │ │ ├── ["7a\x00\x02\x00\x018", "7a\x00\x02\x00\x018"] + │ │ ├── ["7a\x00\x02\x00\x03\x00\x01*\x02\x00", "7a\x00\x02\x00\x03\x00\x01*\x02\x00"] + │ │ └── ["7a\x00\x02\x00\x03\x00\x01*\x04\x00", "7a\x00\x02\x00\x03\x00\x01*\x04\x00"] + │ ├── stats: [rows=100, distinct(1)=100, null(1)=0, distinct(4)=1, null(4)=0] + │ │ histogram(4)= 0 100 + │ │ <--- '\x37000139' + │ ├── key: (1) + │ └── fd: (1)-->(4) + └── filters + └── (j:2->'a') <@ '[1, 2]' [type=bool, outer=(2), immutable] + +# A query with fetch val and contained by operators uses the inverted index +# when the fetch val is on the right side. +opt +SELECT * FROM t WHERE '"c"' <@ j->'a'->'b' +---- +index-join t + ├── columns: k:1(int!null) j:2(jsonb) + ├── immutable + ├── stats: [rows=666.666667] + ├── key: (1) + ├── fd: (1)-->(2) + └── inverted-filter + ├── columns: k:1(int!null) + ├── inverted expression: /4 + │ ├── tight: true, unique: false + │ └── union spans + │ ├── ["7a\x00\x02b\x00\x01\x12c\x00\x01", "7a\x00\x02b\x00\x01\x12c\x00\x01"] + │ └── ["7a\x00\x02b\x00\x02\x00\x03\x00\x01\x12c\x00\x01", "7a\x00\x02b\x00\x02\x00\x03\x00\x01\x12c\x00\x01"] + ├── stats: [rows=2e-07] + ├── key: (1) + └── scan t@j_idx + ├── columns: k:1(int!null) j_inverted_key:4(jsonb!null) + ├── inverted constraint: /4/1 + │ └── spans + │ ├── ["7a\x00\x02b\x00\x01\x12c\x00\x01", "7a\x00\x02b\x00\x01\x12c\x00\x01"] + │ └── ["7a\x00\x02b\x00\x02\x00\x03\x00\x01\x12c\x00\x01", "7a\x00\x02b\x00\x02\x00\x03\x00\x01\x12c\x00\x01"] + ├── stats: [rows=2e-07, distinct(1)=2e-07, null(1)=0, distinct(4)=2e-07, null(4)=0] + │ histogram(4)= + ├── key: (1) + └── fd: (1)-->(4) + +# A query with fetch val and contains operators uses the inverted index when +# the fetch val is on the right side. +opt +SELECT * FROM t WHERE '[1, 2]' @> j->'a'->'b' +---- +select + ├── columns: k:1(int!null) j:2(jsonb) + ├── immutable + ├── stats: [rows=666.666667] + ├── key: (1) + ├── fd: (1)-->(2) + ├── index-join t + │ ├── columns: k:1(int!null) j:2(jsonb) + │ ├── stats: [rows=100] + │ ├── key: (1) + │ ├── fd: (1)-->(2) + │ └── inverted-filter + │ ├── columns: k:1(int!null) + │ ├── inverted expression: /4 + │ │ ├── tight: false, unique: false + │ │ └── union spans + │ │ ├── ["7\x00\x019", "7\x00\x019"] + │ │ ├── ["7a\x00\x019", "7a\x00\x019"] + │ │ ├── ["7a\x00\x02\x00\x019", "7a\x00\x02\x00\x019"] + │ │ ├── ["7a\x00\x02b\x00\x01*\x02\x00", "7a\x00\x02b\x00\x01*\x02\x00"] + │ │ ├── ["7a\x00\x02b\x00\x01*\x04\x00", "7a\x00\x02b\x00\x01*\x04\x00"] + │ │ ├── ["7a\x00\x02b\x00\x018", "7a\x00\x02b\x00\x018"] + │ │ ├── ["7a\x00\x02b\x00\x02\x00\x018", "7a\x00\x02b\x00\x02\x00\x018"] + │ │ ├── ["7a\x00\x02b\x00\x02\x00\x03\x00\x01*\x02\x00", "7a\x00\x02b\x00\x02\x00\x03\x00\x01*\x02\x00"] + │ │ └── ["7a\x00\x02b\x00\x02\x00\x03\x00\x01*\x04\x00", "7a\x00\x02b\x00\x02\x00\x03\x00\x01*\x04\x00"] + │ ├── stats: [rows=100] + │ ├── key: (1) + │ └── scan t@j_idx + │ ├── columns: k:1(int!null) j_inverted_key:4(jsonb!null) + │ ├── inverted constraint: /4/1 + │ │ └── spans + │ │ ├── ["7\x00\x019", "7\x00\x019"] + │ │ ├── ["7a\x00\x019", "7a\x00\x019"] + │ │ ├── ["7a\x00\x02\x00\x019", "7a\x00\x02\x00\x019"] + │ │ ├── ["7a\x00\x02b\x00\x01*\x02\x00", "7a\x00\x02b\x00\x01*\x02\x00"] + │ │ ├── ["7a\x00\x02b\x00\x01*\x04\x00", "7a\x00\x02b\x00\x01*\x04\x00"] + │ │ ├── ["7a\x00\x02b\x00\x018", "7a\x00\x02b\x00\x018"] + │ │ ├── ["7a\x00\x02b\x00\x02\x00\x018", "7a\x00\x02b\x00\x02\x00\x018"] + │ │ ├── ["7a\x00\x02b\x00\x02\x00\x03\x00\x01*\x02\x00", "7a\x00\x02b\x00\x02\x00\x03\x00\x01*\x02\x00"] + │ │ └── ["7a\x00\x02b\x00\x02\x00\x03\x00\x01*\x04\x00", "7a\x00\x02b\x00\x02\x00\x03\x00\x01*\x04\x00"] + │ ├── stats: [rows=100, distinct(1)=100, null(1)=0, distinct(4)=1, null(4)=0] + │ │ histogram(4)= 0 100 + │ │ <--- '\x37000139' + │ ├── key: (1) + │ └── fd: (1)-->(4) + └── filters + └── '[1, 2]' @> ((j:2->'a')->'b') [type=bool, outer=(2), immutable] diff --git a/pkg/sql/opt/xform/testdata/rules/select b/pkg/sql/opt/xform/testdata/rules/select index 5de7717a8ee8..953c89193a06 100644 --- a/pkg/sql/opt/xform/testdata/rules/select +++ b/pkg/sql/opt/xform/testdata/rules/select @@ -2836,6 +2836,279 @@ project ├── key: (1) └── fd: (1)-->(6) +# Query using the fetch val and containment operators. +opt expect=GenerateInvertedIndexScans +SELECT k FROM b WHERE j->'a' @> '"b"' +---- +project + ├── columns: k:1!null + ├── immutable + ├── key: (1) + └── inverted-filter + ├── columns: k:1!null + ├── inverted expression: /6 + │ ├── tight: true, unique: false + │ └── union spans + │ ├── ["7a\x00\x01\x12b\x00\x01", "7a\x00\x01\x12b\x00\x01"] + │ └── ["7a\x00\x02\x00\x03\x00\x01\x12b\x00\x01", "7a\x00\x02\x00\x03\x00\x01\x12b\x00\x01"] + ├── key: (1) + └── scan b@j_inv_idx + ├── columns: k:1!null j_inverted_key:6!null + ├── inverted constraint: /6/1 + │ └── spans + │ ├── ["7a\x00\x01\x12b\x00\x01", "7a\x00\x01\x12b\x00\x01"] + │ └── ["7a\x00\x02\x00\x03\x00\x01\x12b\x00\x01", "7a\x00\x02\x00\x03\x00\x01\x12b\x00\x01"] + ├── key: (1) + └── fd: (1)-->(6) + +opt expect=GenerateInvertedIndexScans +SELECT k FROM b WHERE j->'a' <@ '"b"' +---- +project + ├── columns: k:1!null + ├── immutable + ├── key: (1) + └── select + ├── columns: k:1!null j:4 + ├── immutable + ├── key: (1) + ├── fd: (1)-->(4) + ├── index-join b + │ ├── columns: k:1!null j:4 + │ ├── key: (1) + │ ├── fd: (1)-->(4) + │ └── inverted-filter + │ ├── columns: k:1!null + │ ├── inverted expression: /6 + │ │ ├── tight: false, unique: false + │ │ └── union spans + │ │ ├── ["7\x00\x019", "7\x00\x019"] + │ │ └── ["7a\x00\x01\x12b\x00\x01", "7a\x00\x01\x12b\x00\x01"] + │ ├── key: (1) + │ └── scan b@j_inv_idx + │ ├── columns: k:1!null j_inverted_key:6!null + │ ├── inverted constraint: /6/1 + │ │ └── spans + │ │ ├── ["7\x00\x019", "7\x00\x019"] + │ │ └── ["7a\x00\x01\x12b\x00\x01", "7a\x00\x01\x12b\x00\x01"] + │ ├── key: (1) + │ └── fd: (1)-->(6) + └── filters + └── (j:4->'a') <@ '"b"' [outer=(4), immutable] + +# Chained fetch val operators and containment operator. +opt expect=GenerateInvertedIndexScans +SELECT k FROM b WHERE j->'a'->'b' @> '"c"' +---- +project + ├── columns: k:1!null + ├── immutable + ├── key: (1) + └── inverted-filter + ├── columns: k:1!null + ├── inverted expression: /6 + │ ├── tight: true, unique: false + │ └── union spans + │ ├── ["7a\x00\x02b\x00\x01\x12c\x00\x01", "7a\x00\x02b\x00\x01\x12c\x00\x01"] + │ └── ["7a\x00\x02b\x00\x02\x00\x03\x00\x01\x12c\x00\x01", "7a\x00\x02b\x00\x02\x00\x03\x00\x01\x12c\x00\x01"] + ├── key: (1) + └── scan b@j_inv_idx + ├── columns: k:1!null j_inverted_key:6!null + ├── inverted constraint: /6/1 + │ └── spans + │ ├── ["7a\x00\x02b\x00\x01\x12c\x00\x01", "7a\x00\x02b\x00\x01\x12c\x00\x01"] + │ └── ["7a\x00\x02b\x00\x02\x00\x03\x00\x01\x12c\x00\x01", "7a\x00\x02b\x00\x02\x00\x03\x00\x01\x12c\x00\x01"] + ├── key: (1) + └── fd: (1)-->(6) + +opt expect=GenerateInvertedIndexScans +SELECT k FROM b WHERE j->'a'->'b' <@ '"c"' +---- +project + ├── columns: k:1!null + ├── immutable + ├── key: (1) + └── select + ├── columns: k:1!null j:4 + ├── immutable + ├── key: (1) + ├── fd: (1)-->(4) + ├── index-join b + │ ├── columns: k:1!null j:4 + │ ├── key: (1) + │ ├── fd: (1)-->(4) + │ └── inverted-filter + │ ├── columns: k:1!null + │ ├── inverted expression: /6 + │ │ ├── tight: false, unique: false + │ │ └── union spans + │ │ ├── ["7\x00\x019", "7\x00\x019"] + │ │ ├── ["7a\x00\x019", "7a\x00\x019"] + │ │ ├── ["7a\x00\x02\x00\x019", "7a\x00\x02\x00\x019"] + │ │ └── ["7a\x00\x02b\x00\x01\x12c\x00\x01", "7a\x00\x02b\x00\x01\x12c\x00\x01"] + │ ├── key: (1) + │ └── scan b@j_inv_idx + │ ├── columns: k:1!null j_inverted_key:6!null + │ ├── inverted constraint: /6/1 + │ │ └── spans + │ │ ├── ["7\x00\x019", "7\x00\x019"] + │ │ ├── ["7a\x00\x019", "7a\x00\x019"] + │ │ ├── ["7a\x00\x02\x00\x019", "7a\x00\x02\x00\x019"] + │ │ └── ["7a\x00\x02b\x00\x01\x12c\x00\x01", "7a\x00\x02b\x00\x01\x12c\x00\x01"] + │ ├── key: (1) + │ └── fd: (1)-->(6) + └── filters + └── ((j:4->'a')->'b') <@ '"c"' [outer=(4), immutable] + +# Query using the fetch val and equality operators in a disjunction. +opt expect=GenerateInvertedIndexScans +SELECT k FROM b WHERE j->'a' @> '"b"' OR j->'c' @> '"d"' +---- +project + ├── columns: k:1!null + ├── immutable + ├── key: (1) + └── inverted-filter + ├── columns: k:1!null + ├── inverted expression: /6 + │ ├── tight: true, unique: false + │ └── union spans + │ ├── ["7a\x00\x01\x12b\x00\x01", "7a\x00\x01\x12b\x00\x01"] + │ ├── ["7a\x00\x02\x00\x03\x00\x01\x12b\x00\x01", "7a\x00\x02\x00\x03\x00\x01\x12b\x00\x01"] + │ ├── ["7c\x00\x01\x12d\x00\x01", "7c\x00\x01\x12d\x00\x01"] + │ └── ["7c\x00\x02\x00\x03\x00\x01\x12d\x00\x01", "7c\x00\x02\x00\x03\x00\x01\x12d\x00\x01"] + ├── key: (1) + └── scan b@j_inv_idx + ├── columns: k:1!null j_inverted_key:6!null + ├── inverted constraint: /6/1 + │ └── spans + │ ├── ["7a\x00\x01\x12b\x00\x01", "7a\x00\x01\x12b\x00\x01"] + │ ├── ["7a\x00\x02\x00\x03\x00\x01\x12b\x00\x01", "7a\x00\x02\x00\x03\x00\x01\x12b\x00\x01"] + │ ├── ["7c\x00\x01\x12d\x00\x01", "7c\x00\x01\x12d\x00\x01"] + │ └── ["7c\x00\x02\x00\x03\x00\x01\x12d\x00\x01", "7c\x00\x02\x00\x03\x00\x01\x12d\x00\x01"] + ├── key: (1) + └── fd: (1)-->(6) + +# Query using the fetch val and contains operators in a disjunction with a +# contained by operator. +opt expect=GenerateInvertedIndexScans +SELECT k FROM b WHERE j->'a' @> '["b"]' OR j <@ '{"c": "d"}' +---- +project + ├── columns: k:1!null + ├── immutable + ├── key: (1) + └── select + ├── columns: k:1!null j:4 + ├── immutable + ├── key: (1) + ├── fd: (1)-->(4) + ├── index-join b + │ ├── columns: k:1!null j:4 + │ ├── key: (1) + │ ├── fd: (1)-->(4) + │ └── inverted-filter + │ ├── columns: k:1!null + │ ├── inverted expression: /6 + │ │ ├── tight: false, unique: false + │ │ └── union spans + │ │ ├── ["7\x00\x019", "7\x00\x019"] + │ │ ├── ["7a\x00\x02\x00\x03\x00\x01\x12b\x00\x01", "7a\x00\x02\x00\x03\x00\x01\x12b\x00\x01"] + │ │ └── ["7c\x00\x01\x12d\x00\x01", "7c\x00\x01\x12d\x00\x01"] + │ ├── key: (1) + │ └── scan b@j_inv_idx + │ ├── columns: k:1!null j_inverted_key:6!null + │ ├── inverted constraint: /6/1 + │ │ └── spans + │ │ ├── ["7\x00\x019", "7\x00\x019"] + │ │ ├── ["7a\x00\x02\x00\x03\x00\x01\x12b\x00\x01", "7a\x00\x02\x00\x03\x00\x01\x12b\x00\x01"] + │ │ └── ["7c\x00\x01\x12d\x00\x01", "7c\x00\x01\x12d\x00\x01"] + │ ├── key: (1) + │ └── fd: (1)-->(6) + └── filters + └── ((j:4->'a') @> '["b"]') OR (j:4 <@ '{"c": "d"}') [outer=(4), immutable] + +# Query using the fetch val and equality operators in a conjunction. +opt expect=GenerateInvertedIndexScans +SELECT k FROM b WHERE j->'a' @> '"b"' AND j->'c' @> '"d"' +---- +project + ├── columns: k:1!null + ├── immutable + ├── key: (1) + └── inverted-filter + ├── columns: k:1!null + ├── inverted expression: /6 + │ ├── tight: true, unique: false + │ ├── union spans: empty + │ └── INTERSECTION + │ ├── span expression + │ │ ├── tight: true, unique: false + │ │ └── union spans + │ │ ├── ["7a\x00\x01\x12b\x00\x01", "7a\x00\x01\x12b\x00\x01"] + │ │ └── ["7a\x00\x02\x00\x03\x00\x01\x12b\x00\x01", "7a\x00\x02\x00\x03\x00\x01\x12b\x00\x01"] + │ └── span expression + │ ├── tight: true, unique: false + │ └── union spans + │ ├── ["7c\x00\x01\x12d\x00\x01", "7c\x00\x01\x12d\x00\x01"] + │ └── ["7c\x00\x02\x00\x03\x00\x01\x12d\x00\x01", "7c\x00\x02\x00\x03\x00\x01\x12d\x00\x01"] + ├── key: (1) + └── scan b@j_inv_idx + ├── columns: k:1!null j_inverted_key:6!null + ├── inverted constraint: /6/1 + │ └── spans + │ ├── ["7a\x00\x01\x12b\x00\x01", "7a\x00\x01\x12b\x00\x01"] + │ ├── ["7a\x00\x02\x00\x03\x00\x01\x12b\x00\x01", "7a\x00\x02\x00\x03\x00\x01\x12b\x00\x01"] + │ ├── ["7c\x00\x01\x12d\x00\x01", "7c\x00\x01\x12d\x00\x01"] + │ └── ["7c\x00\x02\x00\x03\x00\x01\x12d\x00\x01", "7c\x00\x02\x00\x03\x00\x01\x12d\x00\x01"] + ├── key: (1) + └── fd: (1)-->(6) + +# Query using the fetch val and contains operators in conjunction with a +# contained by operator. +opt expect=GenerateInvertedIndexScans +SELECT k FROM b WHERE j->'a' @> '["b"]' AND j <@ '{"c": "d"}' +---- +project + ├── columns: k:1!null + ├── immutable + ├── key: (1) + └── select + ├── columns: k:1!null j:4 + ├── immutable + ├── key: (1) + ├── fd: (1)-->(4) + ├── index-join b + │ ├── columns: k:1!null j:4 + │ ├── key: (1) + │ ├── fd: (1)-->(4) + │ └── inverted-filter + │ ├── columns: k:1!null + │ ├── inverted expression: /6 + │ │ ├── tight: false, unique: false + │ │ ├── union spans: empty + │ │ └── INTERSECTION + │ │ ├── span expression + │ │ │ ├── tight: true, unique: true + │ │ │ └── union spans: ["7a\x00\x02\x00\x03\x00\x01\x12b\x00\x01", "7a\x00\x02\x00\x03\x00\x01\x12b\x00\x01"] + │ │ └── span expression + │ │ ├── tight: false, unique: false + │ │ └── union spans + │ │ ├── ["7\x00\x019", "7\x00\x019"] + │ │ └── ["7c\x00\x01\x12d\x00\x01", "7c\x00\x01\x12d\x00\x01"] + │ ├── key: (1) + │ └── scan b@j_inv_idx + │ ├── columns: k:1!null j_inverted_key:6!null + │ ├── inverted constraint: /6/1 + │ │ └── spans + │ │ ├── ["7\x00\x019", "7\x00\x019"] + │ │ ├── ["7a\x00\x02\x00\x03\x00\x01\x12b\x00\x01", "7a\x00\x02\x00\x03\x00\x01\x12b\x00\x01"] + │ │ └── ["7c\x00\x01\x12d\x00\x01", "7c\x00\x01\x12d\x00\x01"] + │ ├── key: (1) + │ └── fd: (1)-->(6) + └── filters + └── j:4 <@ '{"c": "d"}' [outer=(4), immutable] + # GenerateInvertedIndexScans propagates row-level locking information. opt expect=GenerateInvertedIndexScans SELECT k FROM b WHERE j @> '{"a": "b"}' FOR UPDATE