From b72da855c49cb2749cc0367659f0de88e398d05b Mon Sep 17 00:00:00 2001 From: Marcus Gartner Date: Mon, 25 Jan 2021 18:12:17 -0800 Subject: [PATCH] opt: index accelerate chained fetch value operators Prior to #55316, the optimizer generated inverted index scans on indexed JSON columns when queries had filters with chained fetch value operators, for example `j->'a'->'b' = '1'`. The logic that made this possible was found to create query plans not equivalent to the query, so it was removed. This commit restores the ability to index accelerate chained -> operators. Fixes #55317 Release note (performance improvement): A bug fix included in 20.2.1 for for the JSON fetch value operator, `->`, resulted in chained `->` operators in query filters not being index accelerated, e.g., `j->'a'->'b' = '1'`. Chained `->` are now index accelerated. --- .../testdata/logic_test/inverted_index | 22 +++- .../exec/execbuilder/testdata/inverted_index | 17 ++- pkg/sql/opt/invertedidx/json_array.go | 110 +++++++++++++----- pkg/sql/opt/invertedidx/json_array_test.go | 27 +++++ pkg/sql/opt/xform/testdata/rules/select | 14 +++ 5 files changed, 149 insertions(+), 41 deletions(-) diff --git a/pkg/sql/logictest/testdata/logic_test/inverted_index b/pkg/sql/logictest/testdata/logic_test/inverted_index index 6a499bb2cd24..76cc4098476a 100644 --- a/pkg/sql/logictest/testdata/logic_test/inverted_index +++ b/pkg/sql/logictest/testdata/logic_test/inverted_index @@ -744,7 +744,15 @@ INSERT INTO f VALUES (2, '{"b": 2}'), (3, '{"b": 2, "a": 1}'), (4, '{"a": 1, "c": 3}'), - (5, '{"a": [1, 2]}') + (5, '{"a": [1, 2]}'), + (6, '{"a": {"b": 1}}'), + (7, '{"a": {"b": 1, "d": 2}}'), + (8, '{"a": {"d": 2}}'), + (9, '{"a": {"b": [1, 2]}}'), + (10, '{"a": {"b": {"c": 1}}}'), + (11, '{"a": {"b": {"c": 1, "d": 2}}}}'), + (12, '{"a": {"b": {"d": 2}}}}'), + (13, '{"a": {"b": {"c": [1, 2]}}}') query T SELECT j FROM f@i WHERE j->'a' = '1' ORDER BY k @@ -769,6 +777,18 @@ SELECT j FROM f@i WHERE j->'a' = '1' OR j @> '{"b": 2}' ORDER BY k {"a": 1, "b": 2} {"a": 1, "c": 3} +query T +SELECT j FROM f@i WHERE j->'a'->'b' = '1' ORDER BY k +---- +{"a": {"b": 1}} +{"a": {"b": 1, "d": 2}} + +query T +SELECT j FROM f@i WHERE j->'a'->'b'->'c' = '1' ORDER BY k +---- +{"a": {"b": {"c": 1}}} +{"a": {"b": {"c": 1, "d": 2}}} + subtest arrays statement ok diff --git a/pkg/sql/opt/exec/execbuilder/testdata/inverted_index b/pkg/sql/opt/exec/execbuilder/testdata/inverted_index index 5aad23dfada5..091ee62624c2 100644 --- a/pkg/sql/opt/exec/execbuilder/testdata/inverted_index +++ b/pkg/sql/opt/exec/execbuilder/testdata/inverted_index @@ -354,24 +354,23 @@ vectorized: true table: d@foo_inv spans: /"a"/"b"-/"a"/"b"/PrefixEnd -# TODO(mgartner): Add support for building inverted index constraints for chained JSON -# fetch operators. query T -EXPLAIN (VERBOSE) SELECT * from d where b->'a'->'c' = '"b"' +EXPLAIN (VERBOSE) SELECT * from d@foo_inv where b->'a'->'c' = '"b"' ---- distribution: local vectorized: true · -• filter +• index join │ columns: (a, b) │ estimated row count: 333 (missing stats) -│ filter: ((b->'a')->'c') = '"b"' +│ table: d@primary +│ key columns: a │ └── • scan - columns: (a, b) - estimated row count: 1,000 (missing stats) - table: d@primary - spans: FULL SCAN + columns: (a) + estimated row count: 111 (missing stats) + table: d@foo_inv + spans: /"a"/"c"/"b"-/"a"/"c"/"b"/PrefixEnd query T EXPLAIN (VERBOSE) SELECT * from d where b->(NULL::STRING) = '"b"' diff --git a/pkg/sql/opt/invertedidx/json_array.go b/pkg/sql/opt/invertedidx/json_array.go index 669a05518b31..4784c4c39aa0 100644 --- a/pkg/sql/opt/invertedidx/json_array.go +++ b/pkg/sql/opt/invertedidx/json_array.go @@ -334,35 +334,18 @@ func (j *jsonOrArrayFilterPlanner) extractJSONOrArrayContainsCondition( // extractJSONFetchValEqCondition extracts an InvertedExpression representing an // inverted filter over the planner's inverted index, based on equality between -// a fetch val expression and a right scalar expression. If the following criteria -// are not met, an empty InvertedExpression is returned. +// a chain of fetch val expressions and a right scalar expression. If an +// InvertedExpression cannot be generated from the expression, an +// inverted.NonInvertedColExpression is returned. // -// 1. The fetch value operator's left expression must be a variable -// referencing the inverted column in the index. -// 2. The fetch value operator's right expression must be a constant string. -// 3. The right expression in the equality expression must be a constant JSON -// value that is not an object or an array. -// -// TODO(mgartner): Support chained fetch val operators, e.g., j->'a'->'b' = '1'. +// In order to generate an InvertedExpression, left must be a fetch val +// expression in the form [col]->[index0]->[index1]->...->[indexN] where col is +// a variable or expression referencing the inverted column in the inverted +// index and each index is a constant string. The right expression must be a +// constant JSON value that is not an object or an array. func (j *jsonOrArrayFilterPlanner) extractJSONFetchValEqCondition( - evalCtx *tree.EvalContext, fetch *memo.FetchValExpr, right opt.ScalarExpr, + evalCtx *tree.EvalContext, left *memo.FetchValExpr, right opt.ScalarExpr, ) inverted.Expression { - // The left side of the fetch val expression, the Json field, should be a - // variable corresponding to the index column. - if !isIndexColumn(j.tabID, j.index, fetch.Json, j.computedColumns) { - return inverted.NonInvertedColExpression{} - } - - // The right side of the fetch val expression, the Index field, should be a - // constant string. - if !memo.CanExtractConstDatum(fetch.Index) { - return inverted.NonInvertedColExpression{} - } - key, ok := memo.ExtractConstDatum(fetch.Index).(*tree.DString) - if !ok { - return inverted.NonInvertedColExpression{} - } - // The right side of the equals expression should be a constant JSON value // that is not an object or array. if !memo.CanExtractConstDatum(right) { @@ -377,10 +360,75 @@ func (j *jsonOrArrayFilterPlanner) extractJSONFetchValEqCondition( return inverted.NonInvertedColExpression{} } - // Build a new JSON object of the form: {: }. - b := json.NewObjectBuilder(1) - b.Add(string(*key), val.JSON) - obj := tree.NewDJSON(b.Build()) + // Recursively traverse fetch val expressions and collect keys with which to + // build the InvertedExpression. If it is not possible to build an inverted + // expression from the tree of fetch val expressions, collectKeys returns + // early and foundKeys remains false. If successful, foundKeys is set to + // true and JSON fetch value indexes are collected in keys. The keys are + // ordered by the outer-most fetch val index first. The outer-most fetch val + // index is the right-most in the -> chain, for example (j->'a'->'b') is + // equivalent to ((j->'a')->'b') and 'b' is the outer-most fetch val index. + // + // Later on, we iterate forward through these keys to build a JSON object + // from the inside-out with the inner-most value being the right scalar + // expression. In the resulting JSON object, the outer-most JSON fetch value + // indexes are the inner most JSON object keys. + // + // As an example, when left is (j->'a'->'b') and right is ('1'), the keys + // {"b", "a"} are collected and the JSON object {"a": {"b": 1}} is built. + foundKeys := false + var keys []string + var collectKeys func(fetch *memo.FetchValExpr) + collectKeys = func(fetch *memo.FetchValExpr) { + // The right side of the fetch val expression, the Index field, must be + // a constant string. If not, then we cannot build an inverted + // expression. + if !memo.CanExtractConstDatum(fetch.Index) { + return + } + key, ok := memo.ExtractConstDatum(fetch.Index).(*tree.DString) + if !ok { + return + } + + // Append the key to the list of keys. + keys = append(keys, string(*key)) + + // If the left side of the fetch val expression, the Json field, is a + // variable or expression corresponding to the index column, then we + // have found a valid list of keys to build an inverted expression. + if isIndexColumn(j.tabID, j.index, fetch.Json, j.computedColumns) { + foundKeys = true + return + } + + // If the left side of the fetch val expression is another fetch val + // expression, recursively collect its keys. + if innerFetch, ok := fetch.Json.(*memo.FetchValExpr); ok { + collectKeys(innerFetch) + } + + // Otherwise, we cannot build an inverted expression. + } + collectKeys(left) + if !foundKeys { + return inverted.NonInvertedColExpression{} + } + + // Build a new JSON object of the form: + // {: ... {: {key0: }}} + // Note that key0 is the outer-most fetch val index, so the expression + // j->'a'->'b' = 1 results in {"a": {"b": 1}}. + var obj json.JSON + for i := 0; i < len(keys); i++ { + b := json.NewObjectBuilder(1) + if i == 0 { + b.Add(keys[i], val.JSON) + } else { + b.Add(keys[i], obj) + } + obj = b.Build() + } - return getInvertedExprForJSONOrArrayIndex(evalCtx, obj) + return getInvertedExprForJSONOrArrayIndex(evalCtx, tree.NewDJSON(obj)) } diff --git a/pkg/sql/opt/invertedidx/json_array_test.go b/pkg/sql/opt/invertedidx/json_array_test.go index 20c97a59d323..353fd43dd207 100644 --- a/pkg/sql/opt/invertedidx/json_array_test.go +++ b/pkg/sql/opt/invertedidx/json_array_test.go @@ -424,6 +424,33 @@ func TestTryFilterJsonOrArrayIndex(t *testing.T) { indexOrd: arrayOrd, ok: false, }, + { + filters: "j->'a'->'b' = '1'", + indexOrd: jsonOrd, + ok: true, + tight: true, + unique: true, + }, + { + filters: "j->'a'->'b'->'c' = '1'", + indexOrd: jsonOrd, + ok: true, + tight: true, + unique: true, + }, + { + // Integer indexes are not yet supported. + filters: "j->0->'b' = '1'", + indexOrd: jsonOrd, + ok: false, + }, + { + // The inner most expression is not a fetch val expression with an + // indexed column on the left. + filters: "(j-'c')->'a'->'b' = '1'", + indexOrd: jsonOrd, + ok: false, + }, { filters: "j->'a' = '1' AND j->'b' = '2'", indexOrd: jsonOrd, diff --git a/pkg/sql/opt/xform/testdata/rules/select b/pkg/sql/opt/xform/testdata/rules/select index 1e6df6bc8db0..5fca3db693f5 100644 --- a/pkg/sql/opt/xform/testdata/rules/select +++ b/pkg/sql/opt/xform/testdata/rules/select @@ -1951,6 +1951,20 @@ project │ └── spans: ["7a\x00\x01\x12b\x00\x01", "7a\x00\x01\x12b\x00\x01"] └── key: (1) +# Chained fetch val operators. +opt expect=GenerateInvertedIndexScans +SELECT k FROM b WHERE j->'a'->'b' = '"c"' +---- +project + ├── columns: k:1!null + ├── immutable + ├── key: (1) + └── scan b@j_inv_idx + ├── columns: k:1!null + ├── inverted constraint: /6/1 + │ └── spans: ["7a\x00\x02b\x00\x01\x12c\x00\x01", "7a\x00\x02b\x00\x01\x12c\x00\x01"] + └── key: (1) + # Do not generate an inverted scan when the fetch val and equality operators are # wrapped in a NOT operator. The -> operator returns NULL if the key does not # exist in the JSON object, so (NOT j->'a' = '"b"') is not equivalent to the