From 104e5e2ed9ffefba490fc42db335d386c2845d1b Mon Sep 17 00:00:00 2001 From: Marcus Gartner Date: Wed, 7 Oct 2020 19:04:29 -0700 Subject: [PATCH] opt: index-accelerate equalities with JSON fetch expressions This commit allows inverted indexes to be scanned to satisfy query filters in the form: `j->'a' = '1'`. The optimizer had previously supported this by normalizing these expressions into expressions with JSON containment operators, `@>`, but it lost this support when these normalization rules were found to produce inequivalent expressions. Note that query filters of several forms, which were previously accelerated via the incorrect normalization rules, are not accelerated as part of this commit, such as: j->'a' @> '{"x": "y"} j->'a'->'b' = '"c"' Release note: None --- .../testdata/logic_test/inverted_index | 25 ++++++ .../exec/execbuilder/testdata/inverted_index | 56 ++++++------ .../opt/idxconstraint/index_constraints.go | 89 ++++++++++++++++--- pkg/sql/opt/idxconstraint/testdata/inverted | 77 ++++++++++++++++ pkg/sql/opt/norm/scalar_funcs.go | 19 ---- 5 files changed, 210 insertions(+), 56 deletions(-) diff --git a/pkg/sql/logictest/testdata/logic_test/inverted_index b/pkg/sql/logictest/testdata/logic_test/inverted_index index f1b8b4318292..6e6b343d8df4 100644 --- a/pkg/sql/logictest/testdata/logic_test/inverted_index +++ b/pkg/sql/logictest/testdata/logic_test/inverted_index @@ -705,6 +705,31 @@ DROP TABLE table_with_nulls statement ok DROP TABLE c +subtest json_fetch_val + +statement ok +CREATE TABLE f ( + k INT PRIMARY KEY, + j JSON, + INVERTED INDEX i (j) +) + +statement ok +INSERT INTO f VALUES + (0, '{"a": 1}'), + (1, '{"a": 10}'), + (2, '{"b": 2}'), + (3, '{"b": 2, "a": 1}'), + (4, '{"a": 1, "c": 3}'), + (5, '{"a": [1, 2]}') + +query T +SELECT j FROM f@i WHERE j->'a' = '1' ORDER BY k +---- +{"a": 1} +{"a": 1, "b": 2} +{"a": 1, "c": 3} + subtest arrays statement ok diff --git a/pkg/sql/opt/exec/execbuilder/testdata/inverted_index b/pkg/sql/opt/exec/execbuilder/testdata/inverted_index index 269d4297141b..62ab59455524 100644 --- a/pkg/sql/opt/exec/execbuilder/testdata/inverted_index +++ b/pkg/sql/opt/exec/execbuilder/testdata/inverted_index @@ -273,23 +273,25 @@ filter · · (a, b) · · spans FULL SCAN · · -# TODO(mgartner): This query can scan the inverted index when the -> operator is -# supported by the inverted index constraint builder. +# TODO(mgartner): It should not be required to force the index scan. It is +# required until the statistics builder treats b->'a' = '"b"' similarly to the +# containment operator, @>. query TTTTT -EXPLAIN (VERBOSE) SELECT * from d where b->'a' = '"b"' +EXPLAIN (VERBOSE) SELECT * from d@foo_inv where b->'a' = '"b"' ---- -· distribution local · · -· vectorized true · · -filter · · (a, b) · - │ estimated row count 333 (missing stats) · · - │ filter (b->'a') = '"b"' · · - └── scan · · (a, b) · -· estimated row count 1000 (missing stats) · · -· table d@primary · · -· spans FULL SCAN · · +· distribution local · · +· vectorized true · · +index join · · (a, b) · + │ estimated row count 333 (missing stats) · · + │ table d@primary · · + │ key columns a · · + └── scan · · (a) · +· estimated row count 110 (missing stats) · · +· table d@foo_inv · · +· spans /"a"/"b"-/"a"/"b"/PrefixEnd · · -# TODO(mgartner): This query can scan the inverted index when the -> operator is -# supported by the inverted index constraint builder. +# TODO(mgartner): Add support for building inverted index constraints for chained JSON +# fetch operators. query TTTTT EXPLAIN (VERBOSE) SELECT * from d where b->'a'->'c' = '"b"' ---- @@ -310,20 +312,22 @@ EXPLAIN (VERBOSE) SELECT * from d where b->(NULL::STRING) = '"b"' · vectorized true · · norows · · (a, b) · -# TODO(mgartner): This query can scan the inverted index when the -> operator is -# supported by the inverted index constraint builder. +# TODO(mgartner): It should not be required to force the index scan. It is +# required until the statistics builder treats b->'a' = '"b"' similarly to the +# containment operator, @>. query TTTTT -EXPLAIN (VERBOSE) SELECT * from d where '"b"' = b->'a' +EXPLAIN (VERBOSE) SELECT * from d@foo_inv where '"b"' = b->'a' ---- -· distribution local · · -· vectorized true · · -filter · · (a, b) · - │ estimated row count 333 (missing stats) · · - │ filter (b->'a') = '"b"' · · - └── scan · · (a, b) · -· estimated row count 1000 (missing stats) · · -· table d@primary · · -· spans FULL SCAN · · +· distribution local · · +· vectorized true · · +index join · · (a, b) · + │ estimated row count 333 (missing stats) · · + │ table d@primary · · + │ key columns a · · + └── scan · · (a) · +· estimated row count 110 (missing stats) · · +· table d@foo_inv · · +· spans /"a"/"b"-/"a"/"b"/PrefixEnd · · # Make sure that querying for NULL equality doesn't use the inverted index. query TTTTT diff --git a/pkg/sql/opt/idxconstraint/index_constraints.go b/pkg/sql/opt/idxconstraint/index_constraints.go index 0dd05b3efdc6..bec604b92930 100644 --- a/pkg/sql/opt/idxconstraint/index_constraints.go +++ b/pkg/sql/opt/idxconstraint/index_constraints.go @@ -817,12 +817,12 @@ func (c *indexConstraintCtx) makeSpansForOr( return tightLeft && tightRight } -// makeInvertedIndexSpansForJSONExpr is the implementation of +// makeInvertedIndexSpansForJSONContainmentExpr is the implementation of // makeInvertedIndexSpans for JSON inverted indexes. The input datum is the JSON // to produce spans for. If allPaths is true, the slice is populated with // all constraints found. Otherwise, this function stops at the first // constraint. -func (c *indexConstraintCtx) makeInvertedIndexSpansForJSONExpr( +func (c *indexConstraintCtx) makeInvertedIndexSpansForJSONContainmentExpr( datum *tree.DJSON, constraints []*constraint.Constraint, allPaths bool, ) (bool, []*constraint.Constraint) { out := &constraint.Constraint{} @@ -902,12 +902,12 @@ func (c *indexConstraintCtx) makeInvertedIndexSpansForJSONExpr( return false, constraints } -// makeInvertedIndexSpansForArrayExpr is the implementation of +// makeInvertedIndexSpansForArrayContainmentExpr is the implementation of // makeInvertedIndexSpans for array inverted indexes. The input arr is the array // to produce spans for. If allPaths is true, the slice is populated with // all constraints found. Otherwise, this function stops at the first // constraint. -func (c *indexConstraintCtx) makeInvertedIndexSpansForArrayExpr( +func (c *indexConstraintCtx) makeInvertedIndexSpansForArrayContainmentExpr( arr *tree.DArray, constraints []*constraint.Constraint, allPaths bool, ) (bool, []*constraint.Constraint) { if len(arr.Array) == 0 { @@ -952,6 +952,70 @@ func (c *indexConstraintCtx) makeInvertedIndexSpansForArrayExpr( return len(arr.Array) == 1, constraints } +// makeInvertedIndexSpanForJSONFetchValEqExpr attempts to create an inverted +// index scan for filters of the form json->'key' = 'value'. An inverted index +// constraint is only generated when all of the following are true: +// +// 1. The fetch column is an index column. +// +// 2. The fetch key (RHS of ->) is a constant string. This is required because +// we build a JSON object that is used as the boundaries of the span. An +// integer fetch key fetches a value at an index in a JSON array, not a +// JSON object. +// +// 3. The RHS of the equality operator is a constant, scalar JSON value. +// Supporting non-scalar JSON values would require the filter to be +// re-applied to the JSON column after scanning the index. As an example of +// what would happen without re-applying the filter, a row with column j as +// {"a": [1, 2]} would be incorrectly returned from the query filter +// j->'a' = '[1]'. +// +func (c *indexConstraintCtx) makeInvertedIndexSpanForJSONFetchValEqExpr( + fetch *memo.FetchValExpr, rhs opt.Expr, constraints []*constraint.Constraint, +) (bool, []*constraint.Constraint) { + appendUnconstrained := func() []*constraint.Constraint { + out := &constraint.Constraint{} + c.unconstrained(0 /* offset */, out) + return append(constraints, out) + } + + if !c.isIndexColumn(fetch.Json, 0 /* index */) { + return false, appendUnconstrained() + } + + if !memo.CanExtractConstDatum(fetch.Index) { + return false, appendUnconstrained() + } + + key, ok := memo.ExtractConstDatum(fetch.Index).(*tree.DString) + if !ok { + return false, appendUnconstrained() + } + + if !memo.CanExtractConstDatum(rhs) { + return false, appendUnconstrained() + } + + right, ok := memo.ExtractConstDatum(rhs).(*tree.DJSON) + if !ok { + return false, appendUnconstrained() + } + + if right.JSON.Type() == json.ObjectJSONType || right.JSON.Type() == json.ArrayJSONType { + return false, appendUnconstrained() + } + + // Build a new JSON object of the form: {: }. + b := json.NewObjectBuilder(1) + b.Add(string(*key), right.JSON) + obj := tree.NewDJSON(b.Build()) + + // Create an equality span with the JSON object as the value. + out := &constraint.Constraint{} + c.eqSpan(0 /* offset */, obj, out) + return true, append(constraints, out) +} + // makeInvertedIndexSpansForExpr is analogous to makeSpansForExpr, but it is // used for inverted indexes. If allPaths is true, the slice is populated with // all constraints found. Otherwise, this function stops at the first @@ -968,31 +1032,34 @@ func (c *indexConstraintCtx) makeInvertedIndexSpansForExpr( constrained := false switch nd.Op() { case opt.ContainsOp: - out := &constraint.Constraint{} lhs, rhs := nd.Child(0), nd.Child(1) if !c.isIndexColumn(lhs, 0 /* index */) || !opt.IsConstValueOp(rhs) { - c.unconstrained(0 /* offset */, out) - return false, append(constraints, out) + break } rightDatum := memo.ExtractConstDatum(rhs) if rightDatum == tree.DNull { - c.contradiction(0 /* offset */, out) - return false, append(constraints, out) + break } switch rightDatum.ResolvedType().Family() { case types.JsonFamily: - return c.makeInvertedIndexSpansForJSONExpr(rightDatum.(*tree.DJSON), constraints, allPaths) + return c.makeInvertedIndexSpansForJSONContainmentExpr(rightDatum.(*tree.DJSON), constraints, allPaths) case types.ArrayFamily: - return c.makeInvertedIndexSpansForArrayExpr(rightDatum.(*tree.DArray), constraints, allPaths) + return c.makeInvertedIndexSpansForArrayContainmentExpr(rightDatum.(*tree.DArray), constraints, allPaths) default: log.Errorf(context.TODO(), "unexpected type in inverted index: %s", rightDatum.ResolvedType()) } + case opt.EqOp: + lhs, rhs := nd.Child(0), nd.Child(1) + if fetch, ok := lhs.(*memo.FetchValExpr); ok { + return c.makeInvertedIndexSpanForJSONFetchValEqExpr(fetch, rhs, constraints) + } + case opt.AndOp, opt.FiltersOp: var out *constraint.Constraint for i, n := 0, nd.ChildCount(); i < n; i++ { diff --git a/pkg/sql/opt/idxconstraint/testdata/inverted b/pkg/sql/opt/idxconstraint/testdata/inverted index 033f06f311f3..d9e4c6a3756c 100644 --- a/pkg/sql/opt/idxconstraint/testdata/inverted +++ b/pkg/sql/opt/idxconstraint/testdata/inverted @@ -96,3 +96,80 @@ index-constraints vars=(int[]) inverted-index=@1 ---- [ - ] Remaining filter: NULL + +index-constraints vars=(jsonb) inverted-index=@1 +@1->'a' = 'true' +---- +[/'{"a": true}' - /'{"a": true}'] + +index-constraints vars=(jsonb) inverted-index=@1 +@1->'a' = 'false' +---- +[/'{"a": false}' - /'{"a": false}'] + +index-constraints vars=(jsonb) inverted-index=@1 +@1->'a' = 'null' +---- +[/'{"a": null}' - /'{"a": null}'] + +index-constraints vars=(jsonb) inverted-index=@1 +@1->'a' = '1' +---- +[/'{"a": 1}' - /'{"a": 1}'] + +index-constraints vars=(jsonb) inverted-index=@1 +@1->'a' = '"b"' +---- +[/'{"a": "b"}' - /'{"a": "b"}'] + +index-constraints vars=(jsonb) inverted-index=@1 +@1->'0' = 'true' +---- +[/'{"0": true}' - /'{"0": true}'] + +# Do not create a constraint when the var is not an indexed column. +index-constraints vars=(jsonb,jsonb) inverted-index=@2 +@1->'a' = '1' +---- +[ - ] +Remaining filter: (@1->'a') = '1' + +# Do not create a constraint when the fetch key is not a constant. +index-constraints vars=(jsonb,string) inverted-index=@1 +@1->@2 = '1' +---- +[ - ] +Remaining filter: (@1->@2) = '1' + +# Do not create a constraint when the fetch key is an integer. +index-constraints vars=(jsonb) inverted-index=@1 +@1->0 = '1' +---- +[ - ] +Remaining filter: (@1->0) = '1' + +# Do not create a constraint when the RHS of the equality is not a constant. +# Note: @2 is wrapped in a function to avoid normalizing @2 to the RHS of the +# equality. This normalization would result in an expression that fails to match +# the j->'key' = 'val' pattern that is required for building this type of +# inverted index constraint. Therefore, the wrapping function ensures that the +# code path of a non-const RHS is tested. +index-constraints vars=(jsonb,string) inverted-index=@1 +@1->'a' = lower(@2)::JSONB +---- +[ - ] +Remaining filter: (@1->'a') = lower(@2)::JSONB + +# Do not create a constraint when the RHS of the equality is a JSON array. +index-constraints vars=(jsonb) inverted-index=@1 +@1->'a' = '[1]' +---- +[ - ] +Remaining filter: (@1->'a') = '[1]' + +# Do not create a constraint when the RHS of the equality is a JSON object. +index-constraints vars=(jsonb) inverted-index=@1 +@1->'a' = '{"x": 1}' +---- +[ - ] +Remaining filter: (@1->'a') = '{"x": 1}' diff --git a/pkg/sql/opt/norm/scalar_funcs.go b/pkg/sql/opt/norm/scalar_funcs.go index c077a3d10b42..59dae7098d2c 100644 --- a/pkg/sql/opt/norm/scalar_funcs.go +++ b/pkg/sql/opt/norm/scalar_funcs.go @@ -18,7 +18,6 @@ import ( "github.com/cockroachdb/cockroach/pkg/sql/opt/props/physical" "github.com/cockroachdb/cockroach/pkg/sql/sem/tree" "github.com/cockroachdb/cockroach/pkg/sql/types" - "github.com/cockroachdb/cockroach/pkg/util/json" "github.com/cockroachdb/cockroach/pkg/util/log" "github.com/cockroachdb/errors" ) @@ -98,24 +97,6 @@ func (c *CustomFuncs) SimplifyCoalesce(args memo.ScalarListExpr) opt.ScalarExpr return args[len(args)-1] } -// IsJSONScalar returns if the JSON value is a number, string, true, false, or null. -func (c *CustomFuncs) IsJSONScalar(value opt.ScalarExpr) bool { - v := value.(*memo.ConstExpr).Value.(*tree.DJSON) - return v.JSON.Type() != json.ObjectJSONType && v.JSON.Type() != json.ArrayJSONType -} - -// MakeSingleKeyJSONObject returns a JSON object with one entry, mapping key to value. -func (c *CustomFuncs) MakeSingleKeyJSONObject(key, value opt.ScalarExpr) opt.ScalarExpr { - k := key.(*memo.ConstExpr).Value.(*tree.DString) - v := value.(*memo.ConstExpr).Value.(*tree.DJSON) - - builder := json.NewObjectBuilder(1) - builder.Add(string(*k), v.JSON) - j := builder.Build() - - return c.f.ConstructConst(&tree.DJSON{JSON: j}, types.Jsonb) -} - // IsConstValueEqual returns whether const1 and const2 are equal. func (c *CustomFuncs) IsConstValueEqual(const1, const2 opt.ScalarExpr) bool { op1 := const1.Op()