diff --git a/pkg/sql/logictest/testdata/logic_test/inverted_index b/pkg/sql/logictest/testdata/logic_test/inverted_index index f1b8b4318292..6e6b343d8df4 100644 --- a/pkg/sql/logictest/testdata/logic_test/inverted_index +++ b/pkg/sql/logictest/testdata/logic_test/inverted_index @@ -705,6 +705,31 @@ DROP TABLE table_with_nulls statement ok DROP TABLE c +subtest json_fetch_val + +statement ok +CREATE TABLE f ( + k INT PRIMARY KEY, + j JSON, + INVERTED INDEX i (j) +) + +statement ok +INSERT INTO f VALUES + (0, '{"a": 1}'), + (1, '{"a": 10}'), + (2, '{"b": 2}'), + (3, '{"b": 2, "a": 1}'), + (4, '{"a": 1, "c": 3}'), + (5, '{"a": [1, 2]}') + +query T +SELECT j FROM f@i WHERE j->'a' = '1' ORDER BY k +---- +{"a": 1} +{"a": 1, "b": 2} +{"a": 1, "c": 3} + subtest arrays statement ok diff --git a/pkg/sql/opt/exec/execbuilder/testdata/inverted_index b/pkg/sql/opt/exec/execbuilder/testdata/inverted_index index 269d4297141b..62ab59455524 100644 --- a/pkg/sql/opt/exec/execbuilder/testdata/inverted_index +++ b/pkg/sql/opt/exec/execbuilder/testdata/inverted_index @@ -273,23 +273,25 @@ filter · · (a, b) · · spans FULL SCAN · · -# TODO(mgartner): This query can scan the inverted index when the -> operator is -# supported by the inverted index constraint builder. +# TODO(mgartner): It should not be required to force the index scan. It is +# required until the statistics builder treats b->'a' = '"b"' similarly to the +# containment operator, @>. query TTTTT -EXPLAIN (VERBOSE) SELECT * from d where b->'a' = '"b"' +EXPLAIN (VERBOSE) SELECT * from d@foo_inv where b->'a' = '"b"' ---- -· distribution local · · -· vectorized true · · -filter · · (a, b) · - │ estimated row count 333 (missing stats) · · - │ filter (b->'a') = '"b"' · · - └── scan · · (a, b) · -· estimated row count 1000 (missing stats) · · -· table d@primary · · -· spans FULL SCAN · · +· distribution local · · +· vectorized true · · +index join · · (a, b) · + │ estimated row count 333 (missing stats) · · + │ table d@primary · · + │ key columns a · · + └── scan · · (a) · +· estimated row count 110 (missing stats) · · +· table d@foo_inv · · +· spans /"a"/"b"-/"a"/"b"/PrefixEnd · · -# TODO(mgartner): This query can scan the inverted index when the -> operator is -# supported by the inverted index constraint builder. +# TODO(mgartner): Add support for building inverted index constraints for chained JSON +# fetch operators. query TTTTT EXPLAIN (VERBOSE) SELECT * from d where b->'a'->'c' = '"b"' ---- @@ -310,20 +312,22 @@ EXPLAIN (VERBOSE) SELECT * from d where b->(NULL::STRING) = '"b"' · vectorized true · · norows · · (a, b) · -# TODO(mgartner): This query can scan the inverted index when the -> operator is -# supported by the inverted index constraint builder. +# TODO(mgartner): It should not be required to force the index scan. It is +# required until the statistics builder treats b->'a' = '"b"' similarly to the +# containment operator, @>. query TTTTT -EXPLAIN (VERBOSE) SELECT * from d where '"b"' = b->'a' +EXPLAIN (VERBOSE) SELECT * from d@foo_inv where '"b"' = b->'a' ---- -· distribution local · · -· vectorized true · · -filter · · (a, b) · - │ estimated row count 333 (missing stats) · · - │ filter (b->'a') = '"b"' · · - └── scan · · (a, b) · -· estimated row count 1000 (missing stats) · · -· table d@primary · · -· spans FULL SCAN · · +· distribution local · · +· vectorized true · · +index join · · (a, b) · + │ estimated row count 333 (missing stats) · · + │ table d@primary · · + │ key columns a · · + └── scan · · (a) · +· estimated row count 110 (missing stats) · · +· table d@foo_inv · · +· spans /"a"/"b"-/"a"/"b"/PrefixEnd · · # Make sure that querying for NULL equality doesn't use the inverted index. query TTTTT diff --git a/pkg/sql/opt/idxconstraint/index_constraints.go b/pkg/sql/opt/idxconstraint/index_constraints.go index 0dd05b3efdc6..bec604b92930 100644 --- a/pkg/sql/opt/idxconstraint/index_constraints.go +++ b/pkg/sql/opt/idxconstraint/index_constraints.go @@ -817,12 +817,12 @@ func (c *indexConstraintCtx) makeSpansForOr( return tightLeft && tightRight } -// makeInvertedIndexSpansForJSONExpr is the implementation of +// makeInvertedIndexSpansForJSONContainmentExpr is the implementation of // makeInvertedIndexSpans for JSON inverted indexes. The input datum is the JSON // to produce spans for. If allPaths is true, the slice is populated with // all constraints found. Otherwise, this function stops at the first // constraint. -func (c *indexConstraintCtx) makeInvertedIndexSpansForJSONExpr( +func (c *indexConstraintCtx) makeInvertedIndexSpansForJSONContainmentExpr( datum *tree.DJSON, constraints []*constraint.Constraint, allPaths bool, ) (bool, []*constraint.Constraint) { out := &constraint.Constraint{} @@ -902,12 +902,12 @@ func (c *indexConstraintCtx) makeInvertedIndexSpansForJSONExpr( return false, constraints } -// makeInvertedIndexSpansForArrayExpr is the implementation of +// makeInvertedIndexSpansForArrayContainmentExpr is the implementation of // makeInvertedIndexSpans for array inverted indexes. The input arr is the array // to produce spans for. If allPaths is true, the slice is populated with // all constraints found. Otherwise, this function stops at the first // constraint. -func (c *indexConstraintCtx) makeInvertedIndexSpansForArrayExpr( +func (c *indexConstraintCtx) makeInvertedIndexSpansForArrayContainmentExpr( arr *tree.DArray, constraints []*constraint.Constraint, allPaths bool, ) (bool, []*constraint.Constraint) { if len(arr.Array) == 0 { @@ -952,6 +952,70 @@ func (c *indexConstraintCtx) makeInvertedIndexSpansForArrayExpr( return len(arr.Array) == 1, constraints } +// makeInvertedIndexSpanForJSONFetchValEqExpr attempts to create an inverted +// index scan for filters of the form json->'key' = 'value'. An inverted index +// constraint is only generated when all of the following are true: +// +// 1. The fetch column is an index column. +// +// 2. The fetch key (RHS of ->) is a constant string. This is required because +// we build a JSON object that is used as the boundaries of the span. An +// integer fetch key fetches a value at an index in a JSON array, not a +// JSON object. +// +// 3. The RHS of the equality operator is a constant, scalar JSON value. +// Supporting non-scalar JSON values would require the filter to be +// re-applied to the JSON column after scanning the index. As an example of +// what would happen without re-applying the filter, a row with column j as +// {"a": [1, 2]} would be incorrectly returned from the query filter +// j->'a' = '[1]'. +// +func (c *indexConstraintCtx) makeInvertedIndexSpanForJSONFetchValEqExpr( + fetch *memo.FetchValExpr, rhs opt.Expr, constraints []*constraint.Constraint, +) (bool, []*constraint.Constraint) { + appendUnconstrained := func() []*constraint.Constraint { + out := &constraint.Constraint{} + c.unconstrained(0 /* offset */, out) + return append(constraints, out) + } + + if !c.isIndexColumn(fetch.Json, 0 /* index */) { + return false, appendUnconstrained() + } + + if !memo.CanExtractConstDatum(fetch.Index) { + return false, appendUnconstrained() + } + + key, ok := memo.ExtractConstDatum(fetch.Index).(*tree.DString) + if !ok { + return false, appendUnconstrained() + } + + if !memo.CanExtractConstDatum(rhs) { + return false, appendUnconstrained() + } + + right, ok := memo.ExtractConstDatum(rhs).(*tree.DJSON) + if !ok { + return false, appendUnconstrained() + } + + if right.JSON.Type() == json.ObjectJSONType || right.JSON.Type() == json.ArrayJSONType { + return false, appendUnconstrained() + } + + // Build a new JSON object of the form: {: }. + b := json.NewObjectBuilder(1) + b.Add(string(*key), right.JSON) + obj := tree.NewDJSON(b.Build()) + + // Create an equality span with the JSON object as the value. + out := &constraint.Constraint{} + c.eqSpan(0 /* offset */, obj, out) + return true, append(constraints, out) +} + // makeInvertedIndexSpansForExpr is analogous to makeSpansForExpr, but it is // used for inverted indexes. If allPaths is true, the slice is populated with // all constraints found. Otherwise, this function stops at the first @@ -968,31 +1032,34 @@ func (c *indexConstraintCtx) makeInvertedIndexSpansForExpr( constrained := false switch nd.Op() { case opt.ContainsOp: - out := &constraint.Constraint{} lhs, rhs := nd.Child(0), nd.Child(1) if !c.isIndexColumn(lhs, 0 /* index */) || !opt.IsConstValueOp(rhs) { - c.unconstrained(0 /* offset */, out) - return false, append(constraints, out) + break } rightDatum := memo.ExtractConstDatum(rhs) if rightDatum == tree.DNull { - c.contradiction(0 /* offset */, out) - return false, append(constraints, out) + break } switch rightDatum.ResolvedType().Family() { case types.JsonFamily: - return c.makeInvertedIndexSpansForJSONExpr(rightDatum.(*tree.DJSON), constraints, allPaths) + return c.makeInvertedIndexSpansForJSONContainmentExpr(rightDatum.(*tree.DJSON), constraints, allPaths) case types.ArrayFamily: - return c.makeInvertedIndexSpansForArrayExpr(rightDatum.(*tree.DArray), constraints, allPaths) + return c.makeInvertedIndexSpansForArrayContainmentExpr(rightDatum.(*tree.DArray), constraints, allPaths) default: log.Errorf(context.TODO(), "unexpected type in inverted index: %s", rightDatum.ResolvedType()) } + case opt.EqOp: + lhs, rhs := nd.Child(0), nd.Child(1) + if fetch, ok := lhs.(*memo.FetchValExpr); ok { + return c.makeInvertedIndexSpanForJSONFetchValEqExpr(fetch, rhs, constraints) + } + case opt.AndOp, opt.FiltersOp: var out *constraint.Constraint for i, n := 0, nd.ChildCount(); i < n; i++ { diff --git a/pkg/sql/opt/idxconstraint/testdata/inverted b/pkg/sql/opt/idxconstraint/testdata/inverted index 033f06f311f3..d9e4c6a3756c 100644 --- a/pkg/sql/opt/idxconstraint/testdata/inverted +++ b/pkg/sql/opt/idxconstraint/testdata/inverted @@ -96,3 +96,80 @@ index-constraints vars=(int[]) inverted-index=@1 ---- [ - ] Remaining filter: NULL + +index-constraints vars=(jsonb) inverted-index=@1 +@1->'a' = 'true' +---- +[/'{"a": true}' - /'{"a": true}'] + +index-constraints vars=(jsonb) inverted-index=@1 +@1->'a' = 'false' +---- +[/'{"a": false}' - /'{"a": false}'] + +index-constraints vars=(jsonb) inverted-index=@1 +@1->'a' = 'null' +---- +[/'{"a": null}' - /'{"a": null}'] + +index-constraints vars=(jsonb) inverted-index=@1 +@1->'a' = '1' +---- +[/'{"a": 1}' - /'{"a": 1}'] + +index-constraints vars=(jsonb) inverted-index=@1 +@1->'a' = '"b"' +---- +[/'{"a": "b"}' - /'{"a": "b"}'] + +index-constraints vars=(jsonb) inverted-index=@1 +@1->'0' = 'true' +---- +[/'{"0": true}' - /'{"0": true}'] + +# Do not create a constraint when the var is not an indexed column. +index-constraints vars=(jsonb,jsonb) inverted-index=@2 +@1->'a' = '1' +---- +[ - ] +Remaining filter: (@1->'a') = '1' + +# Do not create a constraint when the fetch key is not a constant. +index-constraints vars=(jsonb,string) inverted-index=@1 +@1->@2 = '1' +---- +[ - ] +Remaining filter: (@1->@2) = '1' + +# Do not create a constraint when the fetch key is an integer. +index-constraints vars=(jsonb) inverted-index=@1 +@1->0 = '1' +---- +[ - ] +Remaining filter: (@1->0) = '1' + +# Do not create a constraint when the RHS of the equality is not a constant. +# Note: @2 is wrapped in a function to avoid normalizing @2 to the RHS of the +# equality. This normalization would result in an expression that fails to match +# the j->'key' = 'val' pattern that is required for building this type of +# inverted index constraint. Therefore, the wrapping function ensures that the +# code path of a non-const RHS is tested. +index-constraints vars=(jsonb,string) inverted-index=@1 +@1->'a' = lower(@2)::JSONB +---- +[ - ] +Remaining filter: (@1->'a') = lower(@2)::JSONB + +# Do not create a constraint when the RHS of the equality is a JSON array. +index-constraints vars=(jsonb) inverted-index=@1 +@1->'a' = '[1]' +---- +[ - ] +Remaining filter: (@1->'a') = '[1]' + +# Do not create a constraint when the RHS of the equality is a JSON object. +index-constraints vars=(jsonb) inverted-index=@1 +@1->'a' = '{"x": 1}' +---- +[ - ] +Remaining filter: (@1->'a') = '{"x": 1}' diff --git a/pkg/sql/opt/norm/scalar_funcs.go b/pkg/sql/opt/norm/scalar_funcs.go index c077a3d10b42..59dae7098d2c 100644 --- a/pkg/sql/opt/norm/scalar_funcs.go +++ b/pkg/sql/opt/norm/scalar_funcs.go @@ -18,7 +18,6 @@ import ( "github.com/cockroachdb/cockroach/pkg/sql/opt/props/physical" "github.com/cockroachdb/cockroach/pkg/sql/sem/tree" "github.com/cockroachdb/cockroach/pkg/sql/types" - "github.com/cockroachdb/cockroach/pkg/util/json" "github.com/cockroachdb/cockroach/pkg/util/log" "github.com/cockroachdb/errors" ) @@ -98,24 +97,6 @@ func (c *CustomFuncs) SimplifyCoalesce(args memo.ScalarListExpr) opt.ScalarExpr return args[len(args)-1] } -// IsJSONScalar returns if the JSON value is a number, string, true, false, or null. -func (c *CustomFuncs) IsJSONScalar(value opt.ScalarExpr) bool { - v := value.(*memo.ConstExpr).Value.(*tree.DJSON) - return v.JSON.Type() != json.ObjectJSONType && v.JSON.Type() != json.ArrayJSONType -} - -// MakeSingleKeyJSONObject returns a JSON object with one entry, mapping key to value. -func (c *CustomFuncs) MakeSingleKeyJSONObject(key, value opt.ScalarExpr) opt.ScalarExpr { - k := key.(*memo.ConstExpr).Value.(*tree.DString) - v := value.(*memo.ConstExpr).Value.(*tree.DJSON) - - builder := json.NewObjectBuilder(1) - builder.Add(string(*k), v.JSON) - j := builder.Build() - - return c.f.ConstructConst(&tree.DJSON{JSON: j}, types.Jsonb) -} - // IsConstValueEqual returns whether const1 and const2 are equal. func (c *CustomFuncs) IsConstValueEqual(const1, const2 opt.ScalarExpr) bool { op1 := const1.Op()