Skip to content

Commit

Permalink
opt: index-accelerate equalities with JSON fetch expressions
Browse files Browse the repository at this point in the history
This commit allows inverted indexes to be scanned to satisfy query
filters in the form: `j->'a' = '1'`. The optimizer had previously
supported this by normalizing these expressions into expressions with
JSON containment operators, `@>`, but it lost this support when these
normalization rules were found to produce inequivalent expressions.

Note that query filters of several forms, which were previously
accelerated via the incorrect normalization rules, are not accelerated
as part of this commit, such as:

    j->'a' @> '{"x": "y"}
    j->'a'->'b' = '"c"'

Release note: None
  • Loading branch information
mgartner committed Oct 9, 2020
1 parent c176b31 commit 104e5e2
Show file tree
Hide file tree
Showing 5 changed files with 210 additions and 56 deletions.
25 changes: 25 additions & 0 deletions pkg/sql/logictest/testdata/logic_test/inverted_index
Original file line number Diff line number Diff line change
Expand Up @@ -705,6 +705,31 @@ DROP TABLE table_with_nulls
statement ok
DROP TABLE c

subtest json_fetch_val

statement ok
CREATE TABLE f (
k INT PRIMARY KEY,
j JSON,
INVERTED INDEX i (j)
)

statement ok
INSERT INTO f VALUES
(0, '{"a": 1}'),
(1, '{"a": 10}'),
(2, '{"b": 2}'),
(3, '{"b": 2, "a": 1}'),
(4, '{"a": 1, "c": 3}'),
(5, '{"a": [1, 2]}')

query T
SELECT j FROM f@i WHERE j->'a' = '1' ORDER BY k
----
{"a": 1}
{"a": 1, "b": 2}
{"a": 1, "c": 3}

subtest arrays

statement ok
Expand Down
56 changes: 30 additions & 26 deletions pkg/sql/opt/exec/execbuilder/testdata/inverted_index
Original file line number Diff line number Diff line change
Expand Up @@ -273,23 +273,25 @@ filter · · (a, b) ·
· spans FULL SCAN · ·


# TODO(mgartner): This query can scan the inverted index when the -> operator is
# supported by the inverted index constraint builder.
# TODO(mgartner): It should not be required to force the index scan. It is
# required until the statistics builder treats b->'a' = '"b"' similarly to the
# containment operator, @>.
query TTTTT
EXPLAIN (VERBOSE) SELECT * from d where b->'a' = '"b"'
EXPLAIN (VERBOSE) SELECT * from d@foo_inv where b->'a' = '"b"'
----
· distribution local · ·
· vectorized true · ·
filter · · (a, b) ·
│ estimated row count 333 (missing stats) · ·
│ filter (b->'a') = '"b"' · ·
└── scan · · (a, b) ·
· estimated row count 1000 (missing stats) · ·
· table d@primary · ·
· spans FULL SCAN · ·
· distribution local · ·
· vectorized true · ·
index join · · (a, b) ·
│ estimated row count 333 (missing stats) · ·
│ table d@primary · ·
│ key columns a · ·
└── scan · · (a) ·
· estimated row count 110 (missing stats) · ·
· table d@foo_inv · ·
· spans /"a"/"b"-/"a"/"b"/PrefixEnd · ·

# TODO(mgartner): This query can scan the inverted index when the -> operator is
# supported by the inverted index constraint builder.
# TODO(mgartner): Add support for building inverted index constraints for chained JSON
# fetch operators.
query TTTTT
EXPLAIN (VERBOSE) SELECT * from d where b->'a'->'c' = '"b"'
----
Expand All @@ -310,20 +312,22 @@ EXPLAIN (VERBOSE) SELECT * from d where b->(NULL::STRING) = '"b"'
· vectorized true · ·
norows · · (a, b) ·

# TODO(mgartner): This query can scan the inverted index when the -> operator is
# supported by the inverted index constraint builder.
# TODO(mgartner): It should not be required to force the index scan. It is
# required until the statistics builder treats b->'a' = '"b"' similarly to the
# containment operator, @>.
query TTTTT
EXPLAIN (VERBOSE) SELECT * from d where '"b"' = b->'a'
EXPLAIN (VERBOSE) SELECT * from d@foo_inv where '"b"' = b->'a'
----
· distribution local · ·
· vectorized true · ·
filter · · (a, b) ·
│ estimated row count 333 (missing stats) · ·
│ filter (b->'a') = '"b"' · ·
└── scan · · (a, b) ·
· estimated row count 1000 (missing stats) · ·
· table d@primary · ·
· spans FULL SCAN · ·
· distribution local · ·
· vectorized true · ·
index join · · (a, b) ·
│ estimated row count 333 (missing stats) · ·
│ table d@primary · ·
│ key columns a · ·
└── scan · · (a) ·
· estimated row count 110 (missing stats) · ·
· table d@foo_inv · ·
· spans /"a"/"b"-/"a"/"b"/PrefixEnd · ·

# Make sure that querying for NULL equality doesn't use the inverted index.
query TTTTT
Expand Down
89 changes: 78 additions & 11 deletions pkg/sql/opt/idxconstraint/index_constraints.go
Original file line number Diff line number Diff line change
Expand Up @@ -817,12 +817,12 @@ func (c *indexConstraintCtx) makeSpansForOr(
return tightLeft && tightRight
}

// makeInvertedIndexSpansForJSONExpr is the implementation of
// makeInvertedIndexSpansForJSONContainmentExpr is the implementation of
// makeInvertedIndexSpans for JSON inverted indexes. The input datum is the JSON
// to produce spans for. If allPaths is true, the slice is populated with
// all constraints found. Otherwise, this function stops at the first
// constraint.
func (c *indexConstraintCtx) makeInvertedIndexSpansForJSONExpr(
func (c *indexConstraintCtx) makeInvertedIndexSpansForJSONContainmentExpr(
datum *tree.DJSON, constraints []*constraint.Constraint, allPaths bool,
) (bool, []*constraint.Constraint) {
out := &constraint.Constraint{}
Expand Down Expand Up @@ -902,12 +902,12 @@ func (c *indexConstraintCtx) makeInvertedIndexSpansForJSONExpr(
return false, constraints
}

// makeInvertedIndexSpansForArrayExpr is the implementation of
// makeInvertedIndexSpansForArrayContainmentExpr is the implementation of
// makeInvertedIndexSpans for array inverted indexes. The input arr is the array
// to produce spans for. If allPaths is true, the slice is populated with
// all constraints found. Otherwise, this function stops at the first
// constraint.
func (c *indexConstraintCtx) makeInvertedIndexSpansForArrayExpr(
func (c *indexConstraintCtx) makeInvertedIndexSpansForArrayContainmentExpr(
arr *tree.DArray, constraints []*constraint.Constraint, allPaths bool,
) (bool, []*constraint.Constraint) {
if len(arr.Array) == 0 {
Expand Down Expand Up @@ -952,6 +952,70 @@ func (c *indexConstraintCtx) makeInvertedIndexSpansForArrayExpr(
return len(arr.Array) == 1, constraints
}

// makeInvertedIndexSpanForJSONFetchValEqExpr attempts to create an inverted
// index scan for filters of the form json->'key' = 'value'. An inverted index
// constraint is only generated when all of the following are true:
//
// 1. The fetch column is an index column.
//
// 2. The fetch key (RHS of ->) is a constant string. This is required because
// we build a JSON object that is used as the boundaries of the span. An
// integer fetch key fetches a value at an index in a JSON array, not a
// JSON object.
//
// 3. The RHS of the equality operator is a constant, scalar JSON value.
// Supporting non-scalar JSON values would require the filter to be
// re-applied to the JSON column after scanning the index. As an example of
// what would happen without re-applying the filter, a row with column j as
// {"a": [1, 2]} would be incorrectly returned from the query filter
// j->'a' = '[1]'.
//
func (c *indexConstraintCtx) makeInvertedIndexSpanForJSONFetchValEqExpr(
fetch *memo.FetchValExpr, rhs opt.Expr, constraints []*constraint.Constraint,
) (bool, []*constraint.Constraint) {
appendUnconstrained := func() []*constraint.Constraint {
out := &constraint.Constraint{}
c.unconstrained(0 /* offset */, out)
return append(constraints, out)
}

if !c.isIndexColumn(fetch.Json, 0 /* index */) {
return false, appendUnconstrained()
}

if !memo.CanExtractConstDatum(fetch.Index) {
return false, appendUnconstrained()
}

key, ok := memo.ExtractConstDatum(fetch.Index).(*tree.DString)
if !ok {
return false, appendUnconstrained()
}

if !memo.CanExtractConstDatum(rhs) {
return false, appendUnconstrained()
}

right, ok := memo.ExtractConstDatum(rhs).(*tree.DJSON)
if !ok {
return false, appendUnconstrained()
}

if right.JSON.Type() == json.ObjectJSONType || right.JSON.Type() == json.ArrayJSONType {
return false, appendUnconstrained()
}

// Build a new JSON object of the form: {<key>: <right>}.
b := json.NewObjectBuilder(1)
b.Add(string(*key), right.JSON)
obj := tree.NewDJSON(b.Build())

// Create an equality span with the JSON object as the value.
out := &constraint.Constraint{}
c.eqSpan(0 /* offset */, obj, out)
return true, append(constraints, out)
}

// makeInvertedIndexSpansForExpr is analogous to makeSpansForExpr, but it is
// used for inverted indexes. If allPaths is true, the slice is populated with
// all constraints found. Otherwise, this function stops at the first
Expand All @@ -968,31 +1032,34 @@ func (c *indexConstraintCtx) makeInvertedIndexSpansForExpr(
constrained := false
switch nd.Op() {
case opt.ContainsOp:
out := &constraint.Constraint{}
lhs, rhs := nd.Child(0), nd.Child(1)

if !c.isIndexColumn(lhs, 0 /* index */) || !opt.IsConstValueOp(rhs) {
c.unconstrained(0 /* offset */, out)
return false, append(constraints, out)
break
}

rightDatum := memo.ExtractConstDatum(rhs)

if rightDatum == tree.DNull {
c.contradiction(0 /* offset */, out)
return false, append(constraints, out)
break
}

switch rightDatum.ResolvedType().Family() {
case types.JsonFamily:
return c.makeInvertedIndexSpansForJSONExpr(rightDatum.(*tree.DJSON), constraints, allPaths)
return c.makeInvertedIndexSpansForJSONContainmentExpr(rightDatum.(*tree.DJSON), constraints, allPaths)
case types.ArrayFamily:
return c.makeInvertedIndexSpansForArrayExpr(rightDatum.(*tree.DArray), constraints, allPaths)
return c.makeInvertedIndexSpansForArrayContainmentExpr(rightDatum.(*tree.DArray), constraints, allPaths)

default:
log.Errorf(context.TODO(), "unexpected type in inverted index: %s", rightDatum.ResolvedType())
}

case opt.EqOp:
lhs, rhs := nd.Child(0), nd.Child(1)
if fetch, ok := lhs.(*memo.FetchValExpr); ok {
return c.makeInvertedIndexSpanForJSONFetchValEqExpr(fetch, rhs, constraints)
}

case opt.AndOp, opt.FiltersOp:
var out *constraint.Constraint
for i, n := 0, nd.ChildCount(); i < n; i++ {
Expand Down
77 changes: 77 additions & 0 deletions pkg/sql/opt/idxconstraint/testdata/inverted
Original file line number Diff line number Diff line change
Expand Up @@ -96,3 +96,80 @@ index-constraints vars=(int[]) inverted-index=@1
----
[ - ]
Remaining filter: NULL

index-constraints vars=(jsonb) inverted-index=@1
@1->'a' = 'true'
----
[/'{"a": true}' - /'{"a": true}']

index-constraints vars=(jsonb) inverted-index=@1
@1->'a' = 'false'
----
[/'{"a": false}' - /'{"a": false}']

index-constraints vars=(jsonb) inverted-index=@1
@1->'a' = 'null'
----
[/'{"a": null}' - /'{"a": null}']

index-constraints vars=(jsonb) inverted-index=@1
@1->'a' = '1'
----
[/'{"a": 1}' - /'{"a": 1}']

index-constraints vars=(jsonb) inverted-index=@1
@1->'a' = '"b"'
----
[/'{"a": "b"}' - /'{"a": "b"}']

index-constraints vars=(jsonb) inverted-index=@1
@1->'0' = 'true'
----
[/'{"0": true}' - /'{"0": true}']

# Do not create a constraint when the var is not an indexed column.
index-constraints vars=(jsonb,jsonb) inverted-index=@2
@1->'a' = '1'
----
[ - ]
Remaining filter: (@1->'a') = '1'

# Do not create a constraint when the fetch key is not a constant.
index-constraints vars=(jsonb,string) inverted-index=@1
@1->@2 = '1'
----
[ - ]
Remaining filter: (@1->@2) = '1'

# Do not create a constraint when the fetch key is an integer.
index-constraints vars=(jsonb) inverted-index=@1
@1->0 = '1'
----
[ - ]
Remaining filter: (@1->0) = '1'

# Do not create a constraint when the RHS of the equality is not a constant.
# Note: @2 is wrapped in a function to avoid normalizing @2 to the RHS of the
# equality. This normalization would result in an expression that fails to match
# the j->'key' = 'val' pattern that is required for building this type of
# inverted index constraint. Therefore, the wrapping function ensures that the
# code path of a non-const RHS is tested.
index-constraints vars=(jsonb,string) inverted-index=@1
@1->'a' = lower(@2)::JSONB
----
[ - ]
Remaining filter: (@1->'a') = lower(@2)::JSONB

# Do not create a constraint when the RHS of the equality is a JSON array.
index-constraints vars=(jsonb) inverted-index=@1
@1->'a' = '[1]'
----
[ - ]
Remaining filter: (@1->'a') = '[1]'

# Do not create a constraint when the RHS of the equality is a JSON object.
index-constraints vars=(jsonb) inverted-index=@1
@1->'a' = '{"x": 1}'
----
[ - ]
Remaining filter: (@1->'a') = '{"x": 1}'
19 changes: 0 additions & 19 deletions pkg/sql/opt/norm/scalar_funcs.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ import (
"github.com/cockroachdb/cockroach/pkg/sql/opt/props/physical"
"github.com/cockroachdb/cockroach/pkg/sql/sem/tree"
"github.com/cockroachdb/cockroach/pkg/sql/types"
"github.com/cockroachdb/cockroach/pkg/util/json"
"github.com/cockroachdb/cockroach/pkg/util/log"
"github.com/cockroachdb/errors"
)
Expand Down Expand Up @@ -98,24 +97,6 @@ func (c *CustomFuncs) SimplifyCoalesce(args memo.ScalarListExpr) opt.ScalarExpr
return args[len(args)-1]
}

// IsJSONScalar returns if the JSON value is a number, string, true, false, or null.
func (c *CustomFuncs) IsJSONScalar(value opt.ScalarExpr) bool {
v := value.(*memo.ConstExpr).Value.(*tree.DJSON)
return v.JSON.Type() != json.ObjectJSONType && v.JSON.Type() != json.ArrayJSONType
}

// MakeSingleKeyJSONObject returns a JSON object with one entry, mapping key to value.
func (c *CustomFuncs) MakeSingleKeyJSONObject(key, value opt.ScalarExpr) opt.ScalarExpr {
k := key.(*memo.ConstExpr).Value.(*tree.DString)
v := value.(*memo.ConstExpr).Value.(*tree.DJSON)

builder := json.NewObjectBuilder(1)
builder.Add(string(*k), v.JSON)
j := builder.Build()

return c.f.ConstructConst(&tree.DJSON{JSON: j}, types.Jsonb)
}

// IsConstValueEqual returns whether const1 and const2 are equal.
func (c *CustomFuncs) IsConstValueEqual(const1, const2 opt.ScalarExpr) bool {
op1 := const1.Op()
Expand Down

0 comments on commit 104e5e2

Please sign in to comment.