Skip to content

Commit

Permalink
opt: build inverted index scan constraints from JSON fetch val operator
Browse files Browse the repository at this point in the history
GenerateInvertedIndexScans now generates InvertedConstraints from
equality expressions with a JSON FetchVal operator on the left side and
a constant on the right side, like `j->'a' = '1'`. Previously
Constraints were built for these expressions with the `idxconstraint`
package.

By building InvertedConstraints from FetchVal operators, the optimizer
now has the ability to generate inverted index scans with conjunctive
and disjunctive filters containing FetchVal operators. For example, the
optimizer will plan an inverted index scan with the query below.

    CREATE TABLE t (k INT PRIMARY KEY, j JSON, INVERTED INDEX (j))

    SELECT k FROM t WHERE j->'a' = '1' AND j->'b' = 2

This change also brings us a step closer to cleaning up the
`idxconstraint` by removing code related to inverted indexes.

Informs cockroachdb#47340

Release note (performance improvement): The query optimizer now plans
scans over inverted indexes on JSON columns for query filters that
constrain the JSON column with equality and fetch value operators (`->`)
inside conjunctions and disjunctions, like
`j->'a' = '1' AND j->'b' = '2'`.
  • Loading branch information
mgartner committed Jan 26, 2021
1 parent f236d53 commit d1ba023
Show file tree
Hide file tree
Showing 6 changed files with 317 additions and 13 deletions.
16 changes: 16 additions & 0 deletions pkg/sql/logictest/testdata/logic_test/inverted_index
Original file line number Diff line number Diff line change
Expand Up @@ -753,6 +753,22 @@ SELECT j FROM f@i WHERE j->'a' = '1' ORDER BY k
{"a": 1, "b": 2}
{"a": 1, "c": 3}

query T
SELECT j FROM f@i WHERE j->'a' = '1' OR j->'b' = '2' ORDER BY k
----
{"a": 1}
{"b": 2}
{"a": 1, "b": 2}
{"a": 1, "c": 3}

query T
SELECT j FROM f@i WHERE j->'a' = '1' OR j @> '{"b": 2}' ORDER BY k
----
{"a": 1}
{"b": 2}
{"a": 1, "b": 2}
{"a": 1, "c": 3}

subtest arrays

statement ok
Expand Down
4 changes: 2 additions & 2 deletions pkg/sql/opt/exec/execbuilder/testdata/inverted_index
Original file line number Diff line number Diff line change
Expand Up @@ -350,7 +350,7 @@ vectorized: true
└── • scan
columns: (a)
estimated row count: 110 (missing stats)
estimated row count: 111 (missing stats)
table: d@foo_inv
spans: /"a"/"b"-/"a"/"b"/PrefixEnd

Expand Down Expand Up @@ -399,7 +399,7 @@ vectorized: true
└── • scan
columns: (a)
estimated row count: 110 (missing stats)
estimated row count: 111 (missing stats)
table: d@foo_inv
spans: /"a"/"b"-/"a"/"b"/PrefixEnd

Expand Down
1 change: 1 addition & 0 deletions pkg/sql/opt/invertedidx/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ go_library(
"//pkg/sql/sem/tree",
"//pkg/sql/types",
"//pkg/util/encoding",
"//pkg/util/json",
"@com_github_cockroachdb_errors//:errors",
"@com_github_golang_geo//r1",
"@com_github_golang_geo//s1",
Expand Down
83 changes: 73 additions & 10 deletions pkg/sql/opt/invertedidx/json_array.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import (
"github.com/cockroachdb/cockroach/pkg/sql/rowenc"
"github.com/cockroachdb/cockroach/pkg/sql/sem/tree"
"github.com/cockroachdb/cockroach/pkg/sql/types"
"github.com/cockroachdb/cockroach/pkg/util/json"
"github.com/cockroachdb/errors"
)

Expand Down Expand Up @@ -270,24 +271,32 @@ func (j *jsonOrArrayFilterPlanner) extractInvertedFilterConditionFromLeaf(
_ *invertedexpr.PreFiltererStateForInvertedFilterer,
) {
switch t := expr.(type) {
// TODO(rytaft): Support JSON fetch val operator (->).
case *memo.ContainsExpr:
invertedExpr := j.extractJSONOrArrayContainsCondition(evalCtx, t.Left, t.Right)
if !invertedExpr.IsTight() {
remainingFilters = expr
invertedExpr = j.extractJSONOrArrayContainsCondition(evalCtx, t.Left, t.Right)
case *memo.EqExpr:
if fetch, ok := t.Left.(*memo.FetchValExpr); ok {
invertedExpr = j.extractJSONFetchValEqCondition(evalCtx, fetch, t.Right)
}
}

// We do not currently support pre-filtering for JSON and Array indexes, so
// the returned pre-filter state is nil.
return invertedExpr, remainingFilters, nil

default:
if invertedExpr == nil {
// An inverted expression could not be extracted.
return invertedexpr.NonInvertedColExpression{}, expr, nil
}

// If the extracted inverted expression is not tight then remaining filters
// must be applied after the inverted index scan.
if !invertedExpr.IsTight() {
remainingFilters = expr
}

// We do not currently support pre-filtering for JSON and Array indexes, so
// the returned pre-filter state is nil.
return invertedExpr, remainingFilters, nil
}

// extractJSONOrArrayContainsCondition extracts an InvertedExpression
// representing an inverted filter over the given inverted index, based
// representing an inverted filter over the planner's inverted index, based
// on the given left and right expression arguments. Returns an empty
// InvertedExpression if no inverted filter could be extracted.
func (j *jsonOrArrayFilterPlanner) extractJSONOrArrayContainsCondition(
Expand Down Expand Up @@ -316,3 +325,57 @@ func (j *jsonOrArrayFilterPlanner) extractJSONOrArrayContainsCondition(

return getSpanExprForJSONOrArrayIndex(evalCtx, d)
}

// extractJSONFetchValEqCondition extracts an InvertedExpression representing an
// inverted filter over the planner's inverted index, based on equality between
// a fetch val expression and a right scalar expression. If the following criteria
// are not met, an empty InvertedExpression is returned.
//
// 1. The fetch value operator's left expression must be a variable
// referencing the inverted column in the index.
// 2. The fetch value operator's right expression must be a constant string.
// 3. The right expression in the equality expression must be a constant JSON
// value that is not an object or an array.
//
// TODO(mgartner): Support chained fetch val operators, e.g., j->'a'->'b' = '1'.
func (j *jsonOrArrayFilterPlanner) extractJSONFetchValEqCondition(
evalCtx *tree.EvalContext, fetch *memo.FetchValExpr, right opt.ScalarExpr,
) invertedexpr.InvertedExpression {
// The left side of the fetch val expression, the Json field, should be a
// variable corresponding to the index column.
variable, ok := indexColumnVariable(j.tabID, j.index, fetch.Json)
if !ok {
return invertedexpr.NonInvertedColExpression{}
}

// The right side of the fetch val expression, the Index field, should be a
// constant string.
if !memo.CanExtractConstDatum(fetch.Index) {
return invertedexpr.NonInvertedColExpression{}
}
key, ok := memo.ExtractConstDatum(fetch.Index).(*tree.DString)
if !ok {
return invertedexpr.NonInvertedColExpression{}
}

// The right side of the equals expression should be a constant JSON value
// that is not an object or array.
if !memo.CanExtractConstDatum(right) {
return invertedexpr.NonInvertedColExpression{}
}
val, ok := memo.ExtractConstDatum(right).(*tree.DJSON)
if !ok {
return invertedexpr.NonInvertedColExpression{}
}
typ := val.JSON.Type()
if typ == json.ObjectJSONType || typ == json.ArrayJSONType {
return invertedexpr.NonInvertedColExpression{}
}

// Build a new JSON object of the form: {<key>: <right>}.
b := json.NewObjectBuilder(1)
b.Add(string(*key), val.JSON)
obj := tree.NewDJSON(b.Build())

return getSpanExprForJSONOrArrayIndex(evalCtx, obj)
}
67 changes: 67 additions & 0 deletions pkg/sql/opt/invertedidx/json_array_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -391,6 +391,73 @@ func TestTryFilterJsonOrArrayIndex(t *testing.T) {
unique: false,
remainingFilters: "j @> '[[1, 2]]'",
},
{
filters: "j->'a' = '1'",
indexOrd: jsonOrd,
ok: true,
tight: true,
unique: true,
},
{
// Integer indexes are not yet supported.
filters: "j->0 = '1'",
indexOrd: jsonOrd,
ok: false,
},
{
// Arrays on the right side of the equality are not yet supported.
filters: "j->'a' = '[1]'",
indexOrd: jsonOrd,
ok: false,
},
{
// Objects on the right side of the equality are not yet supported.
filters: `j->'a' = '{"b": "c"}'`,
indexOrd: jsonOrd,
ok: false,
},
{
// Wrong index ordinal.
filters: "j->'a' = '1'",
indexOrd: arrayOrd,
ok: false,
},
{
filters: "j->'a' = '1' AND j->'b' = '2'",
indexOrd: jsonOrd,
ok: true,
tight: true,
unique: false,
},
{
filters: "j->'a' = '1' OR j->'b' = '2'",
indexOrd: jsonOrd,
ok: true,
tight: true,
unique: false,
},
{
filters: `j->'a' = '1' AND j @> '{"b": "c"}'`,
indexOrd: jsonOrd,
ok: true,
tight: true,
unique: false,
},
{
filters: `j->'a' = '1' OR j @> '{"b": "c"}'`,
indexOrd: jsonOrd,
ok: true,
tight: true,
unique: false,
},
{
filters: `j->'a' = '1' AND j @> '[[1, 2]]'`,
indexOrd: jsonOrd,
ok: true,
tight: false,
unique: false,
remainingFilters: "j @> '[[1, 2]]'",
},
}

for _, tc := range testCases {
Expand Down
Loading

0 comments on commit d1ba023

Please sign in to comment.