diff --git a/pkg/sql/opt/exec/execbuilder/testdata/inverted_index b/pkg/sql/opt/exec/execbuilder/testdata/inverted_index index 6d560e310ed2..c3be2181789e 100644 --- a/pkg/sql/opt/exec/execbuilder/testdata/inverted_index +++ b/pkg/sql/opt/exec/execbuilder/testdata/inverted_index @@ -1112,13 +1112,13 @@ vectorized: true └── • inverted filter │ columns: (a, b_inverted_key) │ inverted column: b_inverted_key - │ num spans: 10 + │ num spans: 8 │ └── • scan columns: (a, b_inverted_key) estimated row count: 111 (missing stats) table: d@foo_inv - spans: /"f"-/"f"/PrefixEnd /[]-/{} /Arr/"f"-/Arr/"f"/PrefixEnd /Arr/{}-/Arr/{}/PrefixEnd /Arr/"a"/"b"-/Arr/"a"/"b"/PrefixEnd /Arr/"c"/{}-/Arr/"c"/{}/PrefixEnd /Arr/"c"/{}-/Arr/"c"/{}/PrefixEnd /Arr/"c"/"d"/[]-/Arr/"c"/"d"/{} /Arr/"c"/"d"/[]-/Arr/"c"/"d"/{} /Arr/"c"/"d"/Arr/"e"-/Arr/"c"/"d"/Arr/"e"/PrefixEnd + spans: /"f"-/"f"/PrefixEnd /[]-/{} /Arr/"f"-/Arr/"f"/PrefixEnd /Arr/{}-/Arr/{}/PrefixEnd /Arr/"a"/"b"-/Arr/"a"/"b"/PrefixEnd /Arr/"c"/{}-/Arr/"c"/{}/PrefixEnd /Arr/"c"/"d"/[]-/Arr/"c"/"d"/{} /Arr/"c"/"d"/Arr/"e"-/Arr/"c"/"d"/Arr/"e"/PrefixEnd # Ensure that an inverted index with a composite primary key still encodes # the primary key data in the composite value. diff --git a/pkg/sql/opt/xform/testdata/rules/select b/pkg/sql/opt/xform/testdata/rules/select index 5de7717a8ee8..ff1858c6dd5a 100644 --- a/pkg/sql/opt/xform/testdata/rules/select +++ b/pkg/sql/opt/xform/testdata/rules/select @@ -2506,7 +2506,6 @@ select │ │ ├── ["7\x00\x03\x00\x01*\x02\x00", "7\x00\x03\x00\x01*\x02\x00"] │ │ ├── ["7\x00\x03\x00\x019", "7\x00\x03\x00\x019"] │ │ ├── ["7\x00\x03a\x00\x019", "7\x00\x03a\x00\x019"] - │ │ ├── ["7\x00\x03a\x00\x02\x00\x019", "7\x00\x03a\x00\x02\x00\x019"] │ │ └── ["7\x00\x03a\x00\x02d\x00\x01\n", "7\x00\x03a\x00\x02d\x00\x01\n"] │ ├── key: (1) │ └── scan b@j_inv_idx @@ -2520,7 +2519,6 @@ select │ │ ├── ["7\x00\x03\x00\x01*\x02\x00", "7\x00\x03\x00\x01*\x02\x00"] │ │ ├── ["7\x00\x03\x00\x019", "7\x00\x03\x00\x019"] │ │ ├── ["7\x00\x03a\x00\x019", "7\x00\x03a\x00\x019"] - │ │ ├── ["7\x00\x03a\x00\x02\x00\x019", "7\x00\x03a\x00\x02\x00\x019"] │ │ └── ["7\x00\x03a\x00\x02d\x00\x01\n", "7\x00\x03a\x00\x02d\x00\x01\n"] │ ├── key: (1) │ └── fd: (1)-->(6) @@ -2553,7 +2551,6 @@ select │ │ ├── ["7\x00\x03\x00\x01*\x04\x00", "7\x00\x03\x00\x01*\x04\x00"] │ │ ├── ["7\x00\x03\x00\x01*\x06\x00", "7\x00\x03\x00\x01*\x06\x00"] │ │ ├── ["7a\x00\x018", "7a\x00\x018"] - │ │ ├── ["7a\x00\x02\x00\x018", "7a\x00\x02\x00\x018"] │ │ └── ["7a\x00\x02\x00\x03\x00\x01*\x06\x00", "7a\x00\x02\x00\x03\x00\x01*\x06\x00"] │ ├── key: (1) │ └── scan b@j_inv_idx @@ -2568,7 +2565,6 @@ select │ │ ├── ["7\x00\x03\x00\x01*\x04\x00", "7\x00\x03\x00\x01*\x04\x00"] │ │ ├── ["7\x00\x03\x00\x01*\x06\x00", "7\x00\x03\x00\x01*\x06\x00"] │ │ ├── ["7a\x00\x018", "7a\x00\x018"] - │ │ ├── ["7a\x00\x02\x00\x018", "7a\x00\x02\x00\x018"] │ │ └── ["7a\x00\x02\x00\x03\x00\x01*\x06\x00", "7a\x00\x02\x00\x03\x00\x01*\x06\x00"] │ ├── key: (1) │ └── fd: (1)-->(6) @@ -2599,7 +2595,6 @@ select │ │ │ └── union spans │ │ │ ├── ["7\x00\x019", "7\x00\x019"] │ │ │ ├── ["7a\x00\x018", "7a\x00\x018"] - │ │ │ ├── ["7a\x00\x02\x00\x018", "7a\x00\x02\x00\x018"] │ │ │ └── ["7a\x00\x02\x00\x03\x00\x01*\x06\x00", "7a\x00\x02\x00\x03\x00\x01*\x06\x00"] │ │ └── span expression │ │ ├── tight: false, unique: false @@ -2624,7 +2619,6 @@ select │ │ ├── ["7\x00\x03\x00\x01*\x04\x00", "7\x00\x03\x00\x01*\x04\x00"] │ │ ├── ["7\x00\x03\x00\x01*\x06\x00", "7\x00\x03\x00\x01*\x06\x00"] │ │ ├── ["7a\x00\x018", "7a\x00\x018"] - │ │ ├── ["7a\x00\x02\x00\x018", "7a\x00\x02\x00\x018"] │ │ └── ["7a\x00\x02\x00\x03\x00\x01*\x06\x00", "7a\x00\x02\x00\x03\x00\x01*\x06\x00"] │ ├── key: (1) │ └── fd: (1)-->(6) diff --git a/pkg/util/json/encoded.go b/pkg/util/json/encoded.go index e9f316bb784a..f1a53e939d35 100644 --- a/pkg/util/json/encoded.go +++ b/pkg/util/json/encoded.go @@ -745,13 +745,13 @@ func (j *jsonEncoded) encodeContainingInvertedIndexSpans( } func (j *jsonEncoded) encodeContainedInvertedIndexSpans( - b []byte, isRoot bool, + b []byte, isRoot, isObjectValue bool, ) (inverted.Expression, error) { decoded, err := j.decode() if err != nil { return nil, err } - return decoded.encodeContainedInvertedIndexSpans(b, isRoot) + return decoded.encodeContainedInvertedIndexSpans(b, isRoot, isObjectValue) } // numInvertedIndexEntries implements the JSON interface. diff --git a/pkg/util/json/json.go b/pkg/util/json/json.go index e4cdef78451e..567e07e3368b 100644 --- a/pkg/util/json/json.go +++ b/pkg/util/json/json.go @@ -107,7 +107,7 @@ type JSON interface { // If isRoot is true, this function is being called at the root level of the // JSON hierarchy. encodeContainedInvertedIndexSpans( - b []byte, isRoot bool, + b []byte, isRoot, isObjectValue bool, ) (invertedExpr inverted.Expression, err error) // numInvertedIndexEntries returns the number of entries that will be @@ -814,7 +814,7 @@ func EncodeContainedInvertedIndexSpans( b []byte, json JSON, ) (invertedExpr inverted.Expression, err error) { invertedExpr, err = json.encodeContainedInvertedIndexSpans( - encoding.EncodeJSONAscending(b), true, /* isRoot */ + encoding.EncodeJSONAscending(b), true /* isRoot */, false, /* isObjectValue */ ) if err != nil { return nil, err @@ -843,7 +843,7 @@ func (j jsonNull) encodeContainingInvertedIndexSpans( } func (j jsonNull) encodeContainedInvertedIndexSpans( - b []byte, isRoot bool, + b []byte, isRoot, isObjectValue bool, ) (inverted.Expression, error) { invertedExpr, err := encodeContainedInvertedIndexSpansFromLeaf(j, b, isRoot) return invertedExpr, err @@ -861,7 +861,7 @@ func (j jsonTrue) encodeContainingInvertedIndexSpans( } func (j jsonTrue) encodeContainedInvertedIndexSpans( - b []byte, isRoot bool, + b []byte, isRoot, isObjectValue bool, ) (inverted.Expression, error) { invertedExpr, err := encodeContainedInvertedIndexSpansFromLeaf(j, b, isRoot) return invertedExpr, err @@ -879,7 +879,7 @@ func (j jsonFalse) encodeContainingInvertedIndexSpans( } func (j jsonFalse) encodeContainedInvertedIndexSpans( - b []byte, isRoot bool, + b []byte, isRoot, isObjectValue bool, ) (inverted.Expression, error) { invertedExpr, err := encodeContainedInvertedIndexSpansFromLeaf(j, b, isRoot) return invertedExpr, err @@ -897,7 +897,7 @@ func (j jsonString) encodeContainingInvertedIndexSpans( } func (j jsonString) encodeContainedInvertedIndexSpans( - b []byte, isRoot bool, + b []byte, isRoot, isObjectValue bool, ) (inverted.Expression, error) { invertedExpr, err := encodeContainedInvertedIndexSpansFromLeaf(j, b, isRoot) return invertedExpr, err @@ -916,7 +916,7 @@ func (j jsonNumber) encodeContainingInvertedIndexSpans( } func (j jsonNumber) encodeContainedInvertedIndexSpans( - b []byte, isRoot bool, + b []byte, isRoot, isObjectValue bool, ) (inverted.Expression, error) { invertedExpr, err := encodeContainedInvertedIndexSpansFromLeaf(j, b, isRoot) return invertedExpr, err @@ -992,29 +992,37 @@ func (j jsonArray) encodeContainingInvertedIndexSpans( } func (j jsonArray) encodeContainedInvertedIndexSpans( - b []byte, isRoot bool, + b []byte, isRoot, isObjectValue bool, ) (invertedExpr inverted.Expression, err error) { - // The empty array should always be added to the spans, since it is contained - // by everything. - emptyArrSpanExpr := inverted.ExprForSpan( - inverted.MakeSingleValSpan(encoding.EncodeJSONEmptyArray(b[:len(b):len(b)])), false, /* tight */ - ) - emptyArrSpanExpr.Unique = true + if !isObjectValue || len(j) == 0 { + // The empty array should always be added to the spans, since it is contained + // by everything. Empty array values are already accounted for when getting + // the spans for a non-empty object value, so they should be excluded. + emptyArrSpanExpr := inverted.ExprForSpan( + inverted.MakeSingleValSpan(encoding.EncodeJSONEmptyArray(b[:len(b):len(b)])), false, /* tight */ + ) + emptyArrSpanExpr.Unique = true + invertedExpr = emptyArrSpanExpr + } + // If the given jsonArray is empty, we return the SpanExpression. if len(j) == 0 { - return emptyArrSpanExpr, nil + return invertedExpr, nil } - invertedExpr = emptyArrSpanExpr prefix := encoding.EncodeArrayAscending(b[:len(b):len(b)]) for i := range j { childWithPrefix, err := j[i].encodeContainedInvertedIndexSpans( - prefix[:len(prefix):len(prefix)], false, /* isRoot */ + prefix[:len(prefix):len(prefix)], false /* isRoot */, false, /* isObjectValue */ ) if err != nil { return nil, err } - invertedExpr = inverted.Or(invertedExpr, childWithPrefix) + if invertedExpr == nil { + invertedExpr = childWithPrefix + } else { + invertedExpr = inverted.Or(invertedExpr, childWithPrefix) + } // Scalars inside the array should also be included in the spans // without the array prefix, since they are contained by the array. This @@ -1028,7 +1036,7 @@ func (j jsonArray) encodeContainedInvertedIndexSpans( // and empty arrays/objects. if isRoot && isEnd(j[i]) { childWithoutPrefix, err := j[i].encodeContainedInvertedIndexSpans( - b[:len(b):len(b)], false, /* isRoot */ + b[:len(b):len(b)], false /* isRoot */, false, /* isObjectValue */ ) if err != nil { return nil, err @@ -1112,34 +1120,41 @@ func (j jsonObject) encodeContainingInvertedIndexSpans( } func (j jsonObject) encodeContainedInvertedIndexSpans( - b []byte, isRoot bool, + b []byte, isRoot, isObjectValue bool, ) (invertedExpr inverted.Expression, err error) { // The empty object should always be added to the spans, since it is contained - // by everything. - emptyObjSpanExpr := inverted.ExprForSpan( - inverted.MakeSingleValSpan(encoding.EncodeJSONEmptyObject(b[:len(b):len(b)])), false, /* tight */ - ) - emptyObjSpanExpr.Unique = true + // by everything. Empty object values are already accounted for when getting + // the spans for a non-empty object value, so they should be excluded. + if !isObjectValue || len(j) == 0 { + emptyObjSpanExpr := inverted.ExprForSpan( + inverted.MakeSingleValSpan(encoding.EncodeJSONEmptyObject(b[:len(b):len(b)])), false, /* tight */ + ) + emptyObjSpanExpr.Unique = true + invertedExpr = emptyObjSpanExpr + } // If the given jsonObject is empty, we return the SpanExpression. if len(j) == 0 { - return emptyObjSpanExpr, nil + return invertedExpr, nil } - invertedExpr = emptyObjSpanExpr for i := range j { // We're trying to see if this is the end of the JSON path. If it is, then // we don't want to add an extra separator. end := isEnd(j[i].v) prefix := encoding.EncodeJSONKeyStringAscending(b[:len(b):len(b)], string(j[i].k), end) - child, err := j[i].v.encodeContainedInvertedIndexSpans( - prefix, false, /* isRoot */ + prefix, false /* isRoot */, true, /* isObjectValue */ ) if err != nil { return nil, err } - invertedExpr = inverted.Or(invertedExpr, child) + + if invertedExpr == nil { + invertedExpr = child + } else { + invertedExpr = inverted.Or(invertedExpr, child) + } // When we have a nested object or array, we want to include the empty // object or array span with the prefix. For example, '{"a": {"b": "c"}}' @@ -1150,7 +1165,7 @@ func (j jsonObject) encodeContainedInvertedIndexSpans( if v != nil { prefixWithEnd := encoding.EncodeJSONKeyStringAscending(b[:len(b):len(b)], string(j[i].k), true) childWithEnd, err := v.encodeContainedInvertedIndexSpans( - prefixWithEnd, false, /* isRoot */ + prefixWithEnd, false /* isRoot */, true, /* isObjectValue */ ) if err != nil { return nil, err @@ -1349,9 +1364,7 @@ func encodeContainingInvertedIndexSpansFromLeaf( // array spans are not encoded by this function. // // If isRoot is true, this function is being called at the root level of the -// JSON hierarchy. If isObjectValue is true, the given JSON is the value of a -// JSON object key. Note that isRoot and isObjectValue cannot both be true at -// the same time. +// JSON hierarchy. func encodeContainedInvertedIndexSpansFromLeaf( j JSON, b []byte, isRoot bool, ) (invertedExpr inverted.Expression, err error) {