Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

opt: fix duplicate empty JSON value spans for <@ expressions #63355

Merged
merged 1 commit into from
Apr 9, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions pkg/sql/opt/exec/execbuilder/testdata/inverted_index
Original file line number Diff line number Diff line change
Expand Up @@ -1112,13 +1112,13 @@ vectorized: true
└── • inverted filter
│ columns: (a, b_inverted_key)
│ inverted column: b_inverted_key
│ num spans: 10
│ num spans: 8
└── • scan
columns: (a, b_inverted_key)
estimated row count: 111 (missing stats)
table: d@foo_inv
spans: /"f"-/"f"/PrefixEnd /[]-/{} /Arr/"f"-/Arr/"f"/PrefixEnd /Arr/{}-/Arr/{}/PrefixEnd /Arr/"a"/"b"-/Arr/"a"/"b"/PrefixEnd /Arr/"c"/{}-/Arr/"c"/{}/PrefixEnd /Arr/"c"/{}-/Arr/"c"/{}/PrefixEnd /Arr/"c"/"d"/[]-/Arr/"c"/"d"/{} /Arr/"c"/"d"/[]-/Arr/"c"/"d"/{} /Arr/"c"/"d"/Arr/"e"-/Arr/"c"/"d"/Arr/"e"/PrefixEnd
spans: /"f"-/"f"/PrefixEnd /[]-/{} /Arr/"f"-/Arr/"f"/PrefixEnd /Arr/{}-/Arr/{}/PrefixEnd /Arr/"a"/"b"-/Arr/"a"/"b"/PrefixEnd /Arr/"c"/{}-/Arr/"c"/{}/PrefixEnd /Arr/"c"/"d"/[]-/Arr/"c"/"d"/{} /Arr/"c"/"d"/Arr/"e"-/Arr/"c"/"d"/Arr/"e"/PrefixEnd

# Ensure that an inverted index with a composite primary key still encodes
# the primary key data in the composite value.
Expand Down
6 changes: 0 additions & 6 deletions pkg/sql/opt/xform/testdata/rules/select
Original file line number Diff line number Diff line change
Expand Up @@ -2506,7 +2506,6 @@ select
│ │ ├── ["7\x00\x03\x00\x01*\x02\x00", "7\x00\x03\x00\x01*\x02\x00"]
│ │ ├── ["7\x00\x03\x00\x019", "7\x00\x03\x00\x019"]
│ │ ├── ["7\x00\x03a\x00\x019", "7\x00\x03a\x00\x019"]
│ │ ├── ["7\x00\x03a\x00\x02\x00\x019", "7\x00\x03a\x00\x02\x00\x019"]
│ │ └── ["7\x00\x03a\x00\x02d\x00\x01\n", "7\x00\x03a\x00\x02d\x00\x01\n"]
│ ├── key: (1)
│ └── scan b@j_inv_idx
Expand All @@ -2520,7 +2519,6 @@ select
│ │ ├── ["7\x00\x03\x00\x01*\x02\x00", "7\x00\x03\x00\x01*\x02\x00"]
│ │ ├── ["7\x00\x03\x00\x019", "7\x00\x03\x00\x019"]
│ │ ├── ["7\x00\x03a\x00\x019", "7\x00\x03a\x00\x019"]
│ │ ├── ["7\x00\x03a\x00\x02\x00\x019", "7\x00\x03a\x00\x02\x00\x019"]
│ │ └── ["7\x00\x03a\x00\x02d\x00\x01\n", "7\x00\x03a\x00\x02d\x00\x01\n"]
│ ├── key: (1)
│ └── fd: (1)-->(6)
Expand Down Expand Up @@ -2553,7 +2551,6 @@ select
│ │ ├── ["7\x00\x03\x00\x01*\x04\x00", "7\x00\x03\x00\x01*\x04\x00"]
│ │ ├── ["7\x00\x03\x00\x01*\x06\x00", "7\x00\x03\x00\x01*\x06\x00"]
│ │ ├── ["7a\x00\x018", "7a\x00\x018"]
│ │ ├── ["7a\x00\x02\x00\x018", "7a\x00\x02\x00\x018"]
│ │ └── ["7a\x00\x02\x00\x03\x00\x01*\x06\x00", "7a\x00\x02\x00\x03\x00\x01*\x06\x00"]
│ ├── key: (1)
│ └── scan b@j_inv_idx
Expand All @@ -2568,7 +2565,6 @@ select
│ │ ├── ["7\x00\x03\x00\x01*\x04\x00", "7\x00\x03\x00\x01*\x04\x00"]
│ │ ├── ["7\x00\x03\x00\x01*\x06\x00", "7\x00\x03\x00\x01*\x06\x00"]
│ │ ├── ["7a\x00\x018", "7a\x00\x018"]
│ │ ├── ["7a\x00\x02\x00\x018", "7a\x00\x02\x00\x018"]
│ │ └── ["7a\x00\x02\x00\x03\x00\x01*\x06\x00", "7a\x00\x02\x00\x03\x00\x01*\x06\x00"]
│ ├── key: (1)
│ └── fd: (1)-->(6)
Expand Down Expand Up @@ -2599,7 +2595,6 @@ select
│ │ │ └── union spans
│ │ │ ├── ["7\x00\x019", "7\x00\x019"]
│ │ │ ├── ["7a\x00\x018", "7a\x00\x018"]
│ │ │ ├── ["7a\x00\x02\x00\x018", "7a\x00\x02\x00\x018"]
│ │ │ └── ["7a\x00\x02\x00\x03\x00\x01*\x06\x00", "7a\x00\x02\x00\x03\x00\x01*\x06\x00"]
│ │ └── span expression
│ │ ├── tight: false, unique: false
Expand All @@ -2624,7 +2619,6 @@ select
│ │ ├── ["7\x00\x03\x00\x01*\x04\x00", "7\x00\x03\x00\x01*\x04\x00"]
│ │ ├── ["7\x00\x03\x00\x01*\x06\x00", "7\x00\x03\x00\x01*\x06\x00"]
│ │ ├── ["7a\x00\x018", "7a\x00\x018"]
│ │ ├── ["7a\x00\x02\x00\x018", "7a\x00\x02\x00\x018"]
│ │ └── ["7a\x00\x02\x00\x03\x00\x01*\x06\x00", "7a\x00\x02\x00\x03\x00\x01*\x06\x00"]
│ ├── key: (1)
│ └── fd: (1)-->(6)
Expand Down
4 changes: 2 additions & 2 deletions pkg/util/json/encoded.go
Original file line number Diff line number Diff line change
Expand Up @@ -745,13 +745,13 @@ func (j *jsonEncoded) encodeContainingInvertedIndexSpans(
}

func (j *jsonEncoded) encodeContainedInvertedIndexSpans(
b []byte, isRoot bool,
b []byte, isRoot, isObjectValue bool,
) (inverted.Expression, error) {
decoded, err := j.decode()
if err != nil {
return nil, err
}
return decoded.encodeContainedInvertedIndexSpans(b, isRoot)
return decoded.encodeContainedInvertedIndexSpans(b, isRoot, isObjectValue)
}

// numInvertedIndexEntries implements the JSON interface.
Expand Down
85 changes: 50 additions & 35 deletions pkg/util/json/json.go
Original file line number Diff line number Diff line change
Expand Up @@ -105,9 +105,11 @@ type JSON interface {
// comments in the SpanExpression definition for details.
//
// If isRoot is true, this function is being called at the root level of the
// JSON hierarchy.
// JSON hierarchy. If isObjectValue is true, the given JSON is the value of a
// JSON object key. Note that isRoot and isObjectValue cannot both be true at
// the same time.
encodeContainedInvertedIndexSpans(
b []byte, isRoot bool,
b []byte, isRoot, isObjectValue bool,
) (invertedExpr inverted.Expression, err error)

// numInvertedIndexEntries returns the number of entries that will be
Expand Down Expand Up @@ -814,7 +816,7 @@ func EncodeContainedInvertedIndexSpans(
b []byte, json JSON,
) (invertedExpr inverted.Expression, err error) {
invertedExpr, err = json.encodeContainedInvertedIndexSpans(
encoding.EncodeJSONAscending(b), true, /* isRoot */
encoding.EncodeJSONAscending(b), true /* isRoot */, false, /* isObjectValue */
)
if err != nil {
return nil, err
Expand Down Expand Up @@ -843,7 +845,7 @@ func (j jsonNull) encodeContainingInvertedIndexSpans(
}

func (j jsonNull) encodeContainedInvertedIndexSpans(
b []byte, isRoot bool,
b []byte, isRoot, isObjectValue bool,
) (inverted.Expression, error) {
invertedExpr, err := encodeContainedInvertedIndexSpansFromLeaf(j, b, isRoot)
return invertedExpr, err
Expand All @@ -861,7 +863,7 @@ func (j jsonTrue) encodeContainingInvertedIndexSpans(
}

func (j jsonTrue) encodeContainedInvertedIndexSpans(
b []byte, isRoot bool,
b []byte, isRoot, isObjectValue bool,
) (inverted.Expression, error) {
invertedExpr, err := encodeContainedInvertedIndexSpansFromLeaf(j, b, isRoot)
return invertedExpr, err
Expand All @@ -879,7 +881,7 @@ func (j jsonFalse) encodeContainingInvertedIndexSpans(
}

func (j jsonFalse) encodeContainedInvertedIndexSpans(
b []byte, isRoot bool,
b []byte, isRoot, isObjectValue bool,
) (inverted.Expression, error) {
invertedExpr, err := encodeContainedInvertedIndexSpansFromLeaf(j, b, isRoot)
return invertedExpr, err
Expand All @@ -897,7 +899,7 @@ func (j jsonString) encodeContainingInvertedIndexSpans(
}

func (j jsonString) encodeContainedInvertedIndexSpans(
b []byte, isRoot bool,
b []byte, isRoot, isObjectValue bool,
) (inverted.Expression, error) {
invertedExpr, err := encodeContainedInvertedIndexSpansFromLeaf(j, b, isRoot)
return invertedExpr, err
Expand All @@ -916,7 +918,7 @@ func (j jsonNumber) encodeContainingInvertedIndexSpans(
}

func (j jsonNumber) encodeContainedInvertedIndexSpans(
b []byte, isRoot bool,
b []byte, isRoot, isObjectValue bool,
) (inverted.Expression, error) {
invertedExpr, err := encodeContainedInvertedIndexSpansFromLeaf(j, b, isRoot)
return invertedExpr, err
Expand Down Expand Up @@ -992,29 +994,37 @@ func (j jsonArray) encodeContainingInvertedIndexSpans(
}

func (j jsonArray) encodeContainedInvertedIndexSpans(
b []byte, isRoot bool,
b []byte, isRoot, isObjectValue bool,
) (invertedExpr inverted.Expression, err error) {
// The empty array should always be added to the spans, since it is contained
// by everything.
emptyArrSpanExpr := inverted.ExprForSpan(
inverted.MakeSingleValSpan(encoding.EncodeJSONEmptyArray(b[:len(b):len(b)])), false, /* tight */
)
emptyArrSpanExpr.Unique = true
if !isObjectValue || len(j) == 0 {
// The empty array should always be added to the spans, since it is contained
// by everything. Empty array values are already accounted for when getting
// the spans for a non-empty object value, so they should be excluded.
emptyArrSpanExpr := inverted.ExprForSpan(
inverted.MakeSingleValSpan(encoding.EncodeJSONEmptyArray(b[:len(b):len(b)])), false, /* tight */
)
emptyArrSpanExpr.Unique = true
invertedExpr = emptyArrSpanExpr
}

// If the given jsonArray is empty, we return the SpanExpression.
if len(j) == 0 {
return emptyArrSpanExpr, nil
return invertedExpr, nil
}

invertedExpr = emptyArrSpanExpr
prefix := encoding.EncodeArrayAscending(b[:len(b):len(b)])
for i := range j {
childWithPrefix, err := j[i].encodeContainedInvertedIndexSpans(
prefix[:len(prefix):len(prefix)], false, /* isRoot */
prefix[:len(prefix):len(prefix)], false /* isRoot */, false, /* isObjectValue */
)
if err != nil {
return nil, err
}
invertedExpr = inverted.Or(invertedExpr, childWithPrefix)
if invertedExpr == nil {
invertedExpr = childWithPrefix
} else {
invertedExpr = inverted.Or(invertedExpr, childWithPrefix)
}

// Scalars inside the array should also be included in the spans
// without the array prefix, since they are contained by the array. This
Expand All @@ -1028,7 +1038,7 @@ func (j jsonArray) encodeContainedInvertedIndexSpans(
// and empty arrays/objects.
if isRoot && isEnd(j[i]) {
childWithoutPrefix, err := j[i].encodeContainedInvertedIndexSpans(
b[:len(b):len(b)], false, /* isRoot */
b[:len(b):len(b)], false /* isRoot */, false, /* isObjectValue */
)
if err != nil {
return nil, err
Expand Down Expand Up @@ -1112,34 +1122,41 @@ func (j jsonObject) encodeContainingInvertedIndexSpans(
}

func (j jsonObject) encodeContainedInvertedIndexSpans(
b []byte, isRoot bool,
b []byte, isRoot, isObjectValue bool,
) (invertedExpr inverted.Expression, err error) {
// The empty object should always be added to the spans, since it is contained
// by everything.
emptyObjSpanExpr := inverted.ExprForSpan(
inverted.MakeSingleValSpan(encoding.EncodeJSONEmptyObject(b[:len(b):len(b)])), false, /* tight */
)
emptyObjSpanExpr.Unique = true
// by everything. Empty object values are already accounted for when getting
// the spans for a non-empty object value, so they should be excluded.
if !isObjectValue || len(j) == 0 {
emptyObjSpanExpr := inverted.ExprForSpan(
inverted.MakeSingleValSpan(encoding.EncodeJSONEmptyObject(b[:len(b):len(b)])), false, /* tight */
)
emptyObjSpanExpr.Unique = true
invertedExpr = emptyObjSpanExpr
}
// If the given jsonObject is empty, we return the SpanExpression.
if len(j) == 0 {
return emptyObjSpanExpr, nil
return invertedExpr, nil
}

invertedExpr = emptyObjSpanExpr
for i := range j {
// We're trying to see if this is the end of the JSON path. If it is, then
// we don't want to add an extra separator.
end := isEnd(j[i].v)

prefix := encoding.EncodeJSONKeyStringAscending(b[:len(b):len(b)], string(j[i].k), end)

child, err := j[i].v.encodeContainedInvertedIndexSpans(
prefix, false, /* isRoot */
prefix, false /* isRoot */, true, /* isObjectValue */
)
if err != nil {
return nil, err
}
invertedExpr = inverted.Or(invertedExpr, child)

if invertedExpr == nil {
invertedExpr = child
} else {
invertedExpr = inverted.Or(invertedExpr, child)
}

// When we have a nested object or array, we want to include the empty
// object or array span with the prefix. For example, '{"a": {"b": "c"}}'
Expand All @@ -1150,7 +1167,7 @@ func (j jsonObject) encodeContainedInvertedIndexSpans(
if v != nil {
prefixWithEnd := encoding.EncodeJSONKeyStringAscending(b[:len(b):len(b)], string(j[i].k), true)
childWithEnd, err := v.encodeContainedInvertedIndexSpans(
prefixWithEnd, false, /* isRoot */
prefixWithEnd, false /* isRoot */, true, /* isObjectValue */
)
if err != nil {
return nil, err
Expand Down Expand Up @@ -1349,9 +1366,7 @@ func encodeContainingInvertedIndexSpansFromLeaf(
// array spans are not encoded by this function.
//
// If isRoot is true, this function is being called at the root level of the
// JSON hierarchy. If isObjectValue is true, the given JSON is the value of a
// JSON object key. Note that isRoot and isObjectValue cannot both be true at
// the same time.
// JSON hierarchy.
func encodeContainedInvertedIndexSpansFromLeaf(
j JSON, b []byte, isRoot bool,
) (invertedExpr inverted.Expression, err error) {
Expand Down