Skip to content

Commit

Permalink
opt: fix duplicate empty JSON value spans for <@ expressions
Browse files Browse the repository at this point in the history
Previously, there were some duplicate spans produced for empty JSON values,
such as {"a": []} and {"a": {}}. This was discovered in the spans produced
from contained by and fetch/contained by expressions.

This commit fixes this issue, only allowing one empty object or array span to
be created per level of nesting, in encodedContainedInvertedIndexSpans.

Informs: #61430

Release note: None
  • Loading branch information
angelazxu committed Apr 9, 2021
1 parent 3941029 commit a72d1bb
Show file tree
Hide file tree
Showing 4 changed files with 54 additions and 45 deletions.
4 changes: 2 additions & 2 deletions pkg/sql/opt/exec/execbuilder/testdata/inverted_index
Original file line number Diff line number Diff line change
Expand Up @@ -1112,13 +1112,13 @@ vectorized: true
└── • inverted filter
│ columns: (a, b_inverted_key)
│ inverted column: b_inverted_key
│ num spans: 10
│ num spans: 8
└── • scan
columns: (a, b_inverted_key)
estimated row count: 111 (missing stats)
table: d@foo_inv
spans: /"f"-/"f"/PrefixEnd /[]-/{} /Arr/"f"-/Arr/"f"/PrefixEnd /Arr/{}-/Arr/{}/PrefixEnd /Arr/"a"/"b"-/Arr/"a"/"b"/PrefixEnd /Arr/"c"/{}-/Arr/"c"/{}/PrefixEnd /Arr/"c"/{}-/Arr/"c"/{}/PrefixEnd /Arr/"c"/"d"/[]-/Arr/"c"/"d"/{} /Arr/"c"/"d"/[]-/Arr/"c"/"d"/{} /Arr/"c"/"d"/Arr/"e"-/Arr/"c"/"d"/Arr/"e"/PrefixEnd
spans: /"f"-/"f"/PrefixEnd /[]-/{} /Arr/"f"-/Arr/"f"/PrefixEnd /Arr/{}-/Arr/{}/PrefixEnd /Arr/"a"/"b"-/Arr/"a"/"b"/PrefixEnd /Arr/"c"/{}-/Arr/"c"/{}/PrefixEnd /Arr/"c"/"d"/[]-/Arr/"c"/"d"/{} /Arr/"c"/"d"/Arr/"e"-/Arr/"c"/"d"/Arr/"e"/PrefixEnd

# Ensure that an inverted index with a composite primary key still encodes
# the primary key data in the composite value.
Expand Down
6 changes: 0 additions & 6 deletions pkg/sql/opt/xform/testdata/rules/select
Original file line number Diff line number Diff line change
Expand Up @@ -2506,7 +2506,6 @@ select
│ │ ├── ["7\x00\x03\x00\x01*\x02\x00", "7\x00\x03\x00\x01*\x02\x00"]
│ │ ├── ["7\x00\x03\x00\x019", "7\x00\x03\x00\x019"]
│ │ ├── ["7\x00\x03a\x00\x019", "7\x00\x03a\x00\x019"]
│ │ ├── ["7\x00\x03a\x00\x02\x00\x019", "7\x00\x03a\x00\x02\x00\x019"]
│ │ └── ["7\x00\x03a\x00\x02d\x00\x01\n", "7\x00\x03a\x00\x02d\x00\x01\n"]
│ ├── key: (1)
│ └── scan b@j_inv_idx
Expand All @@ -2520,7 +2519,6 @@ select
│ │ ├── ["7\x00\x03\x00\x01*\x02\x00", "7\x00\x03\x00\x01*\x02\x00"]
│ │ ├── ["7\x00\x03\x00\x019", "7\x00\x03\x00\x019"]
│ │ ├── ["7\x00\x03a\x00\x019", "7\x00\x03a\x00\x019"]
│ │ ├── ["7\x00\x03a\x00\x02\x00\x019", "7\x00\x03a\x00\x02\x00\x019"]
│ │ └── ["7\x00\x03a\x00\x02d\x00\x01\n", "7\x00\x03a\x00\x02d\x00\x01\n"]
│ ├── key: (1)
│ └── fd: (1)-->(6)
Expand Down Expand Up @@ -2553,7 +2551,6 @@ select
│ │ ├── ["7\x00\x03\x00\x01*\x04\x00", "7\x00\x03\x00\x01*\x04\x00"]
│ │ ├── ["7\x00\x03\x00\x01*\x06\x00", "7\x00\x03\x00\x01*\x06\x00"]
│ │ ├── ["7a\x00\x018", "7a\x00\x018"]
│ │ ├── ["7a\x00\x02\x00\x018", "7a\x00\x02\x00\x018"]
│ │ └── ["7a\x00\x02\x00\x03\x00\x01*\x06\x00", "7a\x00\x02\x00\x03\x00\x01*\x06\x00"]
│ ├── key: (1)
│ └── scan b@j_inv_idx
Expand All @@ -2568,7 +2565,6 @@ select
│ │ ├── ["7\x00\x03\x00\x01*\x04\x00", "7\x00\x03\x00\x01*\x04\x00"]
│ │ ├── ["7\x00\x03\x00\x01*\x06\x00", "7\x00\x03\x00\x01*\x06\x00"]
│ │ ├── ["7a\x00\x018", "7a\x00\x018"]
│ │ ├── ["7a\x00\x02\x00\x018", "7a\x00\x02\x00\x018"]
│ │ └── ["7a\x00\x02\x00\x03\x00\x01*\x06\x00", "7a\x00\x02\x00\x03\x00\x01*\x06\x00"]
│ ├── key: (1)
│ └── fd: (1)-->(6)
Expand Down Expand Up @@ -2599,7 +2595,6 @@ select
│ │ │ └── union spans
│ │ │ ├── ["7\x00\x019", "7\x00\x019"]
│ │ │ ├── ["7a\x00\x018", "7a\x00\x018"]
│ │ │ ├── ["7a\x00\x02\x00\x018", "7a\x00\x02\x00\x018"]
│ │ │ └── ["7a\x00\x02\x00\x03\x00\x01*\x06\x00", "7a\x00\x02\x00\x03\x00\x01*\x06\x00"]
│ │ └── span expression
│ │ ├── tight: false, unique: false
Expand All @@ -2624,7 +2619,6 @@ select
│ │ ├── ["7\x00\x03\x00\x01*\x04\x00", "7\x00\x03\x00\x01*\x04\x00"]
│ │ ├── ["7\x00\x03\x00\x01*\x06\x00", "7\x00\x03\x00\x01*\x06\x00"]
│ │ ├── ["7a\x00\x018", "7a\x00\x018"]
│ │ ├── ["7a\x00\x02\x00\x018", "7a\x00\x02\x00\x018"]
│ │ └── ["7a\x00\x02\x00\x03\x00\x01*\x06\x00", "7a\x00\x02\x00\x03\x00\x01*\x06\x00"]
│ ├── key: (1)
│ └── fd: (1)-->(6)
Expand Down
4 changes: 2 additions & 2 deletions pkg/util/json/encoded.go
Original file line number Diff line number Diff line change
Expand Up @@ -745,13 +745,13 @@ func (j *jsonEncoded) encodeContainingInvertedIndexSpans(
}

func (j *jsonEncoded) encodeContainedInvertedIndexSpans(
b []byte, isRoot bool,
b []byte, isRoot, isObjectValue bool,
) (inverted.Expression, error) {
decoded, err := j.decode()
if err != nil {
return nil, err
}
return decoded.encodeContainedInvertedIndexSpans(b, isRoot)
return decoded.encodeContainedInvertedIndexSpans(b, isRoot, isObjectValue)
}

// numInvertedIndexEntries implements the JSON interface.
Expand Down
85 changes: 50 additions & 35 deletions pkg/util/json/json.go
Original file line number Diff line number Diff line change
Expand Up @@ -105,9 +105,11 @@ type JSON interface {
// comments in the SpanExpression definition for details.
//
// If isRoot is true, this function is being called at the root level of the
// JSON hierarchy.
// JSON hierarchy. If isObjectValue is true, the given JSON is the value of a
// JSON object key. Note that isRoot and isObjectValue cannot both be true at
// the same time.
encodeContainedInvertedIndexSpans(
b []byte, isRoot bool,
b []byte, isRoot, isObjectValue bool,
) (invertedExpr inverted.Expression, err error)

// numInvertedIndexEntries returns the number of entries that will be
Expand Down Expand Up @@ -814,7 +816,7 @@ func EncodeContainedInvertedIndexSpans(
b []byte, json JSON,
) (invertedExpr inverted.Expression, err error) {
invertedExpr, err = json.encodeContainedInvertedIndexSpans(
encoding.EncodeJSONAscending(b), true, /* isRoot */
encoding.EncodeJSONAscending(b), true /* isRoot */, false, /* isObjectValue */
)
if err != nil {
return nil, err
Expand Down Expand Up @@ -843,7 +845,7 @@ func (j jsonNull) encodeContainingInvertedIndexSpans(
}

func (j jsonNull) encodeContainedInvertedIndexSpans(
b []byte, isRoot bool,
b []byte, isRoot, isObjectValue bool,
) (inverted.Expression, error) {
invertedExpr, err := encodeContainedInvertedIndexSpansFromLeaf(j, b, isRoot)
return invertedExpr, err
Expand All @@ -861,7 +863,7 @@ func (j jsonTrue) encodeContainingInvertedIndexSpans(
}

func (j jsonTrue) encodeContainedInvertedIndexSpans(
b []byte, isRoot bool,
b []byte, isRoot, isObjectValue bool,
) (inverted.Expression, error) {
invertedExpr, err := encodeContainedInvertedIndexSpansFromLeaf(j, b, isRoot)
return invertedExpr, err
Expand All @@ -879,7 +881,7 @@ func (j jsonFalse) encodeContainingInvertedIndexSpans(
}

func (j jsonFalse) encodeContainedInvertedIndexSpans(
b []byte, isRoot bool,
b []byte, isRoot, isObjectValue bool,
) (inverted.Expression, error) {
invertedExpr, err := encodeContainedInvertedIndexSpansFromLeaf(j, b, isRoot)
return invertedExpr, err
Expand All @@ -897,7 +899,7 @@ func (j jsonString) encodeContainingInvertedIndexSpans(
}

func (j jsonString) encodeContainedInvertedIndexSpans(
b []byte, isRoot bool,
b []byte, isRoot, isObjectValue bool,
) (inverted.Expression, error) {
invertedExpr, err := encodeContainedInvertedIndexSpansFromLeaf(j, b, isRoot)
return invertedExpr, err
Expand All @@ -916,7 +918,7 @@ func (j jsonNumber) encodeContainingInvertedIndexSpans(
}

func (j jsonNumber) encodeContainedInvertedIndexSpans(
b []byte, isRoot bool,
b []byte, isRoot, isObjectValue bool,
) (inverted.Expression, error) {
invertedExpr, err := encodeContainedInvertedIndexSpansFromLeaf(j, b, isRoot)
return invertedExpr, err
Expand Down Expand Up @@ -992,29 +994,37 @@ func (j jsonArray) encodeContainingInvertedIndexSpans(
}

func (j jsonArray) encodeContainedInvertedIndexSpans(
b []byte, isRoot bool,
b []byte, isRoot, isObjectValue bool,
) (invertedExpr inverted.Expression, err error) {
// The empty array should always be added to the spans, since it is contained
// by everything.
emptyArrSpanExpr := inverted.ExprForSpan(
inverted.MakeSingleValSpan(encoding.EncodeJSONEmptyArray(b[:len(b):len(b)])), false, /* tight */
)
emptyArrSpanExpr.Unique = true
if !isObjectValue || len(j) == 0 {
// The empty array should always be added to the spans, since it is contained
// by everything. Empty array values are already accounted for when getting
// the spans for a non-empty object value, so they should be excluded.
emptyArrSpanExpr := inverted.ExprForSpan(
inverted.MakeSingleValSpan(encoding.EncodeJSONEmptyArray(b[:len(b):len(b)])), false, /* tight */
)
emptyArrSpanExpr.Unique = true
invertedExpr = emptyArrSpanExpr
}

// If the given jsonArray is empty, we return the SpanExpression.
if len(j) == 0 {
return emptyArrSpanExpr, nil
return invertedExpr, nil
}

invertedExpr = emptyArrSpanExpr
prefix := encoding.EncodeArrayAscending(b[:len(b):len(b)])
for i := range j {
childWithPrefix, err := j[i].encodeContainedInvertedIndexSpans(
prefix[:len(prefix):len(prefix)], false, /* isRoot */
prefix[:len(prefix):len(prefix)], false /* isRoot */, false, /* isObjectValue */
)
if err != nil {
return nil, err
}
invertedExpr = inverted.Or(invertedExpr, childWithPrefix)
if invertedExpr == nil {
invertedExpr = childWithPrefix
} else {
invertedExpr = inverted.Or(invertedExpr, childWithPrefix)
}

// Scalars inside the array should also be included in the spans
// without the array prefix, since they are contained by the array. This
Expand All @@ -1028,7 +1038,7 @@ func (j jsonArray) encodeContainedInvertedIndexSpans(
// and empty arrays/objects.
if isRoot && isEnd(j[i]) {
childWithoutPrefix, err := j[i].encodeContainedInvertedIndexSpans(
b[:len(b):len(b)], false, /* isRoot */
b[:len(b):len(b)], false /* isRoot */, false, /* isObjectValue */
)
if err != nil {
return nil, err
Expand Down Expand Up @@ -1112,34 +1122,41 @@ func (j jsonObject) encodeContainingInvertedIndexSpans(
}

func (j jsonObject) encodeContainedInvertedIndexSpans(
b []byte, isRoot bool,
b []byte, isRoot, isObjectValue bool,
) (invertedExpr inverted.Expression, err error) {
// The empty object should always be added to the spans, since it is contained
// by everything.
emptyObjSpanExpr := inverted.ExprForSpan(
inverted.MakeSingleValSpan(encoding.EncodeJSONEmptyObject(b[:len(b):len(b)])), false, /* tight */
)
emptyObjSpanExpr.Unique = true
// by everything. Empty object values are already accounted for when getting
// the spans for a non-empty object value, so they should be excluded.
if !isObjectValue || len(j) == 0 {
emptyObjSpanExpr := inverted.ExprForSpan(
inverted.MakeSingleValSpan(encoding.EncodeJSONEmptyObject(b[:len(b):len(b)])), false, /* tight */
)
emptyObjSpanExpr.Unique = true
invertedExpr = emptyObjSpanExpr
}
// If the given jsonObject is empty, we return the SpanExpression.
if len(j) == 0 {
return emptyObjSpanExpr, nil
return invertedExpr, nil
}

invertedExpr = emptyObjSpanExpr
for i := range j {
// We're trying to see if this is the end of the JSON path. If it is, then
// we don't want to add an extra separator.
end := isEnd(j[i].v)

prefix := encoding.EncodeJSONKeyStringAscending(b[:len(b):len(b)], string(j[i].k), end)

child, err := j[i].v.encodeContainedInvertedIndexSpans(
prefix, false, /* isRoot */
prefix, false /* isRoot */, true, /* isObjectValue */
)
if err != nil {
return nil, err
}
invertedExpr = inverted.Or(invertedExpr, child)

if invertedExpr == nil {
invertedExpr = child
} else {
invertedExpr = inverted.Or(invertedExpr, child)
}

// When we have a nested object or array, we want to include the empty
// object or array span with the prefix. For example, '{"a": {"b": "c"}}'
Expand All @@ -1150,7 +1167,7 @@ func (j jsonObject) encodeContainedInvertedIndexSpans(
if v != nil {
prefixWithEnd := encoding.EncodeJSONKeyStringAscending(b[:len(b):len(b)], string(j[i].k), true)
childWithEnd, err := v.encodeContainedInvertedIndexSpans(
prefixWithEnd, false, /* isRoot */
prefixWithEnd, false /* isRoot */, true, /* isObjectValue */
)
if err != nil {
return nil, err
Expand Down Expand Up @@ -1349,9 +1366,7 @@ func encodeContainingInvertedIndexSpansFromLeaf(
// array spans are not encoded by this function.
//
// If isRoot is true, this function is being called at the root level of the
// JSON hierarchy. If isObjectValue is true, the given JSON is the value of a
// JSON object key. Note that isRoot and isObjectValue cannot both be true at
// the same time.
// JSON hierarchy.
func encodeContainedInvertedIndexSpansFromLeaf(
j JSON, b []byte, isRoot bool,
) (invertedExpr inverted.Expression, err error) {
Expand Down

0 comments on commit a72d1bb

Please sign in to comment.