Skip to content

Commit

Permalink
sql: fix index out of range error in inverted filterer
Browse files Browse the repository at this point in the history
Prior to this commit, it was possible that the SpansToRead in
an inverted.SpanExpression were a superset of the spans that were
needed to evaluate the span expression. This could result in an index
out of range error in the invertedFilterer when a span from the input
didn't exist in its list of needed spans. This commit fixes the
problem by ensuring that SpansToRead does not contain spans that are
not needed to evaluate the span expression.

Fixes #63180

Release note (bug fix): Fixed an internal error that could occur
when executing queries using an inverted index. The error was an index
out of range error, and could occur in rare cases when a filter or
join predicate contained at least two JSON, Array, Geometry or
Geography expressions that were combined with AND. This has now been
fixed.
  • Loading branch information
rytaft committed Apr 17, 2021
1 parent a0008ec commit 7b985b6
Show file tree
Hide file tree
Showing 4 changed files with 73 additions and 15 deletions.
28 changes: 23 additions & 5 deletions pkg/sql/inverted/expression.go
Original file line number Diff line number Diff line change
Expand Up @@ -650,12 +650,17 @@ func opSpanExpressionAndDefault(

// Intersects two SpanExpressions.
func intersectSpanExpressions(left, right *SpanExpression) *SpanExpression {
// Since we simply union into SpansToRead, we can end up with
// FactoredUnionSpans as a subset of SpanToRead *and* both children pruned.
// TODO(sumeer): tighten the SpansToRead for this case.
expr := &SpanExpression{
Tight: left.Tight && right.Tight,
Unique: left.Unique && right.Unique,
Tight: left.Tight && right.Tight,
Unique: left.Unique && right.Unique,

// We calculate SpansToRead as the union of the left and right sides as a
// first approximation, but this may result in too many spans if either of
// the children are pruned below. SpansToRead will be recomputed in
// tryPruneChildren if needed. (It is important that SpansToRead be exactly
// what would be computed if a caller traversed the tree and explicitly
// unioned all the FactoredUnionSpans, and no looser, since the execution
// code path relies on this property.)
SpansToRead: unionSpans(left.SpansToRead, right.SpansToRead),
FactoredUnionSpans: intersectSpans(left.FactoredUnionSpans, right.FactoredUnionSpans),
Operator: SetIntersection,
Expand Down Expand Up @@ -709,6 +714,19 @@ func tryPruneChildren(expr *SpanExpression, op SetOperator) {
expr.Operator = child.Operator
expr.Left = child.Left
expr.Right = child.Right

// If child.FactoredUnionSpans is non-empty, we need to recalculate
// SpansToRead since it may have contained some spans that were removed by
// discarding child.FactoredUnionSpans.
if child.FactoredUnionSpans != nil {
expr.SpansToRead = expr.FactoredUnionSpans
if expr.Left != nil {
expr.SpansToRead = unionSpans(expr.SpansToRead, expr.Left.(*SpanExpression).SpansToRead)
}
if expr.Right != nil {
expr.SpansToRead = unionSpans(expr.SpansToRead, expr.Right.(*SpanExpression).SpansToRead)
}
}
}
promoteLeft := expr.Left != nil && expr.Right == nil
promoteRight := expr.Left == nil && expr.Right != nil
Expand Down
4 changes: 2 additions & 2 deletions pkg/sql/inverted/testdata/expression
Original file line number Diff line number Diff line change
Expand Up @@ -242,7 +242,7 @@ and result=_ left=b right=ac
----
span expression
├── tight: true, unique: true
├── to read: ["a", "c")
├── to read: ["b", "b"]
└── union spans: ["b", "b"]

new-span-leaf name=bj tight=true unique=true span=b,j
Expand All @@ -265,7 +265,7 @@ and result=_ left=b right=bj
----
span expression
├── tight: true, unique: true
├── to read: ["b", "j")
├── to read: ["b", "b"]
└── union spans: ["b", "b"]

# [b, j) or [a, c) = [a, j)
Expand Down
24 changes: 17 additions & 7 deletions pkg/sql/logictest/testdata/logic_test/inverted_index
Original file line number Diff line number Diff line change
Expand Up @@ -1036,14 +1036,18 @@ SELECT j FROM f@i WHERE j->'a' @> '"b"' AND '["c"]' <@ j->'a' ORDER BY k
{"a": ["b", "c", "d", "e"]}
{"a": ["b", "e", "c", "d"]}

#TODO(angelazxu): Uncomment these tests once #63180 is fixed.
# query T
# SELECT j FROM f@i WHERE j->'a' <@ '{"b": [1, 2]}' AND j->'a'->'b' @> '[1]' ORDER BY k
# ----
query T
SELECT j FROM f@i WHERE j->'a' <@ '{"b": [1, 2]}' AND j->'a'->'b' @> '[1]' ORDER BY k
----
{"a": {"b": [1, 2]}}

# query T
# SELECT j FROM f@i WHERE j->'a' @> '"b"' AND j->'a' <@ '["b", "c", "d", "e"]' ORDER BY k
# ----
query T
SELECT j FROM f@i WHERE j->'a' @> '"b"' AND j->'a' <@ '["b", "c", "d", "e"]' ORDER BY k
----
{"a": ["b", "c", "d", "e"]}
{"a": ["b", "e", "c", "d"]}
{"a": "b", "x": ["c", "d", "e"]}
{"a": "b", "c": [{"d": 1}, {"e": 2}]}

query T
SELECT j FROM f@i WHERE j->'a' @> '{"d": 2}' AND '[1, 2]' @> j->'a'->'b' ORDER BY k
Expand Down Expand Up @@ -1084,6 +1088,12 @@ SELECT j FROM f@i WHERE j->'a'->'b' <@ '{"c": [1, 2], "d": 2}' OR j->'a'->'b' <@
{"a": {"b": "c"}, "d": "e"}
{"a": {"b": []}}

# Regression test for #63180. This query should not cause an index out of range
# error in the inverted filterer.
query T
SELECT j FROM f@i WHERE j @> '{"a": "c"}' AND (j @> '{"a": "b"}' OR j @> '{"a": "c"}')
----

subtest arrays

statement ok
Expand Down
32 changes: 31 additions & 1 deletion pkg/sql/opt/xform/testdata/rules/select
Original file line number Diff line number Diff line change
Expand Up @@ -3903,7 +3903,8 @@ project
│ ├── inverted constraint: /7/1
│ │ └── spans
│ │ ├── ["B\xfdL\x00\x00\x00\x00\x00\x00\x00", "B\xfdL\x00\x00\x00\x00\x00\x00\x00"]
│ │ └── ["B\xfdN\x00\x00\x00\x00\x00\x00\x01", "B\xfdP\x00\x00\x00\x00\x00\x00\x01")
│ │ ├── ["B\xfdO\x00\x00\x00\x00\x00\x00\x00", "B\xfdO\x00\x00\x00\x00\x00\x00\x00"]
│ │ └── ["B\xfdP\x00\x00\x00\x00\x00\x00\x00", "B\xfdP\x00\x00\x00\x00\x00\x00\x00"]
│ ├── key: (1)
│ └── fd: (1)-->(7)
└── filters
Expand Down Expand Up @@ -5135,6 +5136,35 @@ select
└── filters
└── st_dfullywithin(c:1, '0101000000000000000000F03F000000000000F03F', CAST(NULL AS FLOAT8)) [outer=(1), immutable, constraints=(/1: (/NULL - ])]

# Regression test for #63180. Ensure that we only scan spans needed by the
# inverted filter.
exec-ddl
CREATE TABLE t63180 (
j JSON,
INVERTED INDEX j_idx (j)
)
----

opt expect=GenerateInvertedIndexScans
SELECT j FROM t63180@j_idx WHERE j @> '{"a": "c"}' AND (j @> '{"a": "b"}' OR j @> '{"a": "c"}');
----
index-join t63180
├── columns: j:1!null
├── immutable
└── inverted-filter
├── columns: rowid:2!null
├── inverted expression: /4
│ ├── tight: true, unique: false
│ └── union spans: ["7a\x00\x01\x12c\x00\x01", "7a\x00\x01\x12c\x00\x01"]
├── key: (2)
└── scan t63180@j_idx
├── columns: rowid:2!null j_inverted_key:4!null
├── inverted constraint: /4/2
│ └── spans: ["7a\x00\x01\x12c\x00\x01", "7a\x00\x01\x12c\x00\x01"]
├── flags: force-index=j_idx
├── key: (2)
└── fd: (2)-->(4)

# --------------------------------------------------
# GenerateZigzagJoins
# --------------------------------------------------
Expand Down

0 comments on commit 7b985b6

Please sign in to comment.