From 7b985b6d6e5a5599fc6ff59231ae95f6fd19bd7b Mon Sep 17 00:00:00 2001 From: Rebecca Taft Date: Tue, 13 Apr 2021 13:55:48 -0500 Subject: [PATCH] sql: fix index out of range error in inverted filterer Prior to this commit, it was possible that the SpansToRead in an inverted.SpanExpression were a superset of the spans that were needed to evaluate the span expression. This could result in an index out of range error in the invertedFilterer when a span from the input didn't exist in its list of needed spans. This commit fixes the problem by ensuring that SpansToRead does not contain spans that are not needed to evaluate the span expression. Fixes #63180 Release note (bug fix): Fixed an internal error that could occur when executing queries using an inverted index. The error was an index out of range error, and could occur in rare cases when a filter or join predicate contained at least two JSON, Array, Geometry or Geography expressions that were combined with AND. This has now been fixed. --- pkg/sql/inverted/expression.go | 28 +++++++++++++--- pkg/sql/inverted/testdata/expression | 4 +-- .../testdata/logic_test/inverted_index | 24 ++++++++++---- pkg/sql/opt/xform/testdata/rules/select | 32 ++++++++++++++++++- 4 files changed, 73 insertions(+), 15 deletions(-) diff --git a/pkg/sql/inverted/expression.go b/pkg/sql/inverted/expression.go index b9b1d84c13d4..d45b09b16d53 100644 --- a/pkg/sql/inverted/expression.go +++ b/pkg/sql/inverted/expression.go @@ -650,12 +650,17 @@ func opSpanExpressionAndDefault( // Intersects two SpanExpressions. func intersectSpanExpressions(left, right *SpanExpression) *SpanExpression { - // Since we simply union into SpansToRead, we can end up with - // FactoredUnionSpans as a subset of SpanToRead *and* both children pruned. - // TODO(sumeer): tighten the SpansToRead for this case. expr := &SpanExpression{ - Tight: left.Tight && right.Tight, - Unique: left.Unique && right.Unique, + Tight: left.Tight && right.Tight, + Unique: left.Unique && right.Unique, + + // We calculate SpansToRead as the union of the left and right sides as a + // first approximation, but this may result in too many spans if either of + // the children are pruned below. SpansToRead will be recomputed in + // tryPruneChildren if needed. (It is important that SpansToRead be exactly + // what would be computed if a caller traversed the tree and explicitly + // unioned all the FactoredUnionSpans, and no looser, since the execution + // code path relies on this property.) SpansToRead: unionSpans(left.SpansToRead, right.SpansToRead), FactoredUnionSpans: intersectSpans(left.FactoredUnionSpans, right.FactoredUnionSpans), Operator: SetIntersection, @@ -709,6 +714,19 @@ func tryPruneChildren(expr *SpanExpression, op SetOperator) { expr.Operator = child.Operator expr.Left = child.Left expr.Right = child.Right + + // If child.FactoredUnionSpans is non-empty, we need to recalculate + // SpansToRead since it may have contained some spans that were removed by + // discarding child.FactoredUnionSpans. + if child.FactoredUnionSpans != nil { + expr.SpansToRead = expr.FactoredUnionSpans + if expr.Left != nil { + expr.SpansToRead = unionSpans(expr.SpansToRead, expr.Left.(*SpanExpression).SpansToRead) + } + if expr.Right != nil { + expr.SpansToRead = unionSpans(expr.SpansToRead, expr.Right.(*SpanExpression).SpansToRead) + } + } } promoteLeft := expr.Left != nil && expr.Right == nil promoteRight := expr.Left == nil && expr.Right != nil diff --git a/pkg/sql/inverted/testdata/expression b/pkg/sql/inverted/testdata/expression index bb9e00070031..15b9f0ab4acb 100644 --- a/pkg/sql/inverted/testdata/expression +++ b/pkg/sql/inverted/testdata/expression @@ -242,7 +242,7 @@ and result=_ left=b right=ac ---- span expression ├── tight: true, unique: true - ├── to read: ["a", "c") + ├── to read: ["b", "b"] └── union spans: ["b", "b"] new-span-leaf name=bj tight=true unique=true span=b,j @@ -265,7 +265,7 @@ and result=_ left=b right=bj ---- span expression ├── tight: true, unique: true - ├── to read: ["b", "j") + ├── to read: ["b", "b"] └── union spans: ["b", "b"] # [b, j) or [a, c) = [a, j) diff --git a/pkg/sql/logictest/testdata/logic_test/inverted_index b/pkg/sql/logictest/testdata/logic_test/inverted_index index a96fbadae9c7..bdee7df3ebc4 100644 --- a/pkg/sql/logictest/testdata/logic_test/inverted_index +++ b/pkg/sql/logictest/testdata/logic_test/inverted_index @@ -1036,14 +1036,18 @@ SELECT j FROM f@i WHERE j->'a' @> '"b"' AND '["c"]' <@ j->'a' ORDER BY k {"a": ["b", "c", "d", "e"]} {"a": ["b", "e", "c", "d"]} -#TODO(angelazxu): Uncomment these tests once #63180 is fixed. -# query T -# SELECT j FROM f@i WHERE j->'a' <@ '{"b": [1, 2]}' AND j->'a'->'b' @> '[1]' ORDER BY k -# ---- +query T +SELECT j FROM f@i WHERE j->'a' <@ '{"b": [1, 2]}' AND j->'a'->'b' @> '[1]' ORDER BY k +---- +{"a": {"b": [1, 2]}} -# query T -# SELECT j FROM f@i WHERE j->'a' @> '"b"' AND j->'a' <@ '["b", "c", "d", "e"]' ORDER BY k -# ---- +query T +SELECT j FROM f@i WHERE j->'a' @> '"b"' AND j->'a' <@ '["b", "c", "d", "e"]' ORDER BY k +---- +{"a": ["b", "c", "d", "e"]} +{"a": ["b", "e", "c", "d"]} +{"a": "b", "x": ["c", "d", "e"]} +{"a": "b", "c": [{"d": 1}, {"e": 2}]} query T SELECT j FROM f@i WHERE j->'a' @> '{"d": 2}' AND '[1, 2]' @> j->'a'->'b' ORDER BY k @@ -1084,6 +1088,12 @@ SELECT j FROM f@i WHERE j->'a'->'b' <@ '{"c": [1, 2], "d": 2}' OR j->'a'->'b' <@ {"a": {"b": "c"}, "d": "e"} {"a": {"b": []}} +# Regression test for #63180. This query should not cause an index out of range +# error in the inverted filterer. +query T +SELECT j FROM f@i WHERE j @> '{"a": "c"}' AND (j @> '{"a": "b"}' OR j @> '{"a": "c"}') +---- + subtest arrays statement ok diff --git a/pkg/sql/opt/xform/testdata/rules/select b/pkg/sql/opt/xform/testdata/rules/select index c87a0e463a57..8e8edb811c82 100644 --- a/pkg/sql/opt/xform/testdata/rules/select +++ b/pkg/sql/opt/xform/testdata/rules/select @@ -3903,7 +3903,8 @@ project │ ├── inverted constraint: /7/1 │ │ └── spans │ │ ├── ["B\xfdL\x00\x00\x00\x00\x00\x00\x00", "B\xfdL\x00\x00\x00\x00\x00\x00\x00"] - │ │ └── ["B\xfdN\x00\x00\x00\x00\x00\x00\x01", "B\xfdP\x00\x00\x00\x00\x00\x00\x01") + │ │ ├── ["B\xfdO\x00\x00\x00\x00\x00\x00\x00", "B\xfdO\x00\x00\x00\x00\x00\x00\x00"] + │ │ └── ["B\xfdP\x00\x00\x00\x00\x00\x00\x00", "B\xfdP\x00\x00\x00\x00\x00\x00\x00"] │ ├── key: (1) │ └── fd: (1)-->(7) └── filters @@ -5135,6 +5136,35 @@ select └── filters └── st_dfullywithin(c:1, '0101000000000000000000F03F000000000000F03F', CAST(NULL AS FLOAT8)) [outer=(1), immutable, constraints=(/1: (/NULL - ])] +# Regression test for #63180. Ensure that we only scan spans needed by the +# inverted filter. +exec-ddl +CREATE TABLE t63180 ( + j JSON, + INVERTED INDEX j_idx (j) +) +---- + +opt expect=GenerateInvertedIndexScans +SELECT j FROM t63180@j_idx WHERE j @> '{"a": "c"}' AND (j @> '{"a": "b"}' OR j @> '{"a": "c"}'); +---- +index-join t63180 + ├── columns: j:1!null + ├── immutable + └── inverted-filter + ├── columns: rowid:2!null + ├── inverted expression: /4 + │ ├── tight: true, unique: false + │ └── union spans: ["7a\x00\x01\x12c\x00\x01", "7a\x00\x01\x12c\x00\x01"] + ├── key: (2) + └── scan t63180@j_idx + ├── columns: rowid:2!null j_inverted_key:4!null + ├── inverted constraint: /4/2 + │ └── spans: ["7a\x00\x01\x12c\x00\x01", "7a\x00\x01\x12c\x00\x01"] + ├── flags: force-index=j_idx + ├── key: (2) + └── fd: (2)-->(4) + # -------------------------------------------------- # GenerateZigzagJoins # --------------------------------------------------