Skip to content

Commit

Permalink
opt,invertedexpr: fix memory corruption issue in inverted join
Browse files Browse the repository at this point in the history
Prior to this commit, it was possible for an inverted join to return
incorrect results if it had an additional scalar filter as part of the
join condition. For example, a join condition such as

  ST_Contains(g1.geom, g2.geom)
  AND ST_Contains(g1.geom, ST_MakePolygon('LINESTRING(0 0, 0 5, 5 5, 0 0)'))

could be susceptible to this issue, because we pre-compute the SpanExpression
for the scalar filter and store it for re-use. The problem was that the
pre-computed SpanExpression was being modified and then reused, leading to
memory corruption.

This commit fixes the problem by making a copy of the pre-computed
SpanExpression before using it and modifying it.

Fixes cockroachdb#55648

Release note (bug fix): Fixed a bug that could occur for spatial queries
involving a join between two spatial columns, when there was an additional
filter on one of the spatial columns, and that column also had an inverted
index defined. This bug could cause incorrect results to be returned, in
which some rows were omitted from the output that should have been included.
  • Loading branch information
rytaft committed Oct 19, 2020
1 parent 973a6f3 commit 78a2311
Show file tree
Hide file tree
Showing 5 changed files with 221 additions and 3 deletions.
85 changes: 85 additions & 0 deletions pkg/sql/logictest/testdata/logic_test/inverted_join_geospatial
Original file line number Diff line number Diff line change
Expand Up @@ -330,3 +330,88 @@ WHERE NOT EXISTS (
1
5
6

statement ok
CREATE TABLE g (
k INT PRIMARY KEY,
geom GEOMETRY
)

statement ok
CREATE INVERTED INDEX foo_inv ON g(geom)

statement ok
INSERT INTO g VALUES
(1, ST_MakePolygon('LINESTRING(0 0, 0 15, 15 15, 15 0, 0 0)'::geometry)),
(2, ST_MakePolygon('LINESTRING(0 0, 0 2, 2 2, 2 0, 0 0)'::geometry))

# This query performs an inverted join.
query II
SELECT g1.k, g2.k FROM g@foo_inv AS g1, g@primary AS g2 WHERE ST_Contains(g1.geom, g2.geom) ORDER BY g1.k, g2.k
----
1 1
1 2
2 2

# This query performs a cross join followed by a filter.
query II
SELECT g1.k, g2.k FROM g@primary AS g1, g@primary AS g2 WHERE ST_Contains(g1.geom, g2.geom) ORDER BY g1.k, g2.k
----
1 1
1 2
2 2

# This query is checking that the results of the previous two queries are identical.
# There should be no rows output.
query IIII
SELECT * FROM
(SELECT g1.k, g2.k FROM g@foo_inv AS g1, g@primary AS g2 WHERE ST_Contains(g1.geom, g2.geom)) AS inv_join(k1, k2)
FULL OUTER JOIN
(SELECT g1.k, g2.k FROM g@primary AS g1, g@primary AS g2 WHERE ST_Contains(g1.geom, g2.geom)) AS cross_join(k1, k2)
ON inv_join.k1 = cross_join.k1 AND inv_join.k2 = cross_join.k2
WHERE inv_join.k1 IS NULL OR cross_join.k1 IS NULL
----

# Regression test for #55648.
# This query performs an inverted join with an additional filter.
query II
SELECT g1.k, g2.k FROM g@foo_inv AS g1, g@primary AS g2
WHERE ST_Contains(g1.geom, g2.geom)
AND ST_Contains(g1.geom, ST_MakePolygon('LINESTRING(0 0, 0 5, 5 5, 5 0, 0 0)'::geometry))
AND g2.k < 20
ORDER BY g1.k, g2.k
----
1 1
1 2

# This query performs a cross join followed by a filter.
query II
SELECT g1.k, g2.k FROM g@primary AS g1, g@primary AS g2
WHERE ST_Contains(g1.geom, g2.geom)
AND ST_Contains(g1.geom, ST_MakePolygon('LINESTRING(0 0, 0 5, 5 5, 5 0, 0 0)'::geometry))
AND g2.k < 20
ORDER BY g1.k, g2.k
----
1 1
1 2

# This query is checking that the results of the previous two queries are identical.
# There should be no rows output.
query IIII
SELECT * FROM
(
SELECT g1.k, g2.k FROM g@foo_inv AS g1, g@primary AS g2
WHERE ST_Contains(g1.geom, g2.geom)
AND ST_Contains(g1.geom, ST_MakePolygon('LINESTRING(0 0, 0 5, 5 5, 5 0, 0 0)'::geometry))
AND g2.k < 20
) AS inv_join(k1, k2)
FULL OUTER JOIN
(
SELECT g1.k, g2.k FROM g@primary AS g1, g@primary AS g2
WHERE ST_Contains(g1.geom, g2.geom)
AND ST_Contains(g1.geom, ST_MakePolygon('LINESTRING(0 0, 0 5, 5 5, 5 0, 0 0)'::geometry))
AND g2.k < 20
) AS cross_join(k1, k2)
ON inv_join.k1 = cross_join.k1 AND inv_join.k2 = cross_join.k2
WHERE inv_join.k1 IS NULL OR cross_join.k1 IS NULL
----
Original file line number Diff line number Diff line change
Expand Up @@ -494,3 +494,100 @@ project · ·
· estimated row count 1000 (missing stats) · ·
· table ltable@primary · ·
· spans FULL SCAN · ·

statement ok
CREATE TABLE g (
k INT PRIMARY KEY,
geom GEOMETRY
)

statement ok
CREATE INVERTED INDEX foo_inv ON g(geom)

# This query performs an inverted join.
query TTT
EXPLAIN SELECT g1.k, g2.k FROM g@foo_inv AS g1, g@primary AS g2 WHERE ST_Contains(g1.geom, g2.geom) ORDER BY g1.k, g2.k
----
· distribution local
· vectorized true
sort · ·
│ order +k,+k
└── lookup join · ·
│ table g@primary
│ equality (k) = (k)
│ equality cols are key ·
│ pred st_contains(geom, geom)
└── inverted join · ·
│ table g@foo_inv
└── scan · ·
· missing stats ·
· table g@primary
· spans FULL SCAN

# This query performs a cross join followed by a filter.
query TTT
EXPLAIN SELECT g1.k, g2.k FROM g@primary AS g1, g@primary AS g2 WHERE ST_Contains(g1.geom, g2.geom) ORDER BY g1.k, g2.k
----
· distribution local
· vectorized true
sort · ·
│ order +k,+k
└── cross join · ·
│ pred st_contains(geom, geom)
├── scan · ·
│ missing stats ·
│ table g@primary
│ spans FULL SCAN
└── scan · ·
· missing stats ·
· table g@primary
· spans FULL SCAN

# This query performs an inverted join with an additional filter.
query TTT
EXPLAIN SELECT g1.k, g2.k FROM g@foo_inv AS g1, g@primary AS g2
WHERE ST_Contains(g1.geom, g2.geom)
AND ST_Contains(g1.geom, ST_MakePolygon('LINESTRING(0 0, 0 5, 5 5, 5 0, 0 0)'::geometry))
AND g2.k < 20
ORDER BY g1.k, g2.k
----
· distribution local
· vectorized true
sort · ·
│ order +k,+k
└── lookup join · ·
│ table g@primary
│ equality (k) = (k)
│ equality cols are key ·
│ pred st_contains(geom, geom) AND st_contains(geom, '010300000001000000050000000000000000000000000000000000000000000000000000000000000000001440000000000000144000000000000014400000000000001440000000000000000000000000000000000000000000000000')
└── inverted join · ·
│ table g@foo_inv
└── scan · ·
· missing stats ·
· table g@primary
· spans [ - /19]

# This query performs a cross join followed by a filter.
query TTT
EXPLAIN SELECT g1.k, g2.k FROM g@primary AS g1, g@primary AS g2
WHERE ST_Contains(g1.geom, g2.geom)
AND ST_Contains(g1.geom, ST_MakePolygon('LINESTRING(0 0, 0 5, 5 5, 5 0, 0 0)'::geometry))
AND g2.k < 20
ORDER BY g1.k, g2.k
----
· distribution local
· vectorized true
sort · ·
│ order +k,+k
└── cross join · ·
│ pred st_contains(geom, geom)
├── filter · ·
│ │ filter st_contains(geom, '010300000001000000050000000000000000000000000000000000000000000000000000000000000000001440000000000000144000000000000014400000000000001440000000000000000000000000000000000000000000000000')
│ └── scan · ·
│ missing stats ·
│ table g@primary
│ spans FULL SCAN
└── scan · ·
· missing stats ·
· table g@primary
· spans [ - /19]
36 changes: 34 additions & 2 deletions pkg/sql/opt/invertedexpr/expression.go
Original file line number Diff line number Diff line change
Expand Up @@ -321,6 +321,8 @@ type InvertedExpression interface {
IsTight() bool
// SetNotTight sets tight to false.
SetNotTight()
// Copy makes a copy of the inverted expression.
Copy() InvertedExpression
}

// SpanExpression is an implementation of InvertedExpression.
Expand Down Expand Up @@ -383,6 +385,29 @@ func (s *SpanExpression) SetNotTight() {
s.Tight = false
}

// Copy implements the InvertedExpression interface.
//
// Copy makes a copy of the SpanExpression and returns it. Copy recurses into
// the children and makes copies of them as well, so the new struct is
// independent from the old. It does *not* perform a deep copy of the
// SpansToRead or FactoredUnionSpans slices, however, because those slices are
// never modified in place and therefore are safe to reuse.
func (s *SpanExpression) Copy() InvertedExpression {
res := &SpanExpression{
Tight: s.Tight,
SpansToRead: s.SpansToRead,
FactoredUnionSpans: s.FactoredUnionSpans,
Operator: s.Operator,
}
if s.Left != nil {
res.Left = s.Left.Copy()
}
if s.Right != nil {
res.Right = s.Right.Copy()
}
return res
}

func (s *SpanExpression) String() string {
tp := treeprinter.New()
n := tp.Child("span expression")
Expand Down Expand Up @@ -470,6 +495,11 @@ func (n NonInvertedColExpression) IsTight() bool {
// SetNotTight implements the InvertedExpression interface.
func (n NonInvertedColExpression) SetNotTight() {}

// Copy implements the InvertedExpression interface.
func (n NonInvertedColExpression) Copy() InvertedExpression {
return NonInvertedColExpression{}
}

// ExprForInvertedSpan constructs a leaf-level SpanExpression
// for an inverted expression. Note that these leaf-level
// expressions may also have tight = false. Geospatial functions
Expand All @@ -493,7 +523,8 @@ func ExprForInvertedSpan(span InvertedSpan, tight bool) *SpanExpression {
}
}

// And of two boolean expressions.
// And of two boolean expressions. This function may modify both the left and
// right InvertedExpressions.
func And(left, right InvertedExpression) InvertedExpression {
switch l := left.(type) {
case *SpanExpression:
Expand Down Expand Up @@ -527,7 +558,8 @@ func And(left, right InvertedExpression) InvertedExpression {
}
}

// Or of two boolean expressions.
// Or of two boolean expressions. This function may modify both the left and
// right InvertedExpressions.
func Or(left, right InvertedExpression) InvertedExpression {
switch l := left.(type) {
case *SpanExpression:
Expand Down
3 changes: 3 additions & 0 deletions pkg/sql/opt/invertedexpr/expression_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,9 @@ func (u UnknownExpression) SetNotTight() { u.tight = false }
func (u UnknownExpression) String() string {
return fmt.Sprintf("unknown expression: tight=%t", u.tight)
}
func (u UnknownExpression) Copy() InvertedExpression {
return UnknownExpression{tight: u.tight}
}

// Makes a (shallow) copy of the root node of the expression identified
// by name, since calls to And() and Or() can modify that root node, and
Expand Down
3 changes: 2 additions & 1 deletion pkg/sql/opt/invertedidx/geo.go
Original file line number Diff line number Diff line change
Expand Up @@ -1020,7 +1020,8 @@ func (g *geoDatumsToInvertedExpr) Convert(

case *geoInvertedExpr:
if t.spanExpr != nil {
return t.spanExpr, nil
// We call Copy so the caller can modify the returned expression.
return t.spanExpr.Copy(), nil
}
d, err := t.nonIndexParam.Eval(g.evalCtx)
if err != nil {
Expand Down

0 comments on commit 78a2311

Please sign in to comment.