diff --git a/pkg/sql/logictest/testdata/logic_test/inverted_join_geospatial b/pkg/sql/logictest/testdata/logic_test/inverted_join_geospatial index 6bf262fa7a5e..8928ba04ae3f 100644 --- a/pkg/sql/logictest/testdata/logic_test/inverted_join_geospatial +++ b/pkg/sql/logictest/testdata/logic_test/inverted_join_geospatial @@ -393,3 +393,88 @@ WHERE ST_Intersects(ltable.geom1, rtable2.geom)) ORDER BY lk ---- 4 6 + +statement ok +CREATE TABLE g ( + k INT PRIMARY KEY, + geom GEOMETRY +) + +statement ok +CREATE INVERTED INDEX foo_inv ON g(geom) + +statement ok +INSERT INTO g VALUES + (1, ST_MakePolygon('LINESTRING(0 0, 0 15, 15 15, 15 0, 0 0)'::geometry)), + (2, ST_MakePolygon('LINESTRING(0 0, 0 2, 2 2, 2 0, 0 0)'::geometry)) + +# This query performs an inverted join. +query II +SELECT g1.k, g2.k FROM g@foo_inv AS g1, g@primary AS g2 WHERE ST_Contains(g1.geom, g2.geom) ORDER BY g1.k, g2.k +---- +1 1 +1 2 +2 2 + +# This query performs a cross join followed by a filter. +query II +SELECT g1.k, g2.k FROM g@primary AS g1, g@primary AS g2 WHERE ST_Contains(g1.geom, g2.geom) ORDER BY g1.k, g2.k +---- +1 1 +1 2 +2 2 + +# This query is checking that the results of the previous two queries are identical. +# There should be no rows output. +query IIII +SELECT * FROM +(SELECT g1.k, g2.k FROM g@foo_inv AS g1, g@primary AS g2 WHERE ST_Contains(g1.geom, g2.geom)) AS inv_join(k1, k2) +FULL OUTER JOIN +(SELECT g1.k, g2.k FROM g@primary AS g1, g@primary AS g2 WHERE ST_Contains(g1.geom, g2.geom)) AS cross_join(k1, k2) +ON inv_join.k1 = cross_join.k1 AND inv_join.k2 = cross_join.k2 +WHERE inv_join.k1 IS NULL OR cross_join.k1 IS NULL +---- + +# Regression test for #55648. +# This query performs an inverted join with an additional filter. +query II +SELECT g1.k, g2.k FROM g@foo_inv AS g1, g@primary AS g2 +WHERE ST_Contains(g1.geom, g2.geom) + AND ST_Contains(g1.geom, ST_MakePolygon('LINESTRING(0 0, 0 5, 5 5, 5 0, 0 0)'::geometry)) + AND g2.k < 20 +ORDER BY g1.k, g2.k +---- +1 1 +1 2 + +# This query performs a cross join followed by a filter. +query II +SELECT g1.k, g2.k FROM g@primary AS g1, g@primary AS g2 +WHERE ST_Contains(g1.geom, g2.geom) + AND ST_Contains(g1.geom, ST_MakePolygon('LINESTRING(0 0, 0 5, 5 5, 5 0, 0 0)'::geometry)) + AND g2.k < 20 +ORDER BY g1.k, g2.k +---- +1 1 +1 2 + +# This query is checking that the results of the previous two queries are identical. +# There should be no rows output. +query IIII +SELECT * FROM +( + SELECT g1.k, g2.k FROM g@foo_inv AS g1, g@primary AS g2 + WHERE ST_Contains(g1.geom, g2.geom) + AND ST_Contains(g1.geom, ST_MakePolygon('LINESTRING(0 0, 0 5, 5 5, 5 0, 0 0)'::geometry)) + AND g2.k < 20 +) AS inv_join(k1, k2) +FULL OUTER JOIN +( + SELECT g1.k, g2.k FROM g@primary AS g1, g@primary AS g2 + WHERE ST_Contains(g1.geom, g2.geom) + AND ST_Contains(g1.geom, ST_MakePolygon('LINESTRING(0 0, 0 5, 5 5, 5 0, 0 0)'::geometry)) + AND g2.k < 20 +) AS cross_join(k1, k2) +ON inv_join.k1 = cross_join.k1 AND inv_join.k2 = cross_join.k2 +WHERE inv_join.k1 IS NULL OR cross_join.k1 IS NULL +---- diff --git a/pkg/sql/logictest/testdata/logic_test/inverted_join_geospatial_explain b/pkg/sql/logictest/testdata/logic_test/inverted_join_geospatial_explain index 69d7f0b8c3e5..19f588148863 100644 --- a/pkg/sql/logictest/testdata/logic_test/inverted_join_geospatial_explain +++ b/pkg/sql/logictest/testdata/logic_test/inverted_join_geospatial_explain @@ -405,3 +405,100 @@ project · · · estimated row count 1000 (missing stats) · · · table ltable@primary · · · spans FULL SCAN · · + +statement ok +CREATE TABLE g ( + k INT PRIMARY KEY, + geom GEOMETRY +) + +statement ok +CREATE INVERTED INDEX foo_inv ON g(geom) + +# This query performs an inverted join. +query TTT +EXPLAIN SELECT g1.k, g2.k FROM g@foo_inv AS g1, g@primary AS g2 WHERE ST_Contains(g1.geom, g2.geom) ORDER BY g1.k, g2.k +---- +· distribution local +· vectorized true +sort · · + │ order +k,+k + └── lookup join · · + │ table g@primary + │ equality (k) = (k) + │ equality cols are key · + │ pred st_contains(geom, geom) + └── inverted join · · + │ table g@foo_inv + └── scan · · +· missing stats · +· table g@primary +· spans FULL SCAN + +# This query performs a cross join followed by a filter. +query TTT +EXPLAIN SELECT g1.k, g2.k FROM g@primary AS g1, g@primary AS g2 WHERE ST_Contains(g1.geom, g2.geom) ORDER BY g1.k, g2.k +---- +· distribution local +· vectorized true +sort · · + │ order +k,+k + └── cross join · · + │ pred st_contains(geom, geom) + ├── scan · · + │ missing stats · + │ table g@primary + │ spans FULL SCAN + └── scan · · +· missing stats · +· table g@primary +· spans FULL SCAN + +# This query performs an inverted join with an additional filter. +query TTT +EXPLAIN SELECT g1.k, g2.k FROM g@foo_inv AS g1, g@primary AS g2 +WHERE ST_Contains(g1.geom, g2.geom) + AND ST_Contains(g1.geom, ST_MakePolygon('LINESTRING(0 0, 0 5, 5 5, 5 0, 0 0)'::geometry)) + AND g2.k < 20 +ORDER BY g1.k, g2.k +---- +· distribution local +· vectorized true +sort · · + │ order +k,+k + └── lookup join · · + │ table g@primary + │ equality (k) = (k) + │ equality cols are key · + │ pred st_contains(geom, geom) AND st_contains(geom, '010300000001000000050000000000000000000000000000000000000000000000000000000000000000001440000000000000144000000000000014400000000000001440000000000000000000000000000000000000000000000000') + └── inverted join · · + │ table g@foo_inv + └── scan · · +· missing stats · +· table g@primary +· spans [ - /19] + +# This query performs a cross join followed by a filter. +query TTT +EXPLAIN SELECT g1.k, g2.k FROM g@primary AS g1, g@primary AS g2 +WHERE ST_Contains(g1.geom, g2.geom) + AND ST_Contains(g1.geom, ST_MakePolygon('LINESTRING(0 0, 0 5, 5 5, 5 0, 0 0)'::geometry)) + AND g2.k < 20 +ORDER BY g1.k, g2.k +---- +· distribution local +· vectorized true +sort · · + │ order +k,+k + └── cross join · · + │ pred st_contains(geom, geom) + ├── scan · · + │ missing stats · + │ table g@primary + │ spans [ - /19] + └── filter · · + │ filter st_contains(geom, '010300000001000000050000000000000000000000000000000000000000000000000000000000000000001440000000000000144000000000000014400000000000001440000000000000000000000000000000000000000000000000') + └── scan · · +· missing stats · +· table g@primary +· spans FULL SCAN diff --git a/pkg/sql/opt/invertedexpr/expression.go b/pkg/sql/opt/invertedexpr/expression.go index 05b5057bbad4..76ab40765208 100644 --- a/pkg/sql/opt/invertedexpr/expression.go +++ b/pkg/sql/opt/invertedexpr/expression.go @@ -336,6 +336,8 @@ type InvertedExpression interface { IsTight() bool // SetNotTight sets tight to false. SetNotTight() + // Copy makes a copy of the inverted expression. + Copy() InvertedExpression } // SpanExpression is an implementation of InvertedExpression. @@ -398,6 +400,29 @@ func (s *SpanExpression) SetNotTight() { s.Tight = false } +// Copy implements the InvertedExpression interface. +// +// Copy makes a copy of the SpanExpression and returns it. Copy recurses into +// the children and makes copies of them as well, so the new struct is +// independent from the old. It does *not* perform a deep copy of the +// SpansToRead or FactoredUnionSpans slices, however, because those slices are +// never modified in place and therefore are safe to reuse. +func (s *SpanExpression) Copy() InvertedExpression { + res := &SpanExpression{ + Tight: s.Tight, + SpansToRead: s.SpansToRead, + FactoredUnionSpans: s.FactoredUnionSpans, + Operator: s.Operator, + } + if s.Left != nil { + res.Left = s.Left.Copy() + } + if s.Right != nil { + res.Right = s.Right.Copy() + } + return res +} + func (s *SpanExpression) String() string { tp := treeprinter.New() n := tp.Child("span expression") @@ -485,6 +510,11 @@ func (n NonInvertedColExpression) IsTight() bool { // SetNotTight implements the InvertedExpression interface. func (n NonInvertedColExpression) SetNotTight() {} +// Copy implements the InvertedExpression interface. +func (n NonInvertedColExpression) Copy() InvertedExpression { + return NonInvertedColExpression{} +} + // ExprForInvertedSpan constructs a leaf-level SpanExpression // for an inverted expression. Note that these leaf-level // expressions may also have tight = false. Geospatial functions @@ -508,7 +538,8 @@ func ExprForInvertedSpan(span InvertedSpan, tight bool) *SpanExpression { } } -// And of two boolean expressions. +// And of two boolean expressions. This function may modify both the left and +// right InvertedExpressions. func And(left, right InvertedExpression) InvertedExpression { switch l := left.(type) { case *SpanExpression: @@ -542,7 +573,8 @@ func And(left, right InvertedExpression) InvertedExpression { } } -// Or of two boolean expressions. +// Or of two boolean expressions. This function may modify both the left and +// right InvertedExpressions. func Or(left, right InvertedExpression) InvertedExpression { switch l := left.(type) { case *SpanExpression: diff --git a/pkg/sql/opt/invertedexpr/expression_test.go b/pkg/sql/opt/invertedexpr/expression_test.go index 355737827e4e..c1484d4f2b01 100644 --- a/pkg/sql/opt/invertedexpr/expression_test.go +++ b/pkg/sql/opt/invertedexpr/expression_test.go @@ -83,6 +83,9 @@ func (u UnknownExpression) SetNotTight() { u.tight = false } func (u UnknownExpression) String() string { return fmt.Sprintf("unknown expression: tight=%t", u.tight) } +func (u UnknownExpression) Copy() InvertedExpression { + return UnknownExpression{tight: u.tight} +} // Makes a (shallow) copy of the root node of the expression identified // by name, since calls to And() and Or() can modify that root node, and diff --git a/pkg/sql/opt/invertedidx/geo.go b/pkg/sql/opt/invertedidx/geo.go index f5979cdb9c1a..77f50bb86ec7 100644 --- a/pkg/sql/opt/invertedidx/geo.go +++ b/pkg/sql/opt/invertedidx/geo.go @@ -1041,7 +1041,7 @@ func (g *geoDatumsToInvertedExpr) Convert( case *geoInvertedExpr: if t.spanExpr != nil { - return t.spanExpr, nil + return t.spanExpr.Copy(), nil } d, err := t.nonIndexParam.Eval(g.evalCtx) if err != nil {