Skip to content

Commit

Permalink
Merge #50912
Browse files Browse the repository at this point in the history
50912: xform,geoindex: use geo inverted indexes for DWithin and DFullyWithin r=sumeerbhola a=sumeerbhola

Release note: None

Co-authored-by: sumeerbhola <[email protected]>
  • Loading branch information
craig[bot] and sumeerbhola committed Jul 6, 2020
2 parents ec47cd9 + 7e833ea commit deb468b
Show file tree
Hide file tree
Showing 10 changed files with 387 additions and 24 deletions.
8 changes: 8 additions & 0 deletions pkg/geo/geoindex/index.go
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,14 @@ const (
// Intersects corresponds to the relationship in which one geospatial object
// intersects another geospatial object.
Intersects

// DWithin corresponds to a relationship where there exists a part of one
// geometry within d distance units of the other geometry.
DWithin

// DFullyWithin corresponds to a relationship where every pair of points in
// two geometries are within d distance units.
DFullyWithin
)

var geoRelationshipTypeStr = map[RelationshipType]string{
Expand Down
12 changes: 12 additions & 0 deletions pkg/sql/logictest/testdata/logic_test/inverted_filter_geospatial
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,18 @@ SELECT k FROM geo_table WHERE ST_CoveredBy('POINT(4.0 4.5)'::geometry, geom) ORD
----
6

query I
SELECT k FROM geo_table WHERE ST_Intersects('POINT(2.5 2.5)'::geometry, geom) ORDER BY k
----
6

query I
SELECT k FROM geo_table WHERE ST_DWithin('POINT(2.5 2.5)'::geometry, geom, 1) ORDER BY k
----
2
3
6

statement ok
CREATE TABLE geo_table2(
k int,
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
# LogicTest: !fakedist-metadata !3node-tenant

# TODO(sumeer): move these to opt/exec/execbuilder/testdata since logic tests
# are not supposed to change when a plan changes.

# EXPLAIN test cases for using invertedFilterer on an inverted geospatial index.

statement ok
Expand Down Expand Up @@ -39,3 +42,22 @@ render · ·
└── scan · ·
· table geo_table2@geom_index
· spans /1152921504606846976-/1152921504606846977 /1152921573326323712-/1152921573326323713 /1152921574400065536-/1152921574400065537 /1152921574668500992-/1152921574668500993 /1152921574735609856-/1152921574735609857 /1152921574739804160-/1152921574739804161 /1152921574740066304-/1152921574740066305 /1152921574740070400-/1152921574740070401 /1152921574740070464-/1152921574740070465 /1152921574740070468-/1152921574740070469 /1152921574740070469-/1152921574740070470 /1152921574740070480-/1152921574740070481 /1152921574740070656-/1152921574740070657 /1152921574740071424-/1152921574740071425 /1152921574740082688-/1152921574740082689 /1152921574740131840-/1152921574740131841 /1152921574740852736-/1152921574740852737 /1152921574752387072-/1152921574752387073 /1152921577621291008-/1152921577621291009 /1152921590506192896-/1152921590506192897 /1152921779484753920-/1152921779484753921 /1152922604118474752-/1152922604118474753 /1152925902653358080-/1152925902653358081 /1152939096792891392-/1152939096792891393 /1152991873351024640-/1152991873351024641 /1153202979583557632-/1153202979583557633 /1154047404513689600-/1154047404513689601 /1157425104234217472-/1157425104234217473 /1170935903116328960-/1170935903116328961 /1224979098644774912-/1224979098644774913 /1441151880758558720-/1441151880758558721

query TTT
EXPLAIN SELECT k, k_plus_one FROM geo_table2 WHERE ST_DFullyWithin('POINT(3.0 3.0)'::geometry, geom, 1)
----
· distribution local
· vectorized false
render · ·
└── filter · ·
│ filter st_dfullywithin('010100000000000000000008400000000000000840', geom, 1.0)
└── index-join · ·
│ table geo_table2@primary
│ key columns k, k_plus_one
└── render · ·
└── inverted-filter · ·
│ inverted column 2
│ num spans 20
└── scan · ·
· table geo_table2@geom_index
· spans /1152921504606846976-/1152921504606846977 /1152921521786716160-/1152921521786716161 /1152921526081683456-/1152921526081683457 /1152921527155425280-/1152921527155425281 /1152921527155425281-/1152921527692296191/PrefixEnd /1152921538966585345-/1152921573326323711/PrefixEnd /1152921573326323712-/1152921573326323713 /1152921573326323713-/1152921607686062079/PrefixEnd /1152921607686062081-/1152921642045800447/PrefixEnd /1152921779484753920-/1152921779484753921 /1152922604118474752-/1152922604118474753 /1152925902653358080-/1152925902653358081 /1152939096792891392-/1152939096792891393 /1152991873351024640-/1152991873351024641 /1153202979583557632-/1153202979583557633 /1154047404513689600-/1154047404513689601 /1157425104234217472-/1157425104234217473 /1170935903116328960-/1170935903116328961 /1224979098644774912-/1224979098644774913 /1441151880758558720-/1441151880758558721
13 changes: 13 additions & 0 deletions pkg/sql/logictest/testdata/logic_test/inverted_join_geospatial
Original file line number Diff line number Diff line change
Expand Up @@ -35,3 +35,16 @@ SELECT lk, rk FROM ltable JOIN rtable@geom_index ON ST_Intersects(ltable.geom, r
2 16
3 12
3 16

query II
SELECT lk, rk FROM ltable JOIN rtable@geom_index ON ST_DWithin(ltable.geom, rtable.geom, 2) ORDER BY (lk, rk)
----
1 12
1 13
1 14
1 16
2 14
2 16
3 11
3 12
3 16
Original file line number Diff line number Diff line change
Expand Up @@ -46,3 +46,27 @@ render · ·
└── scan · ·
· table ltable@primary
· spans FULL SCAN

query TTT
EXPLAIN SELECT lk, rk1, rk2, rtable.geom
FROM ltable JOIN rtable@geom_index ON ST_DWithin(ltable.geom, rtable.geom, 5)
----
· distribution local
· vectorized true
render · ·
└── lookup-join · ·
│ table rtable@primary
│ type inner
│ equality (rk1, rk2) = (rk1, rk2)
│ equality cols are key ·
│ parallel ·
│ pred st_dwithin(geom, geom, 5.0)
└── render · ·
└── inverted-join · ·
│ table rtable@geom_index
│ type inner
│ · st_dwithin(@2, @4, 5.0)
│ parallel ·
└── scan · ·
· table ltable@primary
· spans FULL SCAN
26 changes: 18 additions & 8 deletions pkg/sql/opt/xform/custom_funcs.go
Original file line number Diff line number Diff line change
Expand Up @@ -1984,16 +1984,26 @@ func (c *CustomFuncs) IsGeoIndexFunction(fn opt.ScalarExpr) bool {
return IsGeoIndexFunction(fn)
}

// HasAllVariableArgs returns true if all the arguments to the given function
// are variables.
func (c *CustomFuncs) HasAllVariableArgs(fn opt.ScalarExpr) bool {
// FirstArgIsVariable returns true if the first argument to the given
// function is a variable.
func (c *CustomFuncs) FirstArgIsVariable(fn opt.ScalarExpr) bool {
return argNumIsVariable(fn, 0)
}

// SecondArgIsVariable returns true if the second argument to the given
// function is a variable.
func (c *CustomFuncs) SecondArgIsVariable(fn opt.ScalarExpr) bool {
return argNumIsVariable(fn, 1)
}

func argNumIsVariable(fn opt.ScalarExpr, index int) bool {
function := fn.(*memo.FunctionExpr)
for i, n := 0, function.Args.ChildCount(); i < n; i++ {
if _, ok := function.Args.Child(i).(*memo.VariableExpr); !ok {
return false
}
numArgs := function.Args.ChildCount()
if index >= numArgs {
return false
}
return true
_, ok := function.Args.Child(index).(*memo.VariableExpr)
return ok
}

// findConstantFilter tries to find a filter that is exactly equivalent to
Expand Down
112 changes: 97 additions & 15 deletions pkg/sql/opt/xform/geo.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ import (
"context"
"fmt"

"github.com/cockroachdb/cockroach/pkg/geo/geogfn"
"github.com/cockroachdb/cockroach/pkg/geo/geoindex"
"github.com/cockroachdb/cockroach/pkg/sql/opt"
"github.com/cockroachdb/cockroach/pkg/sql/opt/cat"
Expand All @@ -37,14 +38,14 @@ import (
// Note that for all of these functions, a geospatial lookup join or constrained
// index scan may produce false positives. Therefore, the original function must
// be called on the output of the index operation to filter the results.
// TODO(rytaft): add ST_DFullyWithin (geoindex.Covers) and ST_DWithin
// (geoindex.Intersects) once we add support for extending a geometry.
var geoRelationshipMap = map[string]geoindex.RelationshipType{
"st_covers": geoindex.Covers,
"st_coveredby": geoindex.CoveredBy,
"st_contains": geoindex.Covers,
"st_containsproperly": geoindex.Covers,
"st_crosses": geoindex.Intersects,
"st_dwithin": geoindex.DWithin,
"st_dfullywithin": geoindex.DFullyWithin,
"st_equals": geoindex.Intersects,
"st_intersects": geoindex.Intersects,
"st_overlaps": geoindex.Intersects,
Expand All @@ -65,7 +66,7 @@ func IsGeoIndexFunction(fn opt.ScalarExpr) bool {
// geospatial relationship. It is implemented by getSpanExprForGeographyIndex
// and getSpanExprForGeometryIndex and used in constrainGeoIndex.
type getSpanExprForGeoIndexFn func(
context.Context, tree.Datum, geoindex.RelationshipType, *geoindex.Config,
context.Context, tree.Datum, []tree.Datum, geoindex.RelationshipType, *geoindex.Config,
) *invertedexpr.SpanExpression

// tryConstrainGeoIndex tries to derive an inverted index constraint for the
Expand Down Expand Up @@ -114,6 +115,7 @@ func tryConstrainGeoIndex(
func getSpanExprForGeographyIndex(
ctx context.Context,
d tree.Datum,
additionalParams []tree.Datum,
relationship geoindex.RelationshipType,
indexConfig *geoindex.Config,
) *invertedexpr.SpanExpression {
Expand All @@ -138,6 +140,35 @@ func getSpanExprForGeographyIndex(
panic(err)
}

case geoindex.DWithin:
// Parameters are type checked earlier. Keep this consistent with the definition
// in geo_builtins.go.
if len(additionalParams) != 1 && len(additionalParams) != 2 {
panic(errors.AssertionFailedf("unexpected param length %d", len(additionalParams)))
}
d, ok := additionalParams[0].(*tree.DFloat)
if !ok {
panic(errors.AssertionFailedf(
"parameter %s is not float", additionalParams[0].ResolvedType()))
}
distanceMeters := float64(*d)
useSpheroid := geogfn.UseSpheroid
if len(additionalParams) == 2 {
b, ok := additionalParams[1].(*tree.DBool)
if !ok {
panic(errors.AssertionFailedf(
"parameter %s is not bool", additionalParams[1].ResolvedType()))
}
if !*b {
useSpheroid = geogfn.UseSphere
}
}
unionKeySpans, err := geogIdx.DWithin(ctx, geog, distanceMeters, useSpheroid)
if err != nil {
panic(err)
}
spanExpr = invertedexpr.GeoUnionKeySpansToSpanExpr(unionKeySpans)

case geoindex.Intersects:
unionKeySpans, err := geogIdx.Intersects(ctx, geog)
if err != nil {
Expand All @@ -152,11 +183,26 @@ func getSpanExprForGeographyIndex(
return spanExpr
}

// Helper for DWithin and DFullyWithin.
func getDistanceParam(params []tree.Datum) float64 {
// Parameters are type checked earlier. Keep this consistent with the definition
// in geo_builtins.go.
if len(params) != 1 {
panic(errors.AssertionFailedf("unexpected param length %d", len(params)))
}
d, ok := params[0].(*tree.DFloat)
if !ok {
panic(errors.AssertionFailedf("parameter %s is not float", params[0].ResolvedType()))
}
return float64(*d)
}

// getSpanExprForGeometryIndex gets a SpanExpression that constrains the given
// geometry index according to the given constant and geospatial relationship.
func getSpanExprForGeometryIndex(
ctx context.Context,
d tree.Datum,
additionalParams []tree.Datum,
relationship geoindex.RelationshipType,
indexConfig *geoindex.Config,
) *invertedexpr.SpanExpression {
Expand All @@ -181,6 +227,22 @@ func getSpanExprForGeometryIndex(
panic(err)
}

case geoindex.DFullyWithin:
distance := getDistanceParam(additionalParams)
unionKeySpans, err := geomIdx.DFullyWithin(ctx, geom, distance)
if err != nil {
panic(err)
}
spanExpr = invertedexpr.GeoUnionKeySpansToSpanExpr(unionKeySpans)

case geoindex.DWithin:
distance := getDistanceParam(additionalParams)
unionKeySpans, err := geomIdx.DWithin(ctx, geom, distance)
if err != nil {
panic(err)
}
spanExpr = invertedexpr.GeoUnionKeySpansToSpanExpr(unionKeySpans)

case geoindex.Intersects:
unionKeySpans, err := geomIdx.Intersects(ctx, geom)
if err != nil {
Expand Down Expand Up @@ -246,27 +308,37 @@ func constrainGeoIndex(
variable, ok := fn.Args.Child(1).(*memo.VariableExpr)
if !ok {
// TODO(rytaft): Commute the geospatial function in this case.
// Covers <-> CoveredBy
// Intersects <-> Intersects
// Covers <-> CoveredBy
// DWithin <-> DWithin
// DFullyWithin <-> DFullyWithin
// Intersects <-> Intersects
return invertedexpr.NonInvertedColExpression{}
}
if variable.Col != tabID.ColumnID(index.Column(0).Ordinal) {
// The column in the function does not match the index column.
return invertedexpr.NonInvertedColExpression{}
}

// Any additional params must be constant.
var additionalParams []tree.Datum
for i := 2; i < fn.Args.ChildCount(); i++ {
if !memo.CanExtractConstDatum(fn.Args.Child(i)) {
return invertedexpr.NonInvertedColExpression{}
}
additionalParams = append(additionalParams, memo.ExtractConstDatum(fn.Args.Child(i)))
}
relationship := geoRelationshipMap[fn.Name]
return getSpanExpr(ctx, d, relationship, index.GeoConfig())
return getSpanExpr(ctx, d, additionalParams, relationship, index.GeoConfig())
}

// geoDatumToInvertedExpr implements invertedexpr.DatumToInvertedExpr for
// geospatial columns.
type geoDatumToInvertedExpr struct {
relationship geoindex.RelationshipType
indexConfig *geoindex.Config
typ *types.T
getSpanExpr getSpanExprForGeoIndexFn
alloc sqlbase.DatumAlloc
relationship geoindex.RelationshipType
additionalParams []tree.Datum
indexConfig *geoindex.Config
typ *types.T
getSpanExpr getSpanExprForGeoIndexFn
alloc sqlbase.DatumAlloc
}

var _ invertedexpr.DatumToInvertedExpr = &geoDatumToInvertedExpr{}
Expand All @@ -290,9 +362,19 @@ func NewGeoDatumToInvertedExpr(
return nil, fmt.Errorf("%s cannot be index-accelerated", name)
}

var additionalParams []tree.Datum
for i := 2; i < len(fn.Exprs); i++ {
datum, ok := fn.Exprs[i].(tree.Datum)
if !ok {
return nil, fmt.Errorf("non constant additional parameter for %s", name)
}
additionalParams = append(additionalParams, datum)
}

g := &geoDatumToInvertedExpr{
relationship: relationship,
indexConfig: config,
relationship: relationship,
additionalParams: additionalParams,
indexConfig: config,
}
if geoindex.IsGeographyConfig(config) {
g.typ = types.Geography
Expand All @@ -314,7 +396,7 @@ func (g *geoDatumToInvertedExpr) Convert(
if err := d.EnsureDecoded(g.typ, &g.alloc); err != nil {
return nil, err
}
spanExpr := g.getSpanExpr(ctx, d.Datum, g.relationship, g.indexConfig)
spanExpr := g.getSpanExpr(ctx, d.Datum, g.additionalParams, g.relationship, g.indexConfig)
return spanExpr.ToProto(), nil
}

Expand Down
3 changes: 2 additions & 1 deletion pkg/sql/opt/xform/rules/join.opt
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,8 @@
(FiltersItem
$fn:(Function) &
(IsGeoIndexFunction $fn) &
(HasAllVariableArgs $fn)
(FirstArgIsVariable $fn) &
(SecondArgIsVariable $fn)
)
...
]
Expand Down
26 changes: 26 additions & 0 deletions pkg/sql/opt/xform/testdata/rules/join
Original file line number Diff line number Diff line change
Expand Up @@ -1867,6 +1867,32 @@ memo (optimized, ~23KB, required=[presentation: name:13,popn_per_sqkm:16])
├── G32: (const 'Upper West Side')
└── G33: (const 'Upper East Side')

opt expect=GenerateGeoLookupJoins
SELECT
n.name, c.boroname
FROM nyc_census_blocks AS c
JOIN nyc_neighborhoods@nyc_neighborhoods_geo_idx AS n
ON ST_DWithin(c.geom, n.geom, 50)
----
project
├── columns: name:13 boroname:9
├── immutable
└── inner-join (lookup nyc_neighborhoods)
├── columns: c.boroname:9 c.geom:10 name:13 n.geom:14
├── key columns: [11] = [11]
├── lookup columns are key
├── immutable
├── inner-join (inverted-lookup nyc_neighborhoods@nyc_neighborhoods_geo_idx)
│ ├── columns: c.boroname:9 c.geom:10 n.gid:11!null
│ ├── inverted-expr
│ │ └── st_dwithin(c.geom:10, n.geom:14, 50.0)
│ ├── scan c
│ │ └── columns: c.boroname:9 c.geom:10
│ └── filters (true)
└── filters
└── st_dwithin(c.geom:10, n.geom:14, 50.0) [outer=(10,14), immutable]


# --------------------------------------------------
# GenerateZigZagJoins
# --------------------------------------------------
Expand Down
Loading

0 comments on commit deb468b

Please sign in to comment.