Skip to content

Commit

Permalink
geo: order geospatial objects by Hilbert Curve index
Browse files Browse the repository at this point in the history
This follows the [PostGIS way](https://info.crunchydata.com/blog/waiting-for-postgis-3-hilbert-geometry-sorting),
but does not follow the same encoding mechanism, so we cannot compare
results directly.

For Geography we re-use the S2 infrastructure and for Geometry we use
the google/hilbert library.

Release note (sql change): When ordering by geospatial columns, it will
now order by the Hilbert Space Curve index so that points which are
geographically similar are clustered together.
  • Loading branch information
otan committed Jul 28, 2020
1 parent 3c35836 commit 2d2dd6e
Show file tree
Hide file tree
Showing 8 changed files with 405 additions and 59 deletions.
96 changes: 89 additions & 7 deletions pkg/geo/geo.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ package geo
import (
"bytes"
"encoding/binary"
"math"

"github.com/cockroachdb/cockroach/pkg/geo/geographiclib"
"github.com/cockroachdb/cockroach/pkg/geo/geopb"
Expand Down Expand Up @@ -276,6 +277,61 @@ func (g *Geometry) CartesianBoundingBox() *CartesianBoundingBox {
return &CartesianBoundingBox{BoundingBox: *g.spatialObject.BoundingBox}
}

// SpaceCurveIndex returns an uint64 index to use representing an index into a space-filling curve.
// This will return 0 for empty spatial objects, and math.MaxUint64 for any object outside
// the defined bounds of the given SRID projection.
func (g *Geometry) SpaceCurveIndex() uint64 {
bbox := g.CartesianBoundingBox()
if bbox == nil {
return 0
}
centerX := (bbox.BoundingBox.LoX + bbox.BoundingBox.HiX) / 2
centerY := (bbox.BoundingBox.LoY + bbox.BoundingBox.HiY) / 2
// By default, bound by MaxInt32 (we have not typically seen bounds greater than 1B).
bounds := geoprojbase.Bounds{
MinX: math.MinInt32,
MaxX: math.MaxInt32,
MinY: math.MinInt32,
MaxY: math.MaxInt32,
}
if proj, ok := geoprojbase.Projection(g.SRID()); ok {
bounds = proj.Bounds
}
// If we're out of bounds, give up and return a large number.
if centerX > bounds.MaxX || centerY > bounds.MaxY || centerX < bounds.MinX || centerY < bounds.MinY {
return math.MaxUint64
}

// Add 1 to each dimension as the hilbert curve goes from [0, n-1].
xBounds := (bounds.MaxX - bounds.MinX) + 1
yBounds := (bounds.MaxY - bounds.MinY) + 1

// We need two compute N such that N is a power of 2 and the x, y coordinates are in [0, N-1].
// The hilbert curve value will be in the interval [0, N^2-1].
// The x, y coordinates will be first normalized below to the interval [0, 1].
// TODO(#geo): consider caching this value onto the projection for speedup.
biggestBoxLength := uint64(1) << uint64(math.Ceil(math.Log2(math.Max(xBounds, yBounds))))

xPos := uint64(((centerX - bounds.MinX) / (bounds.MaxX - bounds.MinX)) * float64(xBounds))
yPos := uint64(((centerY - bounds.MinY) / (bounds.MaxY - bounds.MinY)) * float64(yBounds))
return hilbertInverse(biggestBoxLength, xPos, yPos)
}

// Compare compares a Geometry against another.
// It compares using SpaceCurveIndex, followed by the byte representation of the Geometry.
// This must produce the same ordering as the index mechanism.
func (g *Geometry) Compare(o *Geometry) int {
lhs := g.SpaceCurveIndex()
rhs := o.SpaceCurveIndex()
if lhs > rhs {
return 1
}
if lhs < rhs {
return -1
}
return compareSpatialObjectBytes(g.SpatialObject(), o.SpatialObject())
}

//
// Geography
//
Expand Down Expand Up @@ -489,6 +545,35 @@ func (g *Geography) BoundingCap() s2.Cap {
return g.BoundingRect().CapBound()
}

// SpaceCurveIndex returns an uint64 index to use representing an index into a space-filling curve.
// This will return 0 for empty spatial objects.
func (g *Geography) SpaceCurveIndex() uint64 {
rect := g.BoundingRect()
if rect.IsEmpty() {
return 0
}
return uint64(s2.CellIDFromLatLng(rect.Center()))
}

// Compare compares a Geography against another.
// It compares using SpaceCurveIndex, followed by the byte representation of the Geography.
// This must produce the same ordering as the index mechanism.
func (g *Geography) Compare(o *Geography) int {
lhs := g.SpaceCurveIndex()
rhs := o.SpaceCurveIndex()
if lhs > rhs {
return 1
}
if lhs < rhs {
return -1
}
return compareSpatialObjectBytes(g.SpatialObject(), o.SpatialObject())
}

//
// Common
//

// IsLinearRingCCW returns whether a given linear ring is counter clock wise.
// See 2.07 of http://www.faqs.org/faqs/graphics/algorithms-faq/.
// "Find the lowest vertex (or, if there is more than one vertex with the same lowest coordinate,
Expand Down Expand Up @@ -618,10 +703,6 @@ func S2RegionsFromGeomT(geomRepr geom.T, emptyBehavior EmptyBehavior) ([]s2.Regi
return regions, nil
}

//
// Common
//

// normalizeLngLat normalizes geographical coordinates into a valid range.
func normalizeLngLat(lng float64, lat float64) (float64, float64) {
if lat > 90 || lat < -90 {
Expand Down Expand Up @@ -755,9 +836,10 @@ func shapeTypeFromGeomT(t geom.T) (geopb.ShapeType, error) {
}
}

// CompareSpatialObject compares the SpatialObject.
// This must match the byte ordering that is be produced by encoding.EncodeGeoAscending.
func CompareSpatialObject(lhs geopb.SpatialObject, rhs geopb.SpatialObject) int {
// compareSpatialObjectBytes compares the SpatialObject if they were serialized.
// This is used for comparison operations, and must be kept consistent with the indexing
// encoding.
func compareSpatialObjectBytes(lhs geopb.SpatialObject, rhs geopb.SpatialObject) int {
marshalledLHS, err := protoutil.Marshal(&lhs)
if err != nil {
panic(err)
Expand Down
100 changes: 100 additions & 0 deletions pkg/geo/geo_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import (
"github.com/cockroachdb/cockroach/pkg/geo/geopb"
"github.com/cockroachdb/errors"
"github.com/golang/geo/s2"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"github.com/twpayne/go-geom"
)
Expand Down Expand Up @@ -561,6 +562,105 @@ func TestGeographyAsS2(t *testing.T) {
}
}

func TestGeographyHash(t *testing.T) {
testCases := []struct {
orderedWKTs []string
srid geopb.SRID
}{
{
[]string{
"POINT EMPTY",
"POLYGON EMPTY",
"POLYGON((0 0, 1 0, 1 1, 0 1, 0 0))",
"POINT(-80 80)",
"LINESTRING(0 0, -90 -80)",
},
4326,
},
{
[]string{
"POINT EMPTY",
"POLYGON EMPTY",
"POLYGON((0 0, 1 0, 1 1, 0 1, 0 0))",
"POINT(-80 80)",
"LINESTRING(0 0, -90 -80)",
},
4004,
},
}
for i, tc := range testCases {
t.Run(strconv.Itoa(i+1), func(t *testing.T) {
previous := uint64(0)
for _, wkt := range tc.orderedWKTs {
t.Run(wkt, func(t *testing.T) {
g, err := ParseGeography(wkt)
require.NoError(t, err)
g, err = g.CloneWithSRID(tc.srid)
require.NoError(t, err)

h := g.SpaceCurveIndex()
assert.GreaterOrEqual(t, h, previous)
previous = h
})
}
})
}
}

func TestGeometryHash(t *testing.T) {
testCases := []struct {
orderedWKTs []string
srid geopb.SRID
}{
{
[]string{
"POINT EMPTY",
"POLYGON EMPTY",
"LINESTRING(0 0, -90 -80)",
"POLYGON((0 0, 1 0, 1 1, 0 1, 0 0))",
"POINT(-80 80)",
},
4326,
},
{
[]string{
"POINT EMPTY",
"POLYGON EMPTY",
"POINT(-80 80)",
"LINESTRING(0 0, -90 -80)",
"POLYGON((0 0, 1 0, 1 1, 0 1, 0 0))",
},
3857,
},
{
[]string{
"POINT EMPTY",
"POLYGON EMPTY",
"LINESTRING(0 0, -90 -80)",
"POINT(-80 80)",
"POLYGON((0 0, 1 0, 1 1, 0 1, 0 0))",
},
0,
},
}
for i, tc := range testCases {
t.Run(strconv.Itoa(i+1), func(t *testing.T) {
previous := uint64(0)
for _, wkt := range tc.orderedWKTs {
t.Run(wkt, func(t *testing.T) {
g, err := ParseGeometry(wkt)
require.NoError(t, err)
g, err = g.CloneWithSRID(tc.srid)
require.NoError(t, err)
h := g.SpaceCurveIndex()
assert.GreaterOrEqual(t, h, previous)
previous = h
})
}
})
}
}

func TestGeometryAsGeography(t *testing.T) {
for _, tc := range []struct {
geom string
Expand Down
44 changes: 44 additions & 0 deletions pkg/geo/hilbert.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
// Copyright 2020 The Cockroach Authors.
//
// Use of this software is governed by the Business Source License
// included in the file licenses/BSL.txt.
//
// As of the Change Date specified in that file, in accordance with
// the Business Source License, use of this software will be governed
// by the Apache License, Version 2.0, included in the file
// licenses/APL.txt.

package geo

// hilbertInverse converts (x,y) to d on a Hilbert Curve.
// Adapted from `xy2d` from https://en.wikipedia.org/wiki/Hilbert_curve#Applications_and_mapping_algorithms.
func hilbertInverse(n, x, y uint64) uint64 {
var d uint64
for s := uint64(n / 2); s > 0; s /= 2 {
var rx uint64
if (x & s) > 0 {
rx = 1
}
var ry uint64
if (y & s) > 0 {
ry = 1
}
d += s * s * ((3 * rx) ^ ry)
x, y = hilbertRotate(n, x, y, rx, ry)
}
return d
}

// hilberRoate rotates/flips a quadrant appropriately.
// Adapted from `rot` in https://en.wikipedia.org/wiki/Hilbert_curve#Applications_and_mapping_algorithms.
func hilbertRotate(n, x, y, rx, ry uint64) (uint64, uint64) {
if ry == 0 {
if rx == 1 {
x = n - 1 - x
y = n - 1 - y
}

x, y = y, x
}
return x, y
}
4 changes: 2 additions & 2 deletions pkg/sql/sem/tree/datum.go
Original file line number Diff line number Diff line change
Expand Up @@ -2766,7 +2766,7 @@ func (d *DGeography) Compare(ctx *EvalContext, other Datum) int {
// NULL is less than any non-NULL value.
return 1
}
return geo.CompareSpatialObject(d.Geography.SpatialObject(), other.(*DGeography).SpatialObject())
return d.Geography.Compare(other.(*DGeography).Geography)
}

// Prev implements the Datum interface.
Expand Down Expand Up @@ -2874,7 +2874,7 @@ func (d *DGeometry) Compare(ctx *EvalContext, other Datum) int {
// NULL is less than any non-NULL value.
return 1
}
return geo.CompareSpatialObject(d.Geometry.SpatialObject(), other.(*DGeometry).SpatialObject())
return d.Geometry.Compare(other.(*DGeometry).Geometry)
}

// Prev implements the Datum interface.
Expand Down
8 changes: 4 additions & 4 deletions pkg/sql/sqlbase/column_type_encoding.go
Original file line number Diff line number Diff line change
Expand Up @@ -97,15 +97,15 @@ func EncodeTableKey(b []byte, val tree.Datum, dir encoding.Direction) ([]byte, e
case *tree.DGeography:
so := t.Geography.SpatialObject()
if dir == encoding.Ascending {
return encoding.EncodeGeoAscending(b, &so)
return encoding.EncodeGeoAscending(b, t.Geography.SpaceCurveIndex(), &so)
}
return encoding.EncodeGeoDescending(b, &so)
return encoding.EncodeGeoDescending(b, t.Geography.SpaceCurveIndex(), &so)
case *tree.DGeometry:
so := t.Geometry.SpatialObject()
if dir == encoding.Ascending {
return encoding.EncodeGeoAscending(b, &so)
return encoding.EncodeGeoAscending(b, t.Geometry.SpaceCurveIndex(), &so)
}
return encoding.EncodeGeoDescending(b, &so)
return encoding.EncodeGeoDescending(b, t.Geometry.SpaceCurveIndex(), &so)
case *tree.DDate:
if dir == encoding.Ascending {
return encoding.EncodeVarintAscending(b, t.UnixEpochDaysWithOrig()), nil
Expand Down
2 changes: 1 addition & 1 deletion pkg/sql/sqlbase/encoded_datum_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -208,7 +208,7 @@ func TestEncDatumCompare(t *testing.T) {

for _, typ := range types.OidToType {
switch typ.Family() {
case types.AnyFamily, types.UnknownFamily, types.ArrayFamily, types.JsonFamily, types.TupleFamily, types.GeometryFamily, types.GeographyFamily:
case types.AnyFamily, types.UnknownFamily, types.ArrayFamily, types.JsonFamily, types.TupleFamily:
continue
case types.CollatedStringFamily:
typ = types.MakeCollatedString(types.String, *RandCollationLocale(rng))
Expand Down
Loading

0 comments on commit 2d2dd6e

Please sign in to comment.