Skip to content

Commit

Permalink
geo: order geospatial objects by Hilbert Curve index
Browse files Browse the repository at this point in the history
This follows the [PostGIS way](https://info.crunchydata.com/blog/waiting-for-postgis-3-hilbert-geometry-sorting),
but does not follow the same encoding mechanism, so we cannot compare
results directly.

For Geography we re-use the S2 infrastructure and for Geometry we use
the google/hilbert library.

Release note (sql change): When ordering by geospatial columns, it will
now order by the Hilbert Space Curve index so that points which are
geographically similar are clustered together.
  • Loading branch information
otan committed Jul 27, 2020
1 parent a409703 commit b56e308
Show file tree
Hide file tree
Showing 10 changed files with 378 additions and 60 deletions.
1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ require (
github.com/google/flatbuffers v1.11.0
github.com/google/go-cmp v0.4.0
github.com/google/go-github v17.0.0+incompatible
github.com/google/hilbert v0.0.0-20181122061418-320f2e35a565
github.com/google/martian v2.1.0+incompatible // indirect
github.com/google/pprof v0.0.0-20190109223431-e84dfd68c163
github.com/googleapis/gax-go v2.0.2+incompatible // indirect
Expand Down
2 changes: 2 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -329,6 +329,8 @@ github.com/google/go-github v17.0.0+incompatible/go.mod h1:zLgOLi98H3fifZn+44m+u
github.com/google/go-querystring v1.0.0 h1:Xkwi/a1rcvNg1PPYe5vI8GbeBY/jrVuDX5ASuANWTrk=
github.com/google/go-querystring v1.0.0/go.mod h1:odCYkC5MyYFN7vkCjXpyrEuKhc/BUO6wN/zVPAxq5ck=
github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
github.com/google/hilbert v0.0.0-20181122061418-320f2e35a565 h1:KBAlCAY6eLC44FiEwbzEbHnpVlw15iVM4ZK8QpRIp4U=
github.com/google/hilbert v0.0.0-20181122061418-320f2e35a565/go.mod h1:xn6EodFfRzV6j8NXQRPjngeHWlrpOrsZPKuuLRThU1k=
github.com/google/martian v2.1.0+incompatible h1:/CP5g8u/VJHijgedC/Legn3BAbAaWPgecwXBIDzw5no=
github.com/google/martian v2.1.0+incompatible/go.mod h1:9I4somxYTbIHy5NJKHRl3wXiIaQGbYVAs8BPL6v8lEs=
github.com/google/pprof v0.0.0-20181127221834-b4f47329b966/go.mod h1:zfwlbNMJ+OItoe0UupaVj+oy1omPYYDuagoSzA8v9mc=
Expand Down
109 changes: 102 additions & 7 deletions pkg/geo/geo.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ package geo
import (
"bytes"
"encoding/binary"
"math"

"github.com/cockroachdb/cockroach/pkg/geo/geographiclib"
"github.com/cockroachdb/cockroach/pkg/geo/geopb"
Expand All @@ -23,6 +24,7 @@ import (
"github.com/golang/geo/r1"
"github.com/golang/geo/s1"
"github.com/golang/geo/s2"
"github.com/google/hilbert"
"github.com/twpayne/go-geom"
"github.com/twpayne/go-geom/encoding/ewkb"
)
Expand Down Expand Up @@ -276,6 +278,73 @@ func (g *Geometry) CartesianBoundingBox() *CartesianBoundingBox {
return &CartesianBoundingBox{BoundingBox: *g.spatialObject.BoundingBox}
}

// SpaceCurveIndex returns an uint64 index to use representing an index into a space-filling curve.
// This will return 0 for empty spatial objects, and math.MaxUint64 for any object outside
// the defined bounds of the given SRID projection.
func (g *Geometry) SpaceCurveIndex() uint64 {
bbox := g.CartesianBoundingBox()
if bbox == nil {
return 0
}
centerX := (bbox.BoundingBox.LoX + bbox.BoundingBox.HiX) / 2
centerY := (bbox.BoundingBox.LoY + bbox.BoundingBox.HiY) / 2
// By default, bound by MaxInt32 (we have not typically seen bounds greater than 1B).
bounds := geoprojbase.Bounds{
MinX: -math.MaxInt32,
MaxX: math.MaxInt32,
MinY: -math.MaxInt32,
MaxY: math.MaxInt32,
}
if proj, ok := geoprojbase.Projection(g.SRID()); ok {
bounds = proj.Bounds
}
// If we're out of bounds, give up and return a large number.
if centerX > bounds.MaxX || centerY > bounds.MaxY || centerX < bounds.MinX || centerY < bounds.MinY {
return math.MaxUint64
}

// We need two compute N such that N is a power of 2 and the x, y coordinates are in [0, N-1].
// The hilbert curve value will be in the interval [0, 2^N-1].
// The x, y coordinates will be first normalized below to the interval [0, 1], then multiplied
// by the box size to get the projection.
boxSize := 1 << 40
// Find the longest dimension, add one here to include the maximum length of the box.
biggestBoxLength := math.Max(bounds.MaxX-bounds.MinX, bounds.MaxY-bounds.MinY) + 1
// If our max length is smaller than the maximum box size, find the biggest power of two
// after the biggestBoxLength.
// TODO(otan): investigate storing this calculation on the projection itself for perf.
if biggestBoxLength < float64(boxSize) {
boxSize = 1 << int(math.Ceil(math.Log2(biggestBoxLength)))
}
h, err := hilbert.NewHilbert(boxSize)
if err != nil {
panic(err)
}

xPos := int(((centerX - bounds.MinX) / (bounds.MaxX - bounds.MinX)) * float64(boxSize-1))
yPos := int(((centerY - bounds.MinY) / (bounds.MaxY - bounds.MinY)) * float64(boxSize-1))
r, err := h.MapInverse(xPos, yPos)
if err != nil {
panic(err)
}
return uint64(r)
}

// Compare compares a Geometry against another.
// It compares using SpaceCurveIndex, followed by the byte representation of the Geometry.
// This must produce the same ordering as the index mechanism.
func (g *Geometry) Compare(o *Geometry) int {
lhs := g.SpaceCurveIndex()
rhs := o.SpaceCurveIndex()
if lhs > rhs {
return 1
}
if lhs < rhs {
return -1
}
return compareSpatialObjectBytes(g.SpatialObject(), o.SpatialObject())
}

//
// Geography
//
Expand Down Expand Up @@ -489,6 +558,35 @@ func (g *Geography) BoundingCap() s2.Cap {
return g.BoundingRect().CapBound()
}

// SpaceCurveIndex returns an uint64 index to use representing an index into a space-filling curve.
// This will return 0 for empty spatial objects.
func (g *Geography) SpaceCurveIndex() uint64 {
rect := g.BoundingRect()
if rect.IsEmpty() {
return 0
}
return uint64(s2.CellIDFromLatLng(rect.Center()))
}

// Compare compares a Geography against another.
// It compares using SpaceCurveIndex, followed by the byte representation of the Geography.
// This must produce the same ordering as the index mechanism.
func (g *Geography) Compare(o *Geography) int {
lhs := g.SpaceCurveIndex()
rhs := o.SpaceCurveIndex()
if lhs > rhs {
return 1
}
if lhs < rhs {
return -1
}
return compareSpatialObjectBytes(g.SpatialObject(), o.SpatialObject())
}

//
// Common
//

// IsLinearRingCCW returns whether a given linear ring is counter clock wise.
// See 2.07 of http://www.faqs.org/faqs/graphics/algorithms-faq/.
// "Find the lowest vertex (or, if there is more than one vertex with the same lowest coordinate,
Expand Down Expand Up @@ -618,10 +716,6 @@ func S2RegionsFromGeomT(geomRepr geom.T, emptyBehavior EmptyBehavior) ([]s2.Regi
return regions, nil
}

//
// Common
//

// normalizeLngLat normalizes geographical coordinates into a valid range.
func normalizeLngLat(lng float64, lat float64) (float64, float64) {
if lat > 90 || lat < -90 {
Expand Down Expand Up @@ -755,9 +849,10 @@ func shapeTypeFromGeomT(t geom.T) (geopb.ShapeType, error) {
}
}

// CompareSpatialObject compares the SpatialObject.
// This must match the byte ordering that is be produced by encoding.EncodeGeoAscending.
func CompareSpatialObject(lhs geopb.SpatialObject, rhs geopb.SpatialObject) int {
// compareSpatialObjectBytes compares the SpatialObject if they were serialized.
// This is used for comparison operations, and must be kept consistent with the indexing
// encoding.
func compareSpatialObjectBytes(lhs geopb.SpatialObject, rhs geopb.SpatialObject) int {
marshalledLHS, err := protoutil.Marshal(&lhs)
if err != nil {
panic(err)
Expand Down
100 changes: 100 additions & 0 deletions pkg/geo/geo_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import (
"github.com/cockroachdb/cockroach/pkg/geo/geopb"
"github.com/cockroachdb/errors"
"github.com/golang/geo/s2"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"github.com/twpayne/go-geom"
)
Expand Down Expand Up @@ -561,6 +562,105 @@ func TestGeographyAsS2(t *testing.T) {
}
}

func TestGeographyHash(t *testing.T) {
testCases := []struct {
orderedWKTs []string
srid geopb.SRID
}{
{
[]string{
"POINT EMPTY",
"POLYGON EMPTY",
"POLYGON((0 0, 1 0, 1 1, 0 1, 0 0))",
"POINT(-80 80)",
"LINESTRING(0 0, -90 -80)",
},
4326,
},
{
[]string{
"POINT EMPTY",
"POLYGON EMPTY",
"POLYGON((0 0, 1 0, 1 1, 0 1, 0 0))",
"POINT(-80 80)",
"LINESTRING(0 0, -90 -80)",
},
4004,
},
}
for i, tc := range testCases {
t.Run(strconv.Itoa(i+1), func(t *testing.T) {
previous := uint64(0)
for _, wkt := range tc.orderedWKTs {
t.Run(wkt, func(t *testing.T) {
g, err := ParseGeography(wkt)
require.NoError(t, err)
g, err = g.CloneWithSRID(tc.srid)
require.NoError(t, err)

h := g.SpaceCurveIndex()
assert.GreaterOrEqual(t, h, previous)
previous = h
})
}
})
}
}

func TestGeometryHash(t *testing.T) {
testCases := []struct {
orderedWKTs []string
srid geopb.SRID
}{
{
[]string{
"POINT EMPTY",
"POLYGON EMPTY",
"LINESTRING(0 0, -90 -80)",
"POINT(-80 80)",
"POLYGON((0 0, 1 0, 1 1, 0 1, 0 0))",
},
4326,
},
{
[]string{
"POINT EMPTY",
"POLYGON EMPTY",
"LINESTRING(0 0, -90 -80)",
"POINT(-80 80)",
"POLYGON((0 0, 1 0, 1 1, 0 1, 0 0))",
},
3857,
},
{
[]string{
"POINT EMPTY",
"POLYGON EMPTY",
"LINESTRING(0 0, -90 -80)",
"POINT(-80 80)",
"POLYGON((0 0, 1 0, 1 1, 0 1, 0 0))",
},
0,
},
}
for i, tc := range testCases {
t.Run(strconv.Itoa(i+1), func(t *testing.T) {
previous := uint64(0)
for _, wkt := range tc.orderedWKTs {
t.Run(wkt, func(t *testing.T) {
g, err := ParseGeometry(wkt)
require.NoError(t, err)
g, err = g.CloneWithSRID(tc.srid)
require.NoError(t, err)
h := g.SpaceCurveIndex()
assert.GreaterOrEqual(t, h, previous)
previous = h
})
}
})
}
}

func TestGeometryAsGeography(t *testing.T) {
for _, tc := range []struct {
geom string
Expand Down
4 changes: 2 additions & 2 deletions pkg/sql/sem/tree/datum.go
Original file line number Diff line number Diff line change
Expand Up @@ -2766,7 +2766,7 @@ func (d *DGeography) Compare(ctx *EvalContext, other Datum) int {
// NULL is less than any non-NULL value.
return 1
}
return geo.CompareSpatialObject(d.Geography.SpatialObject(), other.(*DGeography).SpatialObject())
return d.Geography.Compare(other.(*DGeography).Geography)
}

// Prev implements the Datum interface.
Expand Down Expand Up @@ -2874,7 +2874,7 @@ func (d *DGeometry) Compare(ctx *EvalContext, other Datum) int {
// NULL is less than any non-NULL value.
return 1
}
return geo.CompareSpatialObject(d.Geometry.SpatialObject(), other.(*DGeometry).SpatialObject())
return d.Geometry.Compare(other.(*DGeometry).Geometry)
}

// Prev implements the Datum interface.
Expand Down
8 changes: 4 additions & 4 deletions pkg/sql/sqlbase/column_type_encoding.go
Original file line number Diff line number Diff line change
Expand Up @@ -97,15 +97,15 @@ func EncodeTableKey(b []byte, val tree.Datum, dir encoding.Direction) ([]byte, e
case *tree.DGeography:
so := t.Geography.SpatialObject()
if dir == encoding.Ascending {
return encoding.EncodeGeoAscending(b, &so)
return encoding.EncodeGeoAscending(b, t.Geography.SpaceCurveIndex(), &so)
}
return encoding.EncodeGeoDescending(b, &so)
return encoding.EncodeGeoDescending(b, t.Geography.SpaceCurveIndex(), &so)
case *tree.DGeometry:
so := t.Geometry.SpatialObject()
if dir == encoding.Ascending {
return encoding.EncodeGeoAscending(b, &so)
return encoding.EncodeGeoAscending(b, t.Geometry.SpaceCurveIndex(), &so)
}
return encoding.EncodeGeoDescending(b, &so)
return encoding.EncodeGeoDescending(b, t.Geometry.SpaceCurveIndex(), &so)
case *tree.DDate:
if dir == encoding.Ascending {
return encoding.EncodeVarintAscending(b, t.UnixEpochDaysWithOrig()), nil
Expand Down
2 changes: 1 addition & 1 deletion pkg/sql/sqlbase/encoded_datum_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -208,7 +208,7 @@ func TestEncDatumCompare(t *testing.T) {

for _, typ := range types.OidToType {
switch typ.Family() {
case types.AnyFamily, types.UnknownFamily, types.ArrayFamily, types.JsonFamily, types.TupleFamily, types.GeometryFamily, types.GeographyFamily:
case types.AnyFamily, types.UnknownFamily, types.ArrayFamily, types.JsonFamily, types.TupleFamily:
continue
case types.CollatedStringFamily:
typ = types.MakeCollatedString(types.String, *RandCollationLocale(rng))
Expand Down
Loading

0 comments on commit b56e308

Please sign in to comment.