Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
51898: geo: order geospatial objects by Hilbert Curve index r=sumeerbhola a=otan

This follows the [PostGIS way](https://info.crunchydata.com/blog/waiting-for-postgis-3-hilbert-geometry-sorting),
but does not follow the same encoding.

Visualisation for Geometry for 10000 random points: 
![image](https://user-images.githubusercontent.com/3646147/88688829-9a4f5a00-d0ae-11ea-9618-0179fb7ac327.png)

Visualisation for Geography  10000 random points:
![image](https://user-images.githubusercontent.com/3646147/88449411-93e58780-cdfb-11ea-9e85-eff8c5a94787.png)

Release note (sql change): When ordering by geospatial columns, it will
now order by the Hilbert Space Curve index so that points which are
geographically similar are clustered together.

52168: sql: support synchronous_commit and enable_seqscan as dummy no-op r=rytaft a=otan

These vars are set by `osm2pgsql` and `ogr2ogr` respectively. These
default to the ON state, the OFF state affects performance but not
correctness.

Touches #51818.

Release note (sql change): Support the setting and getting of the
`synchronous_commit` and `enable_seqscan` variables, which do not affect
any performance characteristics. These are no-ops enabled to allow
certain tools to work.

52265: build: avoid errors when tar closes curl's pipe r=dt a=dt

Release note: none.

Co-authored-by: Oliver Tan <[email protected]>
Co-authored-by: David Taylor <[email protected]>
  • Loading branch information
3 people committed Aug 3, 2020
4 parents 6ff0528 + 819e0c3 + 88fd7b4 + ccd534d commit 66a3c19
Show file tree
Hide file tree
Showing 19 changed files with 597 additions and 73 deletions.
3 changes: 2 additions & 1 deletion build/release/teamcity-make-and-publish-build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,8 @@ docker_login_with_google
gcr_repository="us.gcr.io/cockroach-cloud-images/cockroach"

# TODO: update publish-provisional-artifacts with option to leave one or more cockroach binaries in the local filesystem
curl -f -s -S -o- "https://${bucket}.s3.amazonaws.com/cockroach-${build_name}.linux-amd64.tgz" | tar xfz - --strip-components 1
# HACK: we pipe though tac twice to reverse/un-reverse since that will read the whole buffer and make curl happy, even if tar closes early.
curl -f -s -S -o- "https://${bucket}.s3.amazonaws.com/cockroach-${build_name}.linux-amd64.tgz" | tac | tac | tar xfz - --strip-components 1
cp cockroach build/deploy/cockroach

docker build --no-cache --tag="${gcr_repository}:${build_name}" build/deploy
Expand Down
92 changes: 85 additions & 7 deletions pkg/geo/geo.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ package geo
import (
"bytes"
"encoding/binary"
"math"

"github.com/cockroachdb/cockroach/pkg/geo/geographiclib"
"github.com/cockroachdb/cockroach/pkg/geo/geopb"
Expand Down Expand Up @@ -276,6 +277,57 @@ func (g *Geometry) CartesianBoundingBox() *CartesianBoundingBox {
return &CartesianBoundingBox{BoundingBox: *g.spatialObject.BoundingBox}
}

// SpaceCurveIndex returns an uint64 index to use representing an index into a space-filling curve.
// This will return 0 for empty spatial objects, and math.MaxUint64 for any object outside
// the defined bounds of the given SRID projection.
func (g *Geometry) SpaceCurveIndex() uint64 {
bbox := g.CartesianBoundingBox()
if bbox == nil {
return 0
}
centerX := (bbox.BoundingBox.LoX + bbox.BoundingBox.HiX) / 2
centerY := (bbox.BoundingBox.LoY + bbox.BoundingBox.HiY) / 2
// By default, bound by MaxInt32 (we have not typically seen bounds greater than 1B).
bounds := geoprojbase.Bounds{
MinX: math.MinInt32,
MaxX: math.MaxInt32,
MinY: math.MinInt32,
MaxY: math.MaxInt32,
}
if proj, ok := geoprojbase.Projection(g.SRID()); ok {
bounds = proj.Bounds
}
// If we're out of bounds, give up and return a large number.
if centerX > bounds.MaxX || centerY > bounds.MaxY || centerX < bounds.MinX || centerY < bounds.MinY {
return math.MaxUint64
}

const boxLength = 1 << 32
// Add 1 to each bound so that we normalize the coordinates to [0, 1) before
// multiplying by boxLength to give coordinates that are integers in the interval [0, boxLength-1].
xBounds := (bounds.MaxX - bounds.MinX) + 1
yBounds := (bounds.MaxY - bounds.MinY) + 1
// hilbertInverse returns values in the interval [0, boxLength^2-1], so return [0, 2^64-1].
xPos := uint64(((centerX - bounds.MinX) / xBounds) * boxLength)
yPos := uint64(((centerY - bounds.MinY) / yBounds) * boxLength)
return hilbertInverse(boxLength, xPos, yPos)
}

// Compare compares a Geometry against another.
// It compares using SpaceCurveIndex, followed by the byte representation of the Geometry.
// This must produce the same ordering as the index mechanism.
func (g *Geometry) Compare(o *Geometry) int {
lhs := g.SpaceCurveIndex()
rhs := o.SpaceCurveIndex()
if lhs > rhs {
return 1
}
if lhs < rhs {
return -1
}
return compareSpatialObjectBytes(g.SpatialObject(), o.SpatialObject())
}

//
// Geography
//
Expand Down Expand Up @@ -489,6 +541,35 @@ func (g *Geography) BoundingCap() s2.Cap {
return g.BoundingRect().CapBound()
}

// SpaceCurveIndex returns an uint64 index to use representing an index into a space-filling curve.
// This will return 0 for empty spatial objects.
func (g *Geography) SpaceCurveIndex() uint64 {
rect := g.BoundingRect()
if rect.IsEmpty() {
return 0
}
return uint64(s2.CellIDFromLatLng(rect.Center()))
}

// Compare compares a Geography against another.
// It compares using SpaceCurveIndex, followed by the byte representation of the Geography.
// This must produce the same ordering as the index mechanism.
func (g *Geography) Compare(o *Geography) int {
lhs := g.SpaceCurveIndex()
rhs := o.SpaceCurveIndex()
if lhs > rhs {
return 1
}
if lhs < rhs {
return -1
}
return compareSpatialObjectBytes(g.SpatialObject(), o.SpatialObject())
}

//
// Common
//

// IsLinearRingCCW returns whether a given linear ring is counter clock wise.
// See 2.07 of http://www.faqs.org/faqs/graphics/algorithms-faq/.
// "Find the lowest vertex (or, if there is more than one vertex with the same lowest coordinate,
Expand Down Expand Up @@ -618,10 +699,6 @@ func S2RegionsFromGeomT(geomRepr geom.T, emptyBehavior EmptyBehavior) ([]s2.Regi
return regions, nil
}

//
// Common
//

// normalizeLngLat normalizes geographical coordinates into a valid range.
func normalizeLngLat(lng float64, lat float64) (float64, float64) {
if lat > 90 || lat < -90 {
Expand Down Expand Up @@ -789,9 +866,10 @@ func GeomTContainsEmpty(g geom.T) bool {
return false
}

// CompareSpatialObject compares the SpatialObject.
// This must match the byte ordering that is be produced by encoding.EncodeGeoAscending.
func CompareSpatialObject(lhs geopb.SpatialObject, rhs geopb.SpatialObject) int {
// compareSpatialObjectBytes compares the SpatialObject if they were serialized.
// This is used for comparison operations, and must be kept consistent with the indexing
// encoding.
func compareSpatialObjectBytes(lhs geopb.SpatialObject, rhs geopb.SpatialObject) int {
marshalledLHS, err := protoutil.Marshal(&lhs)
if err != nil {
panic(err)
Expand Down
139 changes: 139 additions & 0 deletions pkg/geo/geo_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,14 @@ package geo
import (
"encoding/hex"
"fmt"
"math"
"strconv"
"testing"

"github.com/cockroachdb/cockroach/pkg/geo/geopb"
"github.com/cockroachdb/errors"
"github.com/golang/geo/s2"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"github.com/twpayne/go-geom"
)
Expand Down Expand Up @@ -561,6 +563,143 @@ func TestGeographyAsS2(t *testing.T) {
}
}

func TestGeographySpaceCurveIndex(t *testing.T) {
orderedTestCases := []struct {
orderedWKTs []string
srid geopb.SRID
}{
{
[]string{
"POINT EMPTY",
"POLYGON EMPTY",
"POLYGON((0 0, 1 0, 1 1, 0 1, 0 0))",
"POINT(-80 80)",
"LINESTRING(0 0, -90 -80)",
},
4326,
},
{
[]string{
"POINT EMPTY",
"POLYGON EMPTY",
"POLYGON((0 0, 1 0, 1 1, 0 1, 0 0))",
"POINT(-80 80)",
"LINESTRING(0 0, -90 -80)",
},
4004,
},
}
for i, tc := range orderedTestCases {
t.Run(strconv.Itoa(i+1), func(t *testing.T) {
previous := uint64(0)
for _, wkt := range tc.orderedWKTs {
t.Run(wkt, func(t *testing.T) {
g, err := ParseGeography(wkt)
require.NoError(t, err)
g, err = g.CloneWithSRID(tc.srid)
require.NoError(t, err)

h := g.SpaceCurveIndex()
assert.GreaterOrEqual(t, h, previous)
previous = h
})
}
})
}
}

func TestGeometrySpaceCurveIndex(t *testing.T) {
valueTestCases := []struct {
wkt string
expected uint64
}{
{
wkt: "POINT EMPTY",
expected: 0,
},
{
wkt: "SRID=4326;POINT EMPTY",
expected: 0,
},
{
wkt: "POINT (100 80)",
expected: 9223372036854787504,
},
{
wkt: "SRID=4326;POINT(100 80)",
expected: 11895367802890724441,
},
{
wkt: "POINT (1000 800)",
expected: 9223372036855453930,
},
{
wkt: "SRID=4326;POINT(1000 800)",
expected: math.MaxUint64,
},
}

for _, tc := range valueTestCases {
t.Run(tc.wkt, func(t *testing.T) {
g, err := ParseGeometry(tc.wkt)
require.NoError(t, err)
require.Equal(t, tc.expected, g.SpaceCurveIndex())
})
}

orderedTestCases := []struct {
orderedWKTs []string
srid geopb.SRID
}{
{
[]string{
"POINT EMPTY",
"POLYGON EMPTY",
"LINESTRING(0 0, -90 -80)",
"POINT(-80 80)",
"POLYGON((0 0, 1 0, 1 1, 0 1, 0 0))",
},
4326,
},
{
[]string{
"POINT EMPTY",
"POLYGON EMPTY",
"LINESTRING(0 0, -90 -80)",
"POINT(-80 80)",
"POLYGON((0 0, 1 0, 1 1, 0 1, 0 0))",
},
3857,
},
{
[]string{
"POINT EMPTY",
"POLYGON EMPTY",
"LINESTRING(0 0, -90 -80)",
"POINT(-80 80)",
"POLYGON((0 0, 1 0, 1 1, 0 1, 0 0))",
},
0,
},
}
for i, tc := range orderedTestCases {
t.Run(strconv.Itoa(i+1), func(t *testing.T) {
previous := uint64(0)
for _, wkt := range tc.orderedWKTs {
t.Run(wkt, func(t *testing.T) {
g, err := ParseGeometry(wkt)
require.NoError(t, err)
g, err = g.CloneWithSRID(tc.srid)
require.NoError(t, err)
h := g.SpaceCurveIndex()
assert.GreaterOrEqual(t, h, previous)
previous = h
})
}
})
}
}

func TestGeometryAsGeography(t *testing.T) {
for _, tc := range []struct {
geom string
Expand Down
44 changes: 44 additions & 0 deletions pkg/geo/hilbert.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
// Copyright 2020 The Cockroach Authors.
//
// Use of this software is governed by the Business Source License
// included in the file licenses/BSL.txt.
//
// As of the Change Date specified in that file, in accordance with
// the Business Source License, use of this software will be governed
// by the Apache License, Version 2.0, included in the file
// licenses/APL.txt.

package geo

// hilbertInverse converts (x,y) to d on a Hilbert Curve.
// Adapted from `xy2d` from https://en.wikipedia.org/wiki/Hilbert_curve#Applications_and_mapping_algorithms.
func hilbertInverse(n, x, y uint64) uint64 {
var d uint64
for s := n / 2; s > 0; s /= 2 {
var rx uint64
if (x & s) > 0 {
rx = 1
}
var ry uint64
if (y & s) > 0 {
ry = 1
}
d += s * s * ((3 * rx) ^ ry)
x, y = hilbertRotate(n, x, y, rx, ry)
}
return d
}

// hilberRoate rotates/flips a quadrant appropriately.
// Adapted from `rot` in https://en.wikipedia.org/wiki/Hilbert_curve#Applications_and_mapping_algorithms.
func hilbertRotate(n, x, y, rx, ry uint64) (uint64, uint64) {
if ry == 0 {
if rx == 1 {
x = n - 1 - x
y = n - 1 - y
}

x, y = y, x
}
return x, y
}
8 changes: 8 additions & 0 deletions pkg/sql/exec_util.go
Original file line number Diff line number Diff line change
Expand Up @@ -2037,6 +2037,14 @@ func (m *sessionDataMutator) SetDefaultReadOnly(val bool) {
m.data.DefaultReadOnly = val
}

func (m *sessionDataMutator) SetEnableSeqScan(val bool) {
m.data.EnableSeqScan = val
}

func (m *sessionDataMutator) SetSynchronousCommit(val bool) {
m.data.SynchronousCommit = val
}

func (m *sessionDataMutator) SetDistSQLMode(val sessiondata.DistSQLExecMode) {
m.data.DistSQLMode = val
}
Expand Down
Loading

0 comments on commit 66a3c19

Please sign in to comment.