From ccd534d67d92b2c5b7fcededb4e2ca9efe48475d Mon Sep 17 00:00:00 2001 From: David Taylor Date: Mon, 3 Aug 2020 18:35:57 +0000 Subject: [PATCH 1/3] build: avoid errors when tar closes curl's pipe Release note: none. --- build/release/teamcity-make-and-publish-build.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/build/release/teamcity-make-and-publish-build.sh b/build/release/teamcity-make-and-publish-build.sh index 8638a487a879..3d48ec25b345 100755 --- a/build/release/teamcity-make-and-publish-build.sh +++ b/build/release/teamcity-make-and-publish-build.sh @@ -45,7 +45,8 @@ docker_login_with_google gcr_repository="us.gcr.io/cockroach-cloud-images/cockroach" # TODO: update publish-provisional-artifacts with option to leave one or more cockroach binaries in the local filesystem -curl -f -s -S -o- "https://${bucket}.s3.amazonaws.com/cockroach-${build_name}.linux-amd64.tgz" | tar xfz - --strip-components 1 +# HACK: we pipe though tac twice to reverse/un-reverse since that will read the whole buffer and make curl happy, even if tar closes early. +curl -f -s -S -o- "https://${bucket}.s3.amazonaws.com/cockroach-${build_name}.linux-amd64.tgz" | tac | tac | tar xfz - --strip-components 1 cp cockroach build/deploy/cockroach docker build --no-cache --tag="${gcr_repository}:${build_name}" build/deploy From 819e0c3c509c004c602f2089a6217e1f0548d6dc Mon Sep 17 00:00:00 2001 From: Oliver Tan Date: Mon, 3 Aug 2020 11:38:02 -0700 Subject: [PATCH 2/3] geo: order geospatial objects by Hilbert Curve index This follows the [PostGIS way](https://info.crunchydata.com/blog/waiting-for-postgis-3-hilbert-geometry-sorting), but does not follow the same encoding mechanism, so we cannot compare results directly. For Geography we re-use the S2 infrastructure and for Geometry we use the google/hilbert library. Release note (sql change): When ordering by geospatial columns, it will now order by the Hilbert Space Curve index so that points which are geographically similar are clustered together. --- pkg/geo/geo.go | 92 ++++++++++++++-- pkg/geo/geo_test.go | 139 ++++++++++++++++++++++++ pkg/geo/hilbert.go | 44 ++++++++ pkg/sql/sem/tree/datum.go | 4 +- pkg/sql/sqlbase/column_type_encoding.go | 8 +- pkg/sql/sqlbase/encoded_datum_test.go | 2 +- pkg/util/encoding/encoding.go | 77 ++++++++++--- pkg/util/encoding/encoding_test.go | 133 ++++++++++++++++++----- 8 files changed, 440 insertions(+), 59 deletions(-) create mode 100644 pkg/geo/hilbert.go diff --git a/pkg/geo/geo.go b/pkg/geo/geo.go index 8db1641db9cd..6c1d7d0ba73d 100644 --- a/pkg/geo/geo.go +++ b/pkg/geo/geo.go @@ -14,6 +14,7 @@ package geo import ( "bytes" "encoding/binary" + "math" "github.com/cockroachdb/cockroach/pkg/geo/geographiclib" "github.com/cockroachdb/cockroach/pkg/geo/geopb" @@ -276,6 +277,57 @@ func (g *Geometry) CartesianBoundingBox() *CartesianBoundingBox { return &CartesianBoundingBox{BoundingBox: *g.spatialObject.BoundingBox} } +// SpaceCurveIndex returns an uint64 index to use representing an index into a space-filling curve. +// This will return 0 for empty spatial objects, and math.MaxUint64 for any object outside +// the defined bounds of the given SRID projection. +func (g *Geometry) SpaceCurveIndex() uint64 { + bbox := g.CartesianBoundingBox() + if bbox == nil { + return 0 + } + centerX := (bbox.BoundingBox.LoX + bbox.BoundingBox.HiX) / 2 + centerY := (bbox.BoundingBox.LoY + bbox.BoundingBox.HiY) / 2 + // By default, bound by MaxInt32 (we have not typically seen bounds greater than 1B). + bounds := geoprojbase.Bounds{ + MinX: math.MinInt32, + MaxX: math.MaxInt32, + MinY: math.MinInt32, + MaxY: math.MaxInt32, + } + if proj, ok := geoprojbase.Projection(g.SRID()); ok { + bounds = proj.Bounds + } + // If we're out of bounds, give up and return a large number. + if centerX > bounds.MaxX || centerY > bounds.MaxY || centerX < bounds.MinX || centerY < bounds.MinY { + return math.MaxUint64 + } + + const boxLength = 1 << 32 + // Add 1 to each bound so that we normalize the coordinates to [0, 1) before + // multiplying by boxLength to give coordinates that are integers in the interval [0, boxLength-1]. + xBounds := (bounds.MaxX - bounds.MinX) + 1 + yBounds := (bounds.MaxY - bounds.MinY) + 1 + // hilbertInverse returns values in the interval [0, boxLength^2-1], so return [0, 2^64-1]. + xPos := uint64(((centerX - bounds.MinX) / xBounds) * boxLength) + yPos := uint64(((centerY - bounds.MinY) / yBounds) * boxLength) + return hilbertInverse(boxLength, xPos, yPos) +} + +// Compare compares a Geometry against another. +// It compares using SpaceCurveIndex, followed by the byte representation of the Geometry. +// This must produce the same ordering as the index mechanism. +func (g *Geometry) Compare(o *Geometry) int { + lhs := g.SpaceCurveIndex() + rhs := o.SpaceCurveIndex() + if lhs > rhs { + return 1 + } + if lhs < rhs { + return -1 + } + return compareSpatialObjectBytes(g.SpatialObject(), o.SpatialObject()) +} + // // Geography // @@ -489,6 +541,35 @@ func (g *Geography) BoundingCap() s2.Cap { return g.BoundingRect().CapBound() } +// SpaceCurveIndex returns an uint64 index to use representing an index into a space-filling curve. +// This will return 0 for empty spatial objects. +func (g *Geography) SpaceCurveIndex() uint64 { + rect := g.BoundingRect() + if rect.IsEmpty() { + return 0 + } + return uint64(s2.CellIDFromLatLng(rect.Center())) +} + +// Compare compares a Geography against another. +// It compares using SpaceCurveIndex, followed by the byte representation of the Geography. +// This must produce the same ordering as the index mechanism. +func (g *Geography) Compare(o *Geography) int { + lhs := g.SpaceCurveIndex() + rhs := o.SpaceCurveIndex() + if lhs > rhs { + return 1 + } + if lhs < rhs { + return -1 + } + return compareSpatialObjectBytes(g.SpatialObject(), o.SpatialObject()) +} + +// +// Common +// + // IsLinearRingCCW returns whether a given linear ring is counter clock wise. // See 2.07 of http://www.faqs.org/faqs/graphics/algorithms-faq/. // "Find the lowest vertex (or, if there is more than one vertex with the same lowest coordinate, @@ -618,10 +699,6 @@ func S2RegionsFromGeomT(geomRepr geom.T, emptyBehavior EmptyBehavior) ([]s2.Regi return regions, nil } -// -// Common -// - // normalizeLngLat normalizes geographical coordinates into a valid range. func normalizeLngLat(lng float64, lat float64) (float64, float64) { if lat > 90 || lat < -90 { @@ -789,9 +866,10 @@ func GeomTContainsEmpty(g geom.T) bool { return false } -// CompareSpatialObject compares the SpatialObject. -// This must match the byte ordering that is be produced by encoding.EncodeGeoAscending. -func CompareSpatialObject(lhs geopb.SpatialObject, rhs geopb.SpatialObject) int { +// compareSpatialObjectBytes compares the SpatialObject if they were serialized. +// This is used for comparison operations, and must be kept consistent with the indexing +// encoding. +func compareSpatialObjectBytes(lhs geopb.SpatialObject, rhs geopb.SpatialObject) int { marshalledLHS, err := protoutil.Marshal(&lhs) if err != nil { panic(err) diff --git a/pkg/geo/geo_test.go b/pkg/geo/geo_test.go index dcd637301862..dfe331692ebe 100644 --- a/pkg/geo/geo_test.go +++ b/pkg/geo/geo_test.go @@ -13,12 +13,14 @@ package geo import ( "encoding/hex" "fmt" + "math" "strconv" "testing" "github.com/cockroachdb/cockroach/pkg/geo/geopb" "github.com/cockroachdb/errors" "github.com/golang/geo/s2" + "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "github.com/twpayne/go-geom" ) @@ -561,6 +563,143 @@ func TestGeographyAsS2(t *testing.T) { } } +func TestGeographySpaceCurveIndex(t *testing.T) { + orderedTestCases := []struct { + orderedWKTs []string + srid geopb.SRID + }{ + { + []string{ + "POINT EMPTY", + "POLYGON EMPTY", + "POLYGON((0 0, 1 0, 1 1, 0 1, 0 0))", + "POINT(-80 80)", + "LINESTRING(0 0, -90 -80)", + }, + 4326, + }, + { + []string{ + "POINT EMPTY", + "POLYGON EMPTY", + "POLYGON((0 0, 1 0, 1 1, 0 1, 0 0))", + "POINT(-80 80)", + "LINESTRING(0 0, -90 -80)", + }, + 4004, + }, + } + for i, tc := range orderedTestCases { + t.Run(strconv.Itoa(i+1), func(t *testing.T) { + previous := uint64(0) + for _, wkt := range tc.orderedWKTs { + t.Run(wkt, func(t *testing.T) { + g, err := ParseGeography(wkt) + require.NoError(t, err) + g, err = g.CloneWithSRID(tc.srid) + require.NoError(t, err) + + h := g.SpaceCurveIndex() + assert.GreaterOrEqual(t, h, previous) + previous = h + }) + } + }) + } +} + +func TestGeometrySpaceCurveIndex(t *testing.T) { + valueTestCases := []struct { + wkt string + expected uint64 + }{ + { + wkt: "POINT EMPTY", + expected: 0, + }, + { + wkt: "SRID=4326;POINT EMPTY", + expected: 0, + }, + { + wkt: "POINT (100 80)", + expected: 9223372036854787504, + }, + { + wkt: "SRID=4326;POINT(100 80)", + expected: 11895367802890724441, + }, + { + wkt: "POINT (1000 800)", + expected: 9223372036855453930, + }, + { + wkt: "SRID=4326;POINT(1000 800)", + expected: math.MaxUint64, + }, + } + + for _, tc := range valueTestCases { + t.Run(tc.wkt, func(t *testing.T) { + g, err := ParseGeometry(tc.wkt) + require.NoError(t, err) + require.Equal(t, tc.expected, g.SpaceCurveIndex()) + }) + } + + orderedTestCases := []struct { + orderedWKTs []string + srid geopb.SRID + }{ + { + []string{ + "POINT EMPTY", + "POLYGON EMPTY", + "LINESTRING(0 0, -90 -80)", + "POINT(-80 80)", + "POLYGON((0 0, 1 0, 1 1, 0 1, 0 0))", + }, + 4326, + }, + { + []string{ + "POINT EMPTY", + "POLYGON EMPTY", + "LINESTRING(0 0, -90 -80)", + "POINT(-80 80)", + "POLYGON((0 0, 1 0, 1 1, 0 1, 0 0))", + }, + 3857, + }, + { + []string{ + "POINT EMPTY", + "POLYGON EMPTY", + "LINESTRING(0 0, -90 -80)", + "POINT(-80 80)", + "POLYGON((0 0, 1 0, 1 1, 0 1, 0 0))", + }, + 0, + }, + } + for i, tc := range orderedTestCases { + t.Run(strconv.Itoa(i+1), func(t *testing.T) { + previous := uint64(0) + for _, wkt := range tc.orderedWKTs { + t.Run(wkt, func(t *testing.T) { + g, err := ParseGeometry(wkt) + require.NoError(t, err) + g, err = g.CloneWithSRID(tc.srid) + require.NoError(t, err) + h := g.SpaceCurveIndex() + assert.GreaterOrEqual(t, h, previous) + previous = h + }) + } + }) + } +} + func TestGeometryAsGeography(t *testing.T) { for _, tc := range []struct { geom string diff --git a/pkg/geo/hilbert.go b/pkg/geo/hilbert.go new file mode 100644 index 000000000000..a0a9c2ffb621 --- /dev/null +++ b/pkg/geo/hilbert.go @@ -0,0 +1,44 @@ +// Copyright 2020 The Cockroach Authors. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +package geo + +// hilbertInverse converts (x,y) to d on a Hilbert Curve. +// Adapted from `xy2d` from https://en.wikipedia.org/wiki/Hilbert_curve#Applications_and_mapping_algorithms. +func hilbertInverse(n, x, y uint64) uint64 { + var d uint64 + for s := n / 2; s > 0; s /= 2 { + var rx uint64 + if (x & s) > 0 { + rx = 1 + } + var ry uint64 + if (y & s) > 0 { + ry = 1 + } + d += s * s * ((3 * rx) ^ ry) + x, y = hilbertRotate(n, x, y, rx, ry) + } + return d +} + +// hilberRoate rotates/flips a quadrant appropriately. +// Adapted from `rot` in https://en.wikipedia.org/wiki/Hilbert_curve#Applications_and_mapping_algorithms. +func hilbertRotate(n, x, y, rx, ry uint64) (uint64, uint64) { + if ry == 0 { + if rx == 1 { + x = n - 1 - x + y = n - 1 - y + } + + x, y = y, x + } + return x, y +} diff --git a/pkg/sql/sem/tree/datum.go b/pkg/sql/sem/tree/datum.go index 6ec4b1c0f0a9..107c9e6eb3f5 100644 --- a/pkg/sql/sem/tree/datum.go +++ b/pkg/sql/sem/tree/datum.go @@ -2766,7 +2766,7 @@ func (d *DGeography) Compare(ctx *EvalContext, other Datum) int { // NULL is less than any non-NULL value. return 1 } - return geo.CompareSpatialObject(d.Geography.SpatialObject(), other.(*DGeography).SpatialObject()) + return d.Geography.Compare(other.(*DGeography).Geography) } // Prev implements the Datum interface. @@ -2874,7 +2874,7 @@ func (d *DGeometry) Compare(ctx *EvalContext, other Datum) int { // NULL is less than any non-NULL value. return 1 } - return geo.CompareSpatialObject(d.Geometry.SpatialObject(), other.(*DGeometry).SpatialObject()) + return d.Geometry.Compare(other.(*DGeometry).Geometry) } // Prev implements the Datum interface. diff --git a/pkg/sql/sqlbase/column_type_encoding.go b/pkg/sql/sqlbase/column_type_encoding.go index 794a7a89bb17..48c07f617f57 100644 --- a/pkg/sql/sqlbase/column_type_encoding.go +++ b/pkg/sql/sqlbase/column_type_encoding.go @@ -97,15 +97,15 @@ func EncodeTableKey(b []byte, val tree.Datum, dir encoding.Direction) ([]byte, e case *tree.DGeography: so := t.Geography.SpatialObject() if dir == encoding.Ascending { - return encoding.EncodeGeoAscending(b, &so) + return encoding.EncodeGeoAscending(b, t.Geography.SpaceCurveIndex(), &so) } - return encoding.EncodeGeoDescending(b, &so) + return encoding.EncodeGeoDescending(b, t.Geography.SpaceCurveIndex(), &so) case *tree.DGeometry: so := t.Geometry.SpatialObject() if dir == encoding.Ascending { - return encoding.EncodeGeoAscending(b, &so) + return encoding.EncodeGeoAscending(b, t.Geometry.SpaceCurveIndex(), &so) } - return encoding.EncodeGeoDescending(b, &so) + return encoding.EncodeGeoDescending(b, t.Geometry.SpaceCurveIndex(), &so) case *tree.DDate: if dir == encoding.Ascending { return encoding.EncodeVarintAscending(b, t.UnixEpochDaysWithOrig()), nil diff --git a/pkg/sql/sqlbase/encoded_datum_test.go b/pkg/sql/sqlbase/encoded_datum_test.go index 35b883610358..1e45d52ad22c 100644 --- a/pkg/sql/sqlbase/encoded_datum_test.go +++ b/pkg/sql/sqlbase/encoded_datum_test.go @@ -208,7 +208,7 @@ func TestEncDatumCompare(t *testing.T) { for _, typ := range types.OidToType { switch typ.Family() { - case types.AnyFamily, types.UnknownFamily, types.ArrayFamily, types.JsonFamily, types.TupleFamily, types.GeometryFamily, types.GeographyFamily: + case types.AnyFamily, types.UnknownFamily, types.ArrayFamily, types.JsonFamily, types.TupleFamily: continue case types.CollatedStringFamily: typ = types.MakeCollatedString(types.String, *RandCollationLocale(rng)) diff --git a/pkg/util/encoding/encoding.go b/pkg/util/encoding/encoding.go index 56d40440c321..f1e37b647e4e 100644 --- a/pkg/util/encoding/encoding.go +++ b/pkg/util/encoding/encoding.go @@ -584,7 +584,7 @@ func EncodeBytesDescending(b []byte, data []byte) []byte { // are appended to r. The remainder of the input buffer and the // decoded []byte are returned. func DecodeBytesAscending(b []byte, r []byte) ([]byte, []byte, error) { - return decodeBytesInternal(b, r, ascendingBytesEscapes, true) + return decodeBytesInternal(b, r, ascendingBytesEscapes, true /* expectMarker */) } // DecodeBytesDescending decodes a []byte value from the input buffer @@ -597,7 +597,7 @@ func DecodeBytesDescending(b []byte, r []byte) ([]byte, []byte, error) { if r == nil { r = []byte{} } - b, r, err := decodeBytesInternal(b, r, descendingBytesEscapes, true) + b, r, err := decodeBytesInternal(b, r, descendingBytesEscapes, true /* expectMarker */) onesComplement(r) return b, r, err } @@ -978,39 +978,55 @@ func decodeTime(b []byte) (r []byte, sec int64, nsec int64, err error) { // EncodeGeoAscending encodes a geopb.SpatialObject value in ascending order and // returns the new buffer. -// TODO(otan): this should ideally just be encoded by {SRID,Shape,Raw Points}. -// EWKB is expensive to encode. However, we don't store this as a PRIMARY KEY -// (this is needed for GROUP BY only for now), so we ignore it for now. -func EncodeGeoAscending(b []byte, g *geopb.SpatialObject) ([]byte, error) { +// It is sorted by the given curve index, followed by the bytes of the spatial object. +func EncodeGeoAscending(b []byte, curveIndex uint64, g *geopb.SpatialObject) ([]byte, error) { + b = append(b, geoMarker) + b = EncodeUint64Ascending(b, curveIndex) + data, err := protoutil.Marshal(g) if err != nil { return nil, err } - b = encodeBytesAscendingWithTerminatorAndPrefix(b, data, ascendingGeoEscapes.escapedTerm, geoMarker) + b = encodeBytesAscendingWithTerminator(b, data, ascendingGeoEscapes.escapedTerm) return b, nil } // EncodeGeoDescending encodes a geopb.SpatialObject value in descending order and // returns the new buffer. -func EncodeGeoDescending(b []byte, g *geopb.SpatialObject) ([]byte, error) { +// It is sorted by the given curve index, followed by the bytes of the spatial object. +func EncodeGeoDescending(b []byte, curveIndex uint64, g *geopb.SpatialObject) ([]byte, error) { + b = append(b, geoDescMarker) + b = EncodeUint64Descending(b, curveIndex) + + data, err := protoutil.Marshal(g) + if err != nil { + return nil, err + } n := len(b) - var err error - b, err = EncodeGeoAscending(b, g) + b = encodeBytesAscendingWithTerminator(b, data, ascendingGeoEscapes.escapedTerm) if err != nil { return nil, err } - b[n] = geoDescMarker - onesComplement(b[n+1:]) + onesComplement(b[n:]) return b, nil } // DecodeGeoAscending decodes a geopb.SpatialObject value that was encoded // in ascending order back into a geopb.SpatialObject. func DecodeGeoAscending(b []byte) ([]byte, geopb.SpatialObject, error) { + if PeekType(b) != Geo { + return nil, geopb.SpatialObject{}, errors.Errorf("did not find Geo marker") + } + b = b[1:] + var err error + b, _, err = DecodeUint64Ascending(b) + if err != nil { + return nil, geopb.SpatialObject{}, err + } + var pbBytes []byte var ret geopb.SpatialObject - var err error - b, pbBytes, err = decodeBytesInternal(b, pbBytes, ascendingGeoEscapes, true) + b, pbBytes, err = decodeBytesInternal(b, pbBytes, ascendingGeoEscapes, false /* expectMarker */) if err != nil { return b, ret, err } @@ -1021,10 +1037,19 @@ func DecodeGeoAscending(b []byte) ([]byte, geopb.SpatialObject, error) { // DecodeGeoDescending decodes a geopb.SpatialObject value that was encoded // in descending order back into a geopb.SpatialObject. func DecodeGeoDescending(b []byte) ([]byte, geopb.SpatialObject, error) { + if PeekType(b) != GeoDesc { + return nil, geopb.SpatialObject{}, errors.Errorf("did not find Geo marker") + } + b = b[1:] + var err error + b, _, err = DecodeUint64Descending(b) + if err != nil { + return nil, geopb.SpatialObject{}, err + } + var pbBytes []byte var ret geopb.SpatialObject - var err error - b, pbBytes, err = decodeBytesInternal(b, pbBytes, descendingGeoEscapes, true) + b, pbBytes, err = decodeBytesInternal(b, pbBytes, descendingGeoEscapes, false /* expectMarker */) if err != nil { return b, ret, err } @@ -1527,13 +1552,29 @@ func PeekLength(b []byte) (int, error) { case bytesMarker: return getBytesLength(b, ascendingBytesEscapes) case geoMarker: - return getBytesLength(b, ascendingGeoEscapes) + // Expect to reserve at least 8 bytes for int64. + if len(b) < 8 { + return 0, errors.Errorf("slice too short for geospatial (%d)", len(b)) + } + ret, err := getBytesLength(b[8:], ascendingGeoEscapes) + if err != nil { + return 0, err + } + return 8 + ret, nil case jsonInvertedIndex: return getJSONInvertedIndexKeyLength(b) case bytesDescMarker: return getBytesLength(b, descendingBytesEscapes) case geoDescMarker: - return getBytesLength(b, descendingGeoEscapes) + // Expect to reserve at least 8 bytes for int64. + if len(b) < 8 { + return 0, errors.Errorf("slice too short for geospatial (%d)", len(b)) + } + ret, err := getBytesLength(b[8:], descendingGeoEscapes) + if err != nil { + return 0, err + } + return 8 + ret, nil case timeMarker, timeTZMarker: return GetMultiVarintLen(b, 2) case durationBigNegMarker, durationMarker, durationBigPosMarker: diff --git a/pkg/util/encoding/encoding_test.go b/pkg/util/encoding/encoding_test.go index 8038612725a5..7b330edfb309 100644 --- a/pkg/util/encoding/encoding_test.go +++ b/pkg/util/encoding/encoding_test.go @@ -16,6 +16,7 @@ import ( "math" "math/rand" "regexp" + "strconv" "testing" "time" @@ -1147,37 +1148,115 @@ func TestEncodeDecodeTimeTZ(t *testing.T) { } } -func TestEncodeDecodeGeo(t *testing.T) { - testCases := []string{ - "SRID=4326;POINT(1.0 1.0)", - "POINT(2.0 2.0)", +func TestEncodeDecodeGeometry(t *testing.T) { + testCases := []struct { + orderedWKTs []string + }{ + { + orderedWKTs: []string{ + "SRID=4326;POLYGON EMPTY", + "SRID=4326;POINT EMPTY", + "SRID=4326;LINESTRING(0 0, -90 -80)", + "SRID=4326;POINT(-80 80)", + "SRID=4326;POLYGON((0 0, 1 0, 1 1, 0 1, 0 0))", + }, + }, } - for _, tc := range testCases { - t.Run(tc, func(t *testing.T) { - for _, dir := range []Direction{Ascending, Descending} { + for i, tc := range testCases { + for _, dir := range []Direction{Ascending, Descending} { + t.Run(strconv.Itoa(i+1), func(t *testing.T) { t.Run(fmt.Sprintf("dir:%d", dir), func(t *testing.T) { - parsed, err := geo.ParseGeometry(tc) - require.NoError(t, err) - spatialObject := parsed.SpatialObject() - - var b []byte - var decoded geopb.SpatialObject - if dir == Ascending { - b, err = EncodeGeoAscending(b, &spatialObject) - require.NoError(t, err) - _, decoded, err = DecodeGeoAscending(b) - require.NoError(t, err) - } else { - b, err = EncodeGeoDescending(b, &spatialObject) + var lastEncoded []byte + for _, wkt := range tc.orderedWKTs { + parsed, err := geo.ParseGeometry(wkt) require.NoError(t, err) - _, decoded, err = DecodeGeoDescending(b) + spatialObject := parsed.SpatialObject() + + var b []byte + var decoded geopb.SpatialObject + if dir == Ascending { + b, err = EncodeGeoAscending(b, parsed.SpaceCurveIndex(), &spatialObject) + require.NoError(t, err) + _, decoded, err = DecodeGeoAscending(b) + require.NoError(t, err) + } else { + b, err = EncodeGeoDescending(b, parsed.SpaceCurveIndex(), &spatialObject) + require.NoError(t, err) + _, decoded, err = DecodeGeoDescending(b) + require.NoError(t, err) + } + require.Equal(t, spatialObject, decoded) + testPeekLength(t, b) + + if i > 0 { + if dir == Ascending { + assert.Truef(t, bytes.Compare(b, lastEncoded) > 0, "expected %s > %s", tc.orderedWKTs[i], tc.orderedWKTs[i-1]) + } else { + assert.Truef(t, bytes.Compare(b, lastEncoded) < 0, "expected %s < %s", tc.orderedWKTs[i], tc.orderedWKTs[i-1]) + } + } + + lastEncoded = b + } + }) + }) + } + } +} + +func TestEncodeDecodeGeography(t *testing.T) { + testCases := []struct { + orderedWKTs []string + }{ + { + orderedWKTs: []string{ + "SRID=4326;POLYGON EMPTY", + "SRID=4326;POINT EMPTY", + "SRID=4326;POLYGON((0 0, 1 0, 1 1, 0 1, 0 0))", + "SRID=4326;POINT(-80 80)", + "SRID=4326;LINESTRING(0 0, -90 -80)", + }, + }, + } + for i, tc := range testCases { + for _, dir := range []Direction{Ascending, Descending} { + t.Run(strconv.Itoa(i+1), func(t *testing.T) { + t.Run(fmt.Sprintf("dir:%d", dir), func(t *testing.T) { + var lastEncoded []byte + for _, wkt := range tc.orderedWKTs { + parsed, err := geo.ParseGeography(wkt) require.NoError(t, err) + spatialObject := parsed.SpatialObject() + + var b []byte + var decoded geopb.SpatialObject + if dir == Ascending { + b, err = EncodeGeoAscending(b, parsed.SpaceCurveIndex(), &spatialObject) + require.NoError(t, err) + _, decoded, err = DecodeGeoAscending(b) + require.NoError(t, err) + } else { + b, err = EncodeGeoDescending(b, parsed.SpaceCurveIndex(), &spatialObject) + require.NoError(t, err) + _, decoded, err = DecodeGeoDescending(b) + require.NoError(t, err) + } + require.Equal(t, spatialObject, decoded) + testPeekLength(t, b) + + if i > 0 { + if dir == Ascending { + assert.Truef(t, bytes.Compare(b, lastEncoded) > 0, "expected %s > %s", tc.orderedWKTs[i], tc.orderedWKTs[i-1]) + } else { + assert.Truef(t, bytes.Compare(b, lastEncoded) < 0, "expected %s < %s", tc.orderedWKTs[i], tc.orderedWKTs[i-1]) + } + } + + lastEncoded = b } - require.Equal(t, spatialObject, decoded) - testPeekLength(t, b) }) - } - }) + }) + } } } @@ -1259,9 +1338,9 @@ func TestPeekType(t *testing.T) { require.NoError(t, err) encodedDurationDescending, err := EncodeDurationDescending(nil, duration.Duration{}) require.NoError(t, err) - encodedGeoAscending, err := EncodeGeoAscending(nil, &geopb.SpatialObject{}) + encodedGeoAscending, err := EncodeGeoAscending(nil, 0, &geopb.SpatialObject{}) require.NoError(t, err) - encodedGeoDescending, err := EncodeGeoDescending(nil, &geopb.SpatialObject{}) + encodedGeoDescending, err := EncodeGeoDescending(nil, 0, &geopb.SpatialObject{}) require.NoError(t, err) testCases := []struct { enc []byte From 88fd7b40d27aa9ca91c16f9816f4799b5e238985 Mon Sep 17 00:00:00 2001 From: Oliver Tan Date: Mon, 3 Aug 2020 11:40:44 -0700 Subject: [PATCH 3/3] sql: support synchronous_commit and enable_seqscan as dummy no-op These vars are set by `osm2pgsql` and `ogr2ogr` respectively. These default to the ON state, the OFF state affects performance but not correctness. Release note (sql change): Support the setting and getting of the `synchronous_commit` and `enable_seqscan` variables, which do not affect any performance characteristics. These are no-ops enabled to allow certain tools to work. --- pkg/sql/exec_util.go | 8 ++++ .../logictest/testdata/logic_test/pg_catalog | 6 +++ pkg/sql/logictest/testdata/logic_test/set | 38 +++++++++++++++ .../logictest/testdata/logic_test/show_source | 2 + pkg/sql/sessiondata/session_data.go | 5 ++ pkg/sql/set_var.go | 11 +++++ pkg/sql/sqltelemetry/session.go | 7 +++ pkg/sql/testdata/telemetry/set | 9 ++++ pkg/sql/unsupported_vars.go | 35 ++++++++++++-- pkg/sql/vars.go | 47 +++++++++++++++---- 10 files changed, 155 insertions(+), 13 deletions(-) create mode 100644 pkg/sql/testdata/telemetry/set diff --git a/pkg/sql/exec_util.go b/pkg/sql/exec_util.go index fbdc0d18ccf1..38e810f34fc6 100644 --- a/pkg/sql/exec_util.go +++ b/pkg/sql/exec_util.go @@ -2036,6 +2036,14 @@ func (m *sessionDataMutator) SetDefaultReadOnly(val bool) { m.data.DefaultReadOnly = val } +func (m *sessionDataMutator) SetEnableSeqScan(val bool) { + m.data.EnableSeqScan = val +} + +func (m *sessionDataMutator) SetSynchronousCommit(val bool) { + m.data.SynchronousCommit = val +} + func (m *sessionDataMutator) SetDistSQLMode(val sessiondata.DistSQLExecMode) { m.data.DistSQLMode = val } diff --git a/pkg/sql/logictest/testdata/logic_test/pg_catalog b/pkg/sql/logictest/testdata/logic_test/pg_catalog index 899f01d256dd..6ef8abd662cf 100644 --- a/pkg/sql/logictest/testdata/logic_test/pg_catalog +++ b/pkg/sql/logictest/testdata/logic_test/pg_catalog @@ -1760,6 +1760,7 @@ enable_experimental_alter_column_type_general off NULL NUL enable_implicit_select_for_update on NULL NULL NULL string enable_insert_fast_path on NULL NULL NULL string enable_interleaved_joins on NULL NULL NULL string +enable_seqscan on NULL NULL NULL string enable_zigzag_join on NULL NULL NULL string experimental_distsql_planning off NULL NULL NULL string experimental_enable_enums on NULL NULL NULL string @@ -1795,6 +1796,7 @@ sql_safe_updates off NULL NUL standard_conforming_strings on NULL NULL NULL string statement_timeout 0 NULL NULL NULL string synchronize_seqscans on NULL NULL NULL string +synchronous_commit on NULL NULL NULL string timezone UTC NULL NULL NULL string tracing off NULL NULL NULL string transaction_isolation serializable NULL NULL NULL string @@ -1830,6 +1832,7 @@ enable_experimental_alter_column_type_general off NULL user enable_implicit_select_for_update on NULL user NULL on on enable_insert_fast_path on NULL user NULL on on enable_interleaved_joins on NULL user NULL on on +enable_seqscan on NULL user NULL on on enable_zigzag_join on NULL user NULL on on experimental_distsql_planning off NULL user NULL off off experimental_enable_enums on NULL user NULL off off @@ -1865,6 +1868,7 @@ sql_safe_updates off NULL user standard_conforming_strings on NULL user NULL on on statement_timeout 0 NULL user NULL 0 0 synchronize_seqscans on NULL user NULL on on +synchronous_commit on NULL user NULL on on timezone UTC NULL user NULL UTC UTC tracing off NULL user NULL off off transaction_isolation serializable NULL user NULL serializable serializable @@ -1896,6 +1900,7 @@ enable_experimental_alter_column_type_general NULL NULL NULL NULL enable_implicit_select_for_update NULL NULL NULL NULL NULL enable_insert_fast_path NULL NULL NULL NULL NULL enable_interleaved_joins NULL NULL NULL NULL NULL +enable_seqscan NULL NULL NULL NULL NULL enable_zigzag_join NULL NULL NULL NULL NULL experimental_distsql_planning NULL NULL NULL NULL NULL experimental_enable_enums NULL NULL NULL NULL NULL @@ -1933,6 +1938,7 @@ sql_safe_updates NULL NULL NULL NULL standard_conforming_strings NULL NULL NULL NULL NULL statement_timeout NULL NULL NULL NULL NULL synchronize_seqscans NULL NULL NULL NULL NULL +synchronous_commit NULL NULL NULL NULL NULL timezone NULL NULL NULL NULL NULL tracing NULL NULL NULL NULL NULL transaction_isolation NULL NULL NULL NULL NULL diff --git a/pkg/sql/logictest/testdata/logic_test/set b/pkg/sql/logictest/testdata/logic_test/set index 0dafb615e049..64eb73d69704 100644 --- a/pkg/sql/logictest/testdata/logic_test/set +++ b/pkg/sql/logictest/testdata/logic_test/set @@ -355,3 +355,41 @@ statement ok SET experimental_distsql_planning = always; SET experimental_distsql_planning = on; SET experimental_distsql_planning = off + +subtest dummy_session_vars + +query T noticetrace +SET synchronous_commit = off; SET enable_seqscan = false +---- +WARNING: setting session var "synchronous_commit" is a no-op +WARNING: setting session var "enable_seqscan" is a no-op + +query T colnames +SHOW synchronous_commit +---- +synchronous_commit +off + +query T colnames +SHOW enable_seqscan +---- +enable_seqscan +off + +query T noticetrace +SET synchronous_commit = on; SET enable_seqscan = true +---- +WARNING: setting session var "synchronous_commit" is a no-op +WARNING: setting session var "enable_seqscan" is a no-op + +query T colnames +SHOW synchronous_commit +---- +synchronous_commit +on + +query T colnames +SHOW enable_seqscan +---- +enable_seqscan +on diff --git a/pkg/sql/logictest/testdata/logic_test/show_source b/pkg/sql/logictest/testdata/logic_test/show_source index bdd3f8789626..ff1af5c93b3d 100644 --- a/pkg/sql/logictest/testdata/logic_test/show_source +++ b/pkg/sql/logictest/testdata/logic_test/show_source @@ -43,6 +43,7 @@ enable_experimental_alter_column_type_general off enable_implicit_select_for_update on enable_insert_fast_path on enable_interleaved_joins on +enable_seqscan on enable_zigzag_join on experimental_distsql_planning off experimental_enable_enums off @@ -78,6 +79,7 @@ sql_safe_updates off standard_conforming_strings on statement_timeout 0 synchronize_seqscans on +synchronous_commit on timezone UTC tracing off transaction_isolation serializable diff --git a/pkg/sql/sessiondata/session_data.go b/pkg/sql/sessiondata/session_data.go index 355b311abaed..f41ac12fbc54 100644 --- a/pkg/sql/sessiondata/session_data.go +++ b/pkg/sql/sessiondata/session_data.go @@ -141,6 +141,11 @@ type SessionData struct { // AlterColumnTypeGeneralEnabled is true if ALTER TABLE ... ALTER COLUMN ... // TYPE x may be used for general conversions requiring online schema change/ AlterColumnTypeGeneralEnabled bool + + // SynchronousCommit is a dummy setting for the synchronous_commit var. + SynchronousCommit bool + // EnableSeqScan is a dummy setting for the enable_seqscan var. + EnableSeqScan bool } // DataConversionConfig contains the parameters that influence diff --git a/pkg/sql/set_var.go b/pkg/sql/set_var.go index ab022e12ebad..efc36f885b35 100644 --- a/pkg/sql/set_var.go +++ b/pkg/sql/set_var.go @@ -16,9 +16,12 @@ import ( "time" "github.com/cockroachdb/apd/v2" + "github.com/cockroachdb/cockroach/pkg/server/telemetry" "github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgcode" "github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgerror" + "github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgnotice" "github.com/cockroachdb/cockroach/pkg/sql/sem/tree" + "github.com/cockroachdb/cockroach/pkg/sql/sqltelemetry" "github.com/cockroachdb/cockroach/pkg/sql/types" "github.com/cockroachdb/cockroach/pkg/util/timeutil" "github.com/cockroachdb/errors" @@ -104,6 +107,14 @@ func unresolvedNameToStrVal(expr tree.Expr) tree.Expr { func (n *setVarNode) startExec(params runParams) error { var strVal string + + if _, ok := DummyVars[n.name]; ok { + telemetry.Inc(sqltelemetry.DummySessionVarValueCounter(n.name)) + params.p.SendClientNotice( + params.ctx, + pgnotice.NewWithSeverityf("WARNING", "setting session var %q is a no-op", n.name), + ) + } if n.typedValues != nil { for i, v := range n.typedValues { d, err := v.Eval(params.EvalContext()) diff --git a/pkg/sql/sqltelemetry/session.go b/pkg/sql/sqltelemetry/session.go index 9bab011eca83..d06ab7989caf 100644 --- a/pkg/sql/sqltelemetry/session.go +++ b/pkg/sql/sqltelemetry/session.go @@ -31,3 +31,10 @@ var ForceSavepointRestartCounter = telemetry.GetCounterOnce("sql.force_savepoint func UnimplementedSessionVarValueCounter(varName, val string) telemetry.Counter { return telemetry.GetCounter(fmt.Sprintf("unimplemented.sql.session_var.%s.%s", varName, val)) } + +// DummySessionVarValueCounter is to be incremented every time +// a client attempts to set a compatitibility session var to a +// dummy value. +func DummySessionVarValueCounter(varName string) telemetry.Counter { + return telemetry.GetCounter(fmt.Sprintf("sql.session_var.dummy.%s", varName)) +} diff --git a/pkg/sql/testdata/telemetry/set b/pkg/sql/testdata/telemetry/set new file mode 100644 index 000000000000..b8882c6c1dfc --- /dev/null +++ b/pkg/sql/testdata/telemetry/set @@ -0,0 +1,9 @@ +feature-allowlist +sql.session_var.dummy.* +---- + +feature-usage +SET synchronous_commit = off; SET enable_seqscan = false +---- +sql.session_var.dummy.enable_seqscan +sql.session_var.dummy.synchronous_commit diff --git a/pkg/sql/unsupported_vars.go b/pkg/sql/unsupported_vars.go index 792656e2e17e..6ab92a2f615d 100644 --- a/pkg/sql/unsupported_vars.go +++ b/pkg/sql/unsupported_vars.go @@ -10,6 +10,35 @@ package sql +import "github.com/cockroachdb/cockroach/pkg/settings" + +// DummyVars contains a list of dummy vars we do not support that +// PostgreSQL does, but are required as an easy fix to make certain +// tooling/ORMs work. These vars should not affect the correctness +// of results. +var DummyVars = map[string]sessionVar{ + "enable_seqscan": makeDummyBooleanSessionVar( + "enable_seqscan", + func(evalCtx *extendedEvalContext) string { + return formatBoolAsPostgresSetting(evalCtx.SessionData.EnableSeqScan) + }, + func(m *sessionDataMutator, v bool) { + m.SetEnableSeqScan(v) + }, + func(sv *settings.Values) string { return "on" }, + ), + "synchronous_commit": makeDummyBooleanSessionVar( + "synchronous_commit", + func(evalCtx *extendedEvalContext) string { + return formatBoolAsPostgresSetting(evalCtx.SessionData.SynchronousCommit) + }, + func(m *sessionDataMutator, v bool) { + m.SetSynchronousCommit(v) + }, + func(sv *settings.Values) string { return "on" }, + ), +} + // UnsupportedVars contains the set of PostgreSQL session variables // and client parameters that are not supported in CockroachDB. // These are used to produce error messages and telemetry. @@ -70,7 +99,7 @@ var UnsupportedVars = func(ss ...string) map[string]struct{} { "enable_material", "enable_mergejoin", "enable_nestloop", - "enable_seqscan", + // "enable_seqscan", "enable_sort", "enable_tidscan", "escape_string_warning", @@ -135,8 +164,8 @@ var UnsupportedVars = func(ss ...string) map[string]struct{} { // "ssl_renegotiation_limit", // "standard_conforming_strings", // "statement_timeout", - // "synchronize_seqscans", - "synchronous_commit", + // "synchronize_seqscans", + // "synchronous_commit", "tcp_keepalives_count", "tcp_keepalives_idle", "tcp_keepalives_interval", diff --git a/pkg/sql/vars.go b/pkg/sql/vars.go index 01b19e77d4bf..79963f56714a 100644 --- a/pkg/sql/vars.go +++ b/pkg/sql/vars.go @@ -114,6 +114,30 @@ func parseBoolVar(varName, val string) (bool, error) { return b, nil } +// makeDummyBooleanSessionVar generates a sessionVar for a bool session setting. +// These functions allow the setting to be changed, but whose values are not used. +// They are logged to telemetry and output a notice that these are unused. +func makeDummyBooleanSessionVar( + name string, + getFunc func(*extendedEvalContext) string, + setFunc func(*sessionDataMutator, bool), + sv func(_ *settings.Values) string, +) sessionVar { + return sessionVar{ + GetStringVal: makePostgresBoolGetStringValFn(name), + Set: func(_ context.Context, m *sessionDataMutator, s string) error { + b, err := parseBoolVar(name, s) + if err != nil { + return err + } + setFunc(m, b) + return nil + }, + Get: getFunc, + GlobalDefault: sv, + } +} + // varGen is the main definition array for all session variables. // Note to maintainers: try to keep this sorted in the source code. var varGen = map[string]sessionVar{ @@ -1143,10 +1167,22 @@ var varGen = map[string]sessionVar{ const compatErrMsg = "this parameter is currently recognized only for compatibility and has no effect in CockroachDB." func init() { + for k, v := range DummyVars { + varGen[k] = v + } // Initialize delegate.ValidVars. for v := range varGen { delegate.ValidVars[v] = struct{}{} } + // Initialize varNames. + varNames = func() []string { + res := make([]string, 0, len(varGen)) + for vName := range varGen { + res = append(res, vName) + } + sort.Strings(res) + return res + }() } // makePostgresBoolGetStringValFn returns a function that evaluates and returns @@ -1280,14 +1316,7 @@ func IsSessionVariableConfigurable(varName string) (exists, configurable bool) { return exists, v.Set != nil } -var varNames = func() []string { - res := make([]string, 0, len(varGen)) - for vName := range varGen { - res = append(res, vName) - } - sort.Strings(res) - return res -}() +var varNames []string // getSingleBool returns the boolean if the input Datum is a DBool, // and returns a detailed error message if not. @@ -1317,7 +1346,6 @@ func getSessionVar(name string, missingOk bool) (bool, sessionVar, error) { return false, sessionVar{}, pgerror.Newf(pgcode.UndefinedObject, "unrecognized configuration parameter %q", name) } - return true, v, nil } @@ -1330,7 +1358,6 @@ func (p *planner) GetSessionVar( if err != nil || !ok { return ok, "", err } - return true, v.Get(&p.extendedEvalCtx), nil }