Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
59690: geo/wkt: implement parser for points with Z and M dimensions r=otan a=andyyang890

This patch adds a parser that is capable of parsing WKT
representations of points with Z and M dimensions.

Refs: cockroachdb#53091

Release note: None

Co-authored-by: Andy Yang <[email protected]>
  • Loading branch information
craig[bot] and Andy Yang committed Feb 2, 2021
2 parents 9cda595 + 6390043 commit a0236ac
Show file tree
Hide file tree
Showing 9 changed files with 1,011 additions and 1 deletion.
5 changes: 4 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -816,6 +816,9 @@ SQLPARSER_TARGETS = \
pkg/sql/lex/keywords.go \
pkg/sql/lexbase/reserved_keywords.go

WKTPARSER_TARGETS = \
pkg/geo/wkt/wkt.go

PROTOBUF_TARGETS := bin/.go_protobuf_sources bin/.gw_protobuf_sources

DOCGEN_TARGETS := \
Expand Down Expand Up @@ -1135,7 +1138,7 @@ dupl: bin/.bootstrap

.PHONY: generate
generate: ## Regenerate generated code.
generate: protobuf $(DOCGEN_TARGETS) $(OPTGEN_TARGETS) $(LOG_TARGETS) $(SQLPARSER_TARGETS) $(SETTINGS_DOC_PAGE) bin/langgen bin/terraformgen
generate: protobuf $(DOCGEN_TARGETS) $(OPTGEN_TARGETS) $(LOG_TARGETS) $(SQLPARSER_TARGETS) $(WKTPARSER_TARGETS) $(SETTINGS_DOC_PAGE) bin/langgen bin/terraformgen
$(GO) generate $(GOFLAGS) $(GOMODVENDORFLAGS) -tags '$(TAGS)' -ldflags '$(LINKFLAGS)' $(PKG)
$(MAKE) execgen

Expand Down
1 change: 1 addition & 0 deletions build/variables.mk
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,7 @@ define VALID_VARS
WEBPACK
WEBPACK_DASHBOARD
WEBPACK_DEV_SERVER
WKTPARSER_TARGETS
XCC
XCMAKE_SYSTEM_NAME
XCXX
Expand Down
1 change: 1 addition & 0 deletions pkg/geo/wkt/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
y.output
23 changes: 23 additions & 0 deletions pkg/geo/wkt/BUILD.bazel
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test")

go_library(
name = "wkt",
srcs = [
"lex.go",
"wkt.go",
"wkt_generated.go",
],
importpath = "github.com/cockroachdb/cockroach/pkg/geo/wkt",
visibility = ["//visibility:public"],
deps = ["@com_github_twpayne_go_geom//:go-geom"],
)

go_test(
name = "wkt_test",
srcs = ["wkt_test.go"],
embed = [":wkt"],
deps = [
"@com_github_stretchr_testify//require",
"@com_github_twpayne_go_geom//:go-geom",
],
)
195 changes: 195 additions & 0 deletions pkg/geo/wkt/lex.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,195 @@
// Copyright 2021 The Cockroach Authors.
//
// Use of this software is governed by the Business Source License
// included in the file licenses/BSL.txt.
//
// As of the Change Date specified in that file, in accordance with
// the Business Source License, use of this software will be governed
// by the Apache License, Version 2.0, included in the file
// licenses/APL.txt.

package wkt

import (
"fmt"
"strconv"
"strings"
"unicode"

"github.com/twpayne/go-geom"
)

// LexError is an error that occurs during lexing.
type LexError struct {
problem string
pos int
}

func (e *LexError) Error() string {
return fmt.Sprintf("lex error: %s at pos %d", e.problem, e.pos)
}

// ParseError is an error that occurs during parsing, which happens after lexing.
type ParseError struct {
line string
}

func (e *ParseError) Error() string {
return fmt.Sprintf("parse error: could not parse %q", e.line)
}

// Constant expected by parser when lexer reaches EOF.
const eof = 0

type wktLex struct {
line string
pos int
ret geom.T
lastErr error
}

// Lex lexes a token from the input.
func (l *wktLex) Lex(yylval *wktSymType) int {
// Skip leading spaces.
l.trimLeft()

// Lex a token.
switch c := l.peek(); c {
case eof:
return eof
case '(', ')', ',':
return int(l.next())
default:
if unicode.IsLetter(c) {
return l.keyword()
} else if isNumRune(c) {
return l.num(yylval)
} else {
l.lastErr = &LexError{
problem: "unrecognized character",
pos: l.pos,
}
return eof
}
}
}

func getKeywordToken(tokStr string) int {
switch tokStr {
case "EMPTY":
return EMPTY
case "POINT":
return POINT
case "POINTZ":
return POINTZ
case "POINTM":
return POINTM
case "POINTZM":
return POINTZM
default:
return eof
}
}

// keyword lexes a string keyword.
func (l *wktLex) keyword() int {
startPos := l.pos
var b strings.Builder

for {
c := l.peek()
if !unicode.IsLetter(c) {
break
}
// Add the uppercase letter to the string builder.
b.WriteRune(unicode.ToUpper(l.next()))
}

// Check for extra dimensions for geometry types.
if b.String() != "EMPTY" {
l.trimLeft()
if unicode.ToUpper(l.peek()) == 'Z' {
l.next()
b.WriteRune('Z')
}
if unicode.ToUpper(l.peek()) == 'M' {
l.next()
b.WriteRune('M')
}
}

ret := getKeywordToken(b.String())
if ret == eof {
l.lastErr = &LexError{
problem: "invalid keyword",
pos: startPos,
}
}

return ret
}

func isNumRune(r rune) bool {
switch r {
case '-', '.':
return true
default:
return unicode.IsDigit(r)
}
}

// num lexes a number.
func (l *wktLex) num(yylval *wktSymType) int {
startPos := l.pos
var b strings.Builder

for {
c := l.peek()
if !isNumRune(c) {
break
}
b.WriteRune(l.next())
}

fl, err := strconv.ParseFloat(b.String(), 64)
if err != nil {
l.lastErr = &LexError{
problem: "invalid number",
pos: startPos,
}
return eof
}
yylval.coord = fl
return NUM
}

func (l *wktLex) peek() rune {
if l.pos == len(l.line) {
return eof
}
return rune(l.line[l.pos])
}

func (l *wktLex) next() rune {
c := l.peek()
if c != eof {
l.pos++
}
return c
}

func (l *wktLex) trimLeft() {
for {
c := l.peek()
if c == eof || !unicode.IsSpace(c) {
break
}
l.next()
}
}

func (l *wktLex) Error(s string) {
// Lex errors are set in the Lex function.
// todo (ayang) improve parse error messages
/* EMPTY */
}
28 changes: 28 additions & 0 deletions pkg/geo/wkt/wkt.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
// Copyright 2021 The Cockroach Authors.
//
// Use of this software is governed by the Business Source License
// included in the file licenses/BSL.txt.
//
// As of the Change Date specified in that file, in accordance with
// the Business Source License, use of this software will be governed
// by the Apache License, Version 2.0, included in the file
// licenses/APL.txt.

//go:generate goyacc -o wkt_generated.go -p "wkt" wkt.y

package wkt

import "github.com/twpayne/go-geom"

// Unmarshal accepts a string and parses it to a geom.T.
func Unmarshal(wkt string) (geom.T, error) {
wktlex := &wktLex{line: wkt}
ret := wktParse(wktlex)
if wktlex.lastErr != nil {
return nil, wktlex.lastErr
}
if ret != 0 {
return nil, &ParseError{line: wkt}
}
return wktlex.ret, nil
}
104 changes: 104 additions & 0 deletions pkg/geo/wkt/wkt.y
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
// Copyright 2021 The Cockroach Authors.
//
// Use of this software is governed by the Business Source License
// included in the file licenses/BSL.txt.
//
// As of the Change Date specified in that file, in accordance with
// the Business Source License, use of this software will be governed
// by the Apache License, Version 2.0, included in the file
// licenses/APL.txt.

%{

package wkt

import "github.com/twpayne/go-geom"

%}

%union {
str string
geom geom.T
coord float64
coordList []float64
}

%token <str> POINT POINTZ POINTM POINTZM
%token <str> EMPTY
//%token <str> LINESTRING POLYGON MULTIPOINT MULTILINESTRING MULTIPOLYGON GEOMETRYCOLLECTION
%token <coord> NUM

%type <geom> geometry
%type <geom> point
%type <coordList> two_coords three_coords four_coords

%%

start:
geometry
{
wktlex.(*wktLex).ret = $1
}

geometry:
point

point:
POINT two_coords
{
$$ = geom.NewPointFlat(geom.XY, $2)
}
| POINT three_coords
{
$$ = geom.NewPointFlat(geom.XYZ, $2)
}
| POINT four_coords
{
$$ = geom.NewPointFlat(geom.XYZM, $2)
}
| POINTZ three_coords
{
$$ = geom.NewPointFlat(geom.XYZ, $2)
}
| POINTM three_coords
{
$$ = geom.NewPointFlat(geom.XYM, $2)
}
| POINTZM four_coords
{
$$ = geom.NewPointFlat(geom.XYZM, $2)
}
| POINT EMPTY
{
$$ = geom.NewPointEmpty(geom.XY)
}
| POINTZ EMPTY
{
$$ = geom.NewPointEmpty(geom.XYZ)
}
| POINTM EMPTY
{
$$ = geom.NewPointEmpty(geom.XYM)
}
| POINTZM EMPTY
{
$$ = geom.NewPointEmpty(geom.XYZM)
}

two_coords:
'(' NUM NUM ')'
{
$$ = []float64{$2, $3}
}

three_coords:
'(' NUM NUM NUM ')'
{
$$ = []float64{$2, $3, $4}
}

four_coords:
'(' NUM NUM NUM NUM ')'
{
$$ = []float64{$2, $3, $4, $5}
}
Loading

0 comments on commit a0236ac

Please sign in to comment.