forked from cockroachdb/cockroach
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
59690: geo/wkt: implement parser for points with Z and M dimensions r=otan a=andyyang890 This patch adds a parser that is capable of parsing WKT representations of points with Z and M dimensions. Refs: cockroachdb#53091 Release note: None Co-authored-by: Andy Yang <[email protected]>
- Loading branch information
Showing
9 changed files
with
1,011 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
y.output |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test") | ||
|
||
go_library( | ||
name = "wkt", | ||
srcs = [ | ||
"lex.go", | ||
"wkt.go", | ||
"wkt_generated.go", | ||
], | ||
importpath = "github.com/cockroachdb/cockroach/pkg/geo/wkt", | ||
visibility = ["//visibility:public"], | ||
deps = ["@com_github_twpayne_go_geom//:go-geom"], | ||
) | ||
|
||
go_test( | ||
name = "wkt_test", | ||
srcs = ["wkt_test.go"], | ||
embed = [":wkt"], | ||
deps = [ | ||
"@com_github_stretchr_testify//require", | ||
"@com_github_twpayne_go_geom//:go-geom", | ||
], | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,195 @@ | ||
// Copyright 2021 The Cockroach Authors. | ||
// | ||
// Use of this software is governed by the Business Source License | ||
// included in the file licenses/BSL.txt. | ||
// | ||
// As of the Change Date specified in that file, in accordance with | ||
// the Business Source License, use of this software will be governed | ||
// by the Apache License, Version 2.0, included in the file | ||
// licenses/APL.txt. | ||
|
||
package wkt | ||
|
||
import ( | ||
"fmt" | ||
"strconv" | ||
"strings" | ||
"unicode" | ||
|
||
"github.com/twpayne/go-geom" | ||
) | ||
|
||
// LexError is an error that occurs during lexing. | ||
type LexError struct { | ||
problem string | ||
pos int | ||
} | ||
|
||
func (e *LexError) Error() string { | ||
return fmt.Sprintf("lex error: %s at pos %d", e.problem, e.pos) | ||
} | ||
|
||
// ParseError is an error that occurs during parsing, which happens after lexing. | ||
type ParseError struct { | ||
line string | ||
} | ||
|
||
func (e *ParseError) Error() string { | ||
return fmt.Sprintf("parse error: could not parse %q", e.line) | ||
} | ||
|
||
// Constant expected by parser when lexer reaches EOF. | ||
const eof = 0 | ||
|
||
type wktLex struct { | ||
line string | ||
pos int | ||
ret geom.T | ||
lastErr error | ||
} | ||
|
||
// Lex lexes a token from the input. | ||
func (l *wktLex) Lex(yylval *wktSymType) int { | ||
// Skip leading spaces. | ||
l.trimLeft() | ||
|
||
// Lex a token. | ||
switch c := l.peek(); c { | ||
case eof: | ||
return eof | ||
case '(', ')', ',': | ||
return int(l.next()) | ||
default: | ||
if unicode.IsLetter(c) { | ||
return l.keyword() | ||
} else if isNumRune(c) { | ||
return l.num(yylval) | ||
} else { | ||
l.lastErr = &LexError{ | ||
problem: "unrecognized character", | ||
pos: l.pos, | ||
} | ||
return eof | ||
} | ||
} | ||
} | ||
|
||
func getKeywordToken(tokStr string) int { | ||
switch tokStr { | ||
case "EMPTY": | ||
return EMPTY | ||
case "POINT": | ||
return POINT | ||
case "POINTZ": | ||
return POINTZ | ||
case "POINTM": | ||
return POINTM | ||
case "POINTZM": | ||
return POINTZM | ||
default: | ||
return eof | ||
} | ||
} | ||
|
||
// keyword lexes a string keyword. | ||
func (l *wktLex) keyword() int { | ||
startPos := l.pos | ||
var b strings.Builder | ||
|
||
for { | ||
c := l.peek() | ||
if !unicode.IsLetter(c) { | ||
break | ||
} | ||
// Add the uppercase letter to the string builder. | ||
b.WriteRune(unicode.ToUpper(l.next())) | ||
} | ||
|
||
// Check for extra dimensions for geometry types. | ||
if b.String() != "EMPTY" { | ||
l.trimLeft() | ||
if unicode.ToUpper(l.peek()) == 'Z' { | ||
l.next() | ||
b.WriteRune('Z') | ||
} | ||
if unicode.ToUpper(l.peek()) == 'M' { | ||
l.next() | ||
b.WriteRune('M') | ||
} | ||
} | ||
|
||
ret := getKeywordToken(b.String()) | ||
if ret == eof { | ||
l.lastErr = &LexError{ | ||
problem: "invalid keyword", | ||
pos: startPos, | ||
} | ||
} | ||
|
||
return ret | ||
} | ||
|
||
func isNumRune(r rune) bool { | ||
switch r { | ||
case '-', '.': | ||
return true | ||
default: | ||
return unicode.IsDigit(r) | ||
} | ||
} | ||
|
||
// num lexes a number. | ||
func (l *wktLex) num(yylval *wktSymType) int { | ||
startPos := l.pos | ||
var b strings.Builder | ||
|
||
for { | ||
c := l.peek() | ||
if !isNumRune(c) { | ||
break | ||
} | ||
b.WriteRune(l.next()) | ||
} | ||
|
||
fl, err := strconv.ParseFloat(b.String(), 64) | ||
if err != nil { | ||
l.lastErr = &LexError{ | ||
problem: "invalid number", | ||
pos: startPos, | ||
} | ||
return eof | ||
} | ||
yylval.coord = fl | ||
return NUM | ||
} | ||
|
||
func (l *wktLex) peek() rune { | ||
if l.pos == len(l.line) { | ||
return eof | ||
} | ||
return rune(l.line[l.pos]) | ||
} | ||
|
||
func (l *wktLex) next() rune { | ||
c := l.peek() | ||
if c != eof { | ||
l.pos++ | ||
} | ||
return c | ||
} | ||
|
||
func (l *wktLex) trimLeft() { | ||
for { | ||
c := l.peek() | ||
if c == eof || !unicode.IsSpace(c) { | ||
break | ||
} | ||
l.next() | ||
} | ||
} | ||
|
||
func (l *wktLex) Error(s string) { | ||
// Lex errors are set in the Lex function. | ||
// todo (ayang) improve parse error messages | ||
/* EMPTY */ | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
// Copyright 2021 The Cockroach Authors. | ||
// | ||
// Use of this software is governed by the Business Source License | ||
// included in the file licenses/BSL.txt. | ||
// | ||
// As of the Change Date specified in that file, in accordance with | ||
// the Business Source License, use of this software will be governed | ||
// by the Apache License, Version 2.0, included in the file | ||
// licenses/APL.txt. | ||
|
||
//go:generate goyacc -o wkt_generated.go -p "wkt" wkt.y | ||
|
||
package wkt | ||
|
||
import "github.com/twpayne/go-geom" | ||
|
||
// Unmarshal accepts a string and parses it to a geom.T. | ||
func Unmarshal(wkt string) (geom.T, error) { | ||
wktlex := &wktLex{line: wkt} | ||
ret := wktParse(wktlex) | ||
if wktlex.lastErr != nil { | ||
return nil, wktlex.lastErr | ||
} | ||
if ret != 0 { | ||
return nil, &ParseError{line: wkt} | ||
} | ||
return wktlex.ret, nil | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,104 @@ | ||
// Copyright 2021 The Cockroach Authors. | ||
// | ||
// Use of this software is governed by the Business Source License | ||
// included in the file licenses/BSL.txt. | ||
// | ||
// As of the Change Date specified in that file, in accordance with | ||
// the Business Source License, use of this software will be governed | ||
// by the Apache License, Version 2.0, included in the file | ||
// licenses/APL.txt. | ||
|
||
%{ | ||
|
||
package wkt | ||
|
||
import "github.com/twpayne/go-geom" | ||
|
||
%} | ||
|
||
%union { | ||
str string | ||
geom geom.T | ||
coord float64 | ||
coordList []float64 | ||
} | ||
|
||
%token <str> POINT POINTZ POINTM POINTZM | ||
%token <str> EMPTY | ||
//%token <str> LINESTRING POLYGON MULTIPOINT MULTILINESTRING MULTIPOLYGON GEOMETRYCOLLECTION | ||
%token <coord> NUM | ||
|
||
%type <geom> geometry | ||
%type <geom> point | ||
%type <coordList> two_coords three_coords four_coords | ||
|
||
%% | ||
|
||
start: | ||
geometry | ||
{ | ||
wktlex.(*wktLex).ret = $1 | ||
} | ||
|
||
geometry: | ||
point | ||
|
||
point: | ||
POINT two_coords | ||
{ | ||
$$ = geom.NewPointFlat(geom.XY, $2) | ||
} | ||
| POINT three_coords | ||
{ | ||
$$ = geom.NewPointFlat(geom.XYZ, $2) | ||
} | ||
| POINT four_coords | ||
{ | ||
$$ = geom.NewPointFlat(geom.XYZM, $2) | ||
} | ||
| POINTZ three_coords | ||
{ | ||
$$ = geom.NewPointFlat(geom.XYZ, $2) | ||
} | ||
| POINTM three_coords | ||
{ | ||
$$ = geom.NewPointFlat(geom.XYM, $2) | ||
} | ||
| POINTZM four_coords | ||
{ | ||
$$ = geom.NewPointFlat(geom.XYZM, $2) | ||
} | ||
| POINT EMPTY | ||
{ | ||
$$ = geom.NewPointEmpty(geom.XY) | ||
} | ||
| POINTZ EMPTY | ||
{ | ||
$$ = geom.NewPointEmpty(geom.XYZ) | ||
} | ||
| POINTM EMPTY | ||
{ | ||
$$ = geom.NewPointEmpty(geom.XYM) | ||
} | ||
| POINTZM EMPTY | ||
{ | ||
$$ = geom.NewPointEmpty(geom.XYZM) | ||
} | ||
|
||
two_coords: | ||
'(' NUM NUM ')' | ||
{ | ||
$$ = []float64{$2, $3} | ||
} | ||
|
||
three_coords: | ||
'(' NUM NUM NUM ')' | ||
{ | ||
$$ = []float64{$2, $3, $4} | ||
} | ||
|
||
four_coords: | ||
'(' NUM NUM NUM NUM ')' | ||
{ | ||
$$ = []float64{$2, $3, $4, $5} | ||
} |
Oops, something went wrong.