diff --git a/src/common/geo/CMakeLists.txt b/src/common/geo/CMakeLists.txt index b55b7a8071d..e38eba7113b 100644 --- a/src/common/geo/CMakeLists.txt +++ b/src/common/geo/CMakeLists.txt @@ -8,4 +8,5 @@ nebula_add_library( GeoIndex.cpp ) +nebula_add_subdirectory(io) # nebula_add_subdirectory(test) diff --git a/src/common/geo/GeoParser.cpp b/src/common/geo/GeoParser.cpp deleted file mode 100644 index 2ab9cc62b19..00000000000 --- a/src/common/geo/GeoParser.cpp +++ /dev/null @@ -1,5 +0,0 @@ -/* Copyright (c) 2020 vesoft inc. All rights reserved. - * - * This source code is licensed under Apache 2.0 License, - * attached with Common Clause Condition 1.0, found in the LICENSES directory. - */ diff --git a/src/common/geo/GeoParser.h b/src/common/geo/GeoParser.h deleted file mode 100644 index 2ab9cc62b19..00000000000 --- a/src/common/geo/GeoParser.h +++ /dev/null @@ -1,5 +0,0 @@ -/* Copyright (c) 2020 vesoft inc. All rights reserved. - * - * This source code is licensed under Apache 2.0 License, - * attached with Common Clause Condition 1.0, found in the LICENSES directory. - */ diff --git a/src/common/geo/io/CMakeLists.txt b/src/common/geo/io/CMakeLists.txt new file mode 100644 index 00000000000..5bdaeb1a528 --- /dev/null +++ b/src/common/geo/io/CMakeLists.txt @@ -0,0 +1,7 @@ +# Copyright (c) 2020 vesoft inc. All rights reserved. +# +# This source code is licensed under Apache 2.0 License, +# attached with Common Clause Condition 1.0, found in the LICENSES directory. + +nebula_add_subdirectory(wkt) +# nebula_add_subdirectory(wkb) diff --git a/src/common/geo/io/Geometry.h b/src/common/geo/io/Geometry.h new file mode 100644 index 00000000000..0b86f09f27f --- /dev/null +++ b/src/common/geo/io/Geometry.h @@ -0,0 +1,44 @@ +/* Copyright (c) 2020 vesoft inc. All rights reserved. + * + * This source code is licensed under Apache 2.0 License, + * attached with Common Clause Condition 1.0, found in the LICENSES directory. + */ + +#pragma once + +#include +#include +#include + +namespace nebula { + +enum class ShapeType : uint8_t { + Point = 1, + LineString = 2, + Polygon = 3, +}; + +struct Geometry { + virtual ShapeType shape() const = 0; + virtual ~Geometry() {} +}; + +struct Point : public Geometry { + double x, y; + virtual ShapeType shape() const { return ShapeType::Point; } + ~Point() override = default; +}; + +struct LineString : public Geometry { + std::vector points; + virtual ShapeType shape() const { return ShapeType::LineString; } + ~LineString() override = default; +}; + +struct Polygon : public Geometry { + std::vector rings; + virtual ShapeType shape() const { return ShapeType::Polygon; } + ~Polygon() override = default; +}; + +} // namespace nebula diff --git a/src/common/geo/wkb/WKBReader.h b/src/common/geo/io/wkb/WKBReader.h similarity index 100% rename from src/common/geo/wkb/WKBReader.h rename to src/common/geo/io/wkb/WKBReader.h diff --git a/src/common/geo/wkb/WKBWriter.h b/src/common/geo/io/wkb/WKBWriter.h similarity index 100% rename from src/common/geo/wkb/WKBWriter.h rename to src/common/geo/io/wkb/WKBWriter.h diff --git a/src/common/geo/io/wkt/CMakeLists.txt b/src/common/geo/io/wkt/CMakeLists.txt new file mode 100644 index 00000000000..78fa6162df7 --- /dev/null +++ b/src/common/geo/io/wkt/CMakeLists.txt @@ -0,0 +1,29 @@ +# Copyright (c) 2020 vesoft inc. All rights reserved. +# +# This source code is licensed under Apache 2.0 License, +# attached with Common Clause Condition 1.0, found in the LICENSES directory. + +include_directories(${CMAKE_CURRENT_SOURCE_DIR}) +include_directories(${CMAKE_CURRENT_BINARY_DIR}) + +if(ENABLE_VERBOSE_BISON) + set(bison_flags "-Werror -v") +else() + set(bison_flags "-Werror") +endif() +bison_target(Parser wkt_parser.yy ${CMAKE_CURRENT_BINARY_DIR}/WKTParser.cpp COMPILE_FLAGS ${bison_flags}) +flex_target(Scanner wkt_scanner.lex ${CMAKE_CURRENT_BINARY_DIR}/WKTScanner.cpp) + +add_custom_target(wkt_parser_target DEPENDS ${FLEX_Scanner_OUTPUTS} ${BISON_Parser_OUTPUTS}) + +add_flex_bison_dependency(Scanner Parser) + +add_compile_options(-Wno-sign-compare -Wno-conversion-null -Wno-pedantic -Wno-extra) + +nebula_add_library( + wkt_parser_obj OBJECT + ${FLEX_Scanner_OUTPUTS} + ${BISON_Parser_OUTPUTS} +) + +# nebula_add_subdirectory(test) diff --git a/src/common/geo/io/wkt/WKTReader.cpp b/src/common/geo/io/wkt/WKTReader.cpp new file mode 100644 index 00000000000..82e360219ef --- /dev/null +++ b/src/common/geo/io/wkt/WKTReader.cpp @@ -0,0 +1,11 @@ +/* Copyright (c) 2018 vesoft inc. All rights reserved. + * + * This source code is licensed under Apache 2.0 License, + * attached with Common Clause Condition 1.0, found in the LICENSES directory. + */ + +#include "common/geo/io/wkt/WKTReader.h" + +#include "common/base/Base.h" + +namespace nebula {} // namespace nebula diff --git a/src/common/geo/io/wkt/WKTReader.h b/src/common/geo/io/wkt/WKTReader.h new file mode 100644 index 00000000000..cf7164b1b94 --- /dev/null +++ b/src/common/geo/io/wkt/WKTReader.h @@ -0,0 +1,80 @@ +/* Copyright (c) 2018 vesoft inc. All rights reserved. + * + * This source code is licensed under Apache 2.0 License, + * attached with Common Clause Condition 1.0, found in the LICENSES directory. + */ + +#pragma once + +#include "common/base/Base.h" +#include "common/base/StatusOr.h" +#include "common/geo/io/Geometry.h" +#include "common/geo/io/wkt/WKTParser.hpp" +#include "common/geo/io/wkt/WKTScanner.h" + +namespace nebula { + +class WKTReader { + public: + WKTReader() : parser_(scanner_, error_, &geom_) { + // Callback invoked by WKTScanner + auto readBuffer = [this](char *buf, int maxSize) -> int { + // Reach the end + if (pos_ >= end_) { + pos_ = nullptr; + end_ = nullptr; + return 0; + } + int left = end_ - pos_; + auto n = maxSize > left ? left : maxSize; + ::memcpy(buf, pos_, n); + pos_ += n; + return n; // Number of bytes we actually filled in `buf' + }; + scanner_.setReadBuffer(std::move(readBuffer)); + } + + ~WKTReader() { + if (geom_ != nullptr) delete geom_; + } + + StatusOr> read(std::string wkt) { + // Since WKTScanner needs a writable buffer, we have to copy the query string + buffer_ = std::move(wkt); + pos_ = &buffer_[0]; + end_ = pos_ + buffer_.size(); + + scanner_.setWKT(&buffer_); + if (parser_.parse() != 0) { + pos_ = nullptr; + end_ = nullptr; + // To flush the internal buffer to recover from a failure + scanner_.flushBuffer(); + if (geom_ != nullptr) { + delete geom_; + geom_ = nullptr; + } + scanner_.setWKT(nullptr); + return Status::SyntaxError(error_); + } + + if (geom_ == nullptr) { + return Status::StatementEmpty(); // WKTEmpty() + } + auto *geom = geom_; + geom_ = nullptr; + scanner_.setWKT(nullptr); + return std::unique_ptr(geom); + } + + private: + std::string buffer_; + const char *pos_{nullptr}; + const char *end_{nullptr}; + nebula::WKTScanner scanner_; + nebula::WKTParser parser_; + std::string error_; + Geometry *geom_{nullptr}; +}; + +} // namespace nebula diff --git a/src/common/geo/io/wkt/WKTScanner.h b/src/common/geo/io/wkt/WKTScanner.h new file mode 100644 index 00000000000..5111ea1c66c --- /dev/null +++ b/src/common/geo/io/wkt/WKTScanner.h @@ -0,0 +1,63 @@ +/* Copyright (c) 2018 vesoft inc. All rights reserved. + * + * This source code is licensed under Apache 2.0 License, + * attached with Common Clause Condition 1.0, found in the LICENSES directory. + */ + +#pragma once + +#include "common/base/Base.h" + +// Only include FlexLexer.h if it hasn't been already included +#if !defined(yyFlexLexerOnce) +#include +#endif + +// Override the interface for yylex since we namespaced it +#undef YY_DECL +#define YY_DECL int nebula::WKTScanner::yylex() + +#include "common/geo/io/wkt/WKTParser.hpp" + +namespace nebula { + +class WKTScanner : public yyFlexLexer { + public: + int yylex(nebula::WKTParser::semantic_type *lval, nebula::WKTParser::location_type *loc) { + yylval = lval; + yylloc = loc; + return yylex(); + } + + public: + // Called by WKTReader to set the `readBuffer' callback, which would be + // invoked by LexerInput to fill the stream buffer. + void setReadBuffer(std::function readBuffer) { readBuffer_ = readBuffer; } + + // Manually invoked by WKTReader to recover from a failure state. + // This makes the scanner reentrant. + void flushBuffer() { + yy_flush_buffer(yy_buffer_stack ? yy_buffer_stack[yy_buffer_stack_top] : nullptr); + } + + void setWKT(std::string *wkt) { wkt_ = wkt; } + + std::string *wkt() { return wkt_; } + + protected: + // Called when YY_INPUT is invoked + int LexerInput(char *buf, int maxSize) override { return readBuffer_(buf, maxSize); } + + using TokenType = nebula::WKTParser::token; + + private: + // friend class Scanner_Basic_Test; + int yylex() override; + + nebula::WKTParser::semantic_type *yylval{nullptr}; + nebula::WKTParser::location_type *yylloc{nullptr}; + std::function readBuffer_; + std::string *wkt_{nullptr}; +}; + +} // namespace nebula diff --git a/src/common/geo/wkt/WKTWriter.h b/src/common/geo/io/wkt/WKTWriter.h similarity index 100% rename from src/common/geo/wkt/WKTWriter.h rename to src/common/geo/io/wkt/WKTWriter.h diff --git a/src/common/geo/io/wkt/wkt_parser.yy b/src/common/geo/io/wkt/wkt_parser.yy new file mode 100644 index 00000000000..9931f8c0f52 --- /dev/null +++ b/src/common/geo/io/wkt/wkt_parser.yy @@ -0,0 +1,169 @@ +%language "C++" +%skeleton "lalr1.cc" +%no-lines +%locations +%define api.namespace { nebula } +%define parser_class_name { WKTParser } +%lex-param { nebula::WKTScanner& scanner } +%parse-param { nebula::WKTScanner& scanner } +%parse-param { std::string &errmsg } +%parse-param { nebula::Geometry** geom } + +%code requires { +#include +#include +#include +#include +#include "common/geo/io/Geometry.h" + +namespace nebula { + +class WKTScanner; + +} + +} + +%code { + #include "WKTScanner.h" + static int yylex(nebula::WKTParser::semantic_type* yylval, + nebula::WKTParser::location_type *yylloc, + nebula::WKTScanner& scanner); +} + +%union { + double doubleval; + Geometry* geomval; + Point* pointval; + LineString* lineval; + Polygon* polygonval; +} + +/* destructors */ +%destructor {} +%destructor {} + +/* wkt shape type prefix */ +%token KW_POINT KW_LINESTRING KW_POLYGON + +/* symbols */ +%token L_PAREN R_PAREN COMMA + +/* token type specification */ +%token DOUBLE + +%type geometry +%type point coordinate +%type linestring coordinate_list +%type polygon coordinate_list_list + +%start geometry + +%% + +geometry + : point { + $$ = $1; + } + | linestring { + $$ = $1; + } + | polygon { + $$ = $1; + } +; + +point + : KW_POINT L_PAREN coordinate R_PAREN { + $$ = $3; + } + ; + +linestring + : KW_LINESTRING L_PAREN coordinate_list R_PAREN { + $$ = $3; + } + ; + +polygon + : KW_POLYGON L_PAREN coordinate_list_list R_PAREN { + $$ = $3; + } + ; + +coordinate + : DOUBLE DOUBLE { + $$ = new Point(); + $$->x = $1; + $$->y = $2; + } + ; + +coordinate_list + : coordinate { + $$ = new LineString(); + $$->points.emplace_back(*$1); + } + | coordinate_list COMMA coordinate { + $$ = $1; + $$->points.emplace_back(*$3); + } + ; + +coordinate_list_list + : L_PAREN coordinate_list R_PAREN { + $$ = new Polygon(); + $$->rings.emplace_back(*$2); + + } + | coordinate_list_list COMMA L_PAREN coordinate_list R_PAREN { + $$ = $1; + $$->rings.emplace_back(*$4); + } + ; + +%% + +void nebula::WKTParser::error(const nebula::WKTParser::location_type& loc, + const std::string &msg) { + std::ostringstream os; + if (msg.empty()) { + os << "syntax error"; + } else { + os << msg; + } + + auto *wkt = scanner.wkt(); + if (wkt == nullptr) { + os << " at " << loc; + errmsg = os.str(); + return; + } + + auto begin = loc.begin.column > 0 ? loc.begin.column - 1 : 0; + if ((loc.end.filename + && (!loc.begin.filename + || *loc.begin.filename != *loc.end.filename)) + || loc.begin.line < loc.end.line + || begin >= wkt->size()) { + os << " at " << loc; + } else if (loc.begin.column < (loc.end.column ? loc.end.column - 1 : 0)) { + uint32_t len = loc.end.column - loc.begin.column; + if (len > 80) { + len = 80; + } + os << " near `" << wkt->substr(begin, len) << "'"; + } else { + os << " near `" << wkt->substr(begin, 8) << "'"; + } + + errmsg = os.str(); +} + +static int yylex(nebula::WKTParser::semantic_type* yylval, + nebula::WKTParser::location_type *yylloc, + nebula::WKTScanner& scanner) { + auto token = scanner.yylex(yylval, yylloc); + return token; +} + diff --git a/src/common/geo/io/wkt/wkt_scanner.lex b/src/common/geo/io/wkt/wkt_scanner.lex new file mode 100644 index 00000000000..e5e4a31327b --- /dev/null +++ b/src/common/geo/io/wkt/wkt_scanner.lex @@ -0,0 +1,61 @@ +%option c++ +%option yyclass="WKTScanner" +%option nodefault noyywrap +%option never-interactive +%option yylineno +%option case-insensitive + +%{ +#include "common/geo/io/wkt/WKTReader.h" +#include "common/geo/io/wkt/WKTScanner.h" +#include "WKTParser.hpp" +#include + +%} + +blanks ([ \t\n\r]+) + +%% + + /* WKT shape type prefix */ +"POINT" { return TokenType::KW_POINT; } +"LINESTRING" { return TokenType::KW_LINESTRING; } +"POLYGON" { return TokenType::KW_POLYGON; } + +"," { return TokenType::COMMA; } +"(" { return TokenType::L_PAREN; } +")" { return TokenType::R_PAREN; } + +-?(([0-9]+\.?)|([0-9]*\.?[0-9]+)([eE][-+]?[0-9]+)?) { + yylval->doubleval = atof(yytext); + return TokenType::DOUBLE; +} + +blanks {} + +. { + /** + * Any other unmatched byte sequences will get us here, + * including the non-ascii ones, which are negative + * in terms of type of `signed char'. At the same time, because + * Bison translates all negative tokens to EOF(i.e. YY_NULL), + * so we have to cast illegal characters to type of `unsinged char' + * This will make Bison receive an unknown token, which leads to + * a syntax error. + * + * Please note that it is not Flex but Bison to regard illegal + * characters as errors, in such case. + */ + return static_cast(yytext[0]); + + /** + * Alternatively, we could report illegal characters by + * throwing a `syntax_error' exception. + * In such a way, we could distinguish illegal characters + * from normal syntax errors, but at cost of poor performance + * incurred by the expensive exception handling. + */ + // throw WKTParser::syntax_error(*yylloc, "char illegal"); + } + +%% diff --git a/src/common/geo/wkt/WKTReader.h b/src/common/geo/wkt/WKTReader.h deleted file mode 100644 index 2ab9cc62b19..00000000000 --- a/src/common/geo/wkt/WKTReader.h +++ /dev/null @@ -1,5 +0,0 @@ -/* Copyright (c) 2020 vesoft inc. All rights reserved. - * - * This source code is licensed under Apache 2.0 License, - * attached with Common Clause Condition 1.0, found in the LICENSES directory. - */