From 1d36ecd171142c033aec83f6832d19731b6c7530 Mon Sep 17 00:00:00 2001 From: Shylock Hg <33566796+Shylock-Hg@users.noreply.github.com> Date: Wed, 20 Oct 2021 15:43:04 +0800 Subject: [PATCH 01/11] Implement datetime parser by bison. --- CMakeLists.txt | 1 + src/common/datatypes/Date.h | 13 + src/common/time/CMakeLists.txt | 2 + src/common/time/TimeUtils.h | 19 ++ src/common/time/parser/CMakeLists.txt | 27 ++ src/common/time/parser/DatetimeReader.cpp | 62 ++++ src/common/time/parser/DatetimeReader.h | 65 +++++ src/common/time/parser/DatetimeScanner.h | 70 +++++ src/common/time/parser/datetime_parser.yy | 249 ++++++++++++++++ src/common/time/parser/datetime_scanner.lex | 87 ++++++ src/common/time/parser/test/CMakeLists.txt | 21 ++ .../time/parser/test/DateTimeParserTest.cpp | 272 ++++++++++++++++++ 12 files changed, 888 insertions(+) create mode 100644 src/common/time/parser/CMakeLists.txt create mode 100644 src/common/time/parser/DatetimeReader.cpp create mode 100644 src/common/time/parser/DatetimeReader.h create mode 100644 src/common/time/parser/DatetimeScanner.h create mode 100644 src/common/time/parser/datetime_parser.yy create mode 100644 src/common/time/parser/datetime_scanner.lex create mode 100644 src/common/time/parser/test/CMakeLists.txt create mode 100644 src/common/time/parser/test/DateTimeParserTest.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 4c18ea010c9..b5fddb323f4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -87,6 +87,7 @@ macro(nebula_add_library name type) # hbase_thrift_generator parser_target wkt_parser_target + datetime_parser_target ) endmacro() diff --git a/src/common/datatypes/Date.h b/src/common/datatypes/Date.h index 7afd65f4549..2d2d4cfaee1 100644 --- a/src/common/datatypes/Date.h +++ b/src/common/datatypes/Date.h @@ -165,6 +165,19 @@ struct DateTime { sec = 0; microsec = 0; } + explicit DateTime(const Date& date, const Time& time) { + year = date.year; + month = date.month; + day = date.day; + hour = time.hour; + minute = time.minute; + sec = time.sec; + microsec = time.microsec; + } + + Date date() const { return Date(year, month, day); } + + Time time() const { return Time(hour, minute, sec, microsec); } void clear() { year = 0; diff --git a/src/common/time/CMakeLists.txt b/src/common/time/CMakeLists.txt index 959efda8a8f..e1d0f8d3e02 100644 --- a/src/common/time/CMakeLists.txt +++ b/src/common/time/CMakeLists.txt @@ -17,4 +17,6 @@ nebula_add_library( TimeConversion.cpp ) +nebula_add_subdirectory(parser) + nebula_add_subdirectory(test) diff --git a/src/common/time/TimeUtils.h b/src/common/time/TimeUtils.h index 898aa24a358..f0b2108f803 100644 --- a/src/common/time/TimeUtils.h +++ b/src/common/time/TimeUtils.h @@ -47,6 +47,25 @@ class TimeUtils { return Status::OK(); } + template < + typename D, + typename = std::enable_if_t::value || std::is_same::value>> + static Status validateTime(const D &time) { + if (time.hour < 0 || time.hour >= 24) { + return Status::Error("Invalid hour number %d.", time.hour); + } + if (time.minute < 0 || time.minute >= 60) { + return Status::Error("Invalid minute number %d.", time.minute); + } + if (time.sec < 0 || time.sec >= 60) { + return Status::Error("Invalid second number %d.", time.sec); + } + if (time.microsec < 0 || time.microsec >= 1000000) { + return Status::Error("Invalid microsecond number %d.", time.microsec); + } + return Status::OK(); + } + // TODO(shylock) support more format static StatusOr parseDateTime(const std::string &str) { std::tm tm; diff --git a/src/common/time/parser/CMakeLists.txt b/src/common/time/parser/CMakeLists.txt new file mode 100644 index 00000000000..84de7f1e9e4 --- /dev/null +++ b/src/common/time/parser/CMakeLists.txt @@ -0,0 +1,27 @@ +# Copyright (c) 2020 vesoft inc. All rights reserved. +# +# This source code is licensed under Apache 2.0 License, +# attached with Common Clause Condition 1.0, found in the LICENSES directory. + +if(ENABLE_VERBOSE_BISON) + set(bison_flags "-Werror -v") +else() + set(bison_flags "-Werror") +endif() +bison_target(Parser datetime_parser.yy ${CMAKE_CURRENT_BINARY_DIR}/DatetimeParser.cpp COMPILE_FLAGS ${bison_flags}) +flex_target(Scanner datetime_scanner.lex ${CMAKE_CURRENT_BINARY_DIR}/DatetimeScanner.cpp) + +add_custom_target(datetime_parser_target DEPENDS ${FLEX_Scanner_OUTPUTS} ${BISON_Parser_OUTPUTS}) + +add_flex_bison_dependency(Scanner Parser) + +add_compile_options(-Wno-sign-compare -Wno-conversion-null -Wno-pedantic -Wno-extra) + +nebula_add_library( + datetime_parser_obj OBJECT + ${FLEX_Scanner_OUTPUTS} + ${BISON_Parser_OUTPUTS} + DatetimeReader.cpp +) + +nebula_add_subdirectory(test) diff --git a/src/common/time/parser/DatetimeReader.cpp b/src/common/time/parser/DatetimeReader.cpp new file mode 100644 index 00000000000..79fc08c49c5 --- /dev/null +++ b/src/common/time/parser/DatetimeReader.cpp @@ -0,0 +1,62 @@ +/* Copyright (c) 2018 vesoft inc. All rights reserved. + * + * This source code is licensed under Apache 2.0 License, + * attached with Common Clause Condition 1.0, found in the LICENSES directory. + */ + +#include "common/time/parser/DatetimeReader.h" + +namespace nebula { +namespace time { + +DatetimeReader::DatetimeReader(Type type) : parser_(scanner_, error_, &dt_, type) { + // Callback invoked by WKTScanner + auto readBuffer = [this](char *buf, int maxSize) -> int { + // Reach the end + if (pos_ >= end_) { + pos_ = nullptr; + end_ = nullptr; + return 0; + } + int left = end_ - pos_; + auto n = maxSize > left ? left : maxSize; + ::memcpy(buf, pos_, n); + pos_ += n; + return n; // Number of bytes we actually filled in `buf' + }; + scanner_.setReadBuffer(std::move(readBuffer)); +} + +StatusOr DatetimeReader::read(std::string input) { + // Since WKTScanner needs a writable buffer, we have to copy the query string + buffer_ = std::move(input); + pos_ = &buffer_[0]; + end_ = pos_ + buffer_.size(); + + scanner_.setInput(&buffer_); + if (parser_.parse() != 0) { + pos_ = nullptr; + end_ = nullptr; + // To flush the internal buffer to recover from a failure + scanner_.flushBuffer(); + if (dt_ != nullptr) { + delete dt_; + dt_ = nullptr; + } + scanner_.setInput(nullptr); + return Status::SyntaxError(error_); + } + + if (dt_ == nullptr) { + return Status::StatementEmpty(); // empty + } + auto dt = dt_; + dt_ = nullptr; + scanner_.setInput(nullptr); + auto tmp = std::move(*dt); + delete dt; + return tmp; +} + +} // namespace time +} // namespace nebula diff --git a/src/common/time/parser/DatetimeReader.h b/src/common/time/parser/DatetimeReader.h new file mode 100644 index 00000000000..fe8791360cd --- /dev/null +++ b/src/common/time/parser/DatetimeReader.h @@ -0,0 +1,65 @@ +/* Copyright (c) 2018 vesoft inc. All rights reserved. + * + * This source code is licensed under Apache 2.0 License, + * attached with Common Clause Condition 1.0, found in the LICENSES directory. + */ + +#pragma once + +#include "common/base/Base.h" +#include "common/base/StatusOr.h" +#include "common/datatypes/Date.h" +#include "common/time/parser/DatetimeParser.hpp" +#include "common/time/parser/DatetimeScanner.h" + +namespace nebula { +namespace time { + +class DatetimeReader { + public: + // enum class Type { + // kDate, + // kTime, + // kDateTime, + // }; + + ~DatetimeReader() { + if (dt_ != nullptr) delete dt_; + } + + static inline DatetimeReader makeDateReader() { return DatetimeReader(Type::kDate); } + + static inline DatetimeReader makeTimeReader() { return DatetimeReader(Type::kTime); } + + static inline DatetimeReader makeDateTimeReader() { return DatetimeReader(Type::kDateTime); } + + StatusOr readDatetime(std::string input) { return read(std::move(input)); } + + StatusOr readDate(std::string input) { + auto result = read(std::move(input)); + NG_RETURN_IF_ERROR(result); + return result.value().date(); + } + + StatusOr