Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/bison datetime parser #3179

Merged
merged 36 commits into from
Dec 13, 2021
Merged
Changes from 1 commit
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
1d36ecd
Implement datetime parser by bison.
Shylock-Hg Oct 20, 2021
a76c8a2
Remove unused code.
Shylock-Hg Oct 20, 2021
d82f130
Merge branch 'master' of https://github.com/vesoft-inc/nebula into fe…
Shylock-Hg Oct 20, 2021
2cd7805
Add the time parser.
Shylock-Hg Aug 31, 2021
e09c809
Fix case.
Shylock-Hg Sep 27, 2021
477ecd3
Tune zone.
Shylock-Hg Oct 21, 2021
c927054
Merge branch 'master' of https://github.com/vesoft-inc/nebula into fe…
Shylock-Hg Oct 21, 2021
864ff5e
Merge branch 'master' into feature/bison-datetime-parser
Shylock-Hg Oct 22, 2021
1af62b6
Remove debug code.
Shylock-Hg Oct 22, 2021
2f15b7c
Fix leak.
Shylock-Hg Oct 22, 2021
1d3da50
Merge branch 'master' into feature/bison-datetime-parser
Shylock-Hg Oct 22, 2021
050e0a9
Merge branch 'master' into feature/bison-datetime-parser
Shylock-Hg Oct 25, 2021
7ee9a62
Merge branch 'master' of https://github.com/vesoft-inc/nebula into fe…
Shylock-Hg Oct 27, 2021
5eb1b5b
Fix comments.
Shylock-Hg Oct 27, 2021
1963693
Fix comments.
Shylock-Hg Oct 27, 2021
cfd5541
Merge branch 'master' into feature/bison-datetime-parser
Shylock-Hg Oct 28, 2021
d42f522
Merge branch 'master' into feature/bison-datetime-parser
Shylock-Hg Nov 15, 2021
dab0dce
Fix license header
Shylock-Hg Nov 15, 2021
1f970ab
Merge branch 'master' of https://github.com/vesoft-inc/nebula into fe…
Shylock-Hg Nov 15, 2021
ccde82f
Merge branch 'master' into feature/bison-datetime-parser
Shylock-Hg Nov 16, 2021
f015d99
Merge branch 'master' into feature/bison-datetime-parser
Shylock-Hg Nov 17, 2021
3f4a189
Merge branch 'master' of https://github.com/vesoft-inc/nebula into fe…
Shylock-Hg Nov 19, 2021
fed7316
Merge branch 'master' into feature/bison-datetime-parser
Shylock-Hg Nov 19, 2021
f970638
Merge branch 'master' of https://github.com/vesoft-inc/nebula into fe…
Shylock-Hg Nov 24, 2021
40303be
Merge branch 'master' into feature/bison-datetime-parser
Shylock-Hg Nov 24, 2021
4f482b6
Merge branch 'master' into feature/bison-datetime-parser
Shylock-Hg Nov 25, 2021
cb53a74
Merge branch 'master' into feature/bison-datetime-parser
Shylock-Hg Nov 26, 2021
c554df0
Merge branch 'master' into feature/bison-datetime-parser
Shylock-Hg Nov 30, 2021
29c40d3
Resolve conflict.
Shylock-Hg Nov 30, 2021
2b5164d
Merge branch 'master' of https://github.com/vesoft-inc/nebula into fe…
Shylock-Hg Dec 2, 2021
bcac0e0
Merge branch 'master' into feature/bison-datetime-parser
Shylock-Hg Dec 6, 2021
828cc3e
Merge branch 'master' into feature/bison-datetime-parser
Shylock-Hg Dec 8, 2021
7a39eff
Merge branch 'master' into feature/bison-datetime-parser
Shylock-Hg Dec 10, 2021
3991f39
Merge branch 'master' into feature/bison-datetime-parser
yixinglu Dec 13, 2021
f1bae68
Merge branch 'master' into feature/bison-datetime-parser
yixinglu Dec 13, 2021
34cb561
Merge branch 'master' into feature/bison-datetime-parser
yixinglu Dec 13, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
Implement datetime parser by bison.
Shylock-Hg committed Oct 20, 2021
commit 1d36ecd171142c033aec83f6832d19731b6c7530
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -87,6 +87,7 @@ macro(nebula_add_library name type)
# hbase_thrift_generator
parser_target
wkt_parser_target
datetime_parser_target
)
endmacro()

13 changes: 13 additions & 0 deletions src/common/datatypes/Date.h
Original file line number Diff line number Diff line change
@@ -165,6 +165,19 @@ struct DateTime {
sec = 0;
microsec = 0;
}
explicit DateTime(const Date& date, const Time& time) {
year = date.year;
month = date.month;
day = date.day;
hour = time.hour;
minute = time.minute;
sec = time.sec;
microsec = time.microsec;
}

Date date() const { return Date(year, month, day); }

Time time() const { return Time(hour, minute, sec, microsec); }

void clear() {
year = 0;
2 changes: 2 additions & 0 deletions src/common/time/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -17,4 +17,6 @@ nebula_add_library(
TimeConversion.cpp
)

nebula_add_subdirectory(parser)

nebula_add_subdirectory(test)
19 changes: 19 additions & 0 deletions src/common/time/TimeUtils.h
Original file line number Diff line number Diff line change
@@ -47,6 +47,25 @@ class TimeUtils {
return Status::OK();
}

template <
typename D,
typename = std::enable_if_t<std::is_same<D, Time>::value || std::is_same<D, DateTime>::value>>
static Status validateTime(const D &time) {
if (time.hour < 0 || time.hour >= 24) {
return Status::Error("Invalid hour number %d.", time.hour);
}
if (time.minute < 0 || time.minute >= 60) {
return Status::Error("Invalid minute number %d.", time.minute);
}
if (time.sec < 0 || time.sec >= 60) {
return Status::Error("Invalid second number %d.", time.sec);
}
if (time.microsec < 0 || time.microsec >= 1000000) {
return Status::Error("Invalid microsecond number %d.", time.microsec);
}
return Status::OK();
}

// TODO(shylock) support more format
static StatusOr<DateTime> parseDateTime(const std::string &str) {
std::tm tm;
27 changes: 27 additions & 0 deletions src/common/time/parser/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# Copyright (c) 2020 vesoft inc. All rights reserved.
#
# This source code is licensed under Apache 2.0 License,
# attached with Common Clause Condition 1.0, found in the LICENSES directory.

if(ENABLE_VERBOSE_BISON)
set(bison_flags "-Werror -v")
else()
set(bison_flags "-Werror")
endif()
bison_target(Parser datetime_parser.yy ${CMAKE_CURRENT_BINARY_DIR}/DatetimeParser.cpp COMPILE_FLAGS ${bison_flags})
flex_target(Scanner datetime_scanner.lex ${CMAKE_CURRENT_BINARY_DIR}/DatetimeScanner.cpp)

add_custom_target(datetime_parser_target DEPENDS ${FLEX_Scanner_OUTPUTS} ${BISON_Parser_OUTPUTS})

add_flex_bison_dependency(Scanner Parser)

add_compile_options(-Wno-sign-compare -Wno-conversion-null -Wno-pedantic -Wno-extra)

nebula_add_library(
datetime_parser_obj OBJECT
${FLEX_Scanner_OUTPUTS}
${BISON_Parser_OUTPUTS}
DatetimeReader.cpp
)

nebula_add_subdirectory(test)
62 changes: 62 additions & 0 deletions src/common/time/parser/DatetimeReader.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
/* Copyright (c) 2018 vesoft inc. All rights reserved.
*
* This source code is licensed under Apache 2.0 License,
* attached with Common Clause Condition 1.0, found in the LICENSES directory.
*/

#include "common/time/parser/DatetimeReader.h"

namespace nebula {
namespace time {

DatetimeReader::DatetimeReader(Type type) : parser_(scanner_, error_, &dt_, type) {
// Callback invoked by WKTScanner
auto readBuffer = [this](char *buf, int maxSize) -> int {
// Reach the end
if (pos_ >= end_) {
pos_ = nullptr;
end_ = nullptr;
return 0;
}
int left = end_ - pos_;
auto n = maxSize > left ? left : maxSize;
::memcpy(buf, pos_, n);
pos_ += n;
return n; // Number of bytes we actually filled in `buf'
};
scanner_.setReadBuffer(std::move(readBuffer));
}

StatusOr<DateTime> DatetimeReader::read(std::string input) {
// Since WKTScanner needs a writable buffer, we have to copy the query string
buffer_ = std::move(input);
pos_ = &buffer_[0];
end_ = pos_ + buffer_.size();

scanner_.setInput(&buffer_);
if (parser_.parse() != 0) {
pos_ = nullptr;
end_ = nullptr;
// To flush the internal buffer to recover from a failure
scanner_.flushBuffer();
if (dt_ != nullptr) {
delete dt_;
dt_ = nullptr;
}
scanner_.setInput(nullptr);
return Status::SyntaxError(error_);
}

if (dt_ == nullptr) {
return Status::StatementEmpty(); // empty
}
auto dt = dt_;
dt_ = nullptr;
scanner_.setInput(nullptr);
auto tmp = std::move(*dt);
delete dt;
return tmp;
}

} // namespace time
} // namespace nebula
65 changes: 65 additions & 0 deletions src/common/time/parser/DatetimeReader.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
/* Copyright (c) 2018 vesoft inc. All rights reserved.
*
* This source code is licensed under Apache 2.0 License,
* attached with Common Clause Condition 1.0, found in the LICENSES directory.
*/

#pragma once

#include "common/base/Base.h"
#include "common/base/StatusOr.h"
#include "common/datatypes/Date.h"
#include "common/time/parser/DatetimeParser.hpp"
#include "common/time/parser/DatetimeScanner.h"

namespace nebula {
namespace time {

class DatetimeReader {
public:
// enum class Type {
// kDate,
// kTime,
// kDateTime,
// };

~DatetimeReader() {
if (dt_ != nullptr) delete dt_;
}

static inline DatetimeReader makeDateReader() { return DatetimeReader(Type::kDate); }

static inline DatetimeReader makeTimeReader() { return DatetimeReader(Type::kTime); }

static inline DatetimeReader makeDateTimeReader() { return DatetimeReader(Type::kDateTime); }

StatusOr<DateTime> readDatetime(std::string input) { return read(std::move(input)); }

StatusOr<Date> readDate(std::string input) {
auto result = read(std::move(input));
NG_RETURN_IF_ERROR(result);
return result.value().date();
}

StatusOr<Time> readTime(std::string input) {
auto result = read(std::move(input));
NG_RETURN_IF_ERROR(result);
return result.value().time();
}

private:
explicit DatetimeReader(Type type);

StatusOr<DateTime> read(std::string input);

std::string buffer_;
const char *pos_{nullptr};
const char *end_{nullptr};
DatetimeScanner scanner_;
DatetimeParser parser_;
std::string error_;
DateTime *dt_{nullptr};
};

} // namespace time
} // namespace nebula
70 changes: 70 additions & 0 deletions src/common/time/parser/DatetimeScanner.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
/* Copyright (c) 2018 vesoft inc. All rights reserved.
*
* This source code is licensed under Apache 2.0 License,
* attached with Common Clause Condition 1.0, found in the LICENSES directory.
*/

#pragma once

#include "common/base/Base.h"

// This macro must be defined before #include <FlexLexer.h> !!!
#define yyFlexLexer datetimeFlexLexer

// Only include FlexLexer.h if it hasn't been already included
#if !defined(yyFlexLexerOnce)
#include <FlexLexer.h>
#endif

// Override the interface for yylex since we namespaced it
#undef YY_DECL
#define YY_DECL int nebula::time::DatetimeScanner::yylex()

#include "common/time/parser/DatetimeParser.hpp"

namespace nebula {
namespace time {

// TODO(jie) Try to reuse the class GraphScanner
Shylock-Hg marked this conversation as resolved.
Show resolved Hide resolved
class DatetimeScanner : public yyFlexLexer {
public:
int yylex(nebula::time::DatetimeParser::semantic_type *lval,
nebula::time::DatetimeParser::location_type *loc) {
yylval = lval;
yylloc = loc;
return yylex();
}

public:
// Called by WKTReader to set the `readBuffer' callback, which would be
// invoked by LexerInput to fill the stream buffer.
Shylock-Hg marked this conversation as resolved.
Show resolved Hide resolved
void setReadBuffer(std::function<int(char *, int)> readBuffer) { readBuffer_ = readBuffer; }

// Manually invoked by WKTReader to recover from a failure state.
// This makes the scanner reentrant.
void flushBuffer() {
yy_flush_buffer(yy_buffer_stack ? yy_buffer_stack[yy_buffer_stack_top] : nullptr);
}

void setInput(std::string *input) { input_ = input; }

std::string *input() { return input_; }

protected:
// Called when YY_INPUT is invoked
int LexerInput(char *buf, int maxSize) override { return readBuffer_(buf, maxSize); }

using TokenType = nebula::time::DatetimeParser::token;

private:
// friend class Scanner_Basic_Test; TODO(jie) add it
int yylex() override;

nebula::time::DatetimeParser::semantic_type *yylval{nullptr};
nebula::time::DatetimeParser::location_type *yylloc{nullptr};
std::function<int(char *, int)> readBuffer_;
std::string *input_{nullptr};
};

} // namespace time
} // namespace nebula
249 changes: 249 additions & 0 deletions src/common/time/parser/datetime_parser.yy
Original file line number Diff line number Diff line change
@@ -0,0 +1,249 @@
%language "C++"
%skeleton "lalr1.cc"
%no-lines
%locations
%define api.namespace { nebula::time }
%define parser_class_name { DatetimeParser }
%lex-param { nebula::time::DatetimeScanner& scanner }
%parse-param { nebula::time::DatetimeScanner& scanner }
%parse-param { std::string &errmsg }
%parse-param { nebula::DateTime** output }
%parse-param { nebula::time::Type outputType }

%code requires {
#include <iostream>
#include <sstream>
#include <string>
#include <cstddef>
#include "common/datatypes/Date.h"
#include "common/time/TimeConversion.h"
#include "common/time/TimezoneInfo.h"
#include "common/time/TimeUtils.h"

namespace nebula {
namespace time {
class DatetimeScanner;

enum class Type {
kDate,
kTime,
kDateTime,
};
}
}

}

%code {
#include "common/time/parser/DatetimeScanner.h"
static int yylex(nebula::time::DatetimeParser::semantic_type* yylval,
nebula::time::DatetimeParser::location_type *yylloc,
nebula::time::DatetimeScanner& scanner);
}

%union {
int64_t intVal;
double doubleVal;
nebula::DateTime *dtVal;
nebula::Date *dVal;
nebula::Time *tVal;
std::string *strVal;
}

/* destructors */
%destructor {} <intVal> <doubleVal>
%destructor {} <dtVal> // for output
%destructor { delete $$; } <*>

/* keyword */
%token KW_TIME_ID

/* symbols */
%token TIME_DELIMITER SPACE POSITIVE NEGATIVE

/* token type specification */
%token <intVal> INTEGER
%token <strVal> TIME_ZONE_NAME
%token <doubleVal> DOUBLE

%type <dtVal> datetime
%type <dVal> date
%type <tVal> time
%type <intVal> opt_time_zone opt_time_zone_name time_zone_offset

%define api.prefix {datetime}

%start datetime

%%

datetime
: date date_time_delimiter time opt_time_zone {
if (outputType != nebula::time::Type::kDateTime) {
throw DatetimeParser::syntax_error(@1, "Mismatched date time type.");
}
Shylock-Hg marked this conversation as resolved.
Show resolved Hide resolved
$$ = new DateTime(TimeConversion::dateTimeShift(DateTime(*$1, *$3), -$4));
*output = $$;
delete $1;
delete $3;
}
| date {
if (outputType != nebula::time::Type::kDate) {
throw DatetimeParser::syntax_error(@1, "Mismatched date time type.");
}
Shylock-Hg marked this conversation as resolved.
Show resolved Hide resolved
$$ = new DateTime(*$1);
*output = $$;
delete $1;
}
| time opt_time_zone {
if (outputType != nebula::time::Type::kTime) {
throw DatetimeParser::syntax_error(@1, "Mismatched date time type.");
}
$$ = new DateTime(TimeConversion::dateTimeShift(DateTime(1970, 1, 1, $1->hour, $1->minute, $1->sec, $1->microsec), -$2));
*output = $$;
delete $1;
}
;

date_time_delimiter
: KW_TIME_ID
| SPACE
;

date
: INTEGER NEGATIVE INTEGER NEGATIVE INTEGER {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why not use parser to validate the date value? It's more clear to replace the INTEGER with meaningful YEAR to validate the format. You can define the YEAR with regex {DEC}{1,4} to do the same thing.

Copy link
Contributor Author

@Shylock-Hg Shylock-Hg Oct 27, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We still need to check the value range, e.g. 99 is still invalid minute number. So do it in one place now.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

minutes could also use regex to validate. you have used the parser why not let it do more thing?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's too complicate.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

And different month has different count of days, it can't validate by lex/parse rule.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

minutes could also use regex to validate. you have used the parser why not let it do more thing?

Where is this check? I don't find it.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

See validateDate function

auto *d = new nebula::Date($1, $3, $5);
auto result = nebula::time::TimeUtils::validateDate(*d);
if (!result.ok()) {
throw DatetimeParser::syntax_error(@1, result.toString());
}
$$ = d;
}
| INTEGER NEGATIVE INTEGER {
auto *d = new nebula::Date($1, $3, 1);
auto result = nebula::time::TimeUtils::validateDate(*d);
if (!result.ok()) {
throw DatetimeParser::syntax_error(@1, result.toString());
}
$$ = d;
}
;

time
: INTEGER TIME_DELIMITER INTEGER TIME_DELIMITER DOUBLE {
double integer = 0;
auto fraction = std::modf($5, &integer);
auto *t = new nebula::Time($1, $3, static_cast<int>(integer), std::round(fraction * 1000 * 1000));
auto result = nebula::time::TimeUtils::validateTime(*t);
if (!result.ok()) {
throw DatetimeParser::syntax_error(@1, result.toString());
}
$$ = t;
}
| INTEGER TIME_DELIMITER INTEGER TIME_DELIMITER INTEGER {
auto *t = new nebula::Time($1, $3, $5, 0);
auto result = nebula::time::TimeUtils::validateTime(*t);
if (!result.ok()) {
throw DatetimeParser::syntax_error(@1, result.toString());
}
$$ = t;
}
| INTEGER TIME_DELIMITER INTEGER {
auto *t = new nebula::Time($1, $3, 0, 0);
auto result = nebula::time::TimeUtils::validateTime(*t);
if (!result.ok()) {
throw DatetimeParser::syntax_error(@1, result.toString());
}
$$ = t;
}
;

opt_time_zone
: %empty {
$$ = 0;
}
| POSITIVE time_zone_offset opt_time_zone_name {
if ($3 != 0 && $2 != $3) {
throw DatetimeParser::syntax_error(@1, "Mismatched timezone offset.");
}
$$ = $2;
}
| NEGATIVE time_zone_offset opt_time_zone_name {
if ($3 != 0 && $2 != $3) {
throw DatetimeParser::syntax_error(@1, "Mismatched timezone offset.");
}
$$ = -$2;
}
;

time_zone_offset
: INTEGER TIME_DELIMITER INTEGER {
auto time = nebula::Time($1, $3, 0, 0);
auto result = nebula::time::TimeUtils::validateTime(time);
if (!result.ok()) {
throw DatetimeParser::syntax_error(@1, result.toString());
}
$$ = nebula::time::TimeConversion::timeToSeconds(time);
}
;

opt_time_zone_name
: %empty {
$$ = 0;
}
| TIME_ZONE_NAME {
auto zone = nebula::time::Timezone();
auto result = zone.loadFromDb(*$1);
if (!result.ok()) {
throw DatetimeParser::syntax_error(@1, result.toString());
}
$$ = zone.utcOffsetSecs();
delete $1;
}
;

%%

void nebula::time::DatetimeParser::error(const nebula::time::DatetimeParser::location_type& loc,
const std::string &msg) {
std::ostringstream os;
if (msg.empty()) {
os << "syntax error";
} else {
os << msg;
}

auto *input = scanner.input();
if (input == nullptr) {
os << " at " << loc;
errmsg = os.str();
return;
}

auto begin = loc.begin.column > 0 ? loc.begin.column - 1 : 0;
if ((loc.end.filename
&& (!loc.begin.filename
|| *loc.begin.filename != *loc.end.filename))
|| loc.begin.line < loc.end.line
|| begin >= input->size()) {
os << " at " << loc;
} else if (loc.begin.column < (loc.end.column ? loc.end.column - 1 : 0)) {
uint32_t len = loc.end.column - loc.begin.column;
if (len > 80) {
len = 80;
}
os << " near `" << input->substr(begin, len) << "'";
} else {
os << " near `" << input->substr(begin, 8) << "'";
}

errmsg = os.str();
}

static int yylex(nebula::time::DatetimeParser::semantic_type* yylval,
nebula::time::DatetimeParser::location_type *yylloc,
nebula::time::DatetimeScanner& scanner) {
auto token = scanner.yylex(yylval, yylloc);
return token;
}

87 changes: 87 additions & 0 deletions src/common/time/parser/datetime_scanner.lex
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
%option c++
%option yyclass="DatetimeScanner"
%option nodefault noyywrap
%option never-interactive
%option yylineno
%option case-sensitive
%option prefix="datetime"

%{
#include "common/time/parser/DatetimeScanner.h"
#include "DatetimeParser.hpp"
#include <stdlib.h>
#include <string>

#define YY_USER_ACTION \
yylloc->step(); \
yylloc->columns(yyleng);

%}

DEC ([0-9])
L_BRACKET "["
R_BRACKET "]"

%%

/* date time shape type prefix */
"T" { return TokenType::KW_TIME_ID; }
":" { return TokenType::TIME_DELIMITER; }
" " { return TokenType::SPACE; }
"+" { return TokenType::POSITIVE; }
"-" { return TokenType::NEGATIVE; }


{DEC}+ {
try {
folly::StringPiece text(yytext, yyleng);
uint64_t val = folly::to<uint64_t>(text);
yylval->intVal = val;
} catch (...) {
throw DatetimeParser::syntax_error(*yylloc, "Invalid integer:");
}
return TokenType::INTEGER;
}

{DEC}+\.{DEC}+ {
try {
folly::StringPiece text(yytext, yyleng);
yylval->doubleVal = folly::to<double>(text);
} catch (...) {
throw DatetimeParser::syntax_error(*yylloc, "Invalid double value:");
}
return TokenType::DOUBLE;
}

L_BRACKET[^R_BRACKET]+R_BRACKET {
std::string *str = new std::string(yytext + 1, yyleng - 2);
yylval->strVal = str;
return TokenType::TIME_ZONE_NAME;
}

. {
/**
* Any other unmatched byte sequences will get us here,
* including the non-ascii ones, which are negative
* in terms of type of `signed char'. At the same time, because
* Bison translates all negative tokens to EOF(i.e. YY_NULL),
* so we have to cast illegal characters to type of `unsinged char'
* This will make Bison receive an unknown token, which leads to
* a syntax error.
*
* Please note that it is not Flex but Bison to regard illegal
* characters as errors, in such case.
*/
return static_cast<unsigned char>(yytext[0]);

/**
* Alternatively, we could report illegal characters by
* throwing a `syntax_error' exception.
* In such a way, we could distinguish illegal characters
* from normal syntax errors, but at cost of poor performance
* incurred by the expensive exception handling.
*/
// throw DatetimeParser::syntax_error(*yylloc, "char illegal");
}

%%
21 changes: 21 additions & 0 deletions src/common/time/parser/test/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# Copyright (c) 2021 vesoft inc. All rights reserved.
#
# This source code is licensed under Apache 2.0 License,
# attached with Common Clause Condition 1.0, found in the LICENSES directory.

nebula_add_test(
NAME
datetime_parser_test
SOURCES
DateTimeParserTest.cpp
OBJECTS
$<TARGET_OBJECTS:time_utils_obj>
$<TARGET_OBJECTS:thread_obj>
$<TARGET_OBJECTS:datatypes_obj>
$<TARGET_OBJECTS:base_obj>
$<TARGET_OBJECTS:fs_obj>
$<TARGET_OBJECTS:wkt_wkb_io_obj>
Shylock-Hg marked this conversation as resolved.
Show resolved Hide resolved
$<TARGET_OBJECTS:datetime_parser_obj>
LIBRARIES
gtest
)
272 changes: 272 additions & 0 deletions src/common/time/parser/test/DateTimeParserTest.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,272 @@
/* Copyright (c) 2021 vesoft inc. All rights reserved.
*
* This source code is licensed under Apache 2.0 License,
* attached with Common Clause Condition 1.0, found in the LICENSES directory.
*/

#include <gtest/gtest.h>

#include "common/base/Base.h"
#include "common/time/TimezoneInfo.h"
#include "common/time/parser/DatetimeReader.h"

namespace nebula {

TEST(DatetimeReader, DateTime) {
{
auto parser = time::DatetimeReader::makeDateTimeReader();
auto result = parser.readDatetime("2019-01-03T22:22:3.2333");
ASSERT_TRUE(result.ok()) << result.status();
EXPECT_EQ(nebula::DateTime(2019, 1, 3, 22, 22, 3, 233300), result.value());
}
// with offset
{
auto parser = time::DatetimeReader::makeDateTimeReader();
auto result = parser.readDatetime("2019-01-03T22:22:3.2333+02:30");
ASSERT_TRUE(result.ok()) << result.status();
EXPECT_EQ(nebula::DateTime(2019, 1, 3, 19, 52, 3, 233300), result.value());
}
// lack day
{
auto parser = time::DatetimeReader::makeDateTimeReader();
auto result = parser.readDatetime("2019-01T22:22:3.2333");
ASSERT_TRUE(result.ok()) << result.status();
EXPECT_EQ(nebula::DateTime(2019, 1, 1, 22, 22, 3, 233300), result.value());
}
// TODO
// lack month
// {
// auto parser = time::DatetimeReader::makeDateTimeReader();
// auto result = parser.readDatetime("2019T22:22:3.2333");
// ASSERT_TRUE(result.ok()) << result.status();
// EXPECT_EQ(nebula::DateTime(2019, 1, 1, 22, 22, 3, 233300), result.value());
// }
// lack us
{
auto parser = time::DatetimeReader::makeDateTimeReader();
auto result = parser.readDatetime("2019-1T22:22:3");
ASSERT_TRUE(result.ok()) << result.status();
EXPECT_EQ(nebula::DateTime(2019, 1, 1, 22, 22, 3, 0), result.value());
}
// lack second
{
auto parser = time::DatetimeReader::makeDateTimeReader();
auto result = parser.readDatetime("2019-1T22:22");
ASSERT_TRUE(result.ok()) << result.status();
EXPECT_EQ(nebula::DateTime(2019, 1, 1, 22, 22, 0, 0), result.value());
}
// TODO
// lack minute
// {
// auto parser = time::DatetimeReader::makeDateTimeReader();
// auto result = parser.readDatetime("2019-1T22");
// ASSERT_TRUE(result.ok()) << result.status();
// EXPECT_EQ(nebula::DateTime(2019, 1, 1, 22, 0, 0, 0), result.value());
// }
}

TEST(DatetimeReader, DateTimeFailed) {
// out of range offset
{
auto parser = time::DatetimeReader::makeDateTimeReader();
auto result = parser.readDatetime("2019-01-03T22:22:3.2333+24:30");
EXPECT_FALSE(result.ok()) << result.value();
}
// only time
{
auto parser = time::DatetimeReader::makeDateTimeReader();
auto result = parser.readDatetime("22:22:3.2333");
EXPECT_FALSE(result.ok()) << result.value();
}
// with unexpected character
{
auto parser = time::DatetimeReader::makeDateTimeReader();
auto result = parser.readDatetime("2019-01-03T22:22:3.2333x");
EXPECT_FALSE(result.ok()) << result.value();
}
// not ending delimiter
{
auto parser = time::DatetimeReader::makeDateTimeReader();
auto result = parser.readDatetime("2019-01-T22:22:3.2333");
EXPECT_FALSE(result.ok()) << result.value();
}
{
auto parser = time::DatetimeReader::makeDateTimeReader();
auto result = parser.readDatetime("2019-01-03T22:22:.2333");
EXPECT_FALSE(result.ok()) << result.value();
}
// not ending prefix
{
auto parser = time::DatetimeReader::makeDateTimeReader();
auto result = parser.readDatetime("2019-01-03T");
EXPECT_FALSE(result.ok()) << result.value();
}
{
auto parser = time::DatetimeReader::makeDateTimeReader();
auto result = parser.readDatetime("2019-01-03T22:22:3.");
EXPECT_FALSE(result.ok()) << result.value();
}
// not exits prefix
{
auto parser = time::DatetimeReader::makeDateTimeReader();
auto result = parser.readDatetime("-01-03T22:22:3.2333");
EXPECT_FALSE(result.ok()) << result.value();
}
}

TEST(DatetimeReader, Date) {
{
auto parser = time::DatetimeReader::makeDateReader();
auto result = parser.readDate("2019-01-03");
ASSERT_TRUE(result.ok()) << result.status();
EXPECT_EQ(nebula::Date(2019, 1, 3), result.value());
}
// lack day
{
auto parser = time::DatetimeReader::makeDateReader();
auto result = parser.readDate("2019-01");
ASSERT_TRUE(result.ok()) << result.status();
EXPECT_EQ(nebula::Date(2019, 1, 1), result.value());
}
// TODO
// lack month
// {
// auto parser = time::DatetimeReader::makeDateReader();
// auto result = parser.readDate("2019");
// ASSERT_TRUE(result.ok()) << result.status();
// EXPECT_EQ(nebula::Date(2019, 1, 1), result.value());
// }
}

TEST(DatetimeReader, DateFailed) {
// don't support offset
{
auto parser = time::DatetimeReader::makeDateReader();
auto result = parser.readDate("2019-01-03+23:00");
EXPECT_FALSE(result.ok()) << result.value();
}
// with unexpected character
{
auto parser = time::DatetimeReader::makeDateReader();
auto result = parser.readDate("2019-01-03*");
EXPECT_FALSE(result.ok()) << result.value();
}
// extra components
{
auto parser = time::DatetimeReader::makeDateReader();
auto result = parser.readDate("2019-01-03T22:22:3.2333");
EXPECT_FALSE(result.ok()) << result.value();
}
// not ending delimiter
{
auto parser = time::DatetimeReader::makeDateReader();
auto result = parser.readDate("2019-01-");
EXPECT_FALSE(result.ok()) << result.value();
}
// not exits prefix
{
auto parser = time::DatetimeReader::makeDateReader();
auto result = parser.readDate("-01-03");
EXPECT_FALSE(result.ok()) << result.value();
}
}

TEST(DatetimeReader, Time) {
{
auto parser = time::DatetimeReader::makeTimeReader();
auto result = parser.readTime("22:22:3.2333");
ASSERT_TRUE(result.ok()) << result.status();
EXPECT_EQ(nebula::Time(22, 22, 3, 233300), result.value());
}
// with offset
{
auto parser = time::DatetimeReader::makeTimeReader();
auto result = parser.readTime("22:22:3.2333-03:30");
ASSERT_TRUE(result.ok()) << result.status();
EXPECT_EQ(nebula::Time(1, 52, 3, 233300), result.value());
}
{
auto parser = time::DatetimeReader::makeTimeReader();
auto result = parser.readTime("22:22:3.2333-03:30");
ASSERT_TRUE(result.ok()) << result.status();
EXPECT_EQ(nebula::Time(1, 52, 3, 233300), result.value());
}
// lack us
{
auto parser = time::DatetimeReader::makeTimeReader();
auto result = parser.readTime("22:22:3");
ASSERT_TRUE(result.ok()) << result.status();
EXPECT_EQ(nebula::Time(22, 22, 3, 0), result.value());
}
// lack second
{
auto parser = time::DatetimeReader::makeTimeReader();
auto result = parser.readTime("22:22");
ASSERT_TRUE(result.ok()) << result.status();
EXPECT_EQ(nebula::Time(22, 22, 0, 0), result.value());
}
// TODO
// lack minute
// {
// auto parser = time::DatetimeReader::makeTimeReader();
// auto result = parser.readTime("22");
// ASSERT_TRUE(result.ok()) << result.status();
// EXPECT_EQ(nebula::Time(22, 0, 0, 0), result.value());
// }
}

TEST(DatetimeReader, TimeFailed) {
// out of range offset
{
auto parser = time::DatetimeReader::makeTimeReader();
auto result = parser.readTime("22:22:3.2333-03:60");
EXPECT_FALSE(result.ok()) << result.value();
}
// unexpected character
{
auto parser = time::DatetimeReader::makeTimeReader();
auto result = parser.readTime("22:22:3.2333x");
EXPECT_FALSE(result.ok()) << result.value();
}
// extra components
{
auto parser = time::DatetimeReader::makeTimeReader();
auto result = parser.readTime("2019-01-03T22:22:3.2333");
EXPECT_FALSE(result.ok()) << result.value();
}
// not ending delimiter
{
auto parser = time::DatetimeReader::makeTimeReader();
auto result = parser.readTime("22:22:.2333");
EXPECT_FALSE(result.ok()) << result.value();
}
// not ending prefix
{
auto parser = time::DatetimeReader::makeTimeReader();
auto result = parser.readTime("22:22:3.");
EXPECT_FALSE(result.ok()) << result.value();
}
// not exist prefix
{
auto parser = time::DatetimeReader::makeTimeReader();
auto result = parser.readTime(":22:3.2333");
EXPECT_FALSE(result.ok()) << result.value();
}
}

} // namespace nebula

int main(int argc, char **argv) {
testing::InitGoogleTest(&argc, argv);
folly::init(&argc, &argv, true);
google::SetStderrLogging(google::INFO);
auto result = nebula::time::Timezone::initializeGlobalTimezone();
if (!result.ok()) {
LOG(FATAL) << result;
}

DLOG(INFO) << "Timezone: " << nebula::time::Timezone::getGlobalTimezone().stdZoneName();
DLOG(INFO) << "Timezone offset: " << nebula::time::Timezone::getGlobalTimezone().utcOffsetSecs();

return RUN_ALL_TESTS();
}