Skip to content

Commit

Permalink
Feature/bison datetime parser (#3179)
Browse files Browse the repository at this point in the history
* Implement datetime parser by bison.

* Remove unused code.

* Add the time parser.

* Fix case.

* Tune zone.

* Remove debug code.

* Fix leak.

* Fix comments.

* Fix comments.

* Fix license header

* Resolve conflict.
  • Loading branch information
Shylock-Hg authored Dec 13, 2021
1 parent 3e8e095 commit 393e169
Show file tree
Hide file tree
Showing 44 changed files with 1,062 additions and 75 deletions.
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@ macro(nebula_add_library name type)
# hbase_thrift_generator
parser_target
wkt_parser_target
datetime_parser_target
)
endmacro()

Expand Down
1 change: 1 addition & 0 deletions src/codec/test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ set(CODEC_TEST_LIBS
$<TARGET_OBJECTS:wkt_wkb_io_obj>
$<TARGET_OBJECTS:agg_function_manager_obj>
$<TARGET_OBJECTS:time_utils_obj>
$<TARGET_OBJECTS:datetime_parser_obj>
$<TARGET_OBJECTS:version_obj>
$<TARGET_OBJECTS:ssl_obj>
)
Expand Down
15 changes: 12 additions & 3 deletions src/common/datatypes/Date.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,11 @@

namespace nebula {

static inline std::string decimal(const std::string& number) {
auto find = std::find(number.begin(), number.end(), '.');
return std::string(find, number.end());
}

const int64_t kDaysSoFar[] = {0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365};
const int64_t kLeapDaysSoFar[] = {0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366};

Expand Down Expand Up @@ -98,22 +103,26 @@ std::string Date::toString() const {
}

std::string Time::toString() const {
auto microsecStr = folly::stringPrintf("%.9f", static_cast<uint32_t>(microsec) / 1000000.0);
auto decimalPart = decimal(microsecStr);
// It's in current timezone already
return folly::stringPrintf("%02d:%02d:%02d.%06d", hour, minute, sec, microsec);
return folly::stringPrintf("%02d:%02d:%02d%s", hour, minute, sec, decimalPart.c_str());
}

std::string DateTime::toString() const {
auto microsecStr = folly::stringPrintf("%.9f", static_cast<uint32_t>(microsec) / 1000000.0);
auto decimalPart = decimal(microsecStr);
// It's in current timezone already
return folly::stringPrintf(
"%hd-%02hhu-%02hhu"
"T%02hhu:%02hhu:%02hhu.%u",
"T%02hhu:%02hhu:%02hhu%s",
static_cast<int16_t>(year),
static_cast<uint8_t>(month),
static_cast<uint8_t>(day),
static_cast<uint8_t>(hour),
static_cast<uint8_t>(minute),
static_cast<uint8_t>(sec),
static_cast<uint32_t>(microsec));
decimalPart.c_str());
}

} // namespace nebula
Expand Down
13 changes: 13 additions & 0 deletions src/common/datatypes/Date.h
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,19 @@ struct DateTime {
sec = 0;
microsec = 0;
}
explicit DateTime(const Date& date, const Time& time) {
year = date.year;
month = date.month;
day = date.day;
hour = time.hour;
minute = time.minute;
sec = time.sec;
microsec = time.microsec;
}

Date date() const { return Date(year, month, day); }

Time time() const { return Time(hour, minute, sec, microsec); }

void clear() {
year = 0;
Expand Down
3 changes: 3 additions & 0 deletions src/common/datatypes/test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ nebula_add_test(
$<TARGET_OBJECTS:agg_function_manager_obj>
$<TARGET_OBJECTS:time_utils_obj>
$<TARGET_OBJECTS:time_obj>
$<TARGET_OBJECTS:datetime_parser_obj>
$<TARGET_OBJECTS:fs_obj>
$<TARGET_OBJECTS:wkt_wkb_io_obj>
LIBRARIES
Expand All @@ -65,6 +66,7 @@ nebula_add_test(
$<TARGET_OBJECTS:wkt_wkb_io_obj>
$<TARGET_OBJECTS:time_obj>
$<TARGET_OBJECTS:time_utils_obj>
$<TARGET_OBJECTS:datetime_parser_obj>
$<TARGET_OBJECTS:fs_obj>
LIBRARIES
gtest
Expand Down Expand Up @@ -110,6 +112,7 @@ nebula_add_test(
$<TARGET_OBJECTS:function_manager_obj>
$<TARGET_OBJECTS:agg_function_manager_obj>
$<TARGET_OBJECTS:time_utils_obj>
$<TARGET_OBJECTS:datetime_parser_obj>
$<TARGET_OBJECTS:time_obj>
$<TARGET_OBJECTS:fs_obj>
LIBRARIES
Expand Down
35 changes: 20 additions & 15 deletions src/common/datatypes/test/ValueToJsonTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ TEST(ValueToJson, list) {
Time(13, 30, 15, 0), // time
DateTime(2021, 12, 21, 13, 30, 15, 0)})); // datetime
dynamic expectedListJsonObj = dynamic::array(
2, 2.33, true, "str", "2021-12-21", "13:30:15.000000Z", "2021-12-21T13:30:15.0Z");
2, 2.33, true, "str", "2021-12-21", "13:30:15.000000000Z", "2021-12-21T13:30:15.000000000Z");
ASSERT_EQ(expectedListJsonObj, list1.toJson());

dynamic expectedListMetaObj = dynamic::array(nullptr,
Expand All @@ -158,7 +158,7 @@ TEST(ValueToJson, Set) {
Time(13, 30, 15, 0), // time
DateTime(2021, 12, 21, 13, 30, 15, 0)})); // datetime
dynamic expectedSetJsonObj = dynamic::array(
2, 2.33, true, "str", "2021-12-21", "13:30:15.000000Z", "2021-12-21T13:30:15.0Z");
2, 2.33, true, "str", "2021-12-21", "13:30:15.000000000Z", "2021-12-21T13:30:15.000000000Z");
// The underlying data structure is unordered_set, so sort before the comparison
auto actualJson = set.toJson();
std::sort(actualJson.begin(), actualJson.end());
Expand All @@ -179,7 +179,7 @@ TEST(ValueToJson, map) {
{"key7", DateTime(2021, 12, 21, 13, 30, 15, 0)}})); // datetime
dynamic expectedMapJsonObj =
dynamic::object("key1", 2)("key2", 2.33)("key3", true)("key4", "str")("key5", "2021-12-21")(
"key6", "13:30:15.000000Z")("key7", "2021-12-21T13:30:15.0Z");
"key6", "13:30:15.000000000Z")("key7", "2021-12-21T13:30:15.000000000Z");
ASSERT_EQ(expectedMapJsonObj, map.toJson());
// Skip meta json comparison since nested dynamic objects cannot be sorted. i.g. dynamic::object
// inside dynamic::array
Expand All @@ -194,18 +194,23 @@ TEST(ValueToJson, dataset) {
Date(2021, 12, 21), // date
Time(13, 30, 15, 0), // time
DateTime(2021, 12, 21, 13, 30, 15, 0)}));
dynamic expectedDatasetJsonObj = dynamic::array(dynamic::object(
"row",
dynamic::array(
2, 2.33, true, "str", "2021-12-21", "13:30:15.000000Z", "2021-12-21T13:30:15.0Z"))(
"meta",
dynamic::array(nullptr,
nullptr,
nullptr,
nullptr,
dynamic::object("type", "date"),
dynamic::object("type", "time"),
dynamic::object("type", "datetime"))));
dynamic expectedDatasetJsonObj =
dynamic::array(dynamic::object("row",
dynamic::array(2,
2.33,
true,
"str",
"2021-12-21",
"13:30:15.000000000Z",
"2021-12-21T13:30:15.000000000Z"))(
"meta",
dynamic::array(nullptr,
nullptr,
nullptr,
nullptr,
dynamic::object("type", "date"),
dynamic::object("type", "time"),
dynamic::object("type", "datetime"))));
ASSERT_EQ(expectedDatasetJsonObj, dataset.toJson());
}

Expand Down
3 changes: 3 additions & 0 deletions src/common/expression/test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ set(expression_test_common_libs
$<TARGET_OBJECTS:stats_obj>
$<TARGET_OBJECTS:datatypes_obj>
$<TARGET_OBJECTS:time_obj>
$<TARGET_OBJECTS:datetime_parser_obj>
$<TARGET_OBJECTS:common_thrift_obj>
$<TARGET_OBJECTS:thrift_obj>
$<TARGET_OBJECTS:thread_obj>
Expand Down Expand Up @@ -113,6 +114,7 @@ nebula_add_executable(
$<TARGET_OBJECTS:wkt_wkb_io_obj>
$<TARGET_OBJECTS:time_obj>
$<TARGET_OBJECTS:time_utils_obj>
$<TARGET_OBJECTS:datetime_parser_obj>
$<TARGET_OBJECTS:fs_obj>
$<TARGET_OBJECTS:agg_function_manager_obj>
LIBRARIES
Expand All @@ -136,6 +138,7 @@ nebula_add_executable(
$<TARGET_OBJECTS:agg_function_manager_obj>
$<TARGET_OBJECTS:time_obj>
$<TARGET_OBJECTS:time_utils_obj>
$<TARGET_OBJECTS:datetime_parser_obj>
$<TARGET_OBJECTS:fs_obj>
LIBRARIES
follybenchmark
Expand Down
1 change: 1 addition & 0 deletions src/common/function/test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ nebula_add_test(
$<TARGET_OBJECTS:base_obj>
$<TARGET_OBJECTS:datatypes_obj>
$<TARGET_OBJECTS:time_utils_obj>
$<TARGET_OBJECTS:datetime_parser_obj>
$<TARGET_OBJECTS:time_obj>
$<TARGET_OBJECTS:fs_obj>
LIBRARIES
Expand Down
9 changes: 5 additions & 4 deletions src/common/function/test/FunctionManagerTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -318,7 +318,7 @@ TEST_F(FunctionManagerTest, functionCall) {
TEST_FUNCTION(toString, args_["toString_bool"], "true");
TEST_FUNCTION(toString, args_["string"], "AbcDeFG");
TEST_FUNCTION(toString, args_["date"], "1984-10-11");
TEST_FUNCTION(toString, args_["datetime"], "1984-10-11T12:31:14.341");
TEST_FUNCTION(toString, args_["datetime"], "1984-10-11T12:31:14.000341000");
TEST_FUNCTION(toString, args_["nullvalue"], Value::kNullValue);
}
{
Expand All @@ -327,8 +327,9 @@ TEST_F(FunctionManagerTest, functionCall) {
DateTime dateTime(2021, 10, 31, 8, 5, 34, 29);
TEST_FUNCTION(concat, std::vector<Value>({"hello", 1, "world"}), "hello1world");
TEST_FUNCTION(concat, std::vector<Value>({true, 2, date}), "true22021-10-31");
TEST_FUNCTION(concat, std::vector<Value>({true, dateTime}), "true2021-10-31T08:05:34.29");
TEST_FUNCTION(concat, std::vector<Value>({2.3, time}), "2.309:39:21.000012");
TEST_FUNCTION(
concat, std::vector<Value>({true, dateTime}), "true2021-10-31T08:05:34.000029000");
TEST_FUNCTION(concat, std::vector<Value>({2.3, time}), "2.309:39:21.000012000");
TEST_FUNCTION(concat, args_["two"], "24");
TEST_FUNCTION(concat_ws, std::vector<Value>({",", 1}), "1");
TEST_FUNCTION(concat_ws, std::vector<Value>({"@", 1, "world"}), "1@world");
Expand All @@ -337,7 +338,7 @@ TEST_F(FunctionManagerTest, functionCall) {
"1ABtrueABworld");
TEST_FUNCTION(concat_ws,
std::vector<Value>({".", 1, true, Value::kNullValue, "world", time}),
"1.true.world.09:39:21.000012");
"1.true.world.09:39:21.000012000");
}
{
TEST_FUNCTION(toBoolean, args_["int"], Value::kNullBadType);
Expand Down
1 change: 1 addition & 0 deletions src/common/geo/test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ nebula_add_test(
$<TARGET_OBJECTS:base_obj>
$<TARGET_OBJECTS:datatypes_obj>
$<TARGET_OBJECTS:time_utils_obj>
$<TARGET_OBJECTS:datetime_parser_obj>
$<TARGET_OBJECTS:time_obj>
LIBRARIES
gtest
Expand Down
2 changes: 2 additions & 0 deletions src/common/time/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -16,4 +16,6 @@ nebula_add_library(
TimeConversion.cpp
)

nebula_add_subdirectory(parser)

nebula_add_subdirectory(test)
20 changes: 20 additions & 0 deletions src/common/time/TimeUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@

#include "common/fs/FileUtils.h"
#include "common/time/TimezoneInfo.h"
#include "common/time/parser/DatetimeReader.h"

namespace nebula {
namespace time {
Expand Down Expand Up @@ -170,5 +171,24 @@ StatusOr<Value> TimeUtils::toTimestamp(const Value &val) {
return timestamp;
}

/*static*/ StatusOr<DateTime> TimeUtils::parseDateTime(const std::string &str) {
auto p = DatetimeReader::makeDateTimeReader();
auto result = p.readDatetime(str);
NG_RETURN_IF_ERROR(result);
return result.value();
}

/*static*/ StatusOr<Date> TimeUtils::parseDate(const std::string &str) {
auto p = DatetimeReader::makeDateReader();
auto result = p.readDate(str);
NG_RETURN_IF_ERROR(result);
return result.value();
}

/*static*/ StatusOr<Time> TimeUtils::parseTime(const std::string &str) {
auto p = DatetimeReader::makeTimeReader();
return p.readTime(str);
}

} // namespace time
} // namespace nebula
71 changes: 20 additions & 51 deletions src/common/time/TimeUtils.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,30 +46,27 @@ class TimeUtils {
return Status::OK();
}

// TODO(shylock) support more format
static StatusOr<DateTime> parseDateTime(const std::string &str) {
std::tm tm;
std::istringstream ss(str);
ss >> std::get_time(&tm, "%Y-%m-%dT%H:%M:%S");
if (ss.fail()) {
std::istringstream ss2(str);
ss2 >> std::get_time(&tm, "%Y-%m-%d %H:%M:%S");
if (ss2.fail()) {
return Status::Error();
}
template <
typename D,
typename = std::enable_if_t<std::is_same<D, Time>::value || std::is_same<D, DateTime>::value>>
static Status validateTime(const D &time) {
if (time.hour < 0 || time.hour >= 24) {
return Status::Error("Invalid hour number %d.", time.hour);
}
DateTime dt;
dt.year = tm.tm_year + 1900;
dt.month = tm.tm_mon + 1;
dt.day = tm.tm_mday;
dt.hour = tm.tm_hour;
dt.minute = tm.tm_min;
dt.sec = tm.tm_sec;
dt.microsec = 0;
NG_RETURN_IF_ERROR(validateDate(dt));
return dt;
if (time.minute < 0 || time.minute >= 60) {
return Status::Error("Invalid minute number %d.", time.minute);
}
if (time.sec < 0 || time.sec >= 60) {
return Status::Error("Invalid second number %d.", time.sec);
}
if (time.microsec < 0 || time.microsec >= 1000000) {
return Status::Error("Invalid microsecond number %d.", time.microsec);
}
return Status::OK();
}

static StatusOr<DateTime> parseDateTime(const std::string &str);

static StatusOr<DateTime> dateTimeFromMap(const Map &m);

// utc + offset = local
Expand Down Expand Up @@ -119,21 +116,7 @@ class TimeUtils {

static StatusOr<Date> dateFromMap(const Map &m);

// TODO(shylock) support more format
static StatusOr<Date> parseDate(const std::string &str) {
std::tm tm;
std::istringstream ss(str);
ss >> std::get_time(&tm, "%Y-%m-%d");
if (ss.fail()) {
return Status::Error();
}
Date d;
d.year = tm.tm_year + 1900;
d.month = tm.tm_mon + 1;
d.day = tm.tm_mday;
NG_RETURN_IF_ERROR(validateDate(d));
return d;
}
static StatusOr<Date> parseDate(const std::string &str);

static StatusOr<Date> localDate() {
Date d;
Expand Down Expand Up @@ -169,21 +152,7 @@ class TimeUtils {

static StatusOr<Time> timeFromMap(const Map &m);

// TODO(shylock) support more format
static StatusOr<Time> parseTime(const std::string &str) {
std::tm tm;
std::istringstream ss(str);
ss >> std::get_time(&tm, "%H:%M:%S");
if (ss.fail()) {
return Status::Error();
}
Time t;
t.hour = tm.tm_hour;
t.minute = tm.tm_min;
t.sec = tm.tm_sec;
t.microsec = 0;
return t;
}
static StatusOr<Time> parseTime(const std::string &str);

// utc + offset = local
static Time timeToUTC(const Time &time) {
Expand Down
1 change: 0 additions & 1 deletion src/common/time/TimezoneInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,6 @@ namespace time {
}
if (!FLAGS_timezone_name.empty()) {
if (FLAGS_timezone_name.front() == ':') {
NG_RETURN_IF_ERROR(Timezone::init());
return globalTimezone.loadFromDb(
std::string(FLAGS_timezone_name.begin() + 1, FLAGS_timezone_name.end()));
} else {
Expand Down
Loading

0 comments on commit 393e169

Please sign in to comment.