Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/bison datetime parser #3179

Merged
merged 36 commits into from
Dec 13, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
1d36ecd
Implement datetime parser by bison.
Shylock-Hg Oct 20, 2021
a76c8a2
Remove unused code.
Shylock-Hg Oct 20, 2021
d82f130
Merge branch 'master' of https://github.com/vesoft-inc/nebula into fe…
Shylock-Hg Oct 20, 2021
2cd7805
Add the time parser.
Shylock-Hg Aug 31, 2021
e09c809
Fix case.
Shylock-Hg Sep 27, 2021
477ecd3
Tune zone.
Shylock-Hg Oct 21, 2021
c927054
Merge branch 'master' of https://github.com/vesoft-inc/nebula into fe…
Shylock-Hg Oct 21, 2021
864ff5e
Merge branch 'master' into feature/bison-datetime-parser
Shylock-Hg Oct 22, 2021
1af62b6
Remove debug code.
Shylock-Hg Oct 22, 2021
2f15b7c
Fix leak.
Shylock-Hg Oct 22, 2021
1d3da50
Merge branch 'master' into feature/bison-datetime-parser
Shylock-Hg Oct 22, 2021
050e0a9
Merge branch 'master' into feature/bison-datetime-parser
Shylock-Hg Oct 25, 2021
7ee9a62
Merge branch 'master' of https://github.com/vesoft-inc/nebula into fe…
Shylock-Hg Oct 27, 2021
5eb1b5b
Fix comments.
Shylock-Hg Oct 27, 2021
1963693
Fix comments.
Shylock-Hg Oct 27, 2021
cfd5541
Merge branch 'master' into feature/bison-datetime-parser
Shylock-Hg Oct 28, 2021
d42f522
Merge branch 'master' into feature/bison-datetime-parser
Shylock-Hg Nov 15, 2021
dab0dce
Fix license header
Shylock-Hg Nov 15, 2021
1f970ab
Merge branch 'master' of https://github.com/vesoft-inc/nebula into fe…
Shylock-Hg Nov 15, 2021
ccde82f
Merge branch 'master' into feature/bison-datetime-parser
Shylock-Hg Nov 16, 2021
f015d99
Merge branch 'master' into feature/bison-datetime-parser
Shylock-Hg Nov 17, 2021
3f4a189
Merge branch 'master' of https://github.com/vesoft-inc/nebula into fe…
Shylock-Hg Nov 19, 2021
fed7316
Merge branch 'master' into feature/bison-datetime-parser
Shylock-Hg Nov 19, 2021
f970638
Merge branch 'master' of https://github.com/vesoft-inc/nebula into fe…
Shylock-Hg Nov 24, 2021
40303be
Merge branch 'master' into feature/bison-datetime-parser
Shylock-Hg Nov 24, 2021
4f482b6
Merge branch 'master' into feature/bison-datetime-parser
Shylock-Hg Nov 25, 2021
cb53a74
Merge branch 'master' into feature/bison-datetime-parser
Shylock-Hg Nov 26, 2021
c554df0
Merge branch 'master' into feature/bison-datetime-parser
Shylock-Hg Nov 30, 2021
29c40d3
Resolve conflict.
Shylock-Hg Nov 30, 2021
2b5164d
Merge branch 'master' of https://github.com/vesoft-inc/nebula into fe…
Shylock-Hg Dec 2, 2021
bcac0e0
Merge branch 'master' into feature/bison-datetime-parser
Shylock-Hg Dec 6, 2021
828cc3e
Merge branch 'master' into feature/bison-datetime-parser
Shylock-Hg Dec 8, 2021
7a39eff
Merge branch 'master' into feature/bison-datetime-parser
Shylock-Hg Dec 10, 2021
3991f39
Merge branch 'master' into feature/bison-datetime-parser
yixinglu Dec 13, 2021
f1bae68
Merge branch 'master' into feature/bison-datetime-parser
yixinglu Dec 13, 2021
34cb561
Merge branch 'master' into feature/bison-datetime-parser
yixinglu Dec 13, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@ macro(nebula_add_library name type)
# hbase_thrift_generator
parser_target
wkt_parser_target
datetime_parser_target
)
endmacro()

Expand Down
1 change: 1 addition & 0 deletions src/codec/test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ set(CODEC_TEST_LIBS
$<TARGET_OBJECTS:wkt_wkb_io_obj>
$<TARGET_OBJECTS:agg_function_manager_obj>
$<TARGET_OBJECTS:time_utils_obj>
$<TARGET_OBJECTS:datetime_parser_obj>
$<TARGET_OBJECTS:version_obj>
$<TARGET_OBJECTS:ssl_obj>
)
Expand Down
15 changes: 12 additions & 3 deletions src/common/datatypes/Date.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,11 @@

namespace nebula {

static inline std::string decimal(const std::string& number) {
auto find = std::find(number.begin(), number.end(), '.');
return std::string(find, number.end());
}

const int64_t kDaysSoFar[] = {0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365};
const int64_t kLeapDaysSoFar[] = {0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366};

Expand Down Expand Up @@ -98,22 +103,26 @@ std::string Date::toString() const {
}

std::string Time::toString() const {
auto microsecStr = folly::stringPrintf("%.9f", static_cast<uint32_t>(microsec) / 1000000.0);
auto decimalPart = decimal(microsecStr);
// It's in current timezone already
return folly::stringPrintf("%02d:%02d:%02d.%06d", hour, minute, sec, microsec);
return folly::stringPrintf("%02d:%02d:%02d%s", hour, minute, sec, decimalPart.c_str());
}

std::string DateTime::toString() const {
auto microsecStr = folly::stringPrintf("%.9f", static_cast<uint32_t>(microsec) / 1000000.0);
auto decimalPart = decimal(microsecStr);
// It's in current timezone already
return folly::stringPrintf(
"%hd-%02hhu-%02hhu"
"T%02hhu:%02hhu:%02hhu.%u",
"T%02hhu:%02hhu:%02hhu%s",
static_cast<int16_t>(year),
static_cast<uint8_t>(month),
static_cast<uint8_t>(day),
static_cast<uint8_t>(hour),
static_cast<uint8_t>(minute),
static_cast<uint8_t>(sec),
static_cast<uint32_t>(microsec));
decimalPart.c_str());
}

} // namespace nebula
Expand Down
13 changes: 13 additions & 0 deletions src/common/datatypes/Date.h
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,19 @@ struct DateTime {
sec = 0;
microsec = 0;
}
explicit DateTime(const Date& date, const Time& time) {
year = date.year;
month = date.month;
day = date.day;
hour = time.hour;
minute = time.minute;
sec = time.sec;
microsec = time.microsec;
}

Date date() const { return Date(year, month, day); }

Time time() const { return Time(hour, minute, sec, microsec); }

void clear() {
year = 0;
Expand Down
3 changes: 3 additions & 0 deletions src/common/datatypes/test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ nebula_add_test(
$<TARGET_OBJECTS:agg_function_manager_obj>
$<TARGET_OBJECTS:time_utils_obj>
$<TARGET_OBJECTS:time_obj>
$<TARGET_OBJECTS:datetime_parser_obj>
$<TARGET_OBJECTS:fs_obj>
$<TARGET_OBJECTS:wkt_wkb_io_obj>
LIBRARIES
Expand All @@ -65,6 +66,7 @@ nebula_add_test(
$<TARGET_OBJECTS:wkt_wkb_io_obj>
$<TARGET_OBJECTS:time_obj>
$<TARGET_OBJECTS:time_utils_obj>
$<TARGET_OBJECTS:datetime_parser_obj>
$<TARGET_OBJECTS:fs_obj>
LIBRARIES
gtest
Expand Down Expand Up @@ -110,6 +112,7 @@ nebula_add_test(
$<TARGET_OBJECTS:function_manager_obj>
$<TARGET_OBJECTS:agg_function_manager_obj>
$<TARGET_OBJECTS:time_utils_obj>
$<TARGET_OBJECTS:datetime_parser_obj>
$<TARGET_OBJECTS:time_obj>
$<TARGET_OBJECTS:fs_obj>
LIBRARIES
Expand Down
35 changes: 20 additions & 15 deletions src/common/datatypes/test/ValueToJsonTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ TEST(ValueToJson, list) {
Time(13, 30, 15, 0), // time
DateTime(2021, 12, 21, 13, 30, 15, 0)})); // datetime
dynamic expectedListJsonObj = dynamic::array(
2, 2.33, true, "str", "2021-12-21", "13:30:15.000000Z", "2021-12-21T13:30:15.0Z");
2, 2.33, true, "str", "2021-12-21", "13:30:15.000000000Z", "2021-12-21T13:30:15.000000000Z");
ASSERT_EQ(expectedListJsonObj, list1.toJson());

dynamic expectedListMetaObj = dynamic::array(nullptr,
Expand All @@ -158,7 +158,7 @@ TEST(ValueToJson, Set) {
Time(13, 30, 15, 0), // time
DateTime(2021, 12, 21, 13, 30, 15, 0)})); // datetime
dynamic expectedSetJsonObj = dynamic::array(
2, 2.33, true, "str", "2021-12-21", "13:30:15.000000Z", "2021-12-21T13:30:15.0Z");
2, 2.33, true, "str", "2021-12-21", "13:30:15.000000000Z", "2021-12-21T13:30:15.000000000Z");
// The underlying data structure is unordered_set, so sort before the comparison
auto actualJson = set.toJson();
std::sort(actualJson.begin(), actualJson.end());
Expand All @@ -179,7 +179,7 @@ TEST(ValueToJson, map) {
{"key7", DateTime(2021, 12, 21, 13, 30, 15, 0)}})); // datetime
dynamic expectedMapJsonObj =
dynamic::object("key1", 2)("key2", 2.33)("key3", true)("key4", "str")("key5", "2021-12-21")(
"key6", "13:30:15.000000Z")("key7", "2021-12-21T13:30:15.0Z");
"key6", "13:30:15.000000000Z")("key7", "2021-12-21T13:30:15.000000000Z");
ASSERT_EQ(expectedMapJsonObj, map.toJson());
// Skip meta json comparison since nested dynamic objects cannot be sorted. i.g. dynamic::object
// inside dynamic::array
Expand All @@ -194,18 +194,23 @@ TEST(ValueToJson, dataset) {
Date(2021, 12, 21), // date
Time(13, 30, 15, 0), // time
DateTime(2021, 12, 21, 13, 30, 15, 0)}));
dynamic expectedDatasetJsonObj = dynamic::array(dynamic::object(
"row",
dynamic::array(
2, 2.33, true, "str", "2021-12-21", "13:30:15.000000Z", "2021-12-21T13:30:15.0Z"))(
"meta",
dynamic::array(nullptr,
nullptr,
nullptr,
nullptr,
dynamic::object("type", "date"),
dynamic::object("type", "time"),
dynamic::object("type", "datetime"))));
dynamic expectedDatasetJsonObj =
dynamic::array(dynamic::object("row",
dynamic::array(2,
2.33,
true,
"str",
"2021-12-21",
"13:30:15.000000000Z",
"2021-12-21T13:30:15.000000000Z"))(
"meta",
dynamic::array(nullptr,
nullptr,
nullptr,
nullptr,
dynamic::object("type", "date"),
dynamic::object("type", "time"),
dynamic::object("type", "datetime"))));
ASSERT_EQ(expectedDatasetJsonObj, dataset.toJson());
}

Expand Down
3 changes: 3 additions & 0 deletions src/common/expression/test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ set(expression_test_common_libs
$<TARGET_OBJECTS:stats_obj>
$<TARGET_OBJECTS:datatypes_obj>
$<TARGET_OBJECTS:time_obj>
$<TARGET_OBJECTS:datetime_parser_obj>
$<TARGET_OBJECTS:common_thrift_obj>
$<TARGET_OBJECTS:thrift_obj>
$<TARGET_OBJECTS:thread_obj>
Expand Down Expand Up @@ -113,6 +114,7 @@ nebula_add_executable(
$<TARGET_OBJECTS:wkt_wkb_io_obj>
$<TARGET_OBJECTS:time_obj>
$<TARGET_OBJECTS:time_utils_obj>
$<TARGET_OBJECTS:datetime_parser_obj>
$<TARGET_OBJECTS:fs_obj>
$<TARGET_OBJECTS:agg_function_manager_obj>
LIBRARIES
Expand All @@ -136,6 +138,7 @@ nebula_add_executable(
$<TARGET_OBJECTS:agg_function_manager_obj>
$<TARGET_OBJECTS:time_obj>
$<TARGET_OBJECTS:time_utils_obj>
$<TARGET_OBJECTS:datetime_parser_obj>
$<TARGET_OBJECTS:fs_obj>
LIBRARIES
follybenchmark
Expand Down
1 change: 1 addition & 0 deletions src/common/function/test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ nebula_add_test(
$<TARGET_OBJECTS:base_obj>
$<TARGET_OBJECTS:datatypes_obj>
$<TARGET_OBJECTS:time_utils_obj>
$<TARGET_OBJECTS:datetime_parser_obj>
$<TARGET_OBJECTS:time_obj>
$<TARGET_OBJECTS:fs_obj>
LIBRARIES
Expand Down
9 changes: 5 additions & 4 deletions src/common/function/test/FunctionManagerTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -318,7 +318,7 @@ TEST_F(FunctionManagerTest, functionCall) {
TEST_FUNCTION(toString, args_["toString_bool"], "true");
TEST_FUNCTION(toString, args_["string"], "AbcDeFG");
TEST_FUNCTION(toString, args_["date"], "1984-10-11");
TEST_FUNCTION(toString, args_["datetime"], "1984-10-11T12:31:14.341");
TEST_FUNCTION(toString, args_["datetime"], "1984-10-11T12:31:14.000341000");
TEST_FUNCTION(toString, args_["nullvalue"], Value::kNullValue);
}
{
Expand All @@ -327,8 +327,9 @@ TEST_F(FunctionManagerTest, functionCall) {
DateTime dateTime(2021, 10, 31, 8, 5, 34, 29);
TEST_FUNCTION(concat, std::vector<Value>({"hello", 1, "world"}), "hello1world");
TEST_FUNCTION(concat, std::vector<Value>({true, 2, date}), "true22021-10-31");
TEST_FUNCTION(concat, std::vector<Value>({true, dateTime}), "true2021-10-31T08:05:34.29");
TEST_FUNCTION(concat, std::vector<Value>({2.3, time}), "2.309:39:21.000012");
TEST_FUNCTION(
concat, std::vector<Value>({true, dateTime}), "true2021-10-31T08:05:34.000029000");
TEST_FUNCTION(concat, std::vector<Value>({2.3, time}), "2.309:39:21.000012000");
TEST_FUNCTION(concat, args_["two"], "24");
TEST_FUNCTION(concat_ws, std::vector<Value>({",", 1}), "1");
TEST_FUNCTION(concat_ws, std::vector<Value>({"@", 1, "world"}), "1@world");
Expand All @@ -337,7 +338,7 @@ TEST_F(FunctionManagerTest, functionCall) {
"1ABtrueABworld");
TEST_FUNCTION(concat_ws,
std::vector<Value>({".", 1, true, Value::kNullValue, "world", time}),
"1.true.world.09:39:21.000012");
"1.true.world.09:39:21.000012000");
}
{
TEST_FUNCTION(toBoolean, args_["int"], Value::kNullBadType);
Expand Down
1 change: 1 addition & 0 deletions src/common/geo/test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ nebula_add_test(
$<TARGET_OBJECTS:base_obj>
$<TARGET_OBJECTS:datatypes_obj>
$<TARGET_OBJECTS:time_utils_obj>
$<TARGET_OBJECTS:datetime_parser_obj>
$<TARGET_OBJECTS:time_obj>
LIBRARIES
gtest
Expand Down
2 changes: 2 additions & 0 deletions src/common/time/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -16,4 +16,6 @@ nebula_add_library(
TimeConversion.cpp
)

nebula_add_subdirectory(parser)

nebula_add_subdirectory(test)
20 changes: 20 additions & 0 deletions src/common/time/TimeUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@

#include "common/fs/FileUtils.h"
#include "common/time/TimezoneInfo.h"
#include "common/time/parser/DatetimeReader.h"

namespace nebula {
namespace time {
Expand Down Expand Up @@ -170,5 +171,24 @@ StatusOr<Value> TimeUtils::toTimestamp(const Value &val) {
return timestamp;
}

/*static*/ StatusOr<DateTime> TimeUtils::parseDateTime(const std::string &str) {
auto p = DatetimeReader::makeDateTimeReader();
auto result = p.readDatetime(str);
NG_RETURN_IF_ERROR(result);
return result.value();
}

/*static*/ StatusOr<Date> TimeUtils::parseDate(const std::string &str) {
auto p = DatetimeReader::makeDateReader();
auto result = p.readDate(str);
NG_RETURN_IF_ERROR(result);
return result.value();
}

/*static*/ StatusOr<Time> TimeUtils::parseTime(const std::string &str) {
auto p = DatetimeReader::makeTimeReader();
return p.readTime(str);
}

} // namespace time
} // namespace nebula
71 changes: 20 additions & 51 deletions src/common/time/TimeUtils.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,30 +46,27 @@ class TimeUtils {
return Status::OK();
}

// TODO(shylock) support more format
static StatusOr<DateTime> parseDateTime(const std::string &str) {
std::tm tm;
std::istringstream ss(str);
ss >> std::get_time(&tm, "%Y-%m-%dT%H:%M:%S");
if (ss.fail()) {
std::istringstream ss2(str);
ss2 >> std::get_time(&tm, "%Y-%m-%d %H:%M:%S");
if (ss2.fail()) {
return Status::Error();
}
template <
typename D,
typename = std::enable_if_t<std::is_same<D, Time>::value || std::is_same<D, DateTime>::value>>
static Status validateTime(const D &time) {
if (time.hour < 0 || time.hour >= 24) {
return Status::Error("Invalid hour number %d.", time.hour);
}
DateTime dt;
dt.year = tm.tm_year + 1900;
dt.month = tm.tm_mon + 1;
dt.day = tm.tm_mday;
dt.hour = tm.tm_hour;
dt.minute = tm.tm_min;
dt.sec = tm.tm_sec;
dt.microsec = 0;
NG_RETURN_IF_ERROR(validateDate(dt));
return dt;
if (time.minute < 0 || time.minute >= 60) {
return Status::Error("Invalid minute number %d.", time.minute);
}
if (time.sec < 0 || time.sec >= 60) {
return Status::Error("Invalid second number %d.", time.sec);
}
if (time.microsec < 0 || time.microsec >= 1000000) {
return Status::Error("Invalid microsecond number %d.", time.microsec);
}
return Status::OK();
}

static StatusOr<DateTime> parseDateTime(const std::string &str);

static StatusOr<DateTime> dateTimeFromMap(const Map &m);

// utc + offset = local
Expand Down Expand Up @@ -119,21 +116,7 @@ class TimeUtils {

static StatusOr<Date> dateFromMap(const Map &m);

// TODO(shylock) support more format
static StatusOr<Date> parseDate(const std::string &str) {
std::tm tm;
std::istringstream ss(str);
ss >> std::get_time(&tm, "%Y-%m-%d");
if (ss.fail()) {
return Status::Error();
}
Date d;
d.year = tm.tm_year + 1900;
d.month = tm.tm_mon + 1;
d.day = tm.tm_mday;
NG_RETURN_IF_ERROR(validateDate(d));
return d;
}
static StatusOr<Date> parseDate(const std::string &str);

static StatusOr<Date> localDate() {
Date d;
Expand Down Expand Up @@ -169,21 +152,7 @@ class TimeUtils {

static StatusOr<Time> timeFromMap(const Map &m);

// TODO(shylock) support more format
static StatusOr<Time> parseTime(const std::string &str) {
std::tm tm;
std::istringstream ss(str);
ss >> std::get_time(&tm, "%H:%M:%S");
if (ss.fail()) {
return Status::Error();
}
Time t;
t.hour = tm.tm_hour;
t.minute = tm.tm_min;
t.sec = tm.tm_sec;
t.microsec = 0;
return t;
}
static StatusOr<Time> parseTime(const std::string &str);

// utc + offset = local
static Time timeToUTC(const Time &time) {
Expand Down
1 change: 0 additions & 1 deletion src/common/time/TimezoneInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,6 @@ namespace time {
}
if (!FLAGS_timezone_name.empty()) {
if (FLAGS_timezone_name.front() == ':') {
NG_RETURN_IF_ERROR(Timezone::init());
return globalTimezone.loadFromDb(
std::string(FLAGS_timezone_name.begin() + 1, FLAGS_timezone_name.end()));
} else {
Expand Down
Loading