From 2748f10c48feb255a61e48b48fd708a4338ec5f5 Mon Sep 17 00:00:00 2001 From: Ti Chi Robot Date: Wed, 26 Jan 2022 21:58:44 +0800 Subject: [PATCH] Add unittests for str_to_date, fix #3556, #3557 (#3581) (#3934) --- dbms/src/Common/MyTime.cpp | 71 ++++++++----------- dbms/src/Common/tests/gtest_mytime.cpp | 89 ++++++++++++++++++++++++ dbms/src/Functions/FunctionsConversion.h | 17 ++++- 3 files changed, 133 insertions(+), 44 deletions(-) diff --git a/dbms/src/Common/MyTime.cpp b/dbms/src/Common/MyTime.cpp index 1436ad08342..4e9aacf5dbb 100644 --- a/dbms/src/Common/MyTime.cpp +++ b/dbms/src/Common/MyTime.cpp @@ -72,7 +72,7 @@ std::vector parseDateFormat(String format) { format = Poco::trimInPlace(format); - if (format.size() == 0) + if (format.empty()) return {}; if (!std::isdigit(format[0]) || !std::isdigit(format[format.size() - 1])) @@ -530,7 +530,7 @@ Field parseMyDateTime(const String & str, int8_t fsp) { // if tz_sign is empty, it's sure that the string literal contains timezone (e.g., 2010-10-10T10:10:10Z), // therefore we could safely skip this branch. - if (!noAbsorb(seps) && !(tz_minute != "" && tz_sep == "")) + if (!noAbsorb(seps) && !(!tz_minute.empty() && tz_sep.empty())) { // we can't absorb timezone if there is no separate between tz_hour and tz_minute if (!tz_hour.empty()) @@ -852,9 +852,8 @@ size_t maxFormattedDateTimeStringLength(const String & format) { size_t result = 0; bool in_pattern_match = false; - for (size_t i = 0; i < format.size(); i++) + for (char x : format) { - char x = format[i]; if (in_pattern_match) { switch (x) @@ -969,7 +968,6 @@ void MyTimeBase::check(bool allow_zero_in_date, bool allow_invalid_date) const { throw TiFlashException("Incorrect datetime value", Errors::Types::WrongValue); } - return; } bool toCoreTimeChecked(const UInt64 & year, const UInt64 & month, const UInt64 & day, const UInt64 & hour, const UInt64 & minute, @@ -990,9 +988,8 @@ bool toCoreTimeChecked(const UInt64 & year, const UInt64 & month, const UInt64 & MyDateTimeFormatter::MyDateTimeFormatter(const String & layout) { bool in_pattern_match = false; - for (size_t i = 0; i < layout.size(); i++) + for (char x : layout) { - char x = layout[i]; if (in_pattern_match) { switch (x) @@ -1227,7 +1224,9 @@ struct MyDateTimeParser::Context // The pos we are parsing from size_t pos = 0; - Context(StringRef view_) : view(std::move(view_)) {} + explicit Context(StringRef view_) + : view(std::move(view_)) + {} }; // Try to parse digits with number of `limit` starting from view[pos] @@ -1270,18 +1269,18 @@ static bool parseTime12Hour(MyDateTimeParser::Context & ctx, MyTimeBase & time) { // Use temp_pos instead of changing `ctx.pos` directly in case of parsing failure size_t temp_pos = ctx.pos; - auto checkIfEnd = [&temp_pos, &ctx]() -> ParseState { + auto check_if_end = [&temp_pos, &ctx]() -> ParseState { // To the end if (temp_pos == ctx.view.size) return ParseState::END_OF_FILE; return ParseState::NORMAL; }; - auto skipWhitespaces = [&temp_pos, &ctx, &checkIfEnd]() -> ParseState { + auto skipWhitespaces = [&temp_pos, &ctx, &check_if_end]() -> ParseState { while (temp_pos < ctx.view.size && isWhitespaceASCII(ctx.view.data[temp_pos])) ++temp_pos; - return checkIfEnd(); + return check_if_end(); }; - auto parseSep = [&temp_pos, &ctx, &skipWhitespaces]() -> ParseState { + auto parse_sep = [&temp_pos, &ctx, &skipWhitespaces]() -> ParseState { if (skipWhitespaces() == ParseState::END_OF_FILE) return ParseState::END_OF_FILE; // parse ":" @@ -1290,7 +1289,7 @@ static bool parseTime12Hour(MyDateTimeParser::Context & ctx, MyTimeBase & time) temp_pos += 1; // move forward return ParseState::NORMAL; }; - auto tryParse = [&]() -> ParseState { + auto try_parse = [&]() -> ParseState { ParseState state = ParseState::NORMAL; /// Note that we should update `time` as soon as possible, or we /// can not get correct result for incomplete input like "12:13" @@ -1311,7 +1310,7 @@ static bool parseTime12Hour(MyDateTimeParser::Context & ctx, MyTimeBase & time) time.hour = hour; temp_pos += step; // move forward - if (state = parseSep(); state != ParseState::NORMAL) + if (state = parse_sep(); state != ParseState::NORMAL) return state; int32_t minute = 0; @@ -1323,7 +1322,7 @@ static bool parseTime12Hour(MyDateTimeParser::Context & ctx, MyTimeBase & time) time.minute = minute; temp_pos += step; // move forward - if (state = parseSep(); state != ParseState::NORMAL) + if (state = parse_sep(); state != ParseState::NORMAL) return state; int32_t second = 0; @@ -1362,7 +1361,7 @@ static bool parseTime12Hour(MyDateTimeParser::Context & ctx, MyTimeBase & time) temp_pos += 2; // move forward return ParseState::NORMAL; }; - if (auto state = tryParse(); state == ParseState::FAIL) + if (auto state = try_parse(); state == ParseState::FAIL) return false; // Other state, forward the `ctx.pos` and return true ctx.pos = temp_pos; @@ -1374,18 +1373,18 @@ static bool parseTime24Hour(MyDateTimeParser::Context & ctx, MyTimeBase & time) { // Use temp_pos instead of changing `ctx.pos` directly in case of parsing failure size_t temp_pos = ctx.pos; - auto checkIfEnd = [&temp_pos, &ctx]() -> ParseState { + auto check_if_end = [&temp_pos, &ctx]() -> ParseState { // To the end if (temp_pos == ctx.view.size) return ParseState::END_OF_FILE; return ParseState::NORMAL; }; - auto skipWhitespaces = [&temp_pos, &ctx, &checkIfEnd]() -> ParseState { + auto skipWhitespaces = [&temp_pos, &ctx, &check_if_end]() -> ParseState { while (temp_pos < ctx.view.size && isWhitespaceASCII(ctx.view.data[temp_pos])) ++temp_pos; - return checkIfEnd(); + return check_if_end(); }; - auto parseSep = [&temp_pos, &ctx, &skipWhitespaces]() -> ParseState { + auto parse_sep = [&temp_pos, &ctx, &skipWhitespaces]() -> ParseState { if (skipWhitespaces() == ParseState::END_OF_FILE) return ParseState::END_OF_FILE; // parse ":" @@ -1394,7 +1393,7 @@ static bool parseTime24Hour(MyDateTimeParser::Context & ctx, MyTimeBase & time) temp_pos += 1; // move forward return ParseState::NORMAL; }; - auto tryParse = [&]() -> ParseState { + auto try_parse = [&]() -> ParseState { ParseState state = ParseState::NORMAL; /// Note that we should update `time` as soon as possible, or we /// can not get correct result for incomplete input like "12:13" @@ -1411,7 +1410,7 @@ static bool parseTime24Hour(MyDateTimeParser::Context & ctx, MyTimeBase & time) time.hour = hour; temp_pos += step; // move forward - if (state = parseSep(); state != ParseState::NORMAL) + if (state = parse_sep(); state != ParseState::NORMAL) return state; int32_t minute = 0; @@ -1423,7 +1422,7 @@ static bool parseTime24Hour(MyDateTimeParser::Context & ctx, MyTimeBase & time) time.minute = minute; temp_pos += step; // move forward - if (state = parseSep(); state != ParseState::NORMAL) + if (state = parse_sep(); state != ParseState::NORMAL) return state; int32_t second = 0; @@ -1437,7 +1436,7 @@ static bool parseTime24Hour(MyDateTimeParser::Context & ctx, MyTimeBase & time) return ParseState::NORMAL; }; - if (auto state = tryParse(); state == ParseState::FAIL) + if (auto state = try_parse(); state == ParseState::FAIL) return false; // Other state, forward the `ctx.pos` and return true ctx.pos = temp_pos; @@ -1482,6 +1481,9 @@ MyDateTimeParser::MyDateTimeParser(String format_) : format(std::move(format_)) }); break; } + case 'm': + //"%m": Month, numeric (00..12) + [[fallthrough]]; case 'c': { //"%c": Month, numeric (0..12) @@ -1523,9 +1525,9 @@ MyDateTimeParser::MyDateTimeParser(String format_) : format(std::move(format_)) time.micro_second = 0; return true; } - // The siffix '0' can be ignored. + // The suffix '0' can be ignored. // "9" means 900000 - while (ms > 0 && ms * 10 < 1000000) + for (size_t i = step; i < 6; i++) { ms *= 10; } @@ -1621,19 +1623,6 @@ MyDateTimeParser::MyDateTimeParser(String format_) : format(std::move(format_)) }); break; } - case 'm': - { - //"%m": Month, numeric (00..12) - parsers.emplace_back([](MyDateTimeParser::Context & ctx, MyTimeBase & time) -> bool { - auto [step, month] = parseNDigits(ctx.view, ctx.pos, 2); - if (step == 0 || month > 12) - return false; - time.month = month; - ctx.pos += step; - return true; - }); - break; - } case 'S': //"%S": Seconds (00..59) [[fallthrough]]; @@ -1880,7 +1869,7 @@ std::optional MyDateTimeParser::parseAsPackedUInt(const StringRef & str_ MyDateTimeParser::Context ctx(str_view); // TODO: can we return warnings to TiDB? - for (auto & f : parsers) + for (const auto & f : parsers) { // Ignore all prefix white spaces before each pattern match (TODO: handle unicode space?) while (ctx.pos < str_view.size && isWhitespaceASCII(str_view.data[ctx.pos])) @@ -1889,7 +1878,7 @@ std::optional MyDateTimeParser::parseAsPackedUInt(const StringRef & str_ if (ctx.pos == ctx.view.size) break; - if (f(ctx, my_time) != true) + if (!f(ctx, my_time)) { #ifndef NDEBUG LOG_TRACE(&Logger::get("MyDateTimeParser"), diff --git a/dbms/src/Common/tests/gtest_mytime.cpp b/dbms/src/Common/tests/gtest_mytime.cpp index 31647d0787c..376ec0bf534 100644 --- a/dbms/src/Common/tests/gtest_mytime.cpp +++ b/dbms/src/Common/tests/gtest_mytime.cpp @@ -393,7 +393,96 @@ try // {"Tuesday 52 2001", "%W %V %Y", std::nullopt}, // // {"Tuesday 52 2001", "%W %V %x", std::nullopt}, // // {"Tuesday 52 2001", "%W %u %x", std::nullopt}, // + + // Test cases for %b + {"10/JAN/2010", "%d/%b/%Y", MyDateTime{2010, 1, 10, 0, 0, 0, 0}}, // Right spill, case-insensitive + {"10/FeB/2010", "%d/%b/%Y", MyDateTime{2010, 2, 10, 0, 0, 0, 0}}, + {"10/MAr/2010", "%d/%b/%Y", MyDateTime{2010, 3, 10, 0, 0, 0, 0}}, + {"10/ApR/2010", "%d/%b/%Y", MyDateTime{2010, 4, 10, 0, 0, 0, 0}}, + {"10/mAY/2010", "%d/%b/%Y", MyDateTime{2010, 5, 10, 0, 0, 0, 0}}, + {"10/JuN/2010", "%d/%b/%Y", MyDateTime{2010, 6, 10, 0, 0, 0, 0}}, + {"10/JUL/2010", "%d/%b/%Y", MyDateTime{2010, 7, 10, 0, 0, 0, 0}}, + {"10/Aug/2010", "%d/%b/%Y", MyDateTime{2010, 8, 10, 0, 0, 0, 0}}, + {"10/seP/2010", "%d/%b/%Y", MyDateTime{2010, 9, 10, 0, 0, 0, 0}}, + {"10/Oct/2010", "%d/%b/%Y", MyDateTime{2010, 10, 10, 0, 0, 0, 0}}, + {"10/NOV/2010", "%d/%b/%Y", MyDateTime{2010, 11, 10, 0, 0, 0, 0}}, + {"10/DEC/2010", "%d/%b/%Y", MyDateTime{2010, 12, 10, 0, 0, 0, 0}}, + {"10/January/2010", "%d/%b/%Y", std::nullopt}, // Test full spilling + + // Test cases for %M + {"10/January/2010", "%d/%M/%Y", MyDateTime{2010, 1, 10, 0, 0, 0, 0}}, // Test full spilling + {"10/February/2010", "%d/%M/%Y", MyDateTime{2010, 2, 10, 0, 0, 0, 0}}, + {"10/March/2010", "%d/%M/%Y", MyDateTime{2010, 3, 10, 0, 0, 0, 0}}, + {"10/April/2010", "%d/%M/%Y", MyDateTime{2010, 4, 10, 0, 0, 0, 0}}, + {"10/May/2010", "%d/%M/%Y", MyDateTime{2010, 5, 10, 0, 0, 0, 0}}, + {"10/June/2010", "%d/%M/%Y", MyDateTime{2010, 6, 10, 0, 0, 0, 0}}, + {"10/July/2010", "%d/%M/%Y", MyDateTime{2010, 7, 10, 0, 0, 0, 0}}, + {"10/August/2010", "%d/%M/%Y", MyDateTime{2010, 8, 10, 0, 0, 0, 0}}, + {"10/September/2010", "%d/%M/%Y", MyDateTime{2010, 9, 10, 0, 0, 0, 0}}, + {"10/October/2010", "%d/%M/%Y", MyDateTime{2010, 10, 10, 0, 0, 0, 0}}, + {"10/November/2010", "%d/%M/%Y", MyDateTime{2010, 11, 10, 0, 0, 0, 0}}, + {"10/December/2010", "%d/%M/%Y", MyDateTime{2010, 12, 10, 0, 0, 0, 0}}, + + // Test cases for %c + // {"10/0/2010", "%d/%c/%Y", MyDateTime{2010, 0, 10, 0, 0, 0, 0}}, // TODO: Need Check NO_ZERO_DATE + {"10/1/2010", "%d/%c/%Y", MyDateTime{2010, 1, 10, 0, 0, 0, 0}}, + {"10/01/2010", "%d/%c/%Y", MyDateTime{2010, 1, 10, 0, 0, 0, 0}}, + {"10/001/2010", "%d/%c/%Y", std::nullopt}, + {"10/13/2010", "%d/%c/%Y", std::nullopt}, + {"10/12/2010", "%d/%c/%Y", MyDateTime{2010, 12, 10, 0, 0, 0, 0}}, + + // Test cases for %d, %e + // {"0/12/2010", "%d/%c/%Y", MyDateTime{2010, 12, 0, 0, 0, 0, 0}}, // TODO: Need Check NO_ZERO_DATE + {"1/12/2010", "%d/%c/%Y", MyDateTime{2010, 12, 1, 0, 0, 0, 0}}, + {"05/12/2010", "%d/%c/%Y", MyDateTime{2010, 12, 5, 0, 0, 0, 0}}, + {"05/12/2010", "%e/%c/%Y", MyDateTime{2010, 12, 5, 0, 0, 0, 0}}, + {"31/12/2010", "%d/%c/%Y", MyDateTime{2010, 12, 31, 0, 0, 0, 0}}, + {"32/12/2010", "%d/%c/%Y", std::nullopt}, + {"30/11/2010", "%d/%c/%Y", MyDateTime{2010, 11, 30, 0, 0, 0, 0}}, + {"31/11/2010", "%e/%c/%Y", MyDateTime{2010, 11, 31, 0, 0, 0, 0}}, + {"28/2/2010", "%e/%c/%Y", MyDateTime{2010, 2, 28, 0, 0, 0, 0}}, + {"29/2/2010", "%e/%c/%Y", MyDateTime{2010, 2, 29, 0, 0, 0, 0}}, + {"29/2/2020", "%e/%c/%Y", MyDateTime{2020, 2, 29, 0, 0, 0, 0}}, + + // Test cases for %Y + // {"1/12/0000", "%d/%c/%Y", MyDateTime{0000, 12, 1, 0, 0, 0, 0}}, // TODO: Need Check NO_ZERO_DATE + {"1/12/01", "%d/%c/%Y", MyDateTime{2001, 12, 1, 0, 0, 0, 0}}, + {"1/12/0001", "%d/%c/%Y", MyDateTime{0001, 12, 1, 0, 0, 0, 0}}, + {"1/12/2020", "%d/%c/%Y", MyDateTime{2020, 12, 1, 0, 0, 0, 0}}, + {"1/12/9999", "%d/%c/%Y", MyDateTime{9999, 12, 1, 0, 0, 0, 0}}, + + // Test cases for %f + {"01,5,2013 999999", "%d,%c,%Y %f", MyDateTime{2013, 5, 1, 0, 0, 0, 999999}}, + {"01,5,2013 0", "%d,%c,%Y %f", MyDateTime{2013, 5, 1, 0, 0, 0, 0}}, + {"01,5,2013 9999990000000", "%d,%c,%Y %f", MyDateTime{2013, 5, 1, 0, 0, 0, 999999}}, + {"01,5,2013 1", "%d,%c,%Y %f", MyDateTime{2013, 5, 1, 0, 0, 0, 100000}}, + {"01,5,2013 1230", "%d,%c,%Y %f", MyDateTime{2013, 5, 1, 0, 0, 0, 123000}}, + {"01,5,2013 01", "%d,%c,%Y %f", MyDateTime{2013, 5, 1, 0, 0, 0, 10000}}, // issue 3556 + + // Test cases for %h, %I, %l + {"00:11:12 ", "%h:%i:%S ", std::nullopt}, // 0 is not a valid number of %h + {"01:11:12 ", "%I:%i:%S ", MyDateTime{0, 0, 0, 01, 11, 12, 0}}, + {"12:11:12 ", "%l:%i:%S ", MyDateTime{0, 0, 0, 0, 11, 12, 0}}, + + // Test cases for %k, %H + {"00:11:12 ", "%H:%i:%S ", MyDateTime{0, 0, 0, 00, 11, 12, 0}}, + {"01:11:12 ", "%k:%i:%S ", MyDateTime{0, 0, 0, 01, 11, 12, 0}}, + {"12:11:12 ", "%H:%i:%S ", MyDateTime{0, 0, 0, 12, 11, 12, 0}}, + {"24:11:12 ", "%k:%i:%S ", std::nullopt}, + + // Test cases for %i + {"00:00:12 ", "%H:%i:%S ", MyDateTime{0, 0, 0, 00, 00, 12, 0}}, + {"00:01:12 ", "%H:%i:%S ", MyDateTime{0, 0, 0, 00, 01, 12, 0}}, + {"00:59:12 ", "%H:%i:%S ", MyDateTime{0, 0, 0, 00, 59, 12, 0}}, + {"00:60:12 ", "%H:%i:%S ", std::nullopt}, + + // Test cases for %s, %S + {"00:00:00 ", "%H:%i:%s ", MyDateTime{0, 0, 0, 00, 00, 00, 0}}, + {"00:01:01 ", "%H:%i:%s ", MyDateTime{0, 0, 0, 00, 01, 01, 0}}, + {"00:59:59 ", "%H:%i:%S ", MyDateTime{0, 0, 0, 00, 59, 59, 0}}, + {"00:59:60 ", "%H:%i:%S ", std::nullopt}, }; + auto result_formatter = MyDateTimeFormatter("%Y/%m/%d %T.%f"); size_t idx = 0; for (const auto & [input, fmt, expected] : cases) diff --git a/dbms/src/Functions/FunctionsConversion.h b/dbms/src/Functions/FunctionsConversion.h index 3e7abeb2e55..3c1840b86d2 100644 --- a/dbms/src/Functions/FunctionsConversion.h +++ b/dbms/src/Functions/FunctionsConversion.h @@ -1652,7 +1652,7 @@ class FunctionStrToDate : public IFunction const ColumnString * input_raw_col = nullptr; if (input_column->isColumnNullable()) { - auto null_input_column = checkAndGetColumn(input_column.get()); + const auto * null_input_column = checkAndGetColumn(input_column.get()); input_raw_col = checkAndGetColumn(null_input_column->getNestedColumnPtr().get()); } else @@ -1702,7 +1702,7 @@ class FunctionStrToDate : public IFunction const ColumnString * format_raw_col = nullptr; if (format_column->isColumnNullable()) { - auto null_format_column = checkAndGetColumn(format_column.get()); + const auto * null_format_column = checkAndGetColumn(format_column.get()); format_raw_col = checkAndGetColumn(null_format_column->getNestedColumnPtr().get()); } else @@ -1710,6 +1710,14 @@ class FunctionStrToDate : public IFunction format_raw_col = checkAndGetColumn(format_column.get()); } + String str_input_const; + StringRef str_ref; + if (input_column->isColumnConst()) + { + const auto & input_const = checkAndGetColumnConst(input_column.get()); + str_input_const = input_const->getValue(); + str_ref = StringRef(str_input_const); + } for (size_t i = 0; i < num_rows; ++i) { // Set null for either null input or null format @@ -1734,7 +1742,10 @@ class FunctionStrToDate : public IFunction const auto format_ref = format_raw_col->getDataAt(i); auto parser = MyDateTimeParser(format_ref.toString()); - const auto str_ref = input_raw_col->getDataAt(i); + if (!input_column->isColumnConst()) + { + str_ref = input_raw_col->getDataAt(i); + } if (auto parse_res = parser.parseAsPackedUInt(str_ref); parse_res) { datetime_res[i] = *parse_res;