Skip to content

Commit

Permalink
Add unittests for str_to_date, fix #3556, #3557 (#3581) (#3934)
Browse files Browse the repository at this point in the history
  • Loading branch information
ti-chi-bot authored Jan 26, 2022
1 parent 6351388 commit 2748f10
Show file tree
Hide file tree
Showing 3 changed files with 133 additions and 44 deletions.
71 changes: 30 additions & 41 deletions dbms/src/Common/MyTime.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ std::vector<String> parseDateFormat(String format)
{
format = Poco::trimInPlace(format);

if (format.size() == 0)
if (format.empty())
return {};

if (!std::isdigit(format[0]) || !std::isdigit(format[format.size() - 1]))
Expand Down Expand Up @@ -530,7 +530,7 @@ Field parseMyDateTime(const String & str, int8_t fsp)
{
// if tz_sign is empty, it's sure that the string literal contains timezone (e.g., 2010-10-10T10:10:10Z),
// therefore we could safely skip this branch.
if (!noAbsorb(seps) && !(tz_minute != "" && tz_sep == ""))
if (!noAbsorb(seps) && !(!tz_minute.empty() && tz_sep.empty()))
{
// we can't absorb timezone if there is no separate between tz_hour and tz_minute
if (!tz_hour.empty())
Expand Down Expand Up @@ -852,9 +852,8 @@ size_t maxFormattedDateTimeStringLength(const String & format)
{
size_t result = 0;
bool in_pattern_match = false;
for (size_t i = 0; i < format.size(); i++)
for (char x : format)
{
char x = format[i];
if (in_pattern_match)
{
switch (x)
Expand Down Expand Up @@ -969,7 +968,6 @@ void MyTimeBase::check(bool allow_zero_in_date, bool allow_invalid_date) const
{
throw TiFlashException("Incorrect datetime value", Errors::Types::WrongValue);
}
return;
}

bool toCoreTimeChecked(const UInt64 & year, const UInt64 & month, const UInt64 & day, const UInt64 & hour, const UInt64 & minute,
Expand All @@ -990,9 +988,8 @@ bool toCoreTimeChecked(const UInt64 & year, const UInt64 & month, const UInt64 &
MyDateTimeFormatter::MyDateTimeFormatter(const String & layout)
{
bool in_pattern_match = false;
for (size_t i = 0; i < layout.size(); i++)
for (char x : layout)
{
char x = layout[i];
if (in_pattern_match)
{
switch (x)
Expand Down Expand Up @@ -1227,7 +1224,9 @@ struct MyDateTimeParser::Context
// The pos we are parsing from
size_t pos = 0;

Context(StringRef view_) : view(std::move(view_)) {}
explicit Context(StringRef view_)
: view(std::move(view_))
{}
};

// Try to parse digits with number of `limit` starting from view[pos]
Expand Down Expand Up @@ -1270,18 +1269,18 @@ static bool parseTime12Hour(MyDateTimeParser::Context & ctx, MyTimeBase & time)
{
// Use temp_pos instead of changing `ctx.pos` directly in case of parsing failure
size_t temp_pos = ctx.pos;
auto checkIfEnd = [&temp_pos, &ctx]() -> ParseState {
auto check_if_end = [&temp_pos, &ctx]() -> ParseState {
// To the end
if (temp_pos == ctx.view.size)
return ParseState::END_OF_FILE;
return ParseState::NORMAL;
};
auto skipWhitespaces = [&temp_pos, &ctx, &checkIfEnd]() -> ParseState {
auto skipWhitespaces = [&temp_pos, &ctx, &check_if_end]() -> ParseState {
while (temp_pos < ctx.view.size && isWhitespaceASCII(ctx.view.data[temp_pos]))
++temp_pos;
return checkIfEnd();
return check_if_end();
};
auto parseSep = [&temp_pos, &ctx, &skipWhitespaces]() -> ParseState {
auto parse_sep = [&temp_pos, &ctx, &skipWhitespaces]() -> ParseState {
if (skipWhitespaces() == ParseState::END_OF_FILE)
return ParseState::END_OF_FILE;
// parse ":"
Expand All @@ -1290,7 +1289,7 @@ static bool parseTime12Hour(MyDateTimeParser::Context & ctx, MyTimeBase & time)
temp_pos += 1; // move forward
return ParseState::NORMAL;
};
auto tryParse = [&]() -> ParseState {
auto try_parse = [&]() -> ParseState {
ParseState state = ParseState::NORMAL;
/// Note that we should update `time` as soon as possible, or we
/// can not get correct result for incomplete input like "12:13"
Expand All @@ -1311,7 +1310,7 @@ static bool parseTime12Hour(MyDateTimeParser::Context & ctx, MyTimeBase & time)
time.hour = hour;
temp_pos += step; // move forward

if (state = parseSep(); state != ParseState::NORMAL)
if (state = parse_sep(); state != ParseState::NORMAL)
return state;

int32_t minute = 0;
Expand All @@ -1323,7 +1322,7 @@ static bool parseTime12Hour(MyDateTimeParser::Context & ctx, MyTimeBase & time)
time.minute = minute;
temp_pos += step; // move forward

if (state = parseSep(); state != ParseState::NORMAL)
if (state = parse_sep(); state != ParseState::NORMAL)
return state;

int32_t second = 0;
Expand Down Expand Up @@ -1362,7 +1361,7 @@ static bool parseTime12Hour(MyDateTimeParser::Context & ctx, MyTimeBase & time)
temp_pos += 2; // move forward
return ParseState::NORMAL;
};
if (auto state = tryParse(); state == ParseState::FAIL)
if (auto state = try_parse(); state == ParseState::FAIL)
return false;
// Other state, forward the `ctx.pos` and return true
ctx.pos = temp_pos;
Expand All @@ -1374,18 +1373,18 @@ static bool parseTime24Hour(MyDateTimeParser::Context & ctx, MyTimeBase & time)
{
// Use temp_pos instead of changing `ctx.pos` directly in case of parsing failure
size_t temp_pos = ctx.pos;
auto checkIfEnd = [&temp_pos, &ctx]() -> ParseState {
auto check_if_end = [&temp_pos, &ctx]() -> ParseState {
// To the end
if (temp_pos == ctx.view.size)
return ParseState::END_OF_FILE;
return ParseState::NORMAL;
};
auto skipWhitespaces = [&temp_pos, &ctx, &checkIfEnd]() -> ParseState {
auto skipWhitespaces = [&temp_pos, &ctx, &check_if_end]() -> ParseState {
while (temp_pos < ctx.view.size && isWhitespaceASCII(ctx.view.data[temp_pos]))
++temp_pos;
return checkIfEnd();
return check_if_end();
};
auto parseSep = [&temp_pos, &ctx, &skipWhitespaces]() -> ParseState {
auto parse_sep = [&temp_pos, &ctx, &skipWhitespaces]() -> ParseState {
if (skipWhitespaces() == ParseState::END_OF_FILE)
return ParseState::END_OF_FILE;
// parse ":"
Expand All @@ -1394,7 +1393,7 @@ static bool parseTime24Hour(MyDateTimeParser::Context & ctx, MyTimeBase & time)
temp_pos += 1; // move forward
return ParseState::NORMAL;
};
auto tryParse = [&]() -> ParseState {
auto try_parse = [&]() -> ParseState {
ParseState state = ParseState::NORMAL;
/// Note that we should update `time` as soon as possible, or we
/// can not get correct result for incomplete input like "12:13"
Expand All @@ -1411,7 +1410,7 @@ static bool parseTime24Hour(MyDateTimeParser::Context & ctx, MyTimeBase & time)
time.hour = hour;
temp_pos += step; // move forward

if (state = parseSep(); state != ParseState::NORMAL)
if (state = parse_sep(); state != ParseState::NORMAL)
return state;

int32_t minute = 0;
Expand All @@ -1423,7 +1422,7 @@ static bool parseTime24Hour(MyDateTimeParser::Context & ctx, MyTimeBase & time)
time.minute = minute;
temp_pos += step; // move forward

if (state = parseSep(); state != ParseState::NORMAL)
if (state = parse_sep(); state != ParseState::NORMAL)
return state;

int32_t second = 0;
Expand All @@ -1437,7 +1436,7 @@ static bool parseTime24Hour(MyDateTimeParser::Context & ctx, MyTimeBase & time)

return ParseState::NORMAL;
};
if (auto state = tryParse(); state == ParseState::FAIL)
if (auto state = try_parse(); state == ParseState::FAIL)
return false;
// Other state, forward the `ctx.pos` and return true
ctx.pos = temp_pos;
Expand Down Expand Up @@ -1482,6 +1481,9 @@ MyDateTimeParser::MyDateTimeParser(String format_) : format(std::move(format_))
});
break;
}
case 'm':
//"%m": Month, numeric (00..12)
[[fallthrough]];
case 'c':
{
//"%c": Month, numeric (0..12)
Expand Down Expand Up @@ -1523,9 +1525,9 @@ MyDateTimeParser::MyDateTimeParser(String format_) : format(std::move(format_))
time.micro_second = 0;
return true;
}
// The siffix '0' can be ignored.
// The suffix '0' can be ignored.
// "9" means 900000
while (ms > 0 && ms * 10 < 1000000)
for (size_t i = step; i < 6; i++)
{
ms *= 10;
}
Expand Down Expand Up @@ -1621,19 +1623,6 @@ MyDateTimeParser::MyDateTimeParser(String format_) : format(std::move(format_))
});
break;
}
case 'm':
{
//"%m": Month, numeric (00..12)
parsers.emplace_back([](MyDateTimeParser::Context & ctx, MyTimeBase & time) -> bool {
auto [step, month] = parseNDigits(ctx.view, ctx.pos, 2);
if (step == 0 || month > 12)
return false;
time.month = month;
ctx.pos += step;
return true;
});
break;
}
case 'S':
//"%S": Seconds (00..59)
[[fallthrough]];
Expand Down Expand Up @@ -1880,7 +1869,7 @@ std::optional<UInt64> MyDateTimeParser::parseAsPackedUInt(const StringRef & str_
MyDateTimeParser::Context ctx(str_view);

// TODO: can we return warnings to TiDB?
for (auto & f : parsers)
for (const auto & f : parsers)
{
// Ignore all prefix white spaces before each pattern match (TODO: handle unicode space?)
while (ctx.pos < str_view.size && isWhitespaceASCII(str_view.data[ctx.pos]))
Expand All @@ -1889,7 +1878,7 @@ std::optional<UInt64> MyDateTimeParser::parseAsPackedUInt(const StringRef & str_
if (ctx.pos == ctx.view.size)
break;

if (f(ctx, my_time) != true)
if (!f(ctx, my_time))
{
#ifndef NDEBUG
LOG_TRACE(&Logger::get("MyDateTimeParser"),
Expand Down
89 changes: 89 additions & 0 deletions dbms/src/Common/tests/gtest_mytime.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -393,7 +393,96 @@ try
// {"Tuesday 52 2001", "%W %V %Y", std::nullopt}, //
// {"Tuesday 52 2001", "%W %V %x", std::nullopt}, //
// {"Tuesday 52 2001", "%W %u %x", std::nullopt}, //

// Test cases for %b
{"10/JAN/2010", "%d/%b/%Y", MyDateTime{2010, 1, 10, 0, 0, 0, 0}}, // Right spill, case-insensitive
{"10/FeB/2010", "%d/%b/%Y", MyDateTime{2010, 2, 10, 0, 0, 0, 0}},
{"10/MAr/2010", "%d/%b/%Y", MyDateTime{2010, 3, 10, 0, 0, 0, 0}},
{"10/ApR/2010", "%d/%b/%Y", MyDateTime{2010, 4, 10, 0, 0, 0, 0}},
{"10/mAY/2010", "%d/%b/%Y", MyDateTime{2010, 5, 10, 0, 0, 0, 0}},
{"10/JuN/2010", "%d/%b/%Y", MyDateTime{2010, 6, 10, 0, 0, 0, 0}},
{"10/JUL/2010", "%d/%b/%Y", MyDateTime{2010, 7, 10, 0, 0, 0, 0}},
{"10/Aug/2010", "%d/%b/%Y", MyDateTime{2010, 8, 10, 0, 0, 0, 0}},
{"10/seP/2010", "%d/%b/%Y", MyDateTime{2010, 9, 10, 0, 0, 0, 0}},
{"10/Oct/2010", "%d/%b/%Y", MyDateTime{2010, 10, 10, 0, 0, 0, 0}},
{"10/NOV/2010", "%d/%b/%Y", MyDateTime{2010, 11, 10, 0, 0, 0, 0}},
{"10/DEC/2010", "%d/%b/%Y", MyDateTime{2010, 12, 10, 0, 0, 0, 0}},
{"10/January/2010", "%d/%b/%Y", std::nullopt}, // Test full spilling

// Test cases for %M
{"10/January/2010", "%d/%M/%Y", MyDateTime{2010, 1, 10, 0, 0, 0, 0}}, // Test full spilling
{"10/February/2010", "%d/%M/%Y", MyDateTime{2010, 2, 10, 0, 0, 0, 0}},
{"10/March/2010", "%d/%M/%Y", MyDateTime{2010, 3, 10, 0, 0, 0, 0}},
{"10/April/2010", "%d/%M/%Y", MyDateTime{2010, 4, 10, 0, 0, 0, 0}},
{"10/May/2010", "%d/%M/%Y", MyDateTime{2010, 5, 10, 0, 0, 0, 0}},
{"10/June/2010", "%d/%M/%Y", MyDateTime{2010, 6, 10, 0, 0, 0, 0}},
{"10/July/2010", "%d/%M/%Y", MyDateTime{2010, 7, 10, 0, 0, 0, 0}},
{"10/August/2010", "%d/%M/%Y", MyDateTime{2010, 8, 10, 0, 0, 0, 0}},
{"10/September/2010", "%d/%M/%Y", MyDateTime{2010, 9, 10, 0, 0, 0, 0}},
{"10/October/2010", "%d/%M/%Y", MyDateTime{2010, 10, 10, 0, 0, 0, 0}},
{"10/November/2010", "%d/%M/%Y", MyDateTime{2010, 11, 10, 0, 0, 0, 0}},
{"10/December/2010", "%d/%M/%Y", MyDateTime{2010, 12, 10, 0, 0, 0, 0}},

// Test cases for %c
// {"10/0/2010", "%d/%c/%Y", MyDateTime{2010, 0, 10, 0, 0, 0, 0}}, // TODO: Need Check NO_ZERO_DATE
{"10/1/2010", "%d/%c/%Y", MyDateTime{2010, 1, 10, 0, 0, 0, 0}},
{"10/01/2010", "%d/%c/%Y", MyDateTime{2010, 1, 10, 0, 0, 0, 0}},
{"10/001/2010", "%d/%c/%Y", std::nullopt},
{"10/13/2010", "%d/%c/%Y", std::nullopt},
{"10/12/2010", "%d/%c/%Y", MyDateTime{2010, 12, 10, 0, 0, 0, 0}},

// Test cases for %d, %e
// {"0/12/2010", "%d/%c/%Y", MyDateTime{2010, 12, 0, 0, 0, 0, 0}}, // TODO: Need Check NO_ZERO_DATE
{"1/12/2010", "%d/%c/%Y", MyDateTime{2010, 12, 1, 0, 0, 0, 0}},
{"05/12/2010", "%d/%c/%Y", MyDateTime{2010, 12, 5, 0, 0, 0, 0}},
{"05/12/2010", "%e/%c/%Y", MyDateTime{2010, 12, 5, 0, 0, 0, 0}},
{"31/12/2010", "%d/%c/%Y", MyDateTime{2010, 12, 31, 0, 0, 0, 0}},
{"32/12/2010", "%d/%c/%Y", std::nullopt},
{"30/11/2010", "%d/%c/%Y", MyDateTime{2010, 11, 30, 0, 0, 0, 0}},
{"31/11/2010", "%e/%c/%Y", MyDateTime{2010, 11, 31, 0, 0, 0, 0}},
{"28/2/2010", "%e/%c/%Y", MyDateTime{2010, 2, 28, 0, 0, 0, 0}},
{"29/2/2010", "%e/%c/%Y", MyDateTime{2010, 2, 29, 0, 0, 0, 0}},
{"29/2/2020", "%e/%c/%Y", MyDateTime{2020, 2, 29, 0, 0, 0, 0}},

// Test cases for %Y
// {"1/12/0000", "%d/%c/%Y", MyDateTime{0000, 12, 1, 0, 0, 0, 0}}, // TODO: Need Check NO_ZERO_DATE
{"1/12/01", "%d/%c/%Y", MyDateTime{2001, 12, 1, 0, 0, 0, 0}},
{"1/12/0001", "%d/%c/%Y", MyDateTime{0001, 12, 1, 0, 0, 0, 0}},
{"1/12/2020", "%d/%c/%Y", MyDateTime{2020, 12, 1, 0, 0, 0, 0}},
{"1/12/9999", "%d/%c/%Y", MyDateTime{9999, 12, 1, 0, 0, 0, 0}},

// Test cases for %f
{"01,5,2013 999999", "%d,%c,%Y %f", MyDateTime{2013, 5, 1, 0, 0, 0, 999999}},
{"01,5,2013 0", "%d,%c,%Y %f", MyDateTime{2013, 5, 1, 0, 0, 0, 0}},
{"01,5,2013 9999990000000", "%d,%c,%Y %f", MyDateTime{2013, 5, 1, 0, 0, 0, 999999}},
{"01,5,2013 1", "%d,%c,%Y %f", MyDateTime{2013, 5, 1, 0, 0, 0, 100000}},
{"01,5,2013 1230", "%d,%c,%Y %f", MyDateTime{2013, 5, 1, 0, 0, 0, 123000}},
{"01,5,2013 01", "%d,%c,%Y %f", MyDateTime{2013, 5, 1, 0, 0, 0, 10000}}, // issue 3556

// Test cases for %h, %I, %l
{"00:11:12 ", "%h:%i:%S ", std::nullopt}, // 0 is not a valid number of %h
{"01:11:12 ", "%I:%i:%S ", MyDateTime{0, 0, 0, 01, 11, 12, 0}},
{"12:11:12 ", "%l:%i:%S ", MyDateTime{0, 0, 0, 0, 11, 12, 0}},

// Test cases for %k, %H
{"00:11:12 ", "%H:%i:%S ", MyDateTime{0, 0, 0, 00, 11, 12, 0}},
{"01:11:12 ", "%k:%i:%S ", MyDateTime{0, 0, 0, 01, 11, 12, 0}},
{"12:11:12 ", "%H:%i:%S ", MyDateTime{0, 0, 0, 12, 11, 12, 0}},
{"24:11:12 ", "%k:%i:%S ", std::nullopt},

// Test cases for %i
{"00:00:12 ", "%H:%i:%S ", MyDateTime{0, 0, 0, 00, 00, 12, 0}},
{"00:01:12 ", "%H:%i:%S ", MyDateTime{0, 0, 0, 00, 01, 12, 0}},
{"00:59:12 ", "%H:%i:%S ", MyDateTime{0, 0, 0, 00, 59, 12, 0}},
{"00:60:12 ", "%H:%i:%S ", std::nullopt},

// Test cases for %s, %S
{"00:00:00 ", "%H:%i:%s ", MyDateTime{0, 0, 0, 00, 00, 00, 0}},
{"00:01:01 ", "%H:%i:%s ", MyDateTime{0, 0, 0, 00, 01, 01, 0}},
{"00:59:59 ", "%H:%i:%S ", MyDateTime{0, 0, 0, 00, 59, 59, 0}},
{"00:59:60 ", "%H:%i:%S ", std::nullopt},
};

auto result_formatter = MyDateTimeFormatter("%Y/%m/%d %T.%f");
size_t idx = 0;
for (const auto & [input, fmt, expected] : cases)
Expand Down
17 changes: 14 additions & 3 deletions dbms/src/Functions/FunctionsConversion.h
Original file line number Diff line number Diff line change
Expand Up @@ -1652,7 +1652,7 @@ class FunctionStrToDate : public IFunction
const ColumnString * input_raw_col = nullptr;
if (input_column->isColumnNullable())
{
auto null_input_column = checkAndGetColumn<ColumnNullable>(input_column.get());
const auto * null_input_column = checkAndGetColumn<ColumnNullable>(input_column.get());
input_raw_col = checkAndGetColumn<ColumnString>(null_input_column->getNestedColumnPtr().get());
}
else
Expand Down Expand Up @@ -1702,14 +1702,22 @@ class FunctionStrToDate : public IFunction
const ColumnString * format_raw_col = nullptr;
if (format_column->isColumnNullable())
{
auto null_format_column = checkAndGetColumn<ColumnNullable>(format_column.get());
const auto * null_format_column = checkAndGetColumn<ColumnNullable>(format_column.get());
format_raw_col = checkAndGetColumn<ColumnString>(null_format_column->getNestedColumnPtr().get());
}
else
{
format_raw_col = checkAndGetColumn<ColumnString>(format_column.get());
}

String str_input_const;
StringRef str_ref;
if (input_column->isColumnConst())
{
const auto & input_const = checkAndGetColumnConst<ColumnString>(input_column.get());
str_input_const = input_const->getValue<String>();
str_ref = StringRef(str_input_const);
}
for (size_t i = 0; i < num_rows; ++i)
{
// Set null for either null input or null format
Expand All @@ -1734,7 +1742,10 @@ class FunctionStrToDate : public IFunction

const auto format_ref = format_raw_col->getDataAt(i);
auto parser = MyDateTimeParser(format_ref.toString());
const auto str_ref = input_raw_col->getDataAt(i);
if (!input_column->isColumnConst())
{
str_ref = input_raw_col->getDataAt(i);
}
if (auto parse_res = parser.parseAsPackedUInt(str_ref); parse_res)
{
datetime_res[i] = *parse_res;
Expand Down

0 comments on commit 2748f10

Please sign in to comment.