Skip to content

Commit

Permalink
fix date format identifies '\n' as invalid separator (#4046) (#4059)
Browse files Browse the repository at this point in the history
close #4036
  • Loading branch information
ti-chi-bot authored Feb 22, 2022
1 parent f281880 commit 5b86c55
Show file tree
Hide file tree
Showing 2 changed files with 63 additions and 31 deletions.
63 changes: 32 additions & 31 deletions dbms/src/Common/MyTime.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,8 @@ bool isValidSeperator(char c, int previous_parts)
if (isPunctuation(c))
return true;

return previous_parts == 2 && (c == ' ' || c == 'T');
// for https://github.com/pingcap/tics/issues/4036
return previous_parts == 2 && (c == 'T' || isWhitespaceASCII(c));
}

std::vector<String> parseDateFormat(String format)
Expand Down Expand Up @@ -515,8 +516,8 @@ Field parseMyDateTime(const String & str, int8_t fsp)

bool truncated_or_incorrect = false;

// noAbsorb tests if can absorb FSP or TZ
auto noAbsorb = [](const std::vector<String> & seps) {
// no_absorb tests if can absorb FSP or TZ
auto no_absorb = [](const std::vector<String> & seps) {
// if we have more than 5 parts (i.e. 6), the tailing part can't be absorbed
// or if we only have 1 part, but its length is longer than 4, then it is at least YYMMD, in this case, FSP can
// not be absorbed, and it will be handled later, and the leading sign prevents TZ from being absorbed, because
Expand All @@ -526,7 +527,7 @@ Field parseMyDateTime(const String & str, int8_t fsp)

if (!frac_str.empty())
{
if (!noAbsorb(seps))
if (!no_absorb(seps))
{
seps.push_back(frac_str);
frac_str = "";
Expand All @@ -537,7 +538,7 @@ Field parseMyDateTime(const String & str, int8_t fsp)
{
// if tz_sign is empty, it's sure that the string literal contains timezone (e.g., 2010-10-10T10:10:10Z),
// therefore we could safely skip this branch.
if (!noAbsorb(seps) && !(tz_minute != "" && tz_sep == ""))
if (!no_absorb(seps) && !(tz_minute != "" && tz_sep == ""))
{
// we can't absorb timezone if there is no separate between tz_hour and tz_minute
if (!tz_hour.empty())
Expand All @@ -562,51 +563,51 @@ Field parseMyDateTime(const String & str, int8_t fsp)
{
case 14: // YYYYMMDDHHMMSS
{
std::sscanf(seps[0].c_str(), "%4d%2d%2d%2d%2d%2d", &year, &month, &day, &hour, &minute, &second);
std::sscanf(seps[0].c_str(), "%4d%2d%2d%2d%2d%2d", &year, &month, &day, &hour, &minute, &second); //NOLINT
hhmmss = true;
break;
}
case 12: // YYMMDDHHMMSS
{
std::sscanf(seps[0].c_str(), "%2d%2d%2d%2d%2d%2d", &year, &month, &day, &hour, &minute, &second);
std::sscanf(seps[0].c_str(), "%2d%2d%2d%2d%2d%2d", &year, &month, &day, &hour, &minute, &second); //NOLINT
year = adjustYear(year);
hhmmss = true;
break;
}
case 11: // YYMMDDHHMMS
{
std::sscanf(seps[0].c_str(), "%2d%2d%2d%2d%2d%1d", &year, &month, &day, &hour, &minute, &second);
std::sscanf(seps[0].c_str(), "%2d%2d%2d%2d%2d%1d", &year, &month, &day, &hour, &minute, &second); //NOLINT
year = adjustYear(year);
hhmmss = true;
break;
}
case 10: // YYMMDDHHMM
{
std::sscanf(seps[0].c_str(), "%2d%2d%2d%2d%2d", &year, &month, &day, &hour, &minute);
std::sscanf(seps[0].c_str(), "%2d%2d%2d%2d%2d", &year, &month, &day, &hour, &minute); //NOLINT
year = adjustYear(year);
break;
}
case 9: // YYMMDDHHM
{
std::sscanf(seps[0].c_str(), "%2d%2d%2d%2d%1d", &year, &month, &day, &hour, &minute);
std::sscanf(seps[0].c_str(), "%2d%2d%2d%2d%1d", &year, &month, &day, &hour, &minute); //NOLINT
year = adjustYear(year);
break;
}
case 8: // YYYYMMDD
{
std::sscanf(seps[0].c_str(), "%4d%2d%2d", &year, &month, &day);
std::sscanf(seps[0].c_str(), "%4d%2d%2d", &year, &month, &day); //NOLINT
break;
}
case 7: // YYMMDDH
{
std::sscanf(seps[0].c_str(), "%2d%2d%2d%1d", &year, &month, &day, &hour);
std::sscanf(seps[0].c_str(), "%2d%2d%2d%1d", &year, &month, &day, &hour); //NOLINT
year = adjustYear(year);
break;
}
case 6: // YYMMDD
case 5: // YYMMD
{
std::sscanf(seps[0].c_str(), "%2d%2d%2d", &year, &month, &day);
std::sscanf(seps[0].c_str(), "%2d%2d%2d", &year, &month, &day); //NOLINT
year = adjustYear(year);
break;
}
Expand All @@ -630,18 +631,18 @@ Field parseMyDateTime(const String & str, int8_t fsp)
case 1:
case 2:
{
ret = std::sscanf(frac_str.c_str(), "%2d ", &hour);
ret = std::sscanf(frac_str.c_str(), "%2d ", &hour); //NOLINT
break;
}
case 3:
case 4:
{
ret = std::sscanf(frac_str.c_str(), "%2d%2d ", &hour, &minute);
ret = std::sscanf(frac_str.c_str(), "%2d%2d ", &hour, &minute); //NOLINT
break;
}
default:
{
ret = std::sscanf(frac_str.c_str(), "%2d%2d%2d ", &hour, &minute, &second);
ret = std::sscanf(frac_str.c_str(), "%2d%2d%2d ", &hour, &minute, &second); //NOLINT
break;
}
}
Expand All @@ -655,7 +656,7 @@ Field parseMyDateTime(const String & str, int8_t fsp)
}
else
{
truncated_or_incorrect = (std::sscanf(frac_str.c_str(), "%2d ", &second) == 0);
truncated_or_incorrect = (std::sscanf(frac_str.c_str(), "%2d ", &second) == 0); //NOLINT
}
}
if (truncated_or_incorrect)
Expand Down Expand Up @@ -1003,7 +1004,7 @@ void MyTimeBase::check(bool allow_zero_in_date, bool allow_invalid_date) const
static auto is_leap_year = [](UInt16 _year) {
return ((_year % 4 == 0) && (_year % 100 != 0)) || (_year % 400 == 0);
};
max_day = max_days_in_month[month - 1];
max_day = max_days_in_month[month - 1]; // NOLINT
if (month == 2 && is_leap_year(year))
{
max_day = 29;
Expand Down Expand Up @@ -1336,13 +1337,13 @@ static bool parseTime12Hour(MyDateTimeParser::Context & ctx, MyTimeBase & time)
return ParseState::END_OF_FILE;
return ParseState::NORMAL;
};
auto skipWhitespaces = [&temp_pos, &ctx, &checkIfEnd]() -> ParseState {
auto skip_whitespaces = [&temp_pos, &ctx, &checkIfEnd]() -> ParseState {
while (temp_pos < ctx.view.size && isWhitespaceASCII(ctx.view.data[temp_pos]))
++temp_pos;
return checkIfEnd();
};
auto parseSep = [&temp_pos, &ctx, &skipWhitespaces]() -> ParseState {
if (skipWhitespaces() == ParseState::END_OF_FILE)
auto parseSep = [&temp_pos, &ctx, &skip_whitespaces]() -> ParseState {
if (skip_whitespaces() == ParseState::END_OF_FILE)
return ParseState::END_OF_FILE;
// parse ":"
if (ctx.view.data[temp_pos] != ':')
Expand All @@ -1359,7 +1360,7 @@ static bool parseTime12Hour(MyDateTimeParser::Context & ctx, MyTimeBase & time)
// hh
size_t step = 0;
int32_t hour = 0;
if (state = skipWhitespaces(); state != ParseState::NORMAL)
if (state = skip_whitespaces(); state != ParseState::NORMAL)
return state;
std::tie(step, hour) = parseNDigits(ctx.view, temp_pos, 2);
if (step == 0 || hour > 12 || hour == 0)
Expand All @@ -1375,7 +1376,7 @@ static bool parseTime12Hour(MyDateTimeParser::Context & ctx, MyTimeBase & time)
return state;

int32_t minute = 0;
if (state = skipWhitespaces(); state != ParseState::NORMAL)
if (state = skip_whitespaces(); state != ParseState::NORMAL)
return state;
std::tie(step, minute) = parseNDigits(ctx.view, temp_pos, 2);
if (step == 0 || minute > 59)
Expand All @@ -1387,7 +1388,7 @@ static bool parseTime12Hour(MyDateTimeParser::Context & ctx, MyTimeBase & time)
return state;

int32_t second = 0;
if (state = skipWhitespaces(); state != ParseState::NORMAL)
if (state = skip_whitespaces(); state != ParseState::NORMAL)
return state;
std::tie(step, second) = parseNDigits(ctx.view, temp_pos, 2);
if (step == 0 || second > 59)
Expand All @@ -1396,7 +1397,7 @@ static bool parseTime12Hour(MyDateTimeParser::Context & ctx, MyTimeBase & time)
temp_pos += step; // move forward

int meridiem = 0; // 0 - invalid, 1 - am, 2 - pm
if (state = skipWhitespaces(); state != ParseState::NORMAL)
if (state = skip_whitespaces(); state != ParseState::NORMAL)
return state;
// "AM"/"PM" must be parsed as a single element
// "11:13:56a" is an invalid input for "%r".
Expand Down Expand Up @@ -1440,13 +1441,13 @@ static bool parseTime24Hour(MyDateTimeParser::Context & ctx, MyTimeBase & time)
return ParseState::END_OF_FILE;
return ParseState::NORMAL;
};
auto skipWhitespaces = [&temp_pos, &ctx, &checkIfEnd]() -> ParseState {
auto skip_whitespaces = [&temp_pos, &ctx, &checkIfEnd]() -> ParseState {
while (temp_pos < ctx.view.size && isWhitespaceASCII(ctx.view.data[temp_pos]))
++temp_pos;
return checkIfEnd();
};
auto parseSep = [&temp_pos, &ctx, &skipWhitespaces]() -> ParseState {
if (skipWhitespaces() == ParseState::END_OF_FILE)
auto parseSep = [&temp_pos, &ctx, &skip_whitespaces]() -> ParseState {
if (skip_whitespaces() == ParseState::END_OF_FILE)
return ParseState::END_OF_FILE;
// parse ":"
if (ctx.view.data[temp_pos] != ':')
Expand All @@ -1463,7 +1464,7 @@ static bool parseTime24Hour(MyDateTimeParser::Context & ctx, MyTimeBase & time)
// hh
size_t step = 0;
int32_t hour = 0;
if (state = skipWhitespaces(); state != ParseState::NORMAL)
if (state = skip_whitespaces(); state != ParseState::NORMAL)
return state;
std::tie(step, hour) = parseNDigits(ctx.view, temp_pos, 2);
if (step == 0 || hour > 23)
Expand All @@ -1475,7 +1476,7 @@ static bool parseTime24Hour(MyDateTimeParser::Context & ctx, MyTimeBase & time)
return state;

int32_t minute = 0;
if (state = skipWhitespaces(); state != ParseState::NORMAL)
if (state = skip_whitespaces(); state != ParseState::NORMAL)
return state;
std::tie(step, minute) = parseNDigits(ctx.view, temp_pos, 2);
if (step == 0 || minute > 59)
Expand All @@ -1487,7 +1488,7 @@ static bool parseTime24Hour(MyDateTimeParser::Context & ctx, MyTimeBase & time)
return state;

int32_t second = 0;
if (state = skipWhitespaces(); state != ParseState::NORMAL)
if (state = skip_whitespaces(); state != ParseState::NORMAL)
return state;
std::tie(step, second) = parseNDigits(ctx.view, temp_pos, 2);
if (step == 0 || second > 59)
Expand Down
31 changes: 31 additions & 0 deletions dbms/src/Functions/tests/gtest_tidb_conversion.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -172,5 +172,36 @@ try
}
CATCH

// for https://github.com/pingcap/tics/issues/4036
TEST_F(TestTidbConversion, castStringAsDateTime)
try
{
auto input = std::vector<String>{"2012-12-12 12:12:12", "2012-12-12\t12:12:12", "2012-12-12\n12:12:12", "2012-12-12\v12:12:12", "2012-12-12\f12:12:12", "2012-12-12\r12:12:12"};
auto to_column = createConstColumn<String>(1, "MyDateTime(6)");

// vector
auto from_column = createColumn<String>(input);
UInt64 except_packed = MyDateTime(2012, 12, 12, 12, 12, 12, 0).toPackedUInt();
auto vector_result = executeFunction("tidb_cast", {from_column, to_column});
for (size_t i = 0; i < input.size(); i++)
{
ASSERT_EQ(except_packed, vector_result.column.get()->get64(i));
}

// const
auto const_from_column = createConstColumn<String>(1, "2012-12-12\n12:12:12");
auto const_result = executeFunction("tidb_cast", {from_column, to_column});
ASSERT_EQ(except_packed, const_result.column.get()->get64(0));

// nullable
auto nullable_from_column = createColumn<Nullable<String>>({"2012-12-12 12:12:12", "2012-12-12\t12:12:12", "2012-12-12\n12:12:12", "2012-12-12\v12:12:12", "2012-12-12\f12:12:12", "2012-12-12\r12:12:12"});
auto nullable_result = executeFunction("tidb_cast", {from_column, to_column});
for (size_t i = 0; i < input.size(); i++)
{
ASSERT_EQ(except_packed, nullable_result.column.get()->get64(i));
}
}
CATCH

} // namespace tests
} // namespace DB

0 comments on commit 5b86c55

Please sign in to comment.