diff --git a/dbms/src/Common/MyTime.cpp b/dbms/src/Common/MyTime.cpp index 782ab4b84d1..195b1afabf6 100644 --- a/dbms/src/Common/MyTime.cpp +++ b/dbms/src/Common/MyTime.cpp @@ -1,4 +1,5 @@ #include +#include #include #include @@ -74,18 +75,153 @@ std::vector parseDateFormat(String format) return seps; } -std::pair, String> splitDatetime(String format) +// GetTimezone parses the trailing timezone information of a given time string literal. If idx = -1 is returned, it +// means timezone information not found, otherwise it indicates the index of the starting index of the timezone +// information. If the timezone contains sign, hour part and/or minute part, it will be returned as is, otherwise an +// empty string will be returned. +// +// Supported syntax: +// MySQL compatible: ((?P[-+])(?P[0-9]{2}):(?P[0-9]{2})){0,1}$, see +// https://dev.mysql.com/doc/refman/8.0/en/time-zone-support.html and https://dev.mysql.com/doc/refman/8.0/en/datetime.html +// the first link specified that timezone information should be in "[H]H:MM, prefixed with a + or -" while the +// second link specified that for string literal, "hour values less than than 10, a leading zero is required.". +// ISO-8601: Z|((((?P[-+])(?P[0-9]{2})(:(?P[0-9]{2}){0,1}){0,1})|((?P[0-9]{2}){0,1}){0,1}))$ +// see https://www.cl.cam.ac.uk/~mgk25/iso-time.html +std::tuple getTimeZone(String literal) { - int idx = getFracIndex(format); + static const std::map> valid_idx_combinations{ + {100, {0, 0}}, // 23:59:59Z + {30, {2, 0}}, // 23:59:59+08 + {50, {4, 2}}, // 23:59:59+0800 + {63, {5, 2}}, // 23:59:59+08:00 + // postgres supports the following additional syntax that deviates from ISO8601, although we won't support it + // currently, it will be fairly easy to add in the current parsing framework + // 23:59:59Z+08 + // 23:59:59Z+08:00 + }; + + String tz_sign, tz_hour, tz_sep, tz_minute; + + // idx is for the position of the starting of the timezone information + // zidx is for the z symbol + // sidx is for the sign + // spidx is for the separator + int idx = -1, zidx = -1, sidx = -1, spidx = -1; + + size_t l = literal.size(); + + for (int i = l - 1; i >= 0; i--) + { + if (literal[i] == 'Z') + { + zidx = i; + break; + } + if (sidx == -1 && (literal[i] == '-' || literal[i] == '+')) + { + sidx = i; + } + if (spidx == -1 && literal[i] == ':') + { + spidx = i; + } + } + // we could enumerate all valid combinations of these values and look it up in a table, see validIdxCombinations + // zidx can be -1 (23:59:59+08:00), l-1 (23:59:59Z) + // sidx can be -1, l-3, l-5, l-6 + // spidx can be -1, l-3 + int k = 0; + if (l - zidx == 1) + { + k += 100; + } + if (int t = l - sidx; t == 3 || t == 5 || t == 6) + { + k += t * 10; + } + if (l - spidx == 3) + { + k += 3; + } + if (auto tmp = valid_idx_combinations.find(k); tmp != valid_idx_combinations.end()) + { + auto [h, m] = valid_idx_combinations.at(k); + int hidx = l - h; + int midx = l - m; + auto validate = [](const String & v) { return '0' <= v[0] && v[0] <= '9' && '0' <= v[1] && v[1] <= '9'; }; + if (sidx != -1) + { + tz_sign = literal.substr(sidx, 1); + idx = sidx; + } + if (zidx != -1) + { + idx = zidx; + } + if ((l - spidx) == 3) + { + tz_sep = literal.substr(spidx, 1); + } + if (h != 0) + { + tz_hour = literal.substr(hidx, 2); + if (!validate(tz_hour)) + { + return std::make_tuple(-1, "", "", "", ""); + } + } + if (m != 0) + { + tz_minute = literal.substr(midx, 2); + if (!validate(tz_minute)) + { + return std::make_tuple(-1, "", "", "", ""); + } + } + return std::make_tuple(idx, tz_sign, tz_hour, tz_sep, tz_minute); + } + return std::make_tuple(-1, "", "", "", ""); +} + +// TODO: make unified helper +bool isPunctuation(char c) +{ + return (c >= 0x21 && c <= 0x2F) || (c >= 0x3A && c <= 0x40) || (c >= 0x5B && c <= 0x60) || (c >= 0x7B && c <= 0x7E); +} + +std::tuple, String, bool, String, String, String, String> splitDatetime(String format) +{ + std::vector seps; String frac; - if (idx > 0) + bool has_tz = false; + auto [tz_idx, tz_sign, tz_hour, tz_sep, tz_minute] = getTimeZone(format); + if (tz_idx > 0) { - frac = format.substr(idx + 1); - format = format.substr(0, idx); + has_tz = true; + while (tz_idx > 0 && isPunctuation(format[tz_idx - 1])) + { + // in case of multiple separators, e.g. 2020-10--10 + tz_idx--; + } + format = format.substr(0, tz_idx); + } + int frac_idx = getFracIndex(format); + if (frac_idx > 0) + { + frac = format.substr(frac_idx + 1); + while (frac_idx > 0 && isPunctuation(format[tz_idx - 1])) + { + // in case of multiple separators, e.g. 2020-10--10 + frac_idx--; + } + format = format.substr(0, frac_idx); } - return std::make_pair(parseDateFormat(format), std::move(frac)); + seps = parseDateFormat(format); + return std::make_tuple(std::move(seps), std::move(frac), std::move(has_tz), std::move(tz_sign), std::move(tz_hour), std::move(tz_sep), + std::move(tz_minute)); } + MyTimeBase::MyTimeBase(UInt64 packed) { UInt64 ymdhms = packed >> 24; @@ -576,11 +712,57 @@ void MyTimeBase::convertDateFormat(char c, String & result) const } } -Field parseMyDateTime(const String & str) +// TODO: support parse time from float string +Field parseMyDateTime(const String & str, int8_t fsp) { - Int32 year = 0, month = 0, day = 0, hour = 0, minute = 0, second = 0; + // Since we only use DateLUTImpl as parameter placeholder of AddSecondsImpl::execute + // and it's costly to construct a DateLUTImpl, a shared static instance is enough. + static const DateLUTImpl lut = DateLUT::instance("UTC"); + + Int32 year = 0, month = 0, day = 0, hour = 0, minute = 0, second = 0, delta_hour = 0, delta_minute = 0; - auto [seps, frac_str] = splitDatetime(str); + bool hhmmss = false; + + auto [seps, frac_str, has_tz, tz_sign, tz_hour, tz_sep, tz_minute] = splitDatetime(str); + + bool truncated_or_incorrect = false; + + // noAbsorb tests if can absorb FSP or TZ + auto noAbsorb = [](std::vector seps) { + // if we have more than 5 parts (i.e. 6), the tailing part can't be absorbed + // or if we only have 1 part, but its length is longer than 4, then it is at least YYMMD, in this case, FSP can + // not be absorbed, and it will be handled later, and the leading sign prevents TZ from being absorbed, because + // if date part has no separators, we can't use -/+ as separators between date & time. + return seps.size() > 5 || (seps.size() == 1 && seps[0].size() > 4); + }; + + if (!frac_str.empty()) + { + if (!noAbsorb(seps)) + { + seps.push_back(frac_str); + frac_str = ""; + } + } + + if (has_tz && !tz_sign.empty()) + { + // if tz_sign is empty, it's sure that the string literal contains timezone (e.g., 2010-10-10T10:10:10Z), + // therefore we could safely skip this branch. + if (!noAbsorb(seps) && !(tz_minute != "" && tz_sep == "")) + { + // we can't absorb timezone if there is no separate between tz_hour and tz_minute + if (!tz_hour.empty()) + { + seps.push_back(tz_hour); + } + if (!tz_minute.empty()) + { + seps.push_back(tz_minute); + } + has_tz = false; + } + } switch (seps.size()) { @@ -590,43 +772,51 @@ Field parseMyDateTime(const String & str) size_t l = seps[0].size(); switch (l) { - case 14: - // YYYYMMDDHHMMSS - { - std::sscanf(seps[0].c_str(), "%4d%2d%2d%2d%2d%2d", &year, &month, &day, &hour, &minute, &second); - break; - } - case 12: + case 14: // YYYYMMDDHHMMSS + { + std::sscanf(seps[0].c_str(), "%4d%2d%2d%2d%2d%2d", &year, &month, &day, &hour, &minute, &second); + hhmmss = true; + break; + } + case 12: // YYMMDDHHMMSS { std::sscanf(seps[0].c_str(), "%2d%2d%2d%2d%2d%2d", &year, &month, &day, &hour, &minute, &second); year = adjustYear(year); + hhmmss = true; break; } - case 11: + case 11: // YYMMDDHHMMS { std::sscanf(seps[0].c_str(), "%2d%2d%2d%2d%2d%1d", &year, &month, &day, &hour, &minute, &second); year = adjustYear(year); + hhmmss = true; break; } - case 10: + case 10: // YYMMDDHHMM { std::sscanf(seps[0].c_str(), "%2d%2d%2d%2d%2d", &year, &month, &day, &hour, &minute); year = adjustYear(year); break; } - case 9: + case 9: // YYMMDDHHM { std::sscanf(seps[0].c_str(), "%2d%2d%2d%2d%1d", &year, &month, &day, &hour, &minute); year = adjustYear(year); break; } - case 8: + case 8: // YYYYMMDD { std::sscanf(seps[0].c_str(), "%4d%2d%2d", &year, &month, &day); break; } - case 6: - case 5: + case 7: // YYMMDDH + { + std::sscanf(seps[0].c_str(), "%2d%2d%2d%1d", &year, &month, &day, &hour); + year = adjustYear(year); + break; + } + case 6: // YYMMDD + case 5: // YYMMD { std::sscanf(seps[0].c_str(), "%2d%2d%2d", &year, &month, &day); year = adjustYear(year); @@ -634,19 +824,82 @@ Field parseMyDateTime(const String & str) } default: { - throw Exception("Wrong datetime format"); + throw TiFlashException("Wrong datetime format: " + str, Errors::Types::WrongValue); + } + } + if (l == 5 || l == 6 || l == 8) + { + // YYMMDD or YYYYMMDD + // We must handle float => string => datetime, the difference is that fractional + // part of float type is discarded directly, while fractional part of string type + // is parsed to HH:MM:SS. + int ret = 0; + switch (frac_str.size()) + { + case 0: + ret = 1; + break; + case 1: + case 2: + { + ret = std::sscanf(frac_str.c_str(), "%2d ", &hour); + break; + } + case 3: + case 4: + { + ret = std::sscanf(frac_str.c_str(), "%2d%2d ", &hour, &minute); + break; + } + default: + { + ret = std::sscanf(frac_str.c_str(), "%2d%2d%2d ", &hour, &minute, &second); + break; + } + } + truncated_or_incorrect = (ret == 0); + } + if (l == 9 || l == 10) + { + if (frac_str.empty()) + { + second = 0; + } + else + { + truncated_or_incorrect = (std::sscanf(frac_str.c_str(), "%2d ", &second) == 0); } } + if (truncated_or_incorrect) + { + throw TiFlashException("Datetime truncated: " + str, Errors::Types::Truncated); + } break; } case 3: { + // YYYY-MM-DD scanTimeArgs(seps, {&year, &month, &day}); break; } + case 4: + { + // YYYY-MM-DD HH + scanTimeArgs(seps, {&year, &month, &day, &hour}); + break; + } + case 5: + { + // YYYY-MM-DD HH-MM + scanTimeArgs(seps, {&year, &month, &day, &hour, &minute}); + break; + } case 6: { + // We don't have fractional seconds part. + // YYYY-MM-DD HH-MM-SS scanTimeArgs(seps, {&year, &month, &day, &hour, &minute, &second}); + hhmmss = true; break; } default: @@ -655,21 +908,92 @@ Field parseMyDateTime(const String & str) } } + // If str is sepereated by delimiters, the first one is year, and if the year is 2 digit, + // we should adjust it. + // TODO: adjust year is very complex, now we only consider the simplest way. + if (seps[0].size() == 2) + { + if (year == 0 && month == 0 && day == 0 && hour == 0 && minute == 0 && second == 0 && frac_str.empty()) + { + // Skip a special case "00-00-00". + } + else + { + year = adjustYear(year); + } + } + UInt32 micro_second = 0; - // TODO This is a simple implement, without processing overflow. - if (frac_str.size() > 6) + if (hhmmss && !frac_str.empty()) { - frac_str = frac_str.substr(0, 6); + // If input string is "20170118.999", without hhmmss, fsp is meaningless. + // TODO: this case is not only meaningless, but erroneous, please confirm. + if (static_cast(fsp) >= frac_str.size()) + { + micro_second = std::stoul(frac_str); + micro_second = micro_second * std::pow(10, 6 - frac_str.size()); + } + else + { + auto result_frac = frac_str.substr(0, fsp + 1); + micro_second = std::stoul(result_frac); + micro_second = (micro_second + 5) / 10; + // Overflow + if (micro_second >= std::pow(10, fsp)) + { + MyDateTime datetime(year, month, day, hour, minute, second, 0); + UInt64 result = AddSecondsImpl::execute(datetime.toPackedUInt(), 1, lut); + MyDateTime result_datetime(result); + year = result_datetime.year; + month = result_datetime.month; + day = result_datetime.day; + hour = result_datetime.hour; + minute = result_datetime.minute; + second = result_datetime.second; + micro_second = 0; + } + else + { + micro_second = micro_second * std::pow(10, 6 - fsp); + } + } } - if (frac_str.size() > 0) + MyDateTime result(year, month, day, hour, minute, second, micro_second); + + if (has_tz) { - micro_second = std::stoul(frac_str); - for (size_t i = frac_str.size(); i < 6; i++) - micro_second *= 10; + if (!hhmmss) + { + throw TiFlashException("Invalid datetime value: " + str, Errors::Types::WrongValue); + } + if (!tz_hour.empty()) + { + delta_hour = (tz_hour[0] - '0') * 10 + (tz_hour[1] - '0'); + } + if (!tz_minute.empty()) + { + delta_minute = (tz_minute[0] - '0') * 10 + (tz_minute[1] - '0'); + } + // allowed delta range is [-14:00, 14:00], and we will intentionally reject -00:00 + if (delta_hour > 14 || delta_minute > 59 || (delta_hour == 14 && delta_minute != 0) + || (tz_sign == "-" && delta_hour == 0 && delta_minute == 0)) + { + throw TiFlashException("Invalid datetime value: " + str, Errors::Types::WrongValue); + } + // by default, if the temporal string literal does not contain timezone information, it will be in the timezone + // specified by the time_zone system variable. However, if the timezone is specified in the string literal, we + // will use the specified timezone to interpret the string literal and convert it into the system timezone. + int offset = delta_hour * 60 * 60 + delta_minute * 60; + if (tz_sign == "-") + { + offset = -offset; + } + auto tmp = AddSecondsImpl::execute(result.toPackedUInt(), -offset, lut); + result = MyDateTime(tmp); } - return MyDateTime(year, month, day, hour, minute, second, micro_second).toPackedUInt(); + return result.toPackedUInt(); } String MyDateTime::toString(int fsp) const @@ -846,7 +1170,7 @@ MyDateTime numberToDateTime(Int64 number) // check MMDD if (number < 101) { - throw Exception("Cannot convert " + std::to_string(number) + " to Datetime"); + throw TiFlashException("Cannot convert " + std::to_string(number) + " to Datetime", Errors::Types::WrongValue); } // check YYMMDD: 2000-2069 @@ -866,7 +1190,7 @@ MyDateTime numberToDateTime(Int64 number) // check YYYYMMDD if (number <= 10000101) { - throw Exception("Cannot convert " + std::to_string(number) + " to Datetime"); + throw TiFlashException("Cannot convert " + std::to_string(number) + " to Datetime", Errors::Types::WrongValue); } // check hhmmss @@ -879,7 +1203,7 @@ MyDateTime numberToDateTime(Int64 number) // check MMDDhhmmss if (number < 101000000) { - throw Exception("Cannot convert " + std::to_string(number) + " to Datetime"); + throw TiFlashException("Cannot convert " + std::to_string(number) + " to Datetime", Errors::Types::WrongValue); } // check YYMMDDhhmmss: 2000-2069 @@ -892,7 +1216,7 @@ MyDateTime numberToDateTime(Int64 number) // check YYMMDDhhmmss if (number < 70 * 10000000000 + 101000000) { - throw Exception("Cannot convert " + std::to_string(number) + " to Datetime"); + throw TiFlashException("Cannot convert " + std::to_string(number) + " to Datetime", Errors::Types::WrongValue); } if (number <= 991231235959) diff --git a/dbms/src/Common/MyTime.h b/dbms/src/Common/MyTime.h index 8b0423d54ea..4b78d896582 100644 --- a/dbms/src/Common/MyTime.h +++ b/dbms/src/Common/MyTime.h @@ -114,7 +114,7 @@ struct MyDate : public MyTimeBase String toString() const { return dateFormat("%Y-%m-%d"); } }; -Field parseMyDateTime(const String & str); +Field parseMyDateTime(const String & str, int8_t fsp = 6); void convertTimeZone(UInt64 from_time, UInt64 & to_time, const DateLUTImpl & time_zone_from, const DateLUTImpl & time_zone_to); diff --git a/dbms/src/Common/TiFlashException.h b/dbms/src/Common/TiFlashException.h index 6e9e579182a..7cfc7170a01 100644 --- a/dbms/src/Common/TiFlashException.h +++ b/dbms/src/Common/TiFlashException.h @@ -113,7 +113,8 @@ namespace DB E(Internal, "MPP internal error.", \ "Please contact with developer, \n" \ "better providing information about your cluster(log, topology information etc.).", \ - "");) + "");) \ + C(Types, E(Truncated, "Data is truncated during conversion.", "", ""); E(WrongValue, "Input value is in wrong format", "", "");) /// TiFlashError is core struct of standard error, /// which contains all information about an error except message. diff --git a/dbms/src/Common/tests/CMakeLists.txt b/dbms/src/Common/tests/CMakeLists.txt index e5d44f512d0..b786f7b878d 100644 --- a/dbms/src/Common/tests/CMakeLists.txt +++ b/dbms/src/Common/tests/CMakeLists.txt @@ -77,3 +77,6 @@ target_link_libraries (persisted_container clickhouse_common_io) add_executable(decimal_test_decimal_type gtest_decimal_type.cpp) target_link_libraries(decimal_test_decimal_type clickhouse_common_io gtest_main) + +add_executable(mytime_test gtest_mytime.cpp) +target_link_libraries(mytime_test clickhouse_common_io clickhouse_functions gtest_main) \ No newline at end of file diff --git a/dbms/src/Common/tests/gtest_mytime.cpp b/dbms/src/Common/tests/gtest_mytime.cpp new file mode 100644 index 00000000000..be4f6b217aa --- /dev/null +++ b/dbms/src/Common/tests/gtest_mytime.cpp @@ -0,0 +1,157 @@ +#include +#include +#include +#include + +#include +#include +#include +#include + +namespace DB +{ +namespace tests +{ + +class TestMyTime : public testing::Test +{ +protected: + virtual void SetUp() override {} + virtual void TearDown() override {} + +public: + static void checkParseMyDateTime(const std::string & str, const std::string & expected, const DataTypeMyDateTime & type) + { + try + { + UInt64 res = parseMyDateTime(str, type.getFraction()).template safeGet(); + MyDateTime datetime(res); + std::string actual = datetime.toString(type.getFraction()); + EXPECT_EQ(actual, expected) << "Original datetime string: " << str; + } + catch (...) + { + std::cerr << "Error occurs when parsing: \"" << str << "\"" << std::endl; + throw; + } + } + + static void checkParseMyDateTime(const std::string & str, MyDateTime & expected, const DataTypeMyDateTime & type) + { + try + { + UInt64 res = parseMyDateTime(str, type.getFraction()).template safeGet(); + MyDateTime source(res); + EXPECT_EQ(source.year, expected.year) << "Original datetime string: " << str; + EXPECT_EQ(source.month, expected.month) << "Original datetime string: " << str; + EXPECT_EQ(source.day, expected.day) << "Original datetime string: " << str; + EXPECT_EQ(source.hour, expected.hour) << "Original datetime string: " << str; + EXPECT_EQ(source.minute, expected.minute) << "Original datetime string: " << str; + EXPECT_EQ(source.second, expected.second) << "Original datetime string: " << str; + EXPECT_EQ(source.micro_second, expected.micro_second) << "Original datetime string: " << str; + } + catch (...) + { + std::cerr << "Error occurs when parsing: \"" << str << "\"" << std::endl; + throw; + } + } +}; + +TEST_F(TestMyTime, ParseMyDateTimeWithFraction) +try +{ + std::vector> cases_with_fsp{ + {"2020-12-10 11:11:11.123456", "2020-12-10 11:11:11.123456"}, // YYYY-MM-DD HH:MM:SS.mmmmmm + {"00-00-00 00:00:00.123", "2000-00-00 00:00:00.123000"}, + {"1701020304.1", "2017-01-02 03:04:01.000000"}, + {"1701020302.11", "2017-01-02 03:02:11.000000"}, + {"170102037.11", "2017-01-02 03:07:11.000000"}, + {"2018.01.01", "2018-01-01 00:00:00.000000"}, + {"2020.10.10 10.10.10", "2020-10-10 10:10:10.000000"}, + {"2020-10-10 10-10.10", "2020-10-10 10:10:10.000000"}, + {"2020-10-10 10.10", "2020-10-10 10:10:00.000000"}, + {"2018.01.01", "2018-01-01 00:00:00.000000"}, + }; + DataTypeMyDateTime type_with_fraction(6); + for (auto & [str, expected] : cases_with_fsp) + { + checkParseMyDateTime(str, expected, type_with_fraction); + } +} +catch (Exception & e) +{ + std::cerr << e.displayText() << std::endl; + GTEST_FAIL(); +} + +TEST_F(TestMyTime, ParseMyDateTimeWithoutFraction) +try +{ + std::vector> cases_without_fsp{ + {"2012-12-31 11:30:45", "2012-12-31 11:30:45"}, + {"0000-00-00 00:00:00", "0000-00-00 00:00:00"}, + {"0001-01-01 00:00:00", "0001-01-01 00:00:00"}, + {"00-12-31 11:30:45", "2000-12-31 11:30:45"}, + {"12-12-31 11:30:45", "2012-12-31 11:30:45"}, + {"2012-12-31", "2012-12-31 00:00:00"}, + {"20121231", "2012-12-31 00:00:00"}, + {"121231", "2012-12-31 00:00:00"}, + {"2012^12^31 11+30+45", "2012-12-31 11:30:45"}, + {"2012^12^31T11+30+45", "2012-12-31 11:30:45"}, + {"2012-2-1 11:30:45", "2012-02-01 11:30:45"}, + {"12-2-1 11:30:45", "2012-02-01 11:30:45"}, + {"20121231113045", "2012-12-31 11:30:45"}, + {"121231113045", "2012-12-31 11:30:45"}, + {"2012-02-29", "2012-02-29 00:00:00"}, + {"00-00-00", "0000-00-00 00:00:00"}, + {"11111111111", "2011-11-11 11:11:01"}, + {"1701020301.", "2017-01-02 03:01:00"}, + {"170102036", "2017-01-02 03:06:00"}, + {"170102039.", "2017-01-02 03:09:00"}, + {"2018-01-01 18", "2018-01-01 18:00:00"}, + {"18-01-01 18", "2018-01-01 18:00:00"}, + {"2018.01.01 00:00:00", "2018-01-01 00:00:00"}, + {"2018/01/01-00:00:00", "2018-01-01 00:00:00"}, + {"4710072", "2047-10-07 02:00:00"}, + }; + DataTypeMyDateTime type_without_fraction(0); + for (auto & [str, expected] : cases_without_fsp) + { + checkParseMyDateTime(str, expected, type_without_fraction); + } +} +catch (Exception & e) +{ + std::cerr << e.displayText() << std::endl; + GTEST_FAIL(); +} + +TEST_F(TestMyTime, ParseMyDateTimeWithTimezone) +try +{ + std::vector> cases{ + {"2006-01-02T15:04:05Z", MyDateTime(2006, 1, 2, 15, 4, 5, 0)}, + {"2020-10-21T16:05:10Z", MyDateTime(2020, 10, 21, 16, 5, 10, 0)}, + {"2020-10-21T16:05:10.50+08", MyDateTime(2020, 10, 21, 8, 5, 10, 500 * 1000)}, + {"2020-10-21T16:05:10.50-0700", MyDateTime(2020, 10, 21, 23, 5, 10, 500 * 1000)}, + {"2020-10-21T16:05:10.50+09:00", MyDateTime(2020, 10, 21, 7, 5, 10, 500 * 1000)}, + {"2006-01-02T15:04:05+09:00", MyDateTime(2006, 1, 2, 6, 4, 5, 0)}, + {"2006-01-02T15:04:05-02:00", MyDateTime(2006, 1, 2, 17, 4, 5, 0)}, + {"2006-01-02T15:04:05-14:00", MyDateTime(2006, 1, 3, 5, 4, 5, 0)}, + }; + DataTypeMyDateTime type(6); + for (auto & [str, expected] : cases) + { + checkParseMyDateTime(str, expected, type); + } +} +catch (Exception & e) +{ + std::cerr << e.displayText() << std::endl; + GTEST_FAIL(); +} + +} // namespace tests + +} // namespace DB diff --git a/dbms/src/DataTypes/DataTypeMyDateTime.cpp b/dbms/src/DataTypes/DataTypeMyDateTime.cpp index 5eb6aba8cff..a5bd40813af 100644 --- a/dbms/src/DataTypes/DataTypeMyDateTime.cpp +++ b/dbms/src/DataTypes/DataTypeMyDateTime.cpp @@ -1,16 +1,13 @@ -#include -#include - #include #include #include #include -#include - #include +#include #include - +#include #include +#include namespace DB @@ -102,7 +99,7 @@ bool DataTypeMyDateTime::equals(const IDataType & rhs) const { /// DateTime with different timezones are equal, because: /// "all types with different time zones are equivalent and may be used interchangingly." - return typeid(rhs) == typeid(*this); + return typeid(rhs) == typeid(*this) && getFraction() == dynamic_cast(&rhs)->getFraction(); } diff --git a/dbms/src/Debug/dbgFuncCoprocessor.cpp b/dbms/src/Debug/dbgFuncCoprocessor.cpp index f15fb815ca8..1448538332a 100644 --- a/dbms/src/Debug/dbgFuncCoprocessor.cpp +++ b/dbms/src/Debug/dbgFuncCoprocessor.cpp @@ -274,6 +274,22 @@ std::unordered_map func_name_to_sig({ {"cast_decimal_decimal", tipb::ScalarFuncSig::CastDecimalAsDecimal}, {"cast_time_decimal", tipb::ScalarFuncSig::CastTimeAsDecimal}, {"cast_string_decimal", tipb::ScalarFuncSig::CastStringAsDecimal}, + {"cast_int_string", tipb::ScalarFuncSig::CastIntAsString}, + {"cast_real_string", tipb::ScalarFuncSig::CastRealAsString}, + {"cast_decimal_string", tipb::ScalarFuncSig::CastDecimalAsString}, + {"cast_time_string", tipb::ScalarFuncSig::CastTimeAsString}, + {"cast_string_string", tipb::ScalarFuncSig::CastStringAsString}, + {"cast_int_date", tipb::ScalarFuncSig::CastIntAsTime}, + {"cast_real_date", tipb::ScalarFuncSig::CastRealAsTime}, + {"cast_decimal_date", tipb::ScalarFuncSig::CastDecimalAsTime}, + {"cast_time_date", tipb::ScalarFuncSig::CastTimeAsTime}, + {"cast_string_date", tipb::ScalarFuncSig::CastStringAsTime}, + {"cast_int_datetime", tipb::ScalarFuncSig::CastIntAsTime}, + {"cast_real_datetime", tipb::ScalarFuncSig::CastRealAsTime}, + {"cast_decimal_datetime", tipb::ScalarFuncSig::CastDecimalAsTime}, + {"cast_time_datetime", tipb::ScalarFuncSig::CastTimeAsTime}, + {"cast_string_datetime", tipb::ScalarFuncSig::CastStringAsTime}, + }); void compileExpr(const DAGSchema & input, ASTPtr ast, tipb::Expr * expr, std::unordered_set & referred_columns, @@ -399,6 +415,24 @@ void compileExpr(const DAGSchema & input, ASTPtr ast, tipb::Expr * expr, std::un expr->set_sig(tipb::ScalarFuncSig::DateFormatSig); expr->mutable_field_type()->set_tp(TiDB::TypeString); break; + case tipb::ScalarFuncSig::CastIntAsTime: + case tipb::ScalarFuncSig::CastRealAsTime: + case tipb::ScalarFuncSig::CastTimeAsTime: + case tipb::ScalarFuncSig::CastDecimalAsTime: + case tipb::ScalarFuncSig::CastStringAsTime: + { + expr->set_sig(it_sig->second); + auto * ft = expr->mutable_field_type(); + if (it_sig->first.find("datetime")) + { + ft->set_tp(TiDB::TypeDatetime); + } + else + { + ft->set_tp(TiDB::TypeDate); + } + break; + } default: { expr->set_sig(it_sig->second); diff --git a/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp b/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp index 672b84950da..624c3ae06f3 100644 --- a/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp +++ b/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp @@ -431,6 +431,7 @@ String DAGExpressionAnalyzer::convertToUInt8(ExpressionActionsPtr & actions, con { /// use tidb_cast to make it compatible with TiDB tipb::FieldType field_type; + // TODO: Use TypeDouble as return type, to be compatible with TiDB field_type.set_tp(TiDB::TypeLongLong); tipb::Expr type_expr; constructStringLiteralTiExpr(type_expr, "Nullable(Int64)"); diff --git a/dbms/src/Functions/FunctionsConversion.h b/dbms/src/Functions/FunctionsConversion.h index 3929a910ce2..c0116d5c17e 100644 --- a/dbms/src/Functions/FunctionsConversion.h +++ b/dbms/src/Functions/FunctionsConversion.h @@ -438,6 +438,15 @@ struct FormatImpl } }; +template +struct FormatImpl> +{ + static void execute(const typename DataTypeDecimal::FieldType v, WriteBuffer & wb, const DataTypeDecimal * tp, const DateLUTImpl *) + { + writeText(v, tp->getScale(), wb); + } +}; + template struct FormatImpl> { diff --git a/dbms/src/Functions/FunctionsTiDBConversion.h b/dbms/src/Functions/FunctionsTiDBConversion.h index 94b68124847..a11c10a2107 100644 --- a/dbms/src/Functions/FunctionsTiDBConversion.h +++ b/dbms/src/Functions/FunctionsTiDBConversion.h @@ -101,7 +101,6 @@ struct TiDBConvertToString auto col_to = ColumnString::create(); ColumnString::Chars_t & data_to = col_to->getChars(); ColumnString::Offsets & offsets_to = col_to->getOffsets(); - WriteBufferFromVector write_buffer(data_to); if constexpr (std::is_same_v) { @@ -113,6 +112,8 @@ struct TiDBConvertToString offsets_to.resize(size); + WriteBufferFromVector write_buffer(data_to); + size_t current_offset = 0; for (size_t i = 0; i < size; i++) { @@ -148,6 +149,8 @@ struct TiDBConvertToString container_per_element.resize(decimal_max_prec); offsets_to.resize(size); + WriteBufferFromVector write_buffer(data_to); + for (size_t i = 0; i < size; ++i) { WriteBufferFromVector element_write_buffer(container_per_element); @@ -189,6 +192,8 @@ struct TiDBConvertToString } offsets_to.resize(size); + WriteBufferFromVector write_buffer(data_to); + for (size_t i = 0; i < size; ++i) { WriteBufferFromVector element_write_buffer(container_per_element); @@ -1172,6 +1177,13 @@ struct TiDBConvertToTime const auto & col_with_type_and_name = block.getByPosition(arguments[0]); const auto & type = static_cast(*col_with_type_and_name.type); + int to_fsp [[maybe_unused]] = 0; + if constexpr (std::is_same_v) + { + const auto * tp = dynamic_cast(removeNullable(block.getByPosition(result).type).get()); + to_fsp = tp->getFraction(); + } + if constexpr (return_nullable) { col_null_map_to = ColumnUInt8::create(size, 0); @@ -1195,7 +1207,7 @@ struct TiDBConvertToTime String string_value = string_ref.toString(); try { - Field packed_uint_value = parseMyDateTime(string_value); + Field packed_uint_value = parseMyDateTime(string_value, to_fsp); UInt64 packed_uint = packed_uint_value.template safeGet(); MyDateTime datetime(packed_uint); if constexpr (std::is_same_v) @@ -1219,6 +1231,7 @@ struct TiDBConvertToTime } else if constexpr (std::is_same_v || std::is_same_v) { + // cast time as time const auto * col_from = checkAndGetColumn(block.getByPosition(arguments[0]).column.get()); const ColumnUInt64::Container & vec_from = col_from->getData(); @@ -1233,7 +1246,33 @@ struct TiDBConvertToTime } else { - vec_to[i] = datetime.toPackedUInt(); + int from_fsp = 0; + if constexpr (std::is_same_v) + { + auto & from_type = static_cast(type); + from_fsp = from_type.getFraction(); + } + UInt32 micro_second = datetime.micro_second; + UInt64 packed_uint = vec_from[i]; + if (to_fsp < from_fsp) + { + micro_second = micro_second / std::pow(10, 6 - to_fsp - 1); + micro_second = (micro_second + 5) / 10; + // Overflow + if (micro_second >= std::pow(10, to_fsp)) + { + static const auto lut = DateLUT::instance("UTC"); + datetime.micro_second = 0; + packed_uint = datetime.toPackedUInt(); + packed_uint = AddSecondsImpl::execute(packed_uint, 1, lut); + } + else + { + datetime.micro_second = micro_second * std::pow(10, 6 - to_fsp); + packed_uint = datetime.toPackedUInt(); + } + } + vec_to[i] = packed_uint; } } } @@ -1292,7 +1331,7 @@ struct TiDBConvertToTime { try { - Field packed_uint_value = parseMyDateTime(value_str); + Field packed_uint_value = parseMyDateTime(value_str, to_fsp); UInt64 packed_uint = packed_uint_value.template safeGet(); MyDateTime datetime(packed_uint); if constexpr (std::is_same_v) @@ -1325,7 +1364,7 @@ struct TiDBConvertToTime String value_str = vec_from[i].toString(type.getScale()); try { - Field value = parseMyDateTime(value_str); + Field value = parseMyDateTime(value_str, to_fsp); MyDateTime datetime(value.template safeGet()); if constexpr (std::is_same_v) { diff --git a/errors.toml b/errors.toml index d254c3a855c..1bba324d673 100644 --- a/errors.toml +++ b/errors.toml @@ -63,6 +63,11 @@ error = ''' Encryption internal error. ''' +["FLASH:MPP:Internal"] +error = ''' +MPP internal error. +''' + ["FLASH:PageStorage:FileSizeNotMatch"] error = ''' Some files' size don't match their metadata. @@ -83,3 +88,13 @@ error = ''' Schema synchronize error. ''' +["FLASH:Types:Truncated"] +error = ''' +Data is truncated during conversion. +''' + +["FLASH:Types:WrongValue"] +error = ''' +Input value is in wrong format +''' + diff --git a/tests/fullstack-test/expr/cast_as_time.test b/tests/fullstack-test/expr/cast_as_time.test new file mode 100644 index 00000000000..5a6b8f35c9b --- /dev/null +++ b/tests/fullstack-test/expr/cast_as_time.test @@ -0,0 +1,36 @@ +mysql> drop table if exists test.t +mysql> create table test.t(a decimal(20, 6)) +mysql> alter table test.t set tiflash replica 1 location labels 'rack', 'host', 'abc' + +func> wait_table test t + +mysql> insert into test.t values(20201208111111.999999) +mysql> insert into test.t values(20201208111111.123456) +mysql> set @@tidb_isolation_read_engines='tiflash' +mysql> select * from test.t where cast(a as datetime(4)) = '2020-12-08 11:11:11.1235' ++-----------------------+ +| a | ++-----------------------+ +| 20201208111111.123456 | ++-----------------------+ +mysql> select * from test.t where cast(a as datetime(4)) = '2020-12-08 11:11:12.0000' ++-----------------------+ +| a | ++-----------------------+ +| 20201208111111.999999 | ++-----------------------+ + +mysql> drop table if exists test.t +mysql> create table test.t(a datetime(6)) +mysql> alter table test.t set tiflash replica 1 location labels 'rack', 'host', 'abc' + +func> wait_table test t + +mysql> insert into test.t values('2020-12-08 11:11:11.999999') +mysql> set @@tidb_isolation_read_engines='tiflash' +mysql> select * from test.t where cast(a as datetime(4)) = '2020-12-08 11:11:12.0000' ++----------------------------+ +| a | ++----------------------------+ +| 2020-12-08 11:11:11.999999 | ++----------------------------+ \ No newline at end of file diff --git a/tests/mutable-test/expr/cast_as_decimal.test b/tests/mutable-test/expr/cast_as_decimal.test index 2e81ef1d654..c7c72af3c2e 100644 --- a/tests/mutable-test/expr/cast_as_decimal.test +++ b/tests/mutable-test/expr/cast_as_decimal.test @@ -12,7 +12,6 @@ => DBGInvoke __put_region(4, 0, 100, default, test) => DBGInvoke __raft_insert_row(default, test, 4, 50, 1, 1.234, 1.234, '123', '2020-09-15 01:00:00') -# test date_format => DBGInvoke dag('select count(1) from default.test group by a, cast_int_decimal(a), cast_real_decimal(b), cast_decimal_decimal(c), cast_string_decimal(d), cast_time_decimal(e)', 4,'encode_type:chunk') ┌─count(1)─┬─a─┬─cast_int_decimal(a)─┬─cast_real_decimal(b)─┬─cast_decimal_decimal(c)─┬─cast_string_decimal(d)─┬─cast_time_decimal(e)─┐ │ 1 │ 1 │ 1 │ 1 │ 1 │ 123 │ 20200901501000 │ diff --git a/tests/mutable-test/expr/cast_as_int.test b/tests/mutable-test/expr/cast_as_int.test index 150e04bd728..02a55230878 100644 --- a/tests/mutable-test/expr/cast_as_int.test +++ b/tests/mutable-test/expr/cast_as_int.test @@ -12,7 +12,6 @@ => DBGInvoke __put_region(4, 0, 100, default, test) => DBGInvoke __raft_insert_row(default, test, 4, 50, 1, 1.234, 1.234, '123', '2020-09-15 01:00:00') -# test date_format => DBGInvoke dag('select count(1) from default.test group by a, cast_int_int(a), cast_real_int(b), cast_decimal_int(c), cast_string_int(d), cast_time_int(e)', 4,'encode_type:chunk') ┌─count(1)─┬─a─┬─cast_int_int(a)─┬─cast_real_int(b)─┬─cast_decimal_int(c)─┬─cast_string_int(d)─┬─cast_time_int(e)─┐ │ 1 │ 1 │ 1 │ 1 │ 1 │ 123 │ 20200901501000 │ diff --git a/tests/mutable-test/expr/cast_as_string.test b/tests/mutable-test/expr/cast_as_string.test new file mode 100644 index 00000000000..bc192360afa --- /dev/null +++ b/tests/mutable-test/expr/cast_as_string.test @@ -0,0 +1,22 @@ +# Preparation. +=> DBGInvoke __enable_schema_sync_service('true') + +=> DBGInvoke __drop_tidb_table(default, test) +=> drop table if exists default.test + +=> DBGInvoke __set_flush_threshold(1000000, 1000000) + +# Data. +=> DBGInvoke __mock_tidb_table(default, test, 'a int, b float, c decimal(4, 3), d char, e MyDatetime') +=> DBGInvoke __refresh_schemas() +=> DBGInvoke __put_region(4, 0, 100, default, test) +=> DBGInvoke __raft_insert_row(default, test, 4, 50, 1, 1.234, 1.234, '123', '2020-09-15 01:00:00') + +=> DBGInvoke dag('select count(1) from default.test group by a, cast_int_string(a), cast_real_string(b), cast_decimal_string(c), cast_string_string(d)', 4,'encode_type:chunk') +┌─count(1)─┬─a─┬─cast_int_string(a)─┬─cast_real_string(b)─┬─cast_decimal_string(c)─┬─cast_string_string(d)─┐ +│ 1 │ 1 │ 1 │ 1 │ 1 │ 123 │ +└──────────┴───┴────────────────────┴─────────────────────┴────────────────────────┴───────────────────────┘ + +# Clean up. +=> DBGInvoke __drop_tidb_table(default, test) +=> drop table if exists default.test diff --git a/tests/mutable-test/expr/cast_as_time.test b/tests/mutable-test/expr/cast_as_time.test new file mode 100644 index 00000000000..fe955a8bee3 --- /dev/null +++ b/tests/mutable-test/expr/cast_as_time.test @@ -0,0 +1,25 @@ +# Preparation. +=> DBGInvoke __enable_schema_sync_service('true') + +=> DBGInvoke __drop_tidb_table(default, test) +=> drop table if exists default.test + +=> DBGInvoke __set_flush_threshold(1000000, 1000000) + +# Data. +=> DBGInvoke __mock_tidb_table(default, test, 'a int, b double, c decimal(20, 6), d char, e MyDatetime') +=> DBGInvoke __refresh_schemas() +=> DBGInvoke __put_region(4, 0, 100, default, test) +=> DBGInvoke __raft_insert_row(default, test, 4, 50, 20201203, 20201203010000, 20201203010000, '2020-12-03 01:00:00', '2020-09-15 01:00:00') + +=> DBGInvoke dag('select count(1) from default.test group by a, cast_int_datetime(a), cast_real_datetime(b), cast_decimal_datetime(c), cast_string_datetime(d), cast_time_datetime(e)', 4,'encode_type:chunk') +┌─count(1)─┬────────a─┬─cast_int_datetime(a)─┬─cast_real_datetime(b)─┬─cast_decimal_datetime(c)─┬─cast_string_datetime(d)─┬─cast_time_datetime(e)─┐ +│ 1 │ 20201203 │ 2020-12-03 00:00:00 │ 2020-12-03 01:00:00 │ 2020-12-03 01:00:00 │ 2020-12-03 01:00:00 │ 2020-09-15 01:00:00 │ +└──────────┴──────────┴──────────────────────┴───────────────────────┴──────────────────────────┴─────────────────────────┴───────────────────────┘ + +# TODO: +# => DBGInvoke dag('select count(1) from default.test group by a, cast_int_date(a), cast_real_date(b), cast_decimal_date(c), cast_string_date(d), cast_time_date(e)', 4,'encode_type:chunk') + +# Clean up. +=> DBGInvoke __drop_tidb_table(default, test) +=> drop table if exists default.test